python reading the next n lines based on previous line using regex












1















CREATE TABLE `cluster_diagnostic_report`(
`run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',
`execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',
`module` string COMMENT 'Test Case Module',
`expected_result` string COMMENT 'Test Case Module expected Result',
`actual_result` string COMMENT 'Test Case Module actual Result',
`validation_result` string COMMENT 'Test Case Module validation Result',
`start_time` string COMMENT 'Test Case Module Start Time',
`end_time` string COMMENT 'Test Case Module Elapsed Time',
`elapsed_time` string COMMENT 'from deserializer',
`total_time_seconds` int COMMENT 'total elapsed time for this step')
PARTITIONED BY (
`cluster_name` string,
`rptg_dt` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'


from the above contect i nees to get only the partitioned column name and type.
for the above example i want to get details as follows :



col_name = cluster_name, type = string
rptg_dt= cluster_name, type = string


what i have tried is given below buyt it is returning None:



partitionResult = re.match(r"PARTITIONEDsw+s((n){2}",line)
if partitionResult == None:
pass
else:
print(partitionResult.group(1),sep='t')


can anyone please suggest what to do?










share|improve this question



























    1















    CREATE TABLE `cluster_diagnostic_report`(
    `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',
    `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',
    `module` string COMMENT 'Test Case Module',
    `expected_result` string COMMENT 'Test Case Module expected Result',
    `actual_result` string COMMENT 'Test Case Module actual Result',
    `validation_result` string COMMENT 'Test Case Module validation Result',
    `start_time` string COMMENT 'Test Case Module Start Time',
    `end_time` string COMMENT 'Test Case Module Elapsed Time',
    `elapsed_time` string COMMENT 'from deserializer',
    `total_time_seconds` int COMMENT 'total elapsed time for this step')
    PARTITIONED BY (
    `cluster_name` string,
    `rptg_dt` string)
    ROW FORMAT SERDE
    'org.apache.hadoop.hive.ql.io.orc.OrcSerde'


    from the above contect i nees to get only the partitioned column name and type.
    for the above example i want to get details as follows :



    col_name = cluster_name, type = string
    rptg_dt= cluster_name, type = string


    what i have tried is given below buyt it is returning None:



    partitionResult = re.match(r"PARTITIONEDsw+s((n){2}",line)
    if partitionResult == None:
    pass
    else:
    print(partitionResult.group(1),sep='t')


    can anyone please suggest what to do?










    share|improve this question

























      1












      1








      1








      CREATE TABLE `cluster_diagnostic_report`(
      `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',
      `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',
      `module` string COMMENT 'Test Case Module',
      `expected_result` string COMMENT 'Test Case Module expected Result',
      `actual_result` string COMMENT 'Test Case Module actual Result',
      `validation_result` string COMMENT 'Test Case Module validation Result',
      `start_time` string COMMENT 'Test Case Module Start Time',
      `end_time` string COMMENT 'Test Case Module Elapsed Time',
      `elapsed_time` string COMMENT 'from deserializer',
      `total_time_seconds` int COMMENT 'total elapsed time for this step')
      PARTITIONED BY (
      `cluster_name` string,
      `rptg_dt` string)
      ROW FORMAT SERDE
      'org.apache.hadoop.hive.ql.io.orc.OrcSerde'


      from the above contect i nees to get only the partitioned column name and type.
      for the above example i want to get details as follows :



      col_name = cluster_name, type = string
      rptg_dt= cluster_name, type = string


      what i have tried is given below buyt it is returning None:



      partitionResult = re.match(r"PARTITIONEDsw+s((n){2}",line)
      if partitionResult == None:
      pass
      else:
      print(partitionResult.group(1),sep='t')


      can anyone please suggest what to do?










      share|improve this question














      CREATE TABLE `cluster_diagnostic_report`(
      `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',
      `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',
      `module` string COMMENT 'Test Case Module',
      `expected_result` string COMMENT 'Test Case Module expected Result',
      `actual_result` string COMMENT 'Test Case Module actual Result',
      `validation_result` string COMMENT 'Test Case Module validation Result',
      `start_time` string COMMENT 'Test Case Module Start Time',
      `end_time` string COMMENT 'Test Case Module Elapsed Time',
      `elapsed_time` string COMMENT 'from deserializer',
      `total_time_seconds` int COMMENT 'total elapsed time for this step')
      PARTITIONED BY (
      `cluster_name` string,
      `rptg_dt` string)
      ROW FORMAT SERDE
      'org.apache.hadoop.hive.ql.io.orc.OrcSerde'


      from the above contect i nees to get only the partitioned column name and type.
      for the above example i want to get details as follows :



      col_name = cluster_name, type = string
      rptg_dt= cluster_name, type = string


      what i have tried is given below buyt it is returning None:



      partitionResult = re.match(r"PARTITIONEDsw+s((n){2}",line)
      if partitionResult == None:
      pass
      else:
      print(partitionResult.group(1),sep='t')


      can anyone please suggest what to do?







      python regex






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 20 '18 at 17:33









      c0derc0der

      8121713




      8121713
























          2 Answers
          2






          active

          oldest

          votes


















          1














          Here is a solution that uses G (continue from the start or previous match) to match an arbitrary number of cluster cols/types:



          Online Test (needs to run in PCRE)



          Sample Code (requires alternative regex package for Python)



          import regex as re

          regex = r"(?|PARTITIONEDs+BYs+(s+`(w+)`s+(w+),?|Gs*`(w+)`s+(w+),?)K"

          test_str = ("CREATE TABLE `cluster_diagnostic_report`(n"
          " `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',n"
          " `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',n"
          " `module` string COMMENT 'Test Case Module',n"
          " `expected_result` string COMMENT 'Test Case Module expected Result',n"
          " `actual_result` string COMMENT 'Test Case Module actual Result',n"
          " `validation_result` string COMMENT 'Test Case Module validation Result',n"
          " `start_time` string COMMENT 'Test Case Module Start Time',n"
          " `end_time` string COMMENT 'Test Case Module Elapsed Time',n"
          " `elapsed_time` string COMMENT 'from deserializer',n"
          " `total_time_seconds` int COMMENT 'total elapsed time for this step')n"
          "PARTITIONED BY (n"
          " `cluster_name` string,n"
          " `cluster_name2` string,`rptg_dt` string,n"
          "`cluster_name2` string,)n"
          "ROW FORMAT SERDEn"
          " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'")

          matches = re.finditer(regex, test_str, re.MULTILINE)

          for matchNum, match in enumerate(matches):
          for groupNum in range(0, len(match.groups())):
          groupNum = groupNum + 1

          print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))


          Output:



          Group 1 found at 789-801: cluster_name
          Group 2 found at 803-809: string
          Group 1 found at 813-826: cluster_name2
          Group 2 found at 828-834: string
          Group 1 found at 836-843: rptg_dt
          Group 2 found at 845-851: string
          Group 1 found at 854-867: cluster_name2
          Group 2 found at 869-875: string





          share|improve this answer
























          • This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

            – c0der
            Nov 21 '18 at 18:06













          • Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

            – wp78de
            Nov 21 '18 at 18:28













          • actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

            – c0der
            Nov 22 '18 at 16:26













          • @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

            – wp78de
            Nov 22 '18 at 22:36



















          0














          Make use of positive look ahead and look behind (Example):



          pat = re.compile(r'(?<=PARTITIONED BY (n)(.*)(?=))', re.S)
          # Look behind for "PARTITIONED BY (" and look ahead to ")"
          # use the re.S flag to match across lines with .*

          results = pat.search(text).group()

          # ' `cluster_name` string,n `rptg_dt` string'

          # ... do what you need with the strings.

          [i.lstrip() for i in results.split('n')]
          # ['`cluster_name` string,',
          # '`rptg_dt` string']





          share|improve this answer























            Your Answer






            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "1"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: true,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: 10,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53398469%2fpython-reading-the-next-n-lines-based-on-previous-line-using-regex%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            2 Answers
            2






            active

            oldest

            votes








            2 Answers
            2






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            1














            Here is a solution that uses G (continue from the start or previous match) to match an arbitrary number of cluster cols/types:



            Online Test (needs to run in PCRE)



            Sample Code (requires alternative regex package for Python)



            import regex as re

            regex = r"(?|PARTITIONEDs+BYs+(s+`(w+)`s+(w+),?|Gs*`(w+)`s+(w+),?)K"

            test_str = ("CREATE TABLE `cluster_diagnostic_report`(n"
            " `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',n"
            " `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',n"
            " `module` string COMMENT 'Test Case Module',n"
            " `expected_result` string COMMENT 'Test Case Module expected Result',n"
            " `actual_result` string COMMENT 'Test Case Module actual Result',n"
            " `validation_result` string COMMENT 'Test Case Module validation Result',n"
            " `start_time` string COMMENT 'Test Case Module Start Time',n"
            " `end_time` string COMMENT 'Test Case Module Elapsed Time',n"
            " `elapsed_time` string COMMENT 'from deserializer',n"
            " `total_time_seconds` int COMMENT 'total elapsed time for this step')n"
            "PARTITIONED BY (n"
            " `cluster_name` string,n"
            " `cluster_name2` string,`rptg_dt` string,n"
            "`cluster_name2` string,)n"
            "ROW FORMAT SERDEn"
            " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'")

            matches = re.finditer(regex, test_str, re.MULTILINE)

            for matchNum, match in enumerate(matches):
            for groupNum in range(0, len(match.groups())):
            groupNum = groupNum + 1

            print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))


            Output:



            Group 1 found at 789-801: cluster_name
            Group 2 found at 803-809: string
            Group 1 found at 813-826: cluster_name2
            Group 2 found at 828-834: string
            Group 1 found at 836-843: rptg_dt
            Group 2 found at 845-851: string
            Group 1 found at 854-867: cluster_name2
            Group 2 found at 869-875: string





            share|improve this answer
























            • This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

              – c0der
              Nov 21 '18 at 18:06













            • Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

              – wp78de
              Nov 21 '18 at 18:28













            • actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

              – c0der
              Nov 22 '18 at 16:26













            • @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

              – wp78de
              Nov 22 '18 at 22:36
















            1














            Here is a solution that uses G (continue from the start or previous match) to match an arbitrary number of cluster cols/types:



            Online Test (needs to run in PCRE)



            Sample Code (requires alternative regex package for Python)



            import regex as re

            regex = r"(?|PARTITIONEDs+BYs+(s+`(w+)`s+(w+),?|Gs*`(w+)`s+(w+),?)K"

            test_str = ("CREATE TABLE `cluster_diagnostic_report`(n"
            " `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',n"
            " `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',n"
            " `module` string COMMENT 'Test Case Module',n"
            " `expected_result` string COMMENT 'Test Case Module expected Result',n"
            " `actual_result` string COMMENT 'Test Case Module actual Result',n"
            " `validation_result` string COMMENT 'Test Case Module validation Result',n"
            " `start_time` string COMMENT 'Test Case Module Start Time',n"
            " `end_time` string COMMENT 'Test Case Module Elapsed Time',n"
            " `elapsed_time` string COMMENT 'from deserializer',n"
            " `total_time_seconds` int COMMENT 'total elapsed time for this step')n"
            "PARTITIONED BY (n"
            " `cluster_name` string,n"
            " `cluster_name2` string,`rptg_dt` string,n"
            "`cluster_name2` string,)n"
            "ROW FORMAT SERDEn"
            " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'")

            matches = re.finditer(regex, test_str, re.MULTILINE)

            for matchNum, match in enumerate(matches):
            for groupNum in range(0, len(match.groups())):
            groupNum = groupNum + 1

            print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))


            Output:



            Group 1 found at 789-801: cluster_name
            Group 2 found at 803-809: string
            Group 1 found at 813-826: cluster_name2
            Group 2 found at 828-834: string
            Group 1 found at 836-843: rptg_dt
            Group 2 found at 845-851: string
            Group 1 found at 854-867: cluster_name2
            Group 2 found at 869-875: string





            share|improve this answer
























            • This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

              – c0der
              Nov 21 '18 at 18:06













            • Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

              – wp78de
              Nov 21 '18 at 18:28













            • actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

              – c0der
              Nov 22 '18 at 16:26













            • @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

              – wp78de
              Nov 22 '18 at 22:36














            1












            1








            1







            Here is a solution that uses G (continue from the start or previous match) to match an arbitrary number of cluster cols/types:



            Online Test (needs to run in PCRE)



            Sample Code (requires alternative regex package for Python)



            import regex as re

            regex = r"(?|PARTITIONEDs+BYs+(s+`(w+)`s+(w+),?|Gs*`(w+)`s+(w+),?)K"

            test_str = ("CREATE TABLE `cluster_diagnostic_report`(n"
            " `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',n"
            " `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',n"
            " `module` string COMMENT 'Test Case Module',n"
            " `expected_result` string COMMENT 'Test Case Module expected Result',n"
            " `actual_result` string COMMENT 'Test Case Module actual Result',n"
            " `validation_result` string COMMENT 'Test Case Module validation Result',n"
            " `start_time` string COMMENT 'Test Case Module Start Time',n"
            " `end_time` string COMMENT 'Test Case Module Elapsed Time',n"
            " `elapsed_time` string COMMENT 'from deserializer',n"
            " `total_time_seconds` int COMMENT 'total elapsed time for this step')n"
            "PARTITIONED BY (n"
            " `cluster_name` string,n"
            " `cluster_name2` string,`rptg_dt` string,n"
            "`cluster_name2` string,)n"
            "ROW FORMAT SERDEn"
            " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'")

            matches = re.finditer(regex, test_str, re.MULTILINE)

            for matchNum, match in enumerate(matches):
            for groupNum in range(0, len(match.groups())):
            groupNum = groupNum + 1

            print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))


            Output:



            Group 1 found at 789-801: cluster_name
            Group 2 found at 803-809: string
            Group 1 found at 813-826: cluster_name2
            Group 2 found at 828-834: string
            Group 1 found at 836-843: rptg_dt
            Group 2 found at 845-851: string
            Group 1 found at 854-867: cluster_name2
            Group 2 found at 869-875: string





            share|improve this answer













            Here is a solution that uses G (continue from the start or previous match) to match an arbitrary number of cluster cols/types:



            Online Test (needs to run in PCRE)



            Sample Code (requires alternative regex package for Python)



            import regex as re

            regex = r"(?|PARTITIONEDs+BYs+(s+`(w+)`s+(w+),?|Gs*`(w+)`s+(w+),?)K"

            test_str = ("CREATE TABLE `cluster_diagnostic_report`(n"
            " `run_id` string COMMENT 'format: <hostname>_<datetime> - to uniquely identify the a particular execution instance of Cluster Diag job',n"
            " `execution_hostname` string COMMENT 'Machine Name from where Test Case Executed',n"
            " `module` string COMMENT 'Test Case Module',n"
            " `expected_result` string COMMENT 'Test Case Module expected Result',n"
            " `actual_result` string COMMENT 'Test Case Module actual Result',n"
            " `validation_result` string COMMENT 'Test Case Module validation Result',n"
            " `start_time` string COMMENT 'Test Case Module Start Time',n"
            " `end_time` string COMMENT 'Test Case Module Elapsed Time',n"
            " `elapsed_time` string COMMENT 'from deserializer',n"
            " `total_time_seconds` int COMMENT 'total elapsed time for this step')n"
            "PARTITIONED BY (n"
            " `cluster_name` string,n"
            " `cluster_name2` string,`rptg_dt` string,n"
            "`cluster_name2` string,)n"
            "ROW FORMAT SERDEn"
            " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'")

            matches = re.finditer(regex, test_str, re.MULTILINE)

            for matchNum, match in enumerate(matches):
            for groupNum in range(0, len(match.groups())):
            groupNum = groupNum + 1

            print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))


            Output:



            Group 1 found at 789-801: cluster_name
            Group 2 found at 803-809: string
            Group 1 found at 813-826: cluster_name2
            Group 2 found at 828-834: string
            Group 1 found at 836-843: rptg_dt
            Group 2 found at 845-851: string
            Group 1 found at 854-867: cluster_name2
            Group 2 found at 869-875: string






            share|improve this answer












            share|improve this answer



            share|improve this answer










            answered Nov 20 '18 at 18:29









            wp78dewp78de

            10.1k61939




            10.1k61939













            • This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

              – c0der
              Nov 21 '18 at 18:06













            • Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

              – wp78de
              Nov 21 '18 at 18:28













            • actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

              – c0der
              Nov 22 '18 at 16:26













            • @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

              – wp78de
              Nov 22 '18 at 22:36



















            • This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

              – c0der
              Nov 21 '18 at 18:06













            • Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

              – wp78de
              Nov 21 '18 at 18:28













            • actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

              – c0der
              Nov 22 '18 at 16:26













            • @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

              – wp78de
              Nov 22 '18 at 22:36

















            This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

            – c0der
            Nov 21 '18 at 18:06







            This is giving an error for python "(? Incomplete group structure | There are too many alternatives within the parent structure ) Incomplete group structure".i needed that for python.what changes shoulf make?

            – c0der
            Nov 21 '18 at 18:06















            Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

            – wp78de
            Nov 21 '18 at 18:28







            Works as posted, test it here: repl.it/repls/ColorfulQuizzicalLeads Have you installed the regex module?

            – wp78de
            Nov 21 '18 at 18:28















            actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

            – c0der
            Nov 22 '18 at 16:26







            actuall there are more that one this type CREATE statements in a file.and iam reading through the file line by line.Is it possible to get the the 'PARTITIONED BY' key and type within that forloop itself? this is the code and file: repl.it/@coder007/DeterminedFatPython .i want to add the PARTITIONED BY key and type to the 'toOutputFile' in the script

            – c0der
            Nov 22 '18 at 16:26















            @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

            – wp78de
            Nov 22 '18 at 22:36





            @c0der Many roads lead to Rome. I have suggested a solution to your original post that allows you to get multiple PARTITIONED BY blocks in a file (just read the entire file as string) and I'd rather like to stay on topic. Please ask a new question if required.

            – wp78de
            Nov 22 '18 at 22:36













            0














            Make use of positive look ahead and look behind (Example):



            pat = re.compile(r'(?<=PARTITIONED BY (n)(.*)(?=))', re.S)
            # Look behind for "PARTITIONED BY (" and look ahead to ")"
            # use the re.S flag to match across lines with .*

            results = pat.search(text).group()

            # ' `cluster_name` string,n `rptg_dt` string'

            # ... do what you need with the strings.

            [i.lstrip() for i in results.split('n')]
            # ['`cluster_name` string,',
            # '`rptg_dt` string']





            share|improve this answer




























              0














              Make use of positive look ahead and look behind (Example):



              pat = re.compile(r'(?<=PARTITIONED BY (n)(.*)(?=))', re.S)
              # Look behind for "PARTITIONED BY (" and look ahead to ")"
              # use the re.S flag to match across lines with .*

              results = pat.search(text).group()

              # ' `cluster_name` string,n `rptg_dt` string'

              # ... do what you need with the strings.

              [i.lstrip() for i in results.split('n')]
              # ['`cluster_name` string,',
              # '`rptg_dt` string']





              share|improve this answer


























                0












                0








                0







                Make use of positive look ahead and look behind (Example):



                pat = re.compile(r'(?<=PARTITIONED BY (n)(.*)(?=))', re.S)
                # Look behind for "PARTITIONED BY (" and look ahead to ")"
                # use the re.S flag to match across lines with .*

                results = pat.search(text).group()

                # ' `cluster_name` string,n `rptg_dt` string'

                # ... do what you need with the strings.

                [i.lstrip() for i in results.split('n')]
                # ['`cluster_name` string,',
                # '`rptg_dt` string']





                share|improve this answer













                Make use of positive look ahead and look behind (Example):



                pat = re.compile(r'(?<=PARTITIONED BY (n)(.*)(?=))', re.S)
                # Look behind for "PARTITIONED BY (" and look ahead to ")"
                # use the re.S flag to match across lines with .*

                results = pat.search(text).group()

                # ' `cluster_name` string,n `rptg_dt` string'

                # ... do what you need with the strings.

                [i.lstrip() for i in results.split('n')]
                # ['`cluster_name` string,',
                # '`rptg_dt` string']






                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered Nov 20 '18 at 18:05









                IdlehandsIdlehands

                4,4551518




                4,4551518






























                    draft saved

                    draft discarded




















































                    Thanks for contributing an answer to Stack Overflow!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53398469%2fpython-reading-the-next-n-lines-based-on-previous-line-using-regex%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    'app-layout' is not a known element: how to share Component with different Modules

                    android studio warns about leanback feature tag usage required on manifest while using Unity exported app?

                    WPF add header to Image with URL pettitions [duplicate]