Created
December 2, 2023 03:49
-
-
Save thistleknot/57c0a9eb5aeda0764cb4d1712e184166 to your computer and use it in GitHub Desktop.
Parse Grep
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Step 1: Read and parse the input file | |
parsed_data = [] | |
with open('output.txt', 'r') as file: # Replace 'output.txt' with your file path | |
for line in file: | |
if line.strip(): # Skip empty lines | |
first_colon_index = line.find(':') | |
if first_colon_index != -1: | |
filepath = line[:first_colon_index].strip() | |
remainder = line[first_colon_index + 1:] | |
second_colon_index = remainder.find(':') | |
if second_colon_index != -1 and remainder[:second_colon_index].isdigit(): | |
content = remainder[second_colon_index + 1:] # Exclude line number | |
else: | |
content = remainder # No line number present | |
parsed_data.append([filepath, content]) | |
else: | |
print(f"Skipping line: {line}") | |
# Step 2: Use Pandas for data structuring | |
df = pd.DataFrame(parsed_data, columns=['Filepath', 'Content']) | |
df[['Path', 'Filename']] = df['Filepath'].str.rsplit('/', n=1, expand=True) | |
# Step 3: Reformat the output without line numbers | |
output_data = [] | |
current_file = None | |
for _, row in df.iterrows(): | |
if row['Filename'] != current_file: | |
if current_file is not None: | |
output_data.append('\n') | |
current_file = row['Filename'] | |
output_data.append(f'{current_file}\n') | |
content = row['Content'] | |
if content.strip(): # Add content only if it's not empty | |
output_data.append(f'{content}') | |
# Joining the lines to form the final output string | |
final_output = ''.join(output_data).strip() | |
# Write to a new file | |
with open('reformatted_output.txt', 'w') as file: # Replace with your desired output file path | |
file.write(final_output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment