Skip to content

Instantly share code, notes, and snippets.

@thistleknot
Created December 2, 2023 03:49
Show Gist options
  • Save thistleknot/57c0a9eb5aeda0764cb4d1712e184166 to your computer and use it in GitHub Desktop.
Save thistleknot/57c0a9eb5aeda0764cb4d1712e184166 to your computer and use it in GitHub Desktop.
Parse Grep
import pandas as pd
# Step 1: Read and parse the input file
parsed_data = []
with open('output.txt', 'r') as file: # Replace 'output.txt' with your file path
for line in file:
if line.strip(): # Skip empty lines
first_colon_index = line.find(':')
if first_colon_index != -1:
filepath = line[:first_colon_index].strip()
remainder = line[first_colon_index + 1:]
second_colon_index = remainder.find(':')
if second_colon_index != -1 and remainder[:second_colon_index].isdigit():
content = remainder[second_colon_index + 1:] # Exclude line number
else:
content = remainder # No line number present
parsed_data.append([filepath, content])
else:
print(f"Skipping line: {line}")
# Step 2: Use Pandas for data structuring
df = pd.DataFrame(parsed_data, columns=['Filepath', 'Content'])
df[['Path', 'Filename']] = df['Filepath'].str.rsplit('/', n=1, expand=True)
# Step 3: Reformat the output without line numbers
output_data = []
current_file = None
for _, row in df.iterrows():
if row['Filename'] != current_file:
if current_file is not None:
output_data.append('\n')
current_file = row['Filename']
output_data.append(f'{current_file}\n')
content = row['Content']
if content.strip(): # Add content only if it's not empty
output_data.append(f'{content}')
# Joining the lines to form the final output string
final_output = ''.join(output_data).strip()
# Write to a new file
with open('reformatted_output.txt', 'w') as file: # Replace with your desired output file path
file.write(final_output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment