thistleknot/parse_grep.py

## parse_grep.py
import pandas as pd

# Step 1: Read and parse the input file
parsed_data = []
with open('output.txt', 'r') as file:  # Replace 'output.txt' with your file path
    for line in file:
        if line.strip():  # Skip empty lines
            first_colon_index = line.find(':')
            if first_colon_index != -1:
                filepath = line[:first_colon_index].strip()
                remainder = line[first_colon_index + 1:]
                second_colon_index = remainder.find(':')
                if second_colon_index != -1 and remainder[:second_colon_index].isdigit():
                    content = remainder[second_colon_index + 1:]  # Exclude line number
                else:
                    content = remainder  # No line number present
                parsed_data.append([filepath, content])
            else:
                print(f"Skipping line: {line}")

# Step 2: Use Pandas for data structuring
df = pd.DataFrame(parsed_data, columns=['Filepath', 'Content'])
df[['Path', 'Filename']] = df['Filepath'].str.rsplit('/', n=1, expand=True)

# Step 3: Reformat the output without line numbers
output_data = []
current_file = None
for _, row in df.iterrows():
    if row['Filename'] != current_file:
        if current_file is not None:
            output_data.append('\n')
        current_file = row['Filename']
        output_data.append(f'{current_file}\n')
    content = row['Content']
    if content.strip():  # Add content only if it's not empty
        output_data.append(f'{content}')

# Joining the lines to form the final output string
final_output = ''.join(output_data).strip()

# Write to a new file
with open('reformatted_output.txt', 'w') as file:  # Replace with your desired output file path
    file.write(final_output)
	import pandas as pd

	# Step 1: Read and parse the input file
	parsed_data = []
	with open('output.txt', 'r') as file: # Replace 'output.txt' with your file path
	for line in file:
	if line.strip(): # Skip empty lines
	first_colon_index = line.find(':')
	if first_colon_index != -1:
	filepath = line[:first_colon_index].strip()
	remainder = line[first_colon_index + 1:]
	second_colon_index = remainder.find(':')
	if second_colon_index != -1 and remainder[:second_colon_index].isdigit():
	content = remainder[second_colon_index + 1:] # Exclude line number
	else:
	content = remainder # No line number present
	parsed_data.append([filepath, content])
	else:
	print(f"Skipping line: {line}")

	# Step 2: Use Pandas for data structuring
	df = pd.DataFrame(parsed_data, columns=['Filepath', 'Content'])
	df[['Path', 'Filename']] = df['Filepath'].str.rsplit('/', n=1, expand=True)

	# Step 3: Reformat the output without line numbers
	output_data = []
	current_file = None
	for _, row in df.iterrows():
	if row['Filename'] != current_file:
	if current_file is not None:
	output_data.append('\n')
	current_file = row['Filename']
	output_data.append(f'{current_file}\n')
	content = row['Content']
	if content.strip(): # Add content only if it's not empty
	output_data.append(f'{content}')

	# Joining the lines to form the final output string
	final_output = ''.join(output_data).strip()

	# Write to a new file
	with open('reformatted_output.txt', 'w') as file: # Replace with your desired output file path
	file.write(final_output)