harrytormey/iterate_data.py

## iterate_data.py
import pandas as pd

df = pd.read_parquet("./test-00000-of-00001-dc7762b94638c186.parquet")
# Filter the DataFrame for entries from the Django repo
django_entries = df[df["repo"].str.contains("Django", case=False)]


if not django_entries.empty:

    for index, row in django_entries.iterrows():
        print(row["problem_statement"], "\n")
        created_at_datetime = pd.to_datetime(row["created_at"])
        created_at_readable = created_at_datetime.strftime("%Y-%m-%d %H:%M:%S")
        print(f"Created At: {created_at_readable}")
        # Assess the size of the patch
        patch_lines = (
            row["patch"].count("\n") + 1
        )  # +1 to count the last line if it doesn't end with a newline
        print(f"Patch Size: {patch_lines} lines\n")

else:
    print("No entries from the Django repository were found.")
	import pandas as pd

	df = pd.read_parquet("./test-00000-of-00001-dc7762b94638c186.parquet")
	# Filter the DataFrame for entries from the Django repo
	django_entries = df[df["repo"].str.contains("Django", case=False)]


	if not django_entries.empty:

	for index, row in django_entries.iterrows():
	print(row["problem_statement"], "\n")
	created_at_datetime = pd.to_datetime(row["created_at"])
	created_at_readable = created_at_datetime.strftime("%Y-%m-%d %H:%M:%S")
	print(f"Created At: {created_at_readable}")
	# Assess the size of the patch
	patch_lines = (
	row["patch"].count("\n") + 1
	) # +1 to count the last line if it doesn't end with a newline
	print(f"Patch Size: {patch_lines} lines\n")

	else:
	print("No entries from the Django repository were found.")