Skip to content

Instantly share code, notes, and snippets.

@jspeed-meyers
Created May 28, 2023 23:03
Show Gist options
  • Save jspeed-meyers/8db7ee035473d63b64a364e61101c87c to your computer and use it in GitHub Desktop.
Save jspeed-meyers/8db7ee035473d63b64a364e61101c87c to your computer and use it in GitHub Desktop.
Collect funder finder data from multiple repos into a single CSV. Each line is one type of a contribution to one particular repo
# collect data from multiple repos into a CSV. Each line is one type of a contribution to
# one particular repo
import json
import os
def run_python_command(command):
# Execute the command and capture the output
output = os.popen(command).read().strip()
# Replace single quotes with double quotes in the JSON string
output = output.replace("'", '"')
# in JSON, true must be lowercase
output = output.replace("True", "true")
# Parse the output as JSON
result = json.loads(output)
return result
def append_to_jsonl_file(data, file_path):
with open(file_path, 'a') as file:
# Convert the data to JSON string
json_data = json.dumps(data)
# Append the data as a new line in the file
file.write(json_data + '\n')
if __name__ == "__main__":
output_file = 'collect_repos_into_csv_output.jsonl'
# delete old jsonl file
if os.path.exists(output_file):
with open(output_file, 'w') as file:
file.truncate(0)
# read in all repos to be analyzed
repos = []
with open("repos.txt", 'r') as file:
repos = file.readlines()
# analyze each repo
cnt = 1
for repo in repos:
print(cnt)
# Run the command and get the result
command = f"python3 ../funderfinder/get_funders.py {repo}"
results = run_python_command(command)
cnt += 1
for result in results:
# Keep track of what contributions are associated with what repo
result["repo_name"] = repo
# Append result to the JSONL file
append_to_jsonl_file(result, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment