Skip to content

Instantly share code, notes, and snippets.

@yashprakash13
Created November 24, 2021 04:02
Show Gist options
  • Save yashprakash13/05a306ee549b72050a94f5c613c9c25e to your computer and use it in GitHub Desktop.
Save yashprakash13/05a306ee549b72050a94f5c613c9c25e to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
from prefect import task, Flow
import json
@task
def extract(url_from):
response = requests.get(url_from)
if response:
return json.loads(response.content)["results"]
else:
print("No response available.")
@task
def transform(data_dict):
people_list = []
for person in data_dict:
single_item = {
'gender': person["gender"],
"name": person["name"]["title"] + person["name"]["first"] + person["name"]["last"],
"nat": "AU",
}
people_list.append(single_item)
# return dataframe from list of dicts
return pd.DataFrame(people_list)
@task
def load(data_df, filename):
data_df.to_csv(f"{filename}.csv", index=False)
def start_data_collection(num_people_to_fetch):
with Flow("Random User API ETL:") as flow:
# get 7 people profile upon each request
people = extract(f'https://randomuser.me/api/?inc=gender,name,nat&results={num_people_to_fetch}')
# make a dataframe out of the response
user_df = transform(people)
# save the dataframe formed to disk
load(user_df, f'{num_people_to_fetch}_people')
return flow
if __name__ == "__main__":
flow = start_data_collection(3)
flow.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment