Skip to content

Instantly share code, notes, and snippets.

@kuriwaki
Last active February 11, 2021 14:41
Show Gist options
  • Save kuriwaki/1ec939d453b6c18a17cb5f5ba4610d98 to your computer and use it in GitHub Desktop.
Save kuriwaki/1ec939d453b6c18a17cb5f5ba4610d98 to your computer and use it in GitHub Desktop.
pyDataverse import code updated with v0.3.0
# cell #1
import io
import pandas as pd
from pyDataverse.api import NativeApi
from pyDataverse.api import DataAccessApi
doi = "doi:10.7910/DVN/HIDLTK"
base_url = "https://dataverse.harvard.edu"
api = NativeApi(base_url)
resp = api.get_dataset(doi)
datafiles = resp.json()["data"]["latestVersion"]["files"]
# cell #2
for df in datafiles:
filename = df["dataFile"]["filename"]
datafile_id = df["dataFile"]["id"]
print(f'Filename is "{filename}", datafile ID is "{datafile_id}"')
# cell #3
datafile_id = "4360740" # originally was "4274786" but this new numbers appears to be us_county_confirmed_cases.tab
da_api = DataAccessApi(base_url)
resp = da_api.get_datafile(datafile_id)
# cell #4
data = io.StringIO(str(resp.content, 'utf-8'))
us_states_cases = pd.read_csv(data, sep = "\t")
print(us_states_cases.head(10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment