Skip to content

Instantly share code, notes, and snippets.

@voigtjessica
Created April 23, 2021 15:13
Show Gist options
  • Save voigtjessica/1a39e00cf801d3c8087cd4fed92b6bfe to your computer and use it in GitHub Desktop.
Save voigtjessica/1a39e00cf801d3c8087cd4fed92b6bfe to your computer and use it in GitHub Desktop.
Getting data from E-vergabe NRW API
Display the source blob
Display the rendered blob
Raw
import requests
import pandas as pd
import numpy as np
import json
import janitor
from tabulate import tabulate
#show all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
np.set_printoptions(threshold=np.inf)
url = 'https://daten.vergabe.nrw.de/rest/vmp_rheinland_single/aggregation_search/'
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
#According to documentation, there was a limit of 10.000
params = {'size': 10000}
req = requests.post(url, headers=headers, params=params)
# Retrieving data
json_data = json.loads(req.text)
#json_data
df = pd.json_normalize(json_data['results'])
nomes = df.columns
nomes = nomes.str.replace('result\.', '')
df.columns = nomes
df = df.iloc[:,1:19]
df = df.clean_names()
print('GENERAL INFORMATION: \n\n rows and columns number:', df.shape,
'\n older creation data:', df.created_at.min(),
'\n newer creation data:', df.created_at.max())
# GENERAL INFORMATION:
# rows and columns number: (5817, 18)
# older creation data: 2019-11-19T07:19:15.448Z
# newer creation data: 2021-04-03T22:19:51.775Z
# Looking at null values:
nv = pd.DataFrame(df.isnull().sum())
nv.columns = [ 'total_null_values']
nv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment