Skip to content

Instantly share code, notes, and snippets.

@lognaturel
Last active April 5, 2022 22:15
Show Gist options
  • Save lognaturel/b9420c4211682bdeb0c27a0eae25187d to your computer and use it in GitHub Desktop.
Save lognaturel/b9420c4211682bdeb0c27a0eae25187d to your computer and use it in GitHub Desktop.
A basic example of getting ODK data as a pandas dataframe
"""
Copyright 2022 ODK
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# !/usr/bin/env python3
from typing import Optional
import requests
import json
import pandas as pd
from pandas import DataFrame
def get_data(url: str, username: str, password: str, project: int, formid: str, table: Optional[str] = "Submissions",
cache_file: Optional[str] = None) -> Optional[DataFrame]:
"""Get non-rejected data for a specific form as a pandas dataframe
Parameters:
url: the base URL of the Central server to connect to
username: the username of the Web User to auth with
password: the Web User's password
project: the numeric id of the project to get data from
formid: the formid to get data from
table (optional): if there are repeats, specify the repeat name to get the table for that repeat
cache_file (optional): a file for caching the session token. This is recommended to minimize the login events logged
on the server.
Returns:
Optional[DataFrame]: the dataframe or None if anything has gone wrong
"""
token = get_token(url, username, password, cache_file)
response = requests.get(
f"{url}/v1/projects/{project}/forms/{formid}.svc/{table}?$filter=__system/reviewState ne 'rejected'",
headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
)
try:
if len(response.json()['value']) == 0:
return None
except KeyError:
print(response.json()) # Something went wrong with the query
return None
return pd.json_normalize(response.json()['value'], sep='/')
def get_token(url: str, username: str, password: str, cache_file: Optional[str] = None):
"""Get a verified session token with the provided credential. First tries from cache if a cache file is provided,
then falls back to requesting a new session"
Parameters:
url: the base URL of the Central server to connect to
username: the username of the Web User to auth with
password: the Web User's password
cache_file (optional): a file for caching the session token. This is recommended to minimize the login events logged
on the server.
Returns:
Optional[str]: the session token or None if anything has gone wrong
"""
token = get_verified_cached_token(url, cache_file) or get_new_token(url, username, password)
if not token:
raise SystemExit("Unable to get session token")
if cache_file is not None:
write_to_cache(cache_file, "token", token)
return token
def get_verified_cached_token(url: str, cache_file: Optional[str] = None) -> Optional[str]:
"""Try to read a Central session token from the "token" property of a JSON cache file with the given filename"""
if cache_file is None:
return None
try:
with open(cache_file) as cache_file:
cache = json.load(cache_file)
token = cache["token"]
user_details_response = requests.get(
f"{url}/v1/users/current",
headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"}
)
if user_details_response.ok:
return token
except (FileNotFoundError, KeyError):
return None
def get_new_token(url: str, username: str, password: str) -> Optional[str]:
"""Get a new token from Central by creating a new session (https://odkcentral.docs.apiary.io/#reference/authentication/session-authentication/logging-in)
Parameters:
url: the base URL of the Central server to connect to
username: the username of the Web User to auth with
password: the Web User's password
Returns:
Optional[str]: the session token or None if anything has gone wrong
"""
email_token_response = requests.post(
f"{url}/v1/sessions",
data=json.dumps({"email": username, "password": password}),
headers={"Content-Type": "application/json"},
)
if email_token_response.status_code == 200:
return email_token_response.json()["token"]
def write_to_cache(cache_file: str, key: str, value: str):
"""Add the given key/value pair to the provided cache file, preserving any other properties it may have"""
try:
with open(cache_file) as file:
cache = json.load(file)
cache[key] = value
except FileNotFoundError:
cache = {key: value}
with open(cache_file, 'w') as outfile:
json.dump(cache, outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment