Skip to content

Instantly share code, notes, and snippets.

@ShenZhouHong
Created March 6, 2023 12:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ShenZhouHong/2498a04536d24a69a0e14f0e15416180 to your computer and use it in GitHub Desktop.
Save ShenZhouHong/2498a04536d24a69a0e14f0e15416180 to your computer and use it in GitHub Desktop.
Example file download routine for radiographs in the METRC PAIN study.
import os # To handle directory paths in a platform independent way
import requests.exceptions # To handle exceptions on download
import xml.etree.ElementTree as ET # To get additional information from request error
column_to_view: dict[str, str] = {
"cfu_xrayupload" : "antpr", # Anterio-posterior
"cfu_xrayuploadlat": "later", # Lateral-medial
"cfu_xrayuploadobl": "obliq", # Oblique
"cfu_xrayuploadaxi": "axial" # Axial
}
# For every study_id and file upload field
index: int
label: pd.Series
# Note: we start at iloc 49 because iloc 46, 47, and 48 are corrupt
for index, label in df_labels.iloc[45:].iterrows():
# First, let us determine which file upload fields are null. These we ignore.
is_null: pd.Series = pd.isnull(
label[radiograph_columns]
)
column: str
value_is_null: bool
# print(index)
# break
for column, value_is_null in is_null.iteritems():
# For those radiograph file uploads that are not null,
# Since PAIN is a longitudinal study with multiple indices, we must
# download radiographs for both every record_id as well as every event
study_id: int = index[0]
event_id: str = index[1]
if not value_is_null:
# We will try to download the file using PyCap's Project.
# export_file method\
try:
downloaded_file: tuple[bytes, dict[str, str]] = project.export_file(
record=str(study_id),
field=column,
event=event_id
)
except requests.exceptions.RequestException as e:
# If the file does not exist on the REDCap Server
# Retrieve the actual error message from the XML
# Remove the the b' and ' from the xml string
exception_string: str = str(e)[2:-1]
# Feed it into a ElementTree parser
root = ET.fromstring(exception_string)
error = root.find('error').text
print(f"RequestException for: study_id: {str(study_id)}, event {event_id}")
print(error)
# After downloading the file, we will construct a filename for it
# using the format {study_id}_{view}.{extension}, e.g.: 1001_latr.jpg
orig_filename : str = downloaded_file[1]['name']
orig_extension: str = orig_filename.split(".")[-1]
view: str = column_to_view.get(column, None)
new_filename: str = f"{study_id}-{event_id}-{view}.{orig_extension}"
# Now that we have the new filename, save the image to disk
dataset_directory: str = "dataset"
with open(os.path.join(dataset_directory, new_filename), "wb") as binary_file:
# Remember, the test_export_image is a tuple
# where only the first item is the binary image file.
binary_file.write(downloaded_file[0])
# Print filename on successful download
print(new_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment