Created
March 6, 2023 12:59
-
-
Save ShenZhouHong/2498a04536d24a69a0e14f0e15416180 to your computer and use it in GitHub Desktop.
Example file download routine for radiographs in the METRC PAIN study.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os # To handle directory paths in a platform independent way | |
import requests.exceptions # To handle exceptions on download | |
import xml.etree.ElementTree as ET # To get additional information from request error | |
column_to_view: dict[str, str] = { | |
"cfu_xrayupload" : "antpr", # Anterio-posterior | |
"cfu_xrayuploadlat": "later", # Lateral-medial | |
"cfu_xrayuploadobl": "obliq", # Oblique | |
"cfu_xrayuploadaxi": "axial" # Axial | |
} | |
# For every study_id and file upload field | |
index: int | |
label: pd.Series | |
# Note: we start at iloc 49 because iloc 46, 47, and 48 are corrupt | |
for index, label in df_labels.iloc[45:].iterrows(): | |
# First, let us determine which file upload fields are null. These we ignore. | |
is_null: pd.Series = pd.isnull( | |
label[radiograph_columns] | |
) | |
column: str | |
value_is_null: bool | |
# print(index) | |
# break | |
for column, value_is_null in is_null.iteritems(): | |
# For those radiograph file uploads that are not null, | |
# Since PAIN is a longitudinal study with multiple indices, we must | |
# download radiographs for both every record_id as well as every event | |
study_id: int = index[0] | |
event_id: str = index[1] | |
if not value_is_null: | |
# We will try to download the file using PyCap's Project. | |
# export_file method\ | |
try: | |
downloaded_file: tuple[bytes, dict[str, str]] = project.export_file( | |
record=str(study_id), | |
field=column, | |
event=event_id | |
) | |
except requests.exceptions.RequestException as e: | |
# If the file does not exist on the REDCap Server | |
# Retrieve the actual error message from the XML | |
# Remove the the b' and ' from the xml string | |
exception_string: str = str(e)[2:-1] | |
# Feed it into a ElementTree parser | |
root = ET.fromstring(exception_string) | |
error = root.find('error').text | |
print(f"RequestException for: study_id: {str(study_id)}, event {event_id}") | |
print(error) | |
# After downloading the file, we will construct a filename for it | |
# using the format {study_id}_{view}.{extension}, e.g.: 1001_latr.jpg | |
orig_filename : str = downloaded_file[1]['name'] | |
orig_extension: str = orig_filename.split(".")[-1] | |
view: str = column_to_view.get(column, None) | |
new_filename: str = f"{study_id}-{event_id}-{view}.{orig_extension}" | |
# Now that we have the new filename, save the image to disk | |
dataset_directory: str = "dataset" | |
with open(os.path.join(dataset_directory, new_filename), "wb") as binary_file: | |
# Remember, the test_export_image is a tuple | |
# where only the first item is the binary image file. | |
binary_file.write(downloaded_file[0]) | |
# Print filename on successful download | |
print(new_filename) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment