Created
August 1, 2023 20:27
-
-
Save tashrifbillah/dd4fdbcf4f3fd2ac29c418efc93160bb to your computer and use it in GitHub Desktop.
formqc troubleshooting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from glob import glob | |
from os.path import isfile, basename | |
import pandas as pd | |
subjects=glob('/data/predict1/data_from_nda/Pronet/PHOENIX/PROTECTED/*/raw/*') | |
subjects=[basename(s) for s in subjects] | |
print('for how many subjects inclusionexclusion_criteria_review does not exist?') | |
count=0 | |
for s in subjects: | |
p=s[:2]+'-'+s[:9]+'-form_inclusionexclusion_criteria_review-day*to*csv' | |
p1=glob(p) | |
if not p1: | |
count+=1 | |
print(count) | |
print('for how many subjects sociodemographics does not exist') | |
count=0 | |
for s in subjects: | |
p=s[:2]+'-'+s[:9]+'-form_sociodemographics-day*to*csv' | |
p1=glob(p) | |
if not p1: | |
count+=1 | |
print(count) | |
print('for how many subjects either inclusionexclusion_criteria_review or sociodemographics does not exist?') | |
count=0 | |
count2=0 | |
for s in subjects: | |
p=s[:2]+'-'+s[:9]+'-form_inclusionexclusion_criteria_review-day*to*csv' | |
_p=s[:2]+'-'+s[:9]+'-form_sociodemographics-day*to*csv' | |
p1=glob(p) | |
_p1=glob(_p) | |
if not (p1 and _p1): | |
count+=1 | |
else: | |
# both exists | |
df=pd.read_csv(p1[0]) | |
_df=pd.read_csv(_p1[0]) | |
try: | |
if pd.isna(df.loc[0,'included_excluded']) or pd.isna(df.loc[0,'chrcrit_part']) or pd.isna(_df.loc[0,'chrdemo_sexassigned']): | |
pass | |
except: | |
# at least one of the three variables does not exist | |
count2+=1 | |
print(count) | |
print('for how many subjects at least one of the three variables does not exist?') | |
print(count2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment