Skip to content

Instantly share code, notes, and snippets.

@tashrifbillah
Created August 1, 2023 20:27
Show Gist options
  • Save tashrifbillah/dd4fdbcf4f3fd2ac29c418efc93160bb to your computer and use it in GitHub Desktop.
Save tashrifbillah/dd4fdbcf4f3fd2ac29c418efc93160bb to your computer and use it in GitHub Desktop.
formqc troubleshooting
#!/usr/bin/env python
from glob import glob
from os.path import isfile, basename
import pandas as pd
subjects=glob('/data/predict1/data_from_nda/Pronet/PHOENIX/PROTECTED/*/raw/*')
subjects=[basename(s) for s in subjects]
print('for how many subjects inclusionexclusion_criteria_review does not exist?')
count=0
for s in subjects:
p=s[:2]+'-'+s[:9]+'-form_inclusionexclusion_criteria_review-day*to*csv'
p1=glob(p)
if not p1:
count+=1
print(count)
print('for how many subjects sociodemographics does not exist')
count=0
for s in subjects:
p=s[:2]+'-'+s[:9]+'-form_sociodemographics-day*to*csv'
p1=glob(p)
if not p1:
count+=1
print(count)
print('for how many subjects either inclusionexclusion_criteria_review or sociodemographics does not exist?')
count=0
count2=0
for s in subjects:
p=s[:2]+'-'+s[:9]+'-form_inclusionexclusion_criteria_review-day*to*csv'
_p=s[:2]+'-'+s[:9]+'-form_sociodemographics-day*to*csv'
p1=glob(p)
_p1=glob(_p)
if not (p1 and _p1):
count+=1
else:
# both exists
df=pd.read_csv(p1[0])
_df=pd.read_csv(_p1[0])
try:
if pd.isna(df.loc[0,'included_excluded']) or pd.isna(df.loc[0,'chrcrit_part']) or pd.isna(_df.loc[0,'chrdemo_sexassigned']):
pass
except:
# at least one of the three variables does not exist
count2+=1
print(count)
print('for how many subjects at least one of the three variables does not exist?')
print(count2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment