Skip to content

Instantly share code, notes, and snippets.

@tashrifbillah
Created June 20, 2023 22:14
Show Gist options
  • Save tashrifbillah/8241393de66554f26f85a3909a9c4570 to your computer and use it in GitHub Desktop.
Save tashrifbillah/8241393de66554f26f85a3909a9c4570 to your computer and use it in GitHub Desktop.
Check status of AVL combined QC records and transcripts' existence
# execute it from within /data/predict1/data_from_nda/Prescient/PHOENIX/GENERAL
from os.path import dirname, join as pjoin
import pandas as pd
from glob import glob
files=glob('*/processed/*/interviews/open/*_combinedQCRecords.csv')
j=0
for file in files:
df=pd.read_csv(file)
day=df.loc[0,'day']
template=pjoin(dirname(file),f'transcripts/*_interviewAudioTranscript_open_day{day:04}_session*_REDACTED.txt')
transcript_file=glob(template)
if len(transcript_file)==1:
# print(transcript_file[0])
j+=1
elif len(transcript_file)>1:
print(transcript_file)
else:
print(template, 'does not exist')
print('Total combinedQCRecords.csv', len(files))
print('Total transcript files',j)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment