Skip to content

Instantly share code, notes, and snippets.

@timedreamer
Created September 21, 2022 08:49
Show Gist options
  • Save timedreamer/46953f6e586c916339570001e63a0c30 to your computer and use it in GitHub Desktop.
Save timedreamer/46953f6e586c916339570001e63a0c30 to your computer and use it in GitHub Desktop.
Download sequenncing file meta data using `ffq`
# Download meta data using ffq.
# Need input `SraRunTable.txt`
# Author: Ji Huang
# Date: 2022-09-21
import pandas as pd
import subprocess
import os
# Define output folder.
output_folder = "../data/json/"
try:
os.makedirs(output_folder)
except OSError as e:
print(e)
# Read in SRA table to be processed.
sra_table = pd.read_table('SraRunTable.txt',delimiter=',')
srr_number = list(sra_table["Run"])
# Check if there are repeated SRR number.
if len(srr_number)==len(set(srr_number)):
print("There are no redundant SRR numbers.")
print("There are " + str(len(srr_number)), "unique SRR numbers.")
else:
print("There are redundant SRR numbers.")
print("There are ", len(set(srr_number)), "unique SRR numbers.")
srr_number = set(srr_number)
# Use a loop to download meta data.
for i in srr_number:
output_json = os.path.join(output_folder, i+".json")
ffq_command = "ffq --ftp -o " + output_json + " " + i
subprocess.run(ffq_command, shell = True)
print(i + " is done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment