Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@ivirshup
Created April 25, 2018 18:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivirshup/131a56175503a2408f814874925c19f9 to your computer and use it in GitHub Desktop.
Save ivirshup/131a56175503a2408f814874925c19f9 to your computer and use it in GitHub Desktop.
from subprocess import run, PIPE
import requests
import pandas as pd
from pathlib import Path
BASE_URL = "http://imlspenticton.uzh.ch/robinson_lab/conquer/"
def curl(inpth, outpth=None):
command = ["curl", str(inpth)]
if outpth:
command.extend(["-o", str(outpth)])
p = run(command, check=True)
return p
def fetch_table(url):
r = requests.get(url)
table = pd.read_html(r.content, header=0)[0]
table.drop(0, inplace=True)
table.drop("Unnamed: 0", axis=1, inplace=True)
return table
def download_directory(url, outpth, recurse=False):
"""
Downloads a basic apache directory of files, like from BASE_URL.
Args:
url (str):
URL of directory to download from.
outpth (str or Path):
Directory to place files in.
recurse (bool; default=False):
Should subdirectories be followed?
"""
outpth = Path(outpth)
outpth.mkdir(exist_ok=True)
if not url.endswith("/"): url = url + "/"
table = fetch_table(url)
isdir = table["Name"].str.endswith("/") & (table["Size"] == "-")
files = table.loc[~isdir]
dirs = table.loc[isdir]
for f in files["Name"]:
filepth = outpth.joinpath(f)
print(f"Retrieving {f} => {filepth}")
curl(f"{url}{f}", filepth)
if recurse:
for d in dirs["Name"]:
download_directory(url + d, outpth.joinpath(d), recurse)
def main():
download_directory(BASE_URL + "data-mae/", Path("data-mae"))
download_directory(BASE_URL + "report-multiqc/", Path("report-multiqc"), True)
download_directory(BASE_URL + "report-scater/", Path("report-scater"))
if __name__ == '__main__':
main()
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
asn1crypto=0.24.0=py36_0
ca-certificates=2018.03.07=0
certifi=2018.4.16=py36_0
cffi=1.11.5=py36h9745a5d_0
chardet=3.0.4=py36h0f667ec_1
cryptography=2.2.2=py36h14c3975_0
icu=58.2=h9c2bf20_1
idna=2.6=py36h82fb2a8_1
intel-openmp=2018.0.0=8
libedit=3.1=heed3624_0
libffi=3.2.1=hd88cf55_4
libgcc-ng=7.2.0=hdf63c60_3
libgfortran-ng=7.2.0=hdf63c60_3
libstdcxx-ng=7.2.0=hdf63c60_3
libxml2=2.9.8=hf84eae3_0
libxslt=1.1.32=h1312cb7_0
lxml=4.2.1=py36h23eabaa_0
mkl=2018.0.2=1
mkl_fft=1.0.1=py36h3010b51_0
mkl_random=1.0.1=py36h629b387_0
ncurses=6.0=h9df7e31_2
numpy=1.14.2=py36hdbf6ddf_1
openssl=1.0.2o=h20670df_0
pandas=0.22.0=py36hf484d3e_0
pip=9.0.3=py36_0
pycparser=2.18=py36hf9f622e_1
pyopenssl=17.5.0=py36h20ba746_0
pysocks=1.6.8=py36_0
python=3.6.5=hc3d631a_0
python-dateutil=2.7.2=py36_0
pytz=2018.4=py36_0
readline=7.0=ha6073c6_4
requests=2.18.4=py36he2e5f8d_1
setuptools=39.0.1=py36_0
six=1.11.0=py36h372c433_1
sqlite=3.23.1=he433501_0
tk=8.6.7=hc745277_3
urllib3=1.22=py36hbe7ace6_0
wheel=0.31.0=py36_0
xz=5.2.3=h5e939de_4
zlib=1.2.11=ha838bed_2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment