Skip to content

Instantly share code, notes, and snippets.

@akshay196
Last active November 21, 2017 16:10
Show Gist options
  • Save akshay196/87060ef3dbad0709a7a66bec2f5070fd to your computer and use it in GitHub Desktop.
Save akshay196/87060ef3dbad0709a7a66bec2f5070fd to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
import sys
from bs4 import BeautifulSoup
import os
response = ""
count = -1
def parse():
"""
find all <key> and retrun list
"""
global response
response = requests.get("http://**.**.com")
soup = BeautifulSoup(response.text, 'html.parser')
return soup.find_all('key')
def get_ready(name):
"""
Create directory inside output/
"""
print("Getting ready to download..")
folders = name.split("/")
if len(folders) > 1:
path = ""
i = 0
while i != len(folders)-1:
path = path + folders[i] + "/"
i = i + 1
try:
os.makedirs("output/" + path)
print("Creating directory " + path)
except FileExistsError:
print("Folder exists")
def check_downloaded(name):
"""
Check if file exists
return : True if exists
"""
path = "output/" + name
return os.path.exists(path)
def download(name):
"""
Download name an store in output dir
"""
if check_downloaded(name):
print("File already exists. Downloading skipped.")
return
global count
count = count + 1
print(count)
get_ready(name)
url = "http://**.**.com/" + name
print("Downloading " + name + " ....")
res = requests.get(url)
path = "output/" + name
with open(path, 'wb') as fp:
fp.write(res.content)
print("Download completed.")
if __name__ == '__main__':
file_names = parse()
for name in file_names:
if(name.get_text().endswith(".pdf")):
download(name.get_text())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment