Skip to content

Instantly share code, notes, and snippets.

@drewbitt
Created August 15, 2018 11:06
Show Gist options
  • Save drewbitt/43cab7b6c0792b30a46d65c226a741db to your computer and use it in GitHub Desktop.
Save drewbitt/43cab7b6c0792b30a46d65c226a741db to your computer and use it in GitHub Desktop.
Get all links on madokami page & append to list file, --group to specify text criteria in link title
#!/usr/bin/python3
import httplib2
import base64
import re
import argparse
from bs4 import BeautifulSoup
parser = argparse.ArgumentParser()
parser.add_argument("madokami_url",nargs=1)
parser.add_argument("-group", "--group")
args = parser.parse_args()
username = ""
password = ""
h = httplib2.Http()
data_str = str(username) + ':' + str(password)
auth = base64.encodestring(data_str.encode("utf-8"))
status, response = h.request(
args.madokami_url[0],
'GET',
headers = { 'Authorization' : 'Basic ' + auth.decode("utf-8") })
soup = BeautifulSoup(response, 'lxml')
f = open("list", "a+")
webfull = 'https://manga.madokami.al'
if args.group:
regexp = re.compile(args.group)
for tag in soup.find_all(rel="nofollow"):
if regexp.search(tag.contents[0]):
f.write(webfull + tag.get('href') + '\n')
else:
for link in soup.find_all(rel="nofollow"):
f.write(webfull + link.get('href') + '\n')
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment