Created
May 7, 2018 11:11
-
-
Save SenpaiSilver/2a28437eb29878aca9a15ad84d99877b to your computer and use it in GitHub Desktop.
Scrapper script that will embed to discord Tomo chan from Dropout
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import collections | |
import json | |
import re | |
import requests | |
import settings | |
HEADERS = { | |
"User-Agent": "SenpaiSilver <emailgoesherebutitwasremoveforreddit>" | |
} | |
ENDPOINT = "https://public-api.wordpress.com/rest/v1.1/sites/dropoutmanga.wordpress.com/posts/" | |
def get_chap_number(link): | |
chap_pattern = re.compile(r"http:\/\/dropoutmanga.files.wordpress.com\/\d+\/\d+/dropout-tomo-chan-wa-onna-no-ko-page-(\d+).png", re.IGNORECASE) | |
nbr = chap_pattern.search(link) | |
if (nbr == None or len(nbr.regs) < 2): | |
return ("?") | |
return (link[nbr.regs[1][0]:nbr.regs[1][1]]) | |
def make_payload(link): | |
ch = get_chap_number(link) | |
embeds = { | |
"embeds": [ | |
{ | |
"title": "Tomo-chan wa Onna no Ko! %s" % (ch), | |
"description": "CH %s @ %s" % (ch, link), | |
"image": { | |
"url": link, | |
"height": 250, | |
"width": 167 | |
} | |
} | |
] | |
} | |
return (embeds) | |
def post_discord(link): | |
if (os.path.exists("./discord.txt")): | |
with open("./discord.txt") as f: | |
hooks = f.readlines() | |
for h in hooks: | |
h = h.split('#')[0].replace('\r', '').replace('\n', '') | |
if (len(h) > 0): | |
print(h) | |
req = requests.post(h, json=make_payload(link), headers=HEADERS) | |
if (req.status_code < 400): | |
print(" POSTED") | |
else: | |
print(" %d" % (req.status_code)) | |
def get_chapters(chp): | |
setngs = settings.Settings() | |
chp = collections.OrderedDict(sorted(chp.items(), reverse=False)) | |
for key, val in chp.items(): | |
if (int(key) > setngs.settings['LastID']): | |
setngs.settings['LastID'] = int(key) | |
print(" %s: %s" %(key, val['guid'])) | |
post_discord(val['guid']) | |
setngs.write_settings() | |
if (__name__ == "__main__"): | |
rg = re.compile(r"Tomo-chan wa Onna no ko.*", re.IGNORECASE) | |
r = requests.get(ENDPOINT, headers=HEADERS) | |
print("GET %s %d" % (ENDPOINT, r.status_code)) | |
j = json.loads(r.text)['posts'] | |
for e in j: | |
if (rg.match(e['title'])): | |
print("Processing: %s" % (e['title'])) | |
#get_chapters(json.dumps(e['attachments'], indent=2, sort_keys=True)) | |
get_chapters(e['attachments']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"Ignore": [], | |
"LastID": 1721 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
class Settings: | |
def __init__(self, settings_file="./settings.json"): | |
self.file = settings_file | |
if (not os.path.exists(self.file)): | |
self.settings = {"LastID": 1437, "Ignore": []} | |
self.write_settings() | |
self.read_settings() | |
def read_settings(self): | |
with open(self.file) as f: | |
self.settings = json.loads(f.read()) | |
def write_settings(self): | |
with open(self.file, "w+", newline='\n') as f: | |
f.write(json.dumps(self.settings, indent=2, sort_keys=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment