Skip to content

Instantly share code, notes, and snippets.

@Zylvian
Created April 13, 2019 16:31
Show Gist options
  • Save Zylvian/f25b63d1be309deb833b1eb64fb33fb8 to your computer and use it in GitHub Desktop.
Save Zylvian/f25b63d1be309deb833b1eb64fb33fb8 to your computer and use it in GitHub Desktop.
class Fetcher:
def __init__(self):
self._startlink = 'https://onepiece.fandom.com/api.php?format=json&action='
self._imagestartlink = self._startlink+'imageserving&wisTitle='
self._querystartlink = self._startlink+'query&'
self.constants = Constants()
def cleanName(self, name):
"""ignore all special characters, numbers, whitespace, case"""
return ''.join(c for c in name.lower() if c in string.ascii_lowercase)
def get_wiki_pages(self, names):
pages = []
for name in names:
pages.append(self.__fetch_page(name))
return pages
def __get_correct_page(self, checked_name, all_pages):
# Gets first page
first_page = None
log_string = ""
clean_name = self.cleanName(checked_name)
# Checks for any direct hits.
# difflib.get_close_matches[0]
for nr, page in enumerate(all_pages.values()):
title = page['title']
title_clean = self.cleanName(title)
log_string += title + ","
if title_clean == clean_name:
log.info("Found direct match, page nr {}: {}".format(nr + 1, clean_name))
first_page = page
break
# Get first containing
# if not first_page:
# pages = all_pages.values()
# pages_containing = [page for page in pages if checked_name in page['title'].lower()]
# if pages_containing:
# first_page = pages_containing[0]
# print("bingo")
# Gets first entry
if not first_page:
first_page = next(iter(all_pages.values()))
log.info("Input name: {} \n Parsed titles were: {}.\n Result title was: {}".format(checked_name, log_string[:-1],
first_page["title"]))
return first_page
def __fetch_page(self, name):
# Returns translated name or the same name
#clean_name = self.cleanName(name)
checked_name = self.constants.translateAlt(self.cleanName(name))
if checked_name == self.cleanName(name):
checked_name = name
# All pages with "name" in there, and their URLs.
fetch_json = requests.get(self._querystartlink + '&prop=info&inprop=url&generator=allpages&gapfrom=' + checked_name.title()
).json() #'Use "gapfilterredir=nonredirects" option instead of "redirects" when using allpages as a generator' #gaplimit=1
# Gets the first page
all_pages = fetch_json['query']['pages']
first_page = self.__get_correct_page(checked_name, all_pages)
return first_page
# ASSUME THAT THE FIRST LINK IS CORRECT - MIGHT BE REDIRECTION LINK!
def check_title(self):
pass
def fetch_image_url(self, page):
title = str(page["title"])
image_json = requests.get(self._imagestartlink+title).json()
try:
return image_json["image"]["imageserving"]
except KeyError:
log.info("Couldn't parse image url")
return ""
def fetch_summary(self):
print(self._startlink+'&text=Luffy&parse&summary=')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment