Created
April 13, 2019 16:31
-
-
Save Zylvian/f25b63d1be309deb833b1eb64fb33fb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Fetcher: | |
def __init__(self): | |
self._startlink = 'https://onepiece.fandom.com/api.php?format=json&action=' | |
self._imagestartlink = self._startlink+'imageserving&wisTitle=' | |
self._querystartlink = self._startlink+'query&' | |
self.constants = Constants() | |
def cleanName(self, name): | |
"""ignore all special characters, numbers, whitespace, case""" | |
return ''.join(c for c in name.lower() if c in string.ascii_lowercase) | |
def get_wiki_pages(self, names): | |
pages = [] | |
for name in names: | |
pages.append(self.__fetch_page(name)) | |
return pages | |
def __get_correct_page(self, checked_name, all_pages): | |
# Gets first page | |
first_page = None | |
log_string = "" | |
clean_name = self.cleanName(checked_name) | |
# Checks for any direct hits. | |
# difflib.get_close_matches[0] | |
for nr, page in enumerate(all_pages.values()): | |
title = page['title'] | |
title_clean = self.cleanName(title) | |
log_string += title + "," | |
if title_clean == clean_name: | |
log.info("Found direct match, page nr {}: {}".format(nr + 1, clean_name)) | |
first_page = page | |
break | |
# Get first containing | |
# if not first_page: | |
# pages = all_pages.values() | |
# pages_containing = [page for page in pages if checked_name in page['title'].lower()] | |
# if pages_containing: | |
# first_page = pages_containing[0] | |
# print("bingo") | |
# Gets first entry | |
if not first_page: | |
first_page = next(iter(all_pages.values())) | |
log.info("Input name: {} \n Parsed titles were: {}.\n Result title was: {}".format(checked_name, log_string[:-1], | |
first_page["title"])) | |
return first_page | |
def __fetch_page(self, name): | |
# Returns translated name or the same name | |
#clean_name = self.cleanName(name) | |
checked_name = self.constants.translateAlt(self.cleanName(name)) | |
if checked_name == self.cleanName(name): | |
checked_name = name | |
# All pages with "name" in there, and their URLs. | |
fetch_json = requests.get(self._querystartlink + '&prop=info&inprop=url&generator=allpages&gapfrom=' + checked_name.title() | |
).json() #'Use "gapfilterredir=nonredirects" option instead of "redirects" when using allpages as a generator' #gaplimit=1 | |
# Gets the first page | |
all_pages = fetch_json['query']['pages'] | |
first_page = self.__get_correct_page(checked_name, all_pages) | |
return first_page | |
# ASSUME THAT THE FIRST LINK IS CORRECT - MIGHT BE REDIRECTION LINK! | |
def check_title(self): | |
pass | |
def fetch_image_url(self, page): | |
title = str(page["title"]) | |
image_json = requests.get(self._imagestartlink+title).json() | |
try: | |
return image_json["image"]["imageserving"] | |
except KeyError: | |
log.info("Couldn't parse image url") | |
return "" | |
def fetch_summary(self): | |
print(self._startlink+'&text=Luffy&parse&summary=') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment