Zylvian/current_fetcher.py

## current_fetcher.py
class Fetcher:

    def __init__(self):
        self._startlink = 'https://onepiece.fandom.com/api.php?format=json&action='
        self._imagestartlink = self._startlink+'imageserving&wisTitle='
        self._querystartlink = self._startlink+'query&'
        self.constants = Constants()

    def cleanName(self, name):
        """ignore all special characters, numbers, whitespace, case"""
        return ''.join(c for c in name.lower() if c in string.ascii_lowercase)

    def get_wiki_pages(self, names):
        pages = []
        for name in names:
            pages.append(self.__fetch_page(name))

        return pages

    def __get_correct_page(self, checked_name, all_pages):
        # Gets first page
        first_page = None
        log_string = ""

        clean_name = self.cleanName(checked_name)

        # Checks for any direct hits.
        # difflib.get_close_matches[0]
        for nr, page in enumerate(all_pages.values()):
            title = page['title']
            title_clean = self.cleanName(title)
            log_string += title + ","
            if title_clean == clean_name:
                log.info("Found direct match, page nr {}: {}".format(nr + 1, clean_name))
                first_page = page
                break


        # Get first containing
        # if not first_page:
        #     pages = all_pages.values()
        #     pages_containing = [page for page in pages if checked_name in page['title'].lower()]
        #     if pages_containing:
        #         first_page = pages_containing[0]
        #         print("bingo")

        # Gets first entry
        if not first_page:
            first_page = next(iter(all_pages.values()))

        log.info("Input name: {} \n Parsed titles were: {}.\n Result title was: {}".format(checked_name, log_string[:-1],
                                                                                           first_page["title"]))

        return first_page


    def __fetch_page(self, name):

        # Returns translated name or the same name
        #clean_name = self.cleanName(name)
        checked_name = self.constants.translateAlt(self.cleanName(name))

        if checked_name == self.cleanName(name):
            checked_name = name

        # All pages with "name" in there, and their URLs.
        fetch_json = requests.get(self._querystartlink + '&prop=info&inprop=url&generator=allpages&gapfrom=' + checked_name.title()
                                  ).json() #'Use "gapfilterredir=nonredirects" option instead of "redirects" when using allpages as a generator' #gaplimit=1

        # Gets the first page
        all_pages = fetch_json['query']['pages']

        first_page = self.__get_correct_page(checked_name, all_pages)


        return first_page

        # ASSUME THAT THE FIRST LINK IS CORRECT - MIGHT BE REDIRECTION LINK!


        def check_title(self):
            pass

    def fetch_image_url(self, page):
        title = str(page["title"])

        image_json = requests.get(self._imagestartlink+title).json()

        try:
            return image_json["image"]["imageserving"]
        except KeyError:
            log.info("Couldn't parse image url")
            return ""

    def fetch_summary(self):
        print(self._startlink+'&text=Luffy&parse&summary=')
	class Fetcher:

	def __init__(self):
	self._startlink = 'https://onepiece.fandom.com/api.php?format=json&action='
	self._imagestartlink = self._startlink+'imageserving&wisTitle='
	self._querystartlink = self._startlink+'query&'
	self.constants = Constants()

	def cleanName(self, name):
	"""ignore all special characters, numbers, whitespace, case"""
	return ''.join(c for c in name.lower() if c in string.ascii_lowercase)

	def get_wiki_pages(self, names):
	pages = []
	for name in names:
	pages.append(self.__fetch_page(name))

	return pages

	def __get_correct_page(self, checked_name, all_pages):
	# Gets first page
	first_page = None
	log_string = ""

	clean_name = self.cleanName(checked_name)

	# Checks for any direct hits.
	# difflib.get_close_matches[0]
	for nr, page in enumerate(all_pages.values()):
	title = page['title']
	title_clean = self.cleanName(title)
	log_string += title + ","
	if title_clean == clean_name:
	log.info("Found direct match, page nr {}: {}".format(nr + 1, clean_name))
	first_page = page
	break


	# Get first containing
	# if not first_page:
	# pages = all_pages.values()
	# pages_containing = [page for page in pages if checked_name in page['title'].lower()]
	# if pages_containing:
	# first_page = pages_containing[0]
	# print("bingo")

	# Gets first entry
	if not first_page:
	first_page = next(iter(all_pages.values()))

	log.info("Input name: {} \n Parsed titles were: {}.\n Result title was: {}".format(checked_name, log_string[:-1],
	first_page["title"]))

	return first_page



	def __fetch_page(self, name):

	# Returns translated name or the same name
	#clean_name = self.cleanName(name)
	checked_name = self.constants.translateAlt(self.cleanName(name))

	if checked_name == self.cleanName(name):
	checked_name = name

	# All pages with "name" in there, and their URLs.
	fetch_json = requests.get(self._querystartlink + '&prop=info&inprop=url&generator=allpages&gapfrom=' + checked_name.title()
	).json() #'Use "gapfilterredir=nonredirects" option instead of "redirects" when using allpages as a generator' #gaplimit=1

	# Gets the first page
	all_pages = fetch_json['query']['pages']

	first_page = self.__get_correct_page(checked_name, all_pages)


	return first_page

	# ASSUME THAT THE FIRST LINK IS CORRECT - MIGHT BE REDIRECTION LINK!



	def check_title(self):
	pass

	def fetch_image_url(self, page):
	title = str(page["title"])

	image_json = requests.get(self._imagestartlink+title).json()

	try:
	return image_json["image"]["imageserving"]
	except KeyError:
	log.info("Couldn't parse image url")
	return ""

	def fetch_summary(self):
	print(self._startlink+'&text=Luffy&parse&summary=')