Last active
March 31, 2020 14:00
-
-
Save corentinbettiol/9e751abbd649e2b8f7d719bb0596333c to your computer and use it in GitHub Desktop.
First optional check to be implemented. - See https://github.com/kapt-labs/django-check-seo/wiki/Custom-Checks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# optional file, see https://github.com/kapt-labs/django-check-seo/wiki/How-to-add-a-check%3F | |
# Standard Library | |
import glob | |
from os.path import basename, dirname, isfile, join | |
import sys, json | |
from django.conf import settings | |
# list files | |
modules = glob.glob(join(dirname(__file__), "*.py")) | |
__all__ = [] | |
# add them to __all__ so they can be imported | |
for module in modules: | |
if ( | |
isfile(module) | |
and not module.endswith("__init__.py") | |
): | |
__all__.append(basename(module)[:-3]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Standard Library | |
import os | |
import importlib | |
# Third party | |
import bs4 | |
from django.utils.translation import gettext as _, pgettext | |
import requests | |
# Local application / specific library imports | |
from django_check_seo.checks import custom_list | |
def importance(): | |
"""Scripts with higher importance will be executed in first. | |
Returns: | |
int -- Importance of the script. | |
""" | |
return 1 | |
def run(site): | |
"""Counts the number of internal and external links in the extracted content. | |
Arguments: | |
site {Site} -- Structure containing a good amount of resources from the targeted webpage. | |
""" | |
broken_internal = custom_list.CustomList( | |
name=_("Found broken internal links"), | |
settings=pgettext("masculin", "none"), | |
description=_( | |
"Neither Google nor users like broken links. Consider setting up redirections rather than deleting content on your site." | |
), | |
) | |
working_internal = custom_list.CustomList( | |
name=_("No broken internal link found"), | |
settings=pgettext("masculin", "none"), | |
found=pgettext("masculin", "none"), | |
description=broken_internal.description, | |
) | |
links = bs4.element.ResultSet(None) | |
# only get links with href | |
for c in site.content: | |
links += c.find_all("a", href=True) | |
internal_links = 0 | |
internal_links_list = [] | |
external_links = 0 | |
for link in links: | |
# internal links = absolute links that contains domain name or relative links | |
if os.environ["DOMAIN_NAME"] in link["href"] or not link["href"].startswith( | |
"http" | |
): | |
internal_links += 1 | |
internal_links_list.append(link) | |
else: | |
external_links += 1 | |
# prevent using domain name for loading internal links when testing another website's page | |
if os.environ["DOMAIN_NAME"] not in site.full_url: | |
domain = site.full_url | |
if site.full_url.endswith("/"): | |
domain = domain[:-1] | |
else: | |
domain = "http://" + os.environ["DOMAIN_NAME"] | |
# broken internal links | |
broken_links = [] | |
link_text = _("link") | |
for link in internal_links_list: | |
# prevent bugs if link is absolute and not relative | |
if link["href"].startswith("/"): | |
link["href"] = domain + link["href"] | |
r = requests.get(link["href"]).status_code | |
# status is not success or redirect | |
if r != 200 and r != 301 and r != 302: | |
broken_links.append( | |
'<a target="_blank" title="broken link" href="' | |
+ link["href"] | |
+ '">' | |
+ link_text | |
+ "</a>" | |
) | |
if len(broken_links) > 0: | |
broken_internal.found = str(len(broken_links)) + " - " + ", ".join(broken_links) | |
broken_internal.searched_in = [link.text for link in internal_links_list] | |
site.problems.append(broken_internal) | |
else: | |
working_internal.searched_in = [link.text for link in internal_links_list] | |
site.success.append(working_internal) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment