Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save priyankajayaswal1/079f8f341c4095494d20 to your computer and use it in GitHub Desktop.
Save priyankajayaswal1/079f8f341c4095494d20 to your computer and use it in GitHub Desktop.
def linkedPagesthroughcontentparsing(self, namespace=namespace, withImageLinks=False):
result = []
try:
thistxt = textlib.removeLanguageLinks(self.get(get_redirect=True), site=self.site)
except pywikibot.NoPage:
raise
except pywikibot.IsRedirectPage:
raise
except pywikibot.SectionError:
return []
thistxt = textlib.removeCategoryLinks(thistxt, site=self.site)
# remove HTML comments, pre, nowiki, and includeonly sections
# from text before processing
thistxt = textlib.removeDisabledParts(thistxt)
for match in Rlink.finditer(thistxt):
title = match.group('title')
title = title.replace("_", " ").strip(" ")
if self.namespace() in self.site.family.namespacesWithSubpage:
# convert relative link to absolute link
if title.startswith(".."):
parts = self.title().split('/')
parts.pop()
title = u'/'.join(parts) + title[2:]
elif title.startswith("/"):
title = u'%s/%s' % (self.title(), title[1:])
if title.startswith("#"):
# this is an internal section link
continue
if not self.site().isInterwikiLink(title):
try:
page = Page(self.site, title)
try:
hash(str(page))
except Exception:
raise pywikibot.Error(u"Page %s contains invalid link to [[%s]]."
% (self.title(), title))
except pywikibot.Error:
if pywikibot.config.verbose_output:
pywikibot.output(u"Page %s contains invalid link to [[%s]]."
% (self.title(), title))
continue
if not withImageLinks and page.isImage():
continue
if page.sectionFreeTitle() and page not in result:
result.append(page)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment