Skip to content

Instantly share code, notes, and snippets.

@pqlx
Created October 19, 2018 22:49
Show Gist options
  • Save pqlx/8612d1c5e83f5d4d436691380f0a220b to your computer and use it in GitHub Desktop.
Save pqlx/8612d1c5e83f5d4d436691380f0a220b to your computer and use it in GitHub Desktop.
project euler scraper + template generator
from bs4 import BeautifulSoup
import re
import textwrap
class Problem(object):
def __init__(self, problem_element):
self.number = None
self.title = None
self.description = None
self._parse_element(problem_element)
def _parse_element(self, element):
info = element.find("div", {"class": "info"})
content = element.find('div', {"class": "problem_content"})
link = info.find('a')
self.number = int(re.match("problem=(\d+)", link['href']).group(1))
self.title = re.search("Problem {}: (.+)Published".format(self.number), link.text).group(1)
self.description = content.text.strip()
@property
def normalized_title(self):
return self.title \
.replace(' ', '_') \
.replace('/', 'slash') \
.lower()
def build_template(self):
return textwrap.dedent(
f"""\
from math import *
import gmpy2
######################
# Problem {self.number}: {self.title}
# Answer: (blank)
######################
def solve():
'''
{self.description}
'''
pass
if __name__ == "__main__":
print(solve())
""".strip()
)
if __name__ == "__main__":
import os
import requests
r = requests.get("https://projecteuler.net/show=all").content
soup = BeautifulSoup(r, 'lxml')
elements = soup.find('div', {"id": "content"}).findAll('div', recursive=False)[1:]
parsed = [Problem(elem) for elem in elements]
for problem in parsed:
title = str(problem.number) + '_' + problem.normalized_title
if os.path.isdir(title):
continue
os.mkdir(title)
handle = open(f'{title}/solution.py', 'w')
handle.write(problem.build_template())
handle.flush()
handle.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment