Skip to content

Instantly share code, notes, and snippets.

@heaven00
Last active October 4, 2020 14:43
Show Gist options
  • Save heaven00/6b700a0652acc5383bcda48ce68fcf87 to your computer and use it in GitHub Desktop.
Save heaven00/6b700a0652acc5383bcda48ce68fcf87 to your computer and use it in GitHub Desktop.
# errors.py
class NotPDFLinkError(Exception):
def __str__(self):
return "The Link Provided does not end with type `.pdf`"
class NotAnURLError(Exception):
def __str__(self):
return "The link provided is not a valid URL"
# pdf.py
@dataclass(frozen=True)
class ValidURL:
string: str
@dataclass()
class ValidPDFURL:
url: ValidURL
def get_valid_pdf_url(url) -> [ValidPDFURL, Exception]:
return get_pdf_link(get_valid_url(url))
@singledispatch
def get_pdf_link(url) -> NotAnURLError:
raise NotAnURLError()
@get_pdf_link.register
def _(url: ValidURL) -> [ValidPDFURL, NotPDFLinkError]:
if not url.string.endswith('.pdf'):
raise NotPDFLinkError()
return ValidPDFURL(url)
@singledispatch
def get_valid_url(url) -> Exception:
raise Exception('Not A String')
@get_valid_url.register
def _(url: str) -> [ValidURL, NotAnURLError]:
result = urlparse(url)
if not all([result.scheme, result.netloc, result.path]):
raise NotAnURLError()
return ValidURL(url)
@dataclass(frozen=True)
class SessionPDF:
url: ValidPDFURL
session: int
session_date: datetime
@property
def filename(self):
return f'session_{session}_{session_date.strftime("%d_%m_%Y")}.pdf'
@property
def link(self):
return self.url.url.string
def download_pdf(session_pdf: SessionPDF, output_dir: str) -> str:
if not os.path.isdir(output_dir): raise Exception('Output Directory is not a directory')
data = requests.get(session_pdf.link).content
output_path = os.path.join(output_dir, session_pdf.filename)
with open(output_path, 'wb') as datafile:
datafile.write(data)
return output_path
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment