Find all .pdf links on a page and check which ones return non-200 response
$ poetry install
awscli==1.14.45 | |
boto3==1.5.35 | |
botocore==1.8.49 | |
colorama==0.3.7 | |
docutils==0.14 | |
futures==3.2.0 | |
jmespath==0.9.3 | |
numpy==1.14.0 | |
pandas==0.22.0 | |
pip==9.0.1 |
import pkg_resources | |
installed_packages = pkg_resources.working_set | |
for package in sorted(installed_packages, lambda x: x.key): | |
print "{}=={}".format(package.key, package.version) |
import difflib | |
def visualize_diff(a, b): | |
seqm = difflib.SequenceMatcher(None, a, b) | |
output= [] | |
for opcode, a0, a1, b0, b1 in seqm.get_opcodes(): | |
if opcode == 'equal': | |
output.append(seqm.a[a0:a1]) | |
elif opcode == 'insert': | |
output.append(green) |
for i, row in df.iterrows(): | |
worksheet.write_rich_string(i+1, 2, *visualize_diff(row['original'], row['edited'])) | |
excel_writer.save() |
sheet_name = 'to_review' | |
excel_writer = pd.ExcelWriter("edited_questions.xlsx", engine='xlsxwriter') | |
df.to_excel(excel_writer, sheet_name=sheet_name, index=False) | |
workbook = excel_writer.book | |
worksheet = excel_writer.sheets[sheet_name] | |
green = workbook.add_format({'color': 'green'}) | |
red = workbook.add_format({'color': 'red'}) | |
b_green = workbook.add_format({'color': 'green', 'bold': True}) |
import pandas as pd | |
df = pd.DataFrame([ | |
{ | |
'original': "Can you tell us a bit more abt how scalable your solution is?", | |
'edited': "Can you tell us a bit more about how scalable your solution is?", | |
}, | |
{ | |
'original': "What will our priorities be for the next quarter?", | |
'edited': "What will our priorities be for the next year?", | |
}, |
import xlsxwriter | |
workbook = xlsxwriter.Workbook('rich_strings.xlsx') | |
bold = workbook.add_format({'bold': True}) | |
italic = workbook.add_format({'italic': True}) | |
worksheet.write_rich_string('A1', | |
'This is ', | |
bold, 'bold', |
import logging | |
import os | |
import json | |
import urllib3 | |
import datetime | |
AIRFLOW_URL = os.environ['AIRFLOW_URL'] | |
DAG_ID = 'my_helpful_dag' | |
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'info').upper() |
import os | |
import site | |
from setuptools.command import easy_install | |
install_path = os.environ['GLUE_INSTALLATION'] | |
easy_install.main( ["--install-dir", install_path, "torch"] ) | |
reload(site) | |
import torch | |
print(torch.__version__) |