Find all .pdf links on a page and check which ones return non-200 response
$ poetry install
import difflib | |
def visualize_diff(a, b): | |
seqm = difflib.SequenceMatcher(None, a, b) | |
output= [] | |
for opcode, a0, a1, b0, b1 in seqm.get_opcodes(): | |
if opcode == 'equal': | |
output.append(seqm.a[a0:a1]) | |
elif opcode == 'insert': | |
output.append(green) |
for i, row in df.iterrows(): | |
worksheet.write_rich_string(i+1, 2, *visualize_diff(row['original'], row['edited'])) | |
excel_writer.save() |