This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cumbrous.elements.earth | |
vaulted.either.host | |
without.exteriour.help | |
creating.derivative.works | |
reward.though.here | |
father.without.cloud | |
project.gutenbergtm.name | |
unfold.celestial.guide | |
fierce.strive.here | |
renowned.alcinous.host |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
import sys | |
import requests | |
import whois | |
from nltk import tokenize | |
BOOKFILE = sys.argv[1] | |
OUTPUTFILE = BOOKFILE + '.possible-domains.txt' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"url": "https://www.theonion.com/nasa-panics-after-asteroid-fires-back-1849587289", | |
"status": 200, | |
"content_type": "text/html; charset=utf-8", | |
"title": "NASA Panics After Asteroid Fires Back", | |
"description": "The Onion brings you all of the latest news, stories, photos, videos and more from America's finest news source.", | |
"image": "https://i.kinja-img.com/gawker-media/image/upload/c_fill,f_auto,fl_progressive,g_center,h_675,pg_1,q_80,w_1200/f0c0e17e9ed6acb0e1bb946a9e233c1d.jpg", | |
"publisher": "The Onion", | |
"keywords": [ |
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"id": 1413, | |
"creator": "Ed Summers", | |
"query": [ | |
{ | |
"id": 1413, | |
"searchId": 1413, | |
"created": "2022-10-13T21:40:22.102Z", | |
"value": { | |
"or": [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# This demonstrates an inconsistency in results from the Internet Archive CDX | |
# API when querying by scopeType=domain vs scopeType=prefix. For context see: | |
# | |
# https://inkdroid.org/2022/09/24/pdfs/ | |
# | |
# Note: you'll need to | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
globals()['y'] = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
druids = ['bj330fg0526', 'bp312sd3142', 'bs648dv9357', 'bz893jg7695', 'bz922hc1158', 'cc095kz3027', 'ch908dt6803', 'cp809cz8166', 'cv292vs5727', 'dn752dz0508', 'dy271hk6968', 'fd892fn4310', 'fj109wp2130', 'fn912wb3725', 'fp815hx3553', 'fs415vb1264', 'fv812yp9241', 'fw782ks7983', 'gf100kp6588', 'gj901jn9353', 'hf001pb6273', 'hh929wg3298', 'hn217tx5368', 'hq140wy0905', 'hv642nf7717', 'hv698ks1475', 'hw434pj6642', 'hw645gv7743', 'jb739pj9696', 'jg940ts4575', 'jh597wr5998', 'jz331hr5976', 'kw186hs7975', 'kx196rt8122', 'ky214ft2956', 'ky357nb9554', 'mg249dy7051', 'mk879xr0461', 'mv110pd4781', 'mv300dt6569', 'mx349xb4098', 'mz415jv3453', 'nd087pt9085', 'nk906ht6735', 'nn453zz9250', 'nr015ch1092', 'nv773xq7981', 'pf139tj8228', 'pn628yn6194', 'pq169jd6716', 'px611qw1504', 'qd726vf4177', 'qk039cf4369', 'qw725qm9638', 'qx771bj6775', 'rv306cp2774', 'sd725cc2793', 'sk583gg2589', 'sn506gj4859', 'sq394vr6558', 'sq694nb4696', 'st474bt2800', 'tk364rs5190', 'tw357sy1852', 'tx189sh1771', 't |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# usage: wacz-images.py <wacz_file> | |
# | |
# This program will extract images from the WARC files contained in a WACZ | |
# file and write them to the current working directory using the image's URL | |
# as a file location. | |
# | |
# You will need to `pip install warcio` for it to work. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# print out the url and title of web pages in a WARC file | |
import bs4 | |
import sys | |
from warcio.archiveiterator import ArchiveIterator | |
warc_file = sys.argv[1] | |
records = ArchiveIterator(open(warc_file, 'rb')) |