Ed Summers edsu

## paradise-lost-domains.txt
cumbrous.elements.earth
vaulted.either.host
without.exteriour.help
creating.derivative.works
reward.though.here
father.without.cloud
project.gutenbergtm.name
unfold.celestial.guide
fierce.strive.here
renowned.alcinous.host

## public_domains.py
import string
import sys

import requests
import whois

from nltk import tokenize

BOOKFILE = sys.argv[1]
OUTPUTFILE = BOOKFILE + '.possible-domains.txt'

## webpages.json
[
  {
    "url": "https://www.theonion.com/nasa-panics-after-asteroid-fires-back-1849587289",
    "status": 200,
    "content_type": "text/html; charset=utf-8",
    "title": "NASA Panics After Asteroid Fires Back",
    "description": "The Onion brings you all of the latest news, stories, photos, videos and more from America's finest news source.",
    "image": "https://i.kinja-img.com/gawker-media/image/upload/c_fill,f_auto,fl_progressive,g_center,h_675,pg_1,q_80,w_1200/f0c0e17e9ed6acb0e1bb946a9e233c1d.jpg",
    "publisher": "The Onion",
    "keywords": [

## 1413.json
{
  "id": 1413,
  "creator": "Ed Summers",
  "query": [
    {
      "id": 1413,
      "searchId": 1413,
      "created": "2022-10-13T21:40:22.102Z",
      "value": {
        "or": [

## us_congress_social.py
#!/usr/bin/env python3

# Use https://github.com/unitedstates/ data to get JSON for current members of
# congress including their social media accounts.
#
# Note: you will need to pip install requests and pyyaml

import json
import yaml
import requests

## check.py
#!/usr/bin/env python3

#
# This demonstrates an inconsistency in results from the Internet Archive CDX
# API when querying by scopeType=domain vs scopeType=prefix. For context see:
#
#   https://inkdroid.org/2022/09/24/pdfs/
#
# Note: you'll need to
#

## mymodule.py
globals()['y'] = 1

## check_cdx.py
#!/usr/bin/env python3

import json

druids = ['bj330fg0526', 'bp312sd3142', 'bs648dv9357', 'bz893jg7695', 'bz922hc1158', 'cc095kz3027', 'ch908dt6803', 'cp809cz8166', 'cv292vs5727', 'dn752dz0508', 'dy271hk6968', 'fd892fn4310', 'fj109wp2130', 'fn912wb3725', 'fp815hx3553', 'fs415vb1264', 'fv812yp9241', 'fw782ks7983', 'gf100kp6588', 'gj901jn9353', 'hf001pb6273', 'hh929wg3298', 'hn217tx5368', 'hq140wy0905', 'hv642nf7717', 'hv698ks1475', 'hw434pj6642', 'hw645gv7743', 'jb739pj9696', 'jg940ts4575', 'jh597wr5998', 'jz331hr5976', 'kw186hs7975', 'kx196rt8122', 'ky214ft2956', 'ky357nb9554', 'mg249dy7051', 'mk879xr0461', 'mv110pd4781', 'mv300dt6569', 'mx349xb4098', 'mz415jv3453', 'nd087pt9085', 'nk906ht6735', 'nn453zz9250', 'nr015ch1092', 'nv773xq7981', 'pf139tj8228', 'pn628yn6194', 'pq169jd6716', 'px611qw1504', 'qd726vf4177', 'qk039cf4369', 'qw725qm9638', 'qx771bj6775', 'rv306cp2774', 'sd725cc2793', 'sk583gg2589', 'sn506gj4859', 'sq394vr6558', 'sq694nb4696', 'st474bt2800', 'tk364rs5190', 'tw357sy1852', 'tx189sh1771', 't

## wacz-images.py
#!/usr/bin/env python3

#
# usage: wacz-images.py <wacz_file>
#
# This program will extract images from the WARC files contained in a WACZ
# file and write them to the current working directory using the image's URL
# as a file location.
#
# You will need to `pip install warcio` for it to work.

## titles.py
# print out the url and title of web pages in a WARC file

import bs4
import sys

from warcio.archiveiterator import ArchiveIterator

warc_file = sys.argv[1]
records = ArchiveIterator(open(warc_file, 'rb'))
	cumbrous.elements.earth
	vaulted.either.host
	without.exteriour.help
	creating.derivative.works
	reward.though.here
	father.without.cloud
	project.gutenbergtm.name
	unfold.celestial.guide
	fierce.strive.here
	renowned.alcinous.host
	import string
	import sys

	import requests
	import whois

	from nltk import tokenize

	BOOKFILE = sys.argv[1]
	OUTPUTFILE = BOOKFILE + '.possible-domains.txt'
	[
	{
	"url": "https://www.theonion.com/nasa-panics-after-asteroid-fires-back-1849587289",
	"status": 200,
	"content_type": "text/html; charset=utf-8",
	"title": "NASA Panics After Asteroid Fires Back",
	"description": "The Onion brings you all of the latest news, stories, photos, videos and more from America's finest news source.",
	"image": "https://i.kinja-img.com/gawker-media/image/upload/c_fill,f_auto,fl_progressive,g_center,h_675,pg_1,q_80,w_1200/f0c0e17e9ed6acb0e1bb946a9e233c1d.jpg",
	"publisher": "The Onion",
	"keywords": [
	{
	"id": 1413,
	"creator": "Ed Summers",
	"query": [
	{
	"id": 1413,
	"searchId": 1413,
	"created": "2022-10-13T21:40:22.102Z",
	"value": {
	"or": [
	#!/usr/bin/env python3

	# Use https://github.com/unitedstates/ data to get JSON for current members of
	# congress including their social media accounts.
	#
	# Note: you will need to pip install requests and pyyaml

	import json
	import yaml
	import requests
	#!/usr/bin/env python3

	#
	# This demonstrates an inconsistency in results from the Internet Archive CDX
	# API when querying by scopeType=domain vs scopeType=prefix. For context see:
	#
	# https://inkdroid.org/2022/09/24/pdfs/
	#
	# Note: you'll need to
	#
	#!/usr/bin/env python3

	import json

	druids = ['bj330fg0526', 'bp312sd3142', 'bs648dv9357', 'bz893jg7695', 'bz922hc1158', 'cc095kz3027', 'ch908dt6803', 'cp809cz8166', 'cv292vs5727', 'dn752dz0508', 'dy271hk6968', 'fd892fn4310', 'fj109wp2130', 'fn912wb3725', 'fp815hx3553', 'fs415vb1264', 'fv812yp9241', 'fw782ks7983', 'gf100kp6588', 'gj901jn9353', 'hf001pb6273', 'hh929wg3298', 'hn217tx5368', 'hq140wy0905', 'hv642nf7717', 'hv698ks1475', 'hw434pj6642', 'hw645gv7743', 'jb739pj9696', 'jg940ts4575', 'jh597wr5998', 'jz331hr5976', 'kw186hs7975', 'kx196rt8122', 'ky214ft2956', 'ky357nb9554', 'mg249dy7051', 'mk879xr0461', 'mv110pd4781', 'mv300dt6569', 'mx349xb4098', 'mz415jv3453', 'nd087pt9085', 'nk906ht6735', 'nn453zz9250', 'nr015ch1092', 'nv773xq7981', 'pf139tj8228', 'pn628yn6194', 'pq169jd6716', 'px611qw1504', 'qd726vf4177', 'qk039cf4369', 'qw725qm9638', 'qx771bj6775', 'rv306cp2774', 'sd725cc2793', 'sk583gg2589', 'sn506gj4859', 'sq394vr6558', 'sq694nb4696', 'st474bt2800', 'tk364rs5190', 'tw357sy1852', 'tx189sh1771', 't
	#!/usr/bin/env python3

	#
	# usage: wacz-images.py <wacz_file>
	#
	# This program will extract images from the WARC files contained in a WACZ
	# file and write them to the current working directory using the image's URL
	# as a file location.
	#
	# You will need to `pip install warcio` for it to work.
	# print out the url and title of web pages in a WARC file

	import bs4
	import sys

	from warcio.archiveiterator import ArchiveIterator

	warc_file = sys.argv[1]
	records = ArchiveIterator(open(warc_file, 'rb'))