Skip to content

Instantly share code, notes, and snippets.

@JustAnotherArchivist
Created August 8, 2018 13:22
Show Gist options
  • Save JustAnotherArchivist/b6c15a3dda636c9436f244b2b482720f to your computer and use it in GitHub Desktop.
Save JustAnotherArchivist/b6c15a3dda636c9436f244b2b482720f to your computer and use it in GitHub Desktop.
import asyncio
import dns.query
import itertools
import wpull.errors
import wpull.network.dns
orig_dns_query_udp = dns.query.udp
def udp(message, *args, **kwargs):
try:
return orig_dns_query_udp(message, *args, **kwargs)
except ValueError:
print(repr((message,) + args), repr(kwargs))
print(repr(message.to_wire()))
raise
dns.query.udp = udp
resolver = wpull.network.dns.Resolver(
family = wpull.network.dns.IPFamilyPreference.ipv4_only,
timeout = 30,
rotate = None,
cache = wpull.network.dns.Resolver.new_cache()
)
# Explicitly override the nameserver to Google's
resolver._dns_resolver.nameservers = ['8.8.8.8']
# Top 100 domains from www.solarpaneltalk.com-inf-20180806-141951-3ac34
# grep -Po 'Fetching ‘https?://\K[^/]+(?=[/’])' wpull.log | awk '{cnt[$1]+=1} END { for (domain in cnt) { print cnt[domain] " " domain }}' | sort -nr | head -100 | awk '{print "hosts.append('"'"'" $2 "'"'"')"}'
hosts = []
hosts.append('www.solarpaneltalk.com')
hosts.append('static.xx.fbcdn.net')
hosts.append('images-na.ssl-images-amazon.com')
hosts.append('www.screenlightandgrip.com')
hosts.append('www.solarreviews.com')
hosts.append('i.ebayimg.com')
hosts.append('jonsguide.org')
hosts.append('www.facebook.com')
hosts.append('www.youtube.com')
hosts.append('www.ironridge.com')
hosts.append('fonts.gstatic.com')
hosts.append('www.amazon.com')
hosts.append('energy.sandia.gov')
hosts.append('www.solarroofhook.com')
hosts.append('en.wikipedia.org')
hosts.append('www.homepower.com')
hosts.append('www.legis.ga.gov')
hosts.append('www.quickmountpv.com')
hosts.append('www.solaredge.com')
hosts.append('cdn7.bigcommerce.com')
hosts.append('www.genstattu.com')
hosts.append('www.victronenergy.com')
hosts.append('www.outbackpower.com')
hosts.append('www.electricgeneratorsdirect.com')
hosts.append('www.ebay.com')
hosts.append('ir.ebaystatic.com')
hosts.append('i.ytimg.com')
hosts.append('www.pnas.org')
hosts.append('books.google.com.au')
hosts.append('www.solar-estimate.org')
hosts.append('www.thermotekusa.com')
hosts.append('usbattery.com')
hosts.append('3ohkdk3zdzcq1dul50oqjvvf-wpengine.netdna-ssl.com')
hosts.append('pvoutput.org')
hosts.append('www.thisoldhouse.com')
hosts.append('www.greenbuildingadvisor.com')
hosts.append('www.photonics.com')
hosts.append('webosolar.com')
hosts.append('fonts.googleapis.com')
hosts.append('www.google.com')
hosts.append('ssl.gstatic.com')
hosts.append('www.fieldlines.com')
hosts.append('us.sunpower.com')
hosts.append('sepbatteries.com')
hosts.append('d114hh0cykhyb0.cloudfront.net')
hosts.append('www.caiso.com')
hosts.append('i5.walmartimages.com')
hosts.append('www.lg.com')
hosts.append('www.thesuntrip.com')
hosts.append('engineering.stanford.edu')
hosts.append('energyandmines.com')
hosts.append('2n1s7w3qw84d2ysnx3ia2bct-wpengine.netdna-ssl.com')
hosts.append('www.elecdirect.com')
hosts.append('s2.ibtimes.com')
hosts.append('freebeacon.com')
hosts.append('41j5tc3akbrn3uezx5av0jj1bgm-wpengine.netdna-ssl.com')
hosts.append('m.media-amazon.com')
hosts.append('file.scirp.org')
hosts.append('hpevs.com')
hosts.append('www.sparelys.no')
hosts.append('www.energymatters.com.au')
hosts.append('img.dxcdn.com')
hosts.append('dkasolarcentre.com.au')
hosts.append('d4td1un6f2hha.cloudfront.net')
hosts.append('blog.caranddriver.com')
hosts.append('www.voltaicsystems.com')
hosts.append('www.saftbatteries.com')
hosts.append('www.wholesalesolar.com')
hosts.append('www.ddmotorsystems.com')
hosts.append('batteryuniversity.com')
hosts.append('www.googletagmanager.com')
hosts.append('static.squarespace.com')
hosts.append('www.wbdg.org')
hosts.append('thesolarstore.com')
hosts.append('static-na.payments-amazon.com')
hosts.append('enphase.com')
hosts.append('assets.alicdn.com')
hosts.append('www.solarquotes.com.au')
hosts.append('www.batteriesplus.com')
hosts.append('indaily.com.au')
hosts.append('ajax.googleapis.com')
hosts.append('ae01.alicdn.com')
hosts.append('www.eevblog.com')
hosts.append('s12.photobucket.com')
hosts.append('www.solarpanelstore.com')
hosts.append('www.soldapools.com')
hosts.append('www.dropbox.com')
hosts.append('hybridautocenter.com')
hosts.append('www.exeltech.com')
hosts.append('www.bluepacificsolar.com')
hosts.append('powerequipment.honda.com')
hosts.append('gbbattery.com')
hosts.append('www.powerequipmentdirect.com')
hosts.append('realgoods.com')
hosts.append('c5.rgstatic.net')
hosts.append('s137.photobucket.com')
hosts.append('gcell.com')
hosts.append('forum.solar-electric.com')
hosts.append('www.seia.org')
hosts.append('www.samlexamerica.com')
# ... and the two hosts that were fetching at the crash, just to be sure.
hosts.append('www.aqua-sun-intl.com')
hosts.append('www.katadyn.com')
async def main(resolver, hosts):
for host in itertools.cycle(hosts):
print('Resolving {}'.format(host))
try:
await resolver.resolve(host)
except wpull.errors.NetworkError:
pass
loop = asyncio.get_event_loop()
loop.run_until_complete(main(resolver, hosts))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment