Skip to content

Instantly share code, notes, and snippets.

@eyJhb
Created May 4, 2018 20:29
Show Gist options
  • Save eyJhb/de7bde76b7adf77503f49080221a6fc8 to your computer and use it in GitHub Desktop.
Save eyJhb/de7bde76b7adf77503f49080221a6fc8 to your computer and use it in GitHub Desktop.
DR OnDemand Akamai gather
import requests
import re
import string
import asyncio
from aiohttp import ClientSession
class drdk_extractor(object):
def __init__(self):
self.url = 'https://www.dr.dk/mu/programcard'
self.url = 'https://www.dr.dk/mu/search/programcard?Title='
self.re_mu = re.compile('(https:\/\/www\.dr\.dk\/mu\/bar\/[^"]+)')
self.re_akamai = re.compile('(http:\/\/[^\.]+\.akamaihd\.net)')
async def fetch(self, url, session):
async with session.get(url) as response:
return await response.read()
async def run(self, urls):
tasks = []
# Fetch all responses within one Client session,
# keep connection alive for all requests.
async with ClientSession() as session:
for url in urls:
task = asyncio.ensure_future(self.fetch(url, session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
akai = []
for response in responses:
match = self.re_akamai.search(str(response))
if match:
print("Found - "+str(match.group(0)))
akai.append(match.group(0))
print("-------------------------")
new_list = list(set(akai))
f = open("output.txt", "w")
for url in new_list:
f.write(url+"\n")
f.close()
print(len(list(set(akai))))
def main(self):
urls = []
for c in string.printable:
req = requests.get(self.url+c)
matches = self.re_mu.findall(req.text)
for match in matches:
print("Found - "+match)
urls.append(match)
print(len(urls))
print(len(list(set(urls))))
print("----------------")
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(self.run(urls))
loop.run_until_complete(future)
x = drdk_extractor()
x.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment