Created
April 21, 2012 23:24
-
-
Save glisha/2440222 to your computer and use it in GitHub Desktop.
скриптите за анализа на домените 2012
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
# -*- coding: utf-8 -*- | |
import couchdbkit | |
import collections | |
database = couchdbkit.Database('http://localhost:5984/marnet') | |
#Kolku se registrirani sekoja godina | |
registrirani_po_godina = collections.Counter() | |
for doc in database.view('domejn/datum_na_registracija'): | |
godina = doc['value'].split('-')[-1] | |
registrirani_po_godina[godina] += 1 | |
print "##################################" | |
print "Kolku se registrirani sekoja godina" | |
for godina,broj in registrirani_po_godina.items(): | |
print godina,";",broj | |
## Kakov tip domejni se registrirani | |
tip_na_domejni = collections.Counter() | |
for doc in database.view('domejn/datum_na_registracija'): | |
domejn = doc['id'].split('.')[1:] | |
tip_na_domejni[".".join(domejn)] += 1 | |
print "##################################" | |
print "Kolku od sekoj tip se registrirani" | |
for tip,broj in tip_na_domejni.items(): | |
print tip,";",broj | |
## Na koj DNS server ima hostirano najvekje domejni | |
# ns = "ns1.skopje.gov.mk" | |
domejni_po_ns = collections.Counter() | |
for doc in database.view('domejn/prvdns',include_docs=True): | |
glaven_domain = doc['doc']['data']['dns'][0][0].split('.')[1:] | |
domejni_po_ns[".".join(glaven_domain)] += 1 | |
print "##################################" | |
print "DNS serverite kade se hostiraat domejnite" | |
for ns,broj in domejni_po_ns.most_common(50): | |
print ns,";",broj | |
## Regitranti so najvekje domejni | |
registranti = collections.Counter() | |
for doc in database.view('domejn/registrant'): | |
registranti[doc['value']] += 1 | |
print "##################################" | |
print "Regitranti so najvekje domejni" | |
for registrant,broj in registranti.most_common(20): | |
print registrant,";",broj | |
## Web serveri (normalizirani "Apache/12313|Microsoft/IIS = Apache|Microsoft" | |
webserver = collections.Counter() | |
httpstatus = collections.Counter() | |
ipadresi = collections.Counter() | |
for rezultat in database.view('domejn/hederi',include_docs=True): | |
server = rezultat['doc']['hederi'].get('server') | |
server = server if server else 'Prazno' | |
webserver[server.split('/')[0]] += 1 | |
status = rezultat['doc']['hederi'].get('http-status') | |
httpstatus[status] += 1 | |
ip = rezultat['doc']['hederi'].get('ip-address') | |
ipadresi[ip] += 1 | |
print "##################################" | |
print "Tip na webserveri" | |
for server,broj in webserver.items(): | |
print server,";",broj | |
print "##################################" | |
print "HTTP statusi" | |
for status,broj in httpstatus.items(): | |
print status,";",broj | |
print "##################################" | |
print "IP adresi" | |
for ip,broj in ipadresi.most_common(50): | |
print ip,";",broj |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
import couchdbkit | |
import threading | |
import Queue | |
import httplib | |
import socket | |
class ThreadHTTP(threading.Thread): | |
def __init__(self,queue,database): | |
threading.Thread.__init__(self) | |
self.queue = queue | |
self.database = database | |
def run(self): | |
while True: | |
doc = self.queue.get() | |
domain = doc['_id'] | |
req_headers = { | |
'User-Agent':'Mozilla/5.0 (X11; U; Linux i686; mk; rv:1.9.0.3) Gecko/2008092416 Firefox/3.0.3', | |
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Language':'mk,en;q=0.5', | |
'Accept-Encoding':'gzip,deflate', | |
'Accept-Charset': 'UTF-8,*' | |
} | |
socket.setdefaulttimeout(5) | |
try: | |
ip = socket.gethostbyname(domain) | |
except socket.gaierror: | |
domain = 'www.' + domain | |
try: | |
ip = socket.gethostbyname(domain) | |
except socket.gaierror: | |
self.queue.task_done | |
continue | |
try: | |
c = httplib.HTTPConnection(domain) | |
c.request('HEAD','/',headers=req_headers) | |
r = c.getresponse() | |
c.close() | |
hederi = r.getheaders() | |
hederi.append(('ip-address',ip)) | |
hederi.append(('http-status',r.status)) | |
doc['hederi'] = dict(hederi) | |
self.database.save_doc(doc) | |
except socket.error: | |
print "Greska: %s " % domain | |
#printaj deka 100 se zavrsheni | |
if not self.queue.qsize() % 100: | |
print "%s - %s" % (domain,self.queue.qsize()) | |
self.queue.task_done() | |
database = couchdbkit.Database('http://localhost:5984/marnet') | |
queue = Queue.Queue() | |
for i in range(10): | |
t = ThreadHTTP(queue,database) | |
t.setDaemon(True) | |
t.start() | |
for rezultat in database.view('domejn/prvdns',include_docs=True): | |
queue.put(rezultat['doc']) | |
queue.join() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
import couchdbkit | |
import threading | |
import Queue | |
from dns import resolver | |
from dns.exception import DNSException | |
class ThreadSOA(threading.Thread): | |
def __init__(self,queue,database): | |
threading.Thread.__init__(self) | |
self.queue = queue | |
self.database = database | |
self.resolver = resolver.Resolver() | |
self.resolver.timeout = 5 | |
self.resolver.lifetime = 5 | |
def run(self): | |
while True: | |
doc = self.queue.get() | |
domain = doc['data']['domain'] | |
try: | |
soa = self.resolver.query(domain,'SOA')[0].to_text() | |
except DNSException: | |
soa = False | |
doc['soa'] = soa | |
self.database.save_doc(doc) | |
#printaj deka 1000 se zavrsheni | |
if self.queue.qsize() % 1000: | |
print "%s - %s" % (domain,self.queue.qsize()) | |
self.queue.task_done() | |
database = couchdbkit.Database('http://localhost:5984/marnet') | |
queue = Queue.Queue() | |
for i in range(300): | |
t = ThreadSOA(queue,database) | |
t.setDaemon(True) | |
t.start() | |
for doc in database.all_docs(include_docs=True): | |
queue.put(doc['doc']) | |
queue.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment