Skip to content

Instantly share code, notes, and snippets.

@hillar
Last active November 25, 2016 17:20
Show Gist options
  • Save hillar/30d47453cd3db2f10c99 to your computer and use it in GitHub Desktop.
Save hillar/30d47453cd3db2f10c99 to your computer and use it in GitHub Desktop.
read and parse dnsqr messages from nsmg files created by sie-dns-sensor
import nmsg
import wdns
import re
def _parse_DKIM(data):
if not re.search(r'v\=DKIM', data):
return None
ret = dict()
tmp = data.split(' ')
if len(tmp) < 2:
return None
else:
for kv in tmp[1:]:
_kv = kv.split('=')
ret[_kv[0]] = _kv[1]
return ret
def _parse_SPF(data):
if not re.search(r'v\=spf', data):
return None
ret = dict()
ipv4 = list()
ipv4.extend(re.findall('ip4:(\S*) ', "".join(data)))
if len(ipv4) > 0:
ret['ipv4'] = ipv4
ipv6 = list()
ipv6.extend(re.findall('ip6:(\S*) ', "".join(data)))
if len(ipv6) > 0:
ret['ipv4'] = ipv6
a = list()
a.extend(re.findall('a:(\S*) ', "".join(data)))
if len(a) > 0:
ret['a'] = a
mx = list()
mx.extend(re.findall('mx:(\S*) ', "".join(data)))
if len(mx) > 0:
ret['mx'] = mx
return ret
RSAMD5 = 1
DH = 2
DSA = 3
ECC = 4
RSASHA1 = 5
DSANSEC3SHA1 = 6
RSASHA1NSEC3SHA1 = 7
RSASHA256 = 8
RSASHA512 = 10
ECDSAP256SHA256 = 13
ECDSAP384SHA384 = 14
INDIRECT = 252
PRIVATEDNS = 253
PRIVATEOID = 254
ALGORITHM_2_TEXT = {
RSAMD5 : 'RSAMD5',
DH : 'DH',
DSA : 'DSA',
ECC : 'ECC',
RSASHA1 : 'RSASHA1',
DSANSEC3SHA1 : 'DSANSEC3SHA1',
RSASHA1NSEC3SHA1 : 'RSASHA1NSEC3SHA1',
RSASHA256 : 'RSASHA256',
RSASHA512 : 'RSASHA512',
INDIRECT : 'INDIRECT',
ECDSAP256SHA256 : 'ECDSAP256SHA256',
ECDSAP384SHA384 : 'ECDSAP384SHA384',
PRIVATEDNS : 'PRIVATEDNS',
PRIVATEOID : 'PRIVATEOID',
}
RDATA_SLOTS = dict()
RDATA_SLOTS[wdns.TYPE_RRSIG] = ['type_covered', 'algorithm', 'labels', 'original_ttl',
'expiration', 'inception', 'key_tag', 'signer',
'signature']
RDATA_SLOTS[wdns.TYPE_NSEC3] = ['algorithm', 'flags', 'iterations', 'salt', 'next', 'types']
RDATA_SLOTS[wdns.TYPE_DNSKEY] = ['flags', 'protocol', 'algorithm', 'key']
RDATA_SLOTS[wdns.TYPE_SOA] = ['mname', 'rname', 'serial', 'refresh', 'retry', 'expire', 'minimum']
RDATA_SLOTS[wdns.TYPE_MX] = ['mail_pref', 'mail_name']
RDATA_SLOTS[wdns.TYPE_SRV] = ['priority', 'weight', 'port', 'target']
RCODES = dict()
RCODES[wdns.R_NOERROR] = 'NOERROR'
RCODES[wdns.R_FORMERR] = 'FORMERR'
RCODES[wdns.R_SERVFAIL] = 'SERVFAIL'
RCODES[wdns.R_NXDOMAIN] = 'NXDOMAIN'
RCODES[wdns.R_NOTIMP] = 'NOTIMP'
RCODES[wdns.R_REFUSED] = 'REFUSED'
RCODES[wdns.R_YXDOMAIN] = 'YXDOMAIN'
RCODES[wdns.R_YXRRSET] = 'YXRRSET'
RCODES[wdns.R_NXRRSET] = 'NXRRSET'
RCODES[wdns.R_NOTAUTH] = 'NOTAUTH'
RCODES[wdns.R_NOTZONE] = 'NOTZONE'
RCODES[wdns.R_BADVERS] = 'BADVERS'
def _parse_rdata_str(string,rrtype):
if " " not in string:
return string
if rrtype == wdns.TYPE_TXT:
tmp = _parse_DKIM(string)
if tmp != None:
return {'type':'DKIM','dkim':tmp, 'orig':string}
tmp = _parse_SPF(string)
if tmp != None:
return {'type':'SPF','spf':tmp, 'orig':string}
else:
return string
elif rrtype == wdns.TYPE_SPF:
tmp = _parse_SPF(string)
if tmp != None:
return {'spf':tmp, 'orig':string}
else:
return {'rdata_parse_error':'not SPF', 'orig':string}
else:
if rrtype in RDATA_SLOTS:
tmp = string.split(" ")
slots = RDATA_SLOTS[rrtype]
if len(tmp) < len(slots):
return {'rdata_parse_error':'missing '+str(len(slots)-len(tmp))+' slot(s): '+','.join(slots[len(tmp):]),'type':rrtype,'orig':string}
ret = dict()
i = 0
for slot in slots:
ret[slot] = tmp[i]
i = i + 1
# join rightovers with brute force, good for DNSKEY.key and RRSIG.signature ;) bad for windows
if len(tmp) > len(slots):
if rrtype == wdns.TYPE_DNSKEY or rrtype == wdns.TYPE_RRSIG:
ret[slots[len(slots)-1]] += "".join(tmp[i:len(tmp)])
else:
ret[slots[len(slots)-1]] = tmp[i-1:len(tmp)]
if 'algorithm' in slots:
ret['algorithm'] = ALGORITHM_2_TEXT[int(ret['algorithm'])]
return ret
else:
return {'rdata_parse_info':'unknonw type', 'type':rrtype,'orig':string}
def _parse_rdata(rdata):
_rdata = wdns.rdata_to_str(rdata.data, rdata.rrtype, rdata.rrclass)
return _parse_rdata_str(_rdata,rdata.rrtype)
def _getRRSETs(section):
if len(section) == 0:
return None
else:
rrsets = list()
for rrset in section:
_rrset = dict()
#convert ot strings
_rrset['name'] = wdns.domain_to_str(rrset.name)
_rrset['rrclass'] = wdns.rrclass_to_str(rrset.rrclass)
_rrset['rrtype'] = wdns.rrtype_to_str(rrset.rrtype)
_rrset['rrttl'] = rrset.rrttl
# go over each rdata
# TODO, what if rrset[class,type] != rdata[class,type]
_rrset['rdata'] = list()
for rdata in rrset.rdata:
_rrset['rdata'].append(_parse_rdata(rdata))
rrsets.append(_rrset)
return rrsets
def _getQRRs(section):
if len(section) == 0:
return None
else:
qrrs = list()
for qrr in section:
_qrr = dict()
_qrr['name'] = wdns.domain_to_str(qrr.name)
_qrr['rrclass'] = wdns.rrclass_to_str(qrr.rrclass)
_qrr['rrtype'] = wdns.rrtype_to_str(qrr.rrtype)
qrrs.append(_qrr)
return qrrs
def _getParsedResponse(message):
if 'response' in message.keys():
return wdns.message.parse(message['response'])
else:
return None
def getResponse(message):
parsedresponse = _getParsedResponse(message)
if parsedresponse == None:
return None
else:
response = dict()
response['rcode'] = RCODES[message['rcode']]
response['response_ip'] = message['response_ip']
response['response_port'] = message['response_port']
response['response_time_nsec'] = message['response_time_nsec'][0]
response['response_time_sec'] = message['response_time_sec'][0]
# TODO, convert to singe timestamp ?
if 'delay' in message.keys():
response['delay'] = message['delay']
# take section 0 and get qrr's
sec_q = _getQRRs(parsedresponse.sec[wdns.SEC_QUESTION])
if sec_q != None:
response['QUESTION'] = sec_q
# go over sections 1..3 and get rrsets
sec_a = _getRRSETs(parsedresponse.sec[wdns.SEC_ANSWER])
if sec_a != None:
response['ANSWER'] = sec_a
sec_auth = _getRRSETs(parsedresponse.sec[wdns.SEC_AUTHORITY])
if sec_auth != None:
response['AUTHORITY'] = sec_auth
sec_add = _getRRSETs(parsedresponse.sec[wdns.SEC_ADDITIONAL])
if sec_add != None:
response['ADDITIONAL'] = sec_add
#TODO, test for some section exists
return response
def getQuery(message):
if 'query' in message.keys():
query = dict()
# convert rrclass, rrtype and qname to string
query['qclass'] = wdns.rrclass_to_str(message['qclass'])
query['qtype'] = wdns.rrtype_to_str(message['qtype'])
query['qname'] = wdns.domain_to_str(message['qname'])
# take only first timestamp from array
query['query_time_sec'] = message['query_time_sec'][0]
query['query_time_nsec'] = message['query_time_nsec'][0]
# TODO, convert to singe timestamp ?
# just copy rest
query['query_ip'] = message['query_ip']
query['response_ip'] = message['response_ip']
return query
else:
return None
def getAll(message):
_all = dict()
_all['type'] = message['type']
_all['id'] = message['id']
_query = getQuery(message)
if _query != None:
_all['query'] = _query
_response = getResponse(message)
if _response != None:
_all['response'] = _response
return _all
if __name__ == '__main__':
import sys
import dnsqrreader
for message in dnsqrreader.FileReader(sys.argv[1]):
print getAll(message)
import nmsg
import sys
def FileReader(filename):
try:
_input = nmsg.input.open_file(filename)
except IOError as err:
sys.stderr.write("dnsqr FileReader I/O error({0}): {1}\n".format(err.errno, err.strerror))
else:
while True:
try:
message = _input.read()
except:
sys.stderr.write("dnsqr FileReader unexpected error: {0}\n".format(sys.exc_info()[1]))
break
else:
if not message:
break
if not message.vid == 1:
continue
if not message.msgtype == 9:
continue
if not 'qclass' in message.keys():
continue
yield message
if __name__ == '__main__':
for dnsqr in FileReader(sys.argv[1]):
print dnsqr
import dnsqrreader
import dnsqrparser
import sys
for message in dnsqrreader.FileReader(sys.argv[1]):
print dnsqrparser.getAll(message)
@hillar
Copy link
Author

hillar commented Sep 10, 2015

ES doesn't allow the "type" of a property to change, and gives you error:
MapperParsingException[failed to parse [response.ANSWER.rdata]]; nested: ElasticsearchIllegalArgumentException[unknown property [type]];

so each 'rdata' has to be 'uniq'
also TXT ;)

            _name = 'rdata.'+_rrset['rrtype']
            _rrset[_name] = list()
            for rdata in rrset.rdata:
                _rd = _parse_rdata(rdata)
                if 'type' in _rd:
                    if _rd['type'] != _rrset['rrtype']:
                        _nn = _name+'.'+_rd['type']
                        if _nn not in _rrset:
                            _rrset[_nn] = list()
                        _rrset[_nn].append(_rd)
                else:
                    _rrset[_name].append(_rd)

so result is

                  "rdata.A"
                  "rdata.AAAA"
                  "rdata.CNAME"
                  "rdata.DNSKEY"
                  "rdata.MX"
                  "rdata.NS"
                  "rdata.NSEC3"
                  "rdata.PTR"
                  "rdata.RRSIG"
                  "rdata.SOA"
                  "rdata.SPF"
                  "rdata.SRV"
                  "rdata.TXT"
                  "rdata.TXT.DKIM"
                  "rdata.TXT.SPF"

@hillar
Copy link
Author

hillar commented Sep 10, 2015

Fields, what can be indexed

“a”
“ipv4”
“k”
“mx”
“p”
“algorithm”
“expiration”
“expire”
“flags”
“inception”
“iterations”
“key”
“key_tag”
“labels”
“mail_name”
“mail_pref”
“minimum”
“mname”
“next”
“orig”
“original_ttl”
“port”
“priority”
“protocol”
“refresh”
“retry”
“rname”
“salt”
“serial”
“signature”
“signer”
“target”
“type”
“type_covered”
“types”
“weight”
“name”
“rrclass”
“rrttl”
“rrtype”
“delay”
“qclass”
“qname”
“qtype”
“query_ip”
“query_time_nsec”
“query_time_sec”
“rcode”
“response_ip”
“response_port”
“response_time_nsec”
“response_time_sec”

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment