Skip to content

Instantly share code, notes, and snippets.

@dimi-kr
Created October 9, 2015 13:53
Show Gist options
  • Save dimi-kr/ac5eff9de4127ea373fa to your computer and use it in GitHub Desktop.
Save dimi-kr/ac5eff9de4127ea373fa to your computer and use it in GitHub Desktop.
#!/usr/bin/python
from xml.etree.ElementTree import iterparse
import sys
import matplotlib.pyplot as plt
from numpy.random import normal
source_file="structure.rdf.u8"
avg_sum=0
res_count=0
depth_list = []
def resolvetag(ns, tag, nsmap):
return '{' + nsmap[ns] + '}' + tag
print "Start processing"
nsmap = {}
for event, elem in iterparse(source_file, events=["start-ns","start"]):
if event == "start-ns":
ns, url = elem
nsmap[ns] = url
else:
break
print nsmap
for (event, node) in iterparse(source_file, events=['start','end']):
if "Topic" in node.tag and event == "start":
if node.attrib.get(resolvetag("r","id",nsmap)):
data = node.attrib[resolvetag("r","id",nsmap)]
depth = len(data.split('/'))
depth_list.append(depth)
avg_sum+=depth
res_count+=1
print avg_sum," ", res_count
if event == 'end':
if node.attrib.get(resolvetag("r","id",nsmap)):
node.clear()
print "Avarage value:", avg_sum/res_count
print "Float avarage value:", avg_sum/float(res_count)
print "Plotting..."
plt.hist(depth_list)
plt.xlabel("Depth")
plt.ylabel("Count")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment