Created
January 21, 2011 18:16
-
-
Save psychemedia/790119 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from calais import Calais | |
import simplejson | |
import urllib | |
import networkx as nx | |
import unicodedata | |
from time import sleep | |
import os | |
calaisKey=YOUR_CALAIS_KEY | |
calais = Calais(calaisKey, submitter="python-calais ouseful") | |
def calaisTagIOT(progID,graph): | |
#progID='b00vrxx0' | |
url='http://www.bbc.co.uk/programmes/'+progID+'.json' | |
if not os.path.exists('bbcdata/'+progID+'.json'): | |
try: | |
print 'Fetching BBC programme data for',progID | |
data=simplejson.load(urllib.urlopen(url)) | |
fb=open('bbcdata/'+progID+'.json','w') | |
simplejson.dump(data, fb) | |
fb.close() | |
except: | |
print 'failed to get BBC programme data for',progID | |
return graph | |
else: | |
print 'Using cached BBC programme data for',progID | |
fj=open('bbcdata/'+progID+'.json') | |
data=simplejson.load(fj) | |
fj.close() | |
print data | |
for item in data['programme']['categories']: | |
if item['type']=='subject': | |
try: | |
title=unicodedata.normalize('NFKD', item['title']).encode('ascii','ignore') | |
except: | |
title=item['title'] | |
if title not in graph.node: | |
key=item['key'] | |
#key=unicodedata.normalize('NFKD', item['key']).encode('ascii','ignore') | |
graph.add_node(key,label=title, type=item['type']) | |
graph.add_edge(progID,key) | |
if 'long_synopsis' in data['programme']: | |
desc=data['programme']['long_synopsis'] | |
else: | |
desc=data['programme']['short_synopsis'] | |
if desc != None: | |
if not os.path.exists('calaisdata/'+progID+'.json'): | |
print 'Fetching Calais data for',progID | |
desc=unicodedata.normalize('NFKD', desc).encode('ascii','ignore') | |
print desc | |
try: | |
result = calais.analyze(desc) | |
sleep(5) | |
except: | |
return G | |
print "SUMMARY" | |
result.print_summary() | |
print "\nENTITIES\n" | |
result.print_entities() | |
print "\nRELATIONS\n" | |
result.print_relations() | |
print "\nTOPICS\n" | |
result.print_topics() | |
#print result.entities | |
#print result.simplified_response | |
cData=result.simplified_response | |
f=open('calaisdata/'+progID+'.json','w') | |
simplejson.dump(cData, f) | |
f.close() | |
else: | |
print 'using cached calais data for',progID | |
fj=open('calaisdata/'+progID+'.json') | |
cData=simplejson.load(fj) | |
fj.close() | |
if 'entities' in cData: | |
for item in cData['entities']: | |
if item['_type'] in ['Facility']: | |
print item['_type'],item['name'] | |
name=unicodedata.normalize('NFKD', item['name']).encode('ascii','ignore') | |
typ=unicodedata.normalize('NFKD', item['_type']).encode('ascii','ignore') | |
if name not in graph.node: | |
graph.add_node(name, type=typ,label=name) | |
graph.add_edge(progID,name) | |
if 'relations' in cData: | |
for item in cData['relations']: | |
if item['_type'] in ['PersonCareer']: | |
if 'organization' in item: | |
oname=unicodedata.normalize('NFKD', item['organization']['name']).encode('ascii','ignore') | |
if oname not in graph.node: | |
graph.add_node(oname, type=item['organization']['_type'], label=oname) | |
print oname, | |
pname=unicodedata.normalize('NFKD', item['person']['name']).encode('ascii','ignore') | |
graph.add_node(pname, type=item['person']['_type'], label=pname) | |
graph.add_edge(pname,oname) | |
graph.add_edge(progID,oname) | |
return graph | |
seriesID='b006qykl' | |
episodeID='p003k9gm' | |
title='inOurTime' | |
#in our time b006qykl | |
#in business b006s609 | |
#bottom line b006sz6t | |
url='http://www.bbc.co.uk/programmes/'+seriesID+'/episodes/player.json' | |
data=simplejson.load(urllib.urlopen(url)) | |
G=nx.Graph() | |
#progID='b00vrxx0' | |
def processIOTSeries(G, seriesID): | |
for episode in data['episodes']: | |
progID=episode['programme']['pid'] | |
print progID | |
if progID!=None: | |
processIOTEpisode(G, progID, episode) | |
return G | |
def processIOTEpisode(G, episodeID,episode): | |
title=unicodedata.normalize('NFKD', episode['programme']['title']).encode('ascii','ignore') | |
G.add_node(episodeID,type='episode',label=title) | |
G=calaisTagIOT(episodeID,G) | |
return G | |
title='iotSeriesTest' | |
G=processIOTSeries(G,seriesID) | |
#G=processEpisode(G,episodeID) | |
print 'writing yaml...' | |
nx.write_yaml(G,'graphdata/'+title+'.yaml') | |
print '...ok, done yaml' | |
print 'writing yaml...' | |
nx.write_gml(G,'graphdata/'+title+'.gml') | |
print '...ok, done gml' | |
print 'writing graphml...' | |
nx.write_graphml(G, 'graphdata/'+title+'.graphml') | |
print '...ok, done graphml' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment