Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Created January 21, 2011 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save psychemedia/790119 to your computer and use it in GitHub Desktop.
Save psychemedia/790119 to your computer and use it in GitHub Desktop.
from calais import Calais
import simplejson
import urllib
import networkx as nx
import unicodedata
from time import sleep
import os
calaisKey=YOUR_CALAIS_KEY
calais = Calais(calaisKey, submitter="python-calais ouseful")
def calaisTagIOT(progID,graph):
#progID='b00vrxx0'
url='http://www.bbc.co.uk/programmes/'+progID+'.json'
if not os.path.exists('bbcdata/'+progID+'.json'):
try:
print 'Fetching BBC programme data for',progID
data=simplejson.load(urllib.urlopen(url))
fb=open('bbcdata/'+progID+'.json','w')
simplejson.dump(data, fb)
fb.close()
except:
print 'failed to get BBC programme data for',progID
return graph
else:
print 'Using cached BBC programme data for',progID
fj=open('bbcdata/'+progID+'.json')
data=simplejson.load(fj)
fj.close()
print data
for item in data['programme']['categories']:
if item['type']=='subject':
try:
title=unicodedata.normalize('NFKD', item['title']).encode('ascii','ignore')
except:
title=item['title']
if title not in graph.node:
key=item['key']
#key=unicodedata.normalize('NFKD', item['key']).encode('ascii','ignore')
graph.add_node(key,label=title, type=item['type'])
graph.add_edge(progID,key)
if 'long_synopsis' in data['programme']:
desc=data['programme']['long_synopsis']
else:
desc=data['programme']['short_synopsis']
if desc != None:
if not os.path.exists('calaisdata/'+progID+'.json'):
print 'Fetching Calais data for',progID
desc=unicodedata.normalize('NFKD', desc).encode('ascii','ignore')
print desc
try:
result = calais.analyze(desc)
sleep(5)
except:
return G
print "SUMMARY"
result.print_summary()
print "\nENTITIES\n"
result.print_entities()
print "\nRELATIONS\n"
result.print_relations()
print "\nTOPICS\n"
result.print_topics()
#print result.entities
#print result.simplified_response
cData=result.simplified_response
f=open('calaisdata/'+progID+'.json','w')
simplejson.dump(cData, f)
f.close()
else:
print 'using cached calais data for',progID
fj=open('calaisdata/'+progID+'.json')
cData=simplejson.load(fj)
fj.close()
if 'entities' in cData:
for item in cData['entities']:
if item['_type'] in ['Facility']:
print item['_type'],item['name']
name=unicodedata.normalize('NFKD', item['name']).encode('ascii','ignore')
typ=unicodedata.normalize('NFKD', item['_type']).encode('ascii','ignore')
if name not in graph.node:
graph.add_node(name, type=typ,label=name)
graph.add_edge(progID,name)
if 'relations' in cData:
for item in cData['relations']:
if item['_type'] in ['PersonCareer']:
if 'organization' in item:
oname=unicodedata.normalize('NFKD', item['organization']['name']).encode('ascii','ignore')
if oname not in graph.node:
graph.add_node(oname, type=item['organization']['_type'], label=oname)
print oname,
pname=unicodedata.normalize('NFKD', item['person']['name']).encode('ascii','ignore')
graph.add_node(pname, type=item['person']['_type'], label=pname)
graph.add_edge(pname,oname)
graph.add_edge(progID,oname)
return graph
seriesID='b006qykl'
episodeID='p003k9gm'
title='inOurTime'
#in our time b006qykl
#in business b006s609
#bottom line b006sz6t
url='http://www.bbc.co.uk/programmes/'+seriesID+'/episodes/player.json'
data=simplejson.load(urllib.urlopen(url))
G=nx.Graph()
#progID='b00vrxx0'
def processIOTSeries(G, seriesID):
for episode in data['episodes']:
progID=episode['programme']['pid']
print progID
if progID!=None:
processIOTEpisode(G, progID, episode)
return G
def processIOTEpisode(G, episodeID,episode):
title=unicodedata.normalize('NFKD', episode['programme']['title']).encode('ascii','ignore')
G.add_node(episodeID,type='episode',label=title)
G=calaisTagIOT(episodeID,G)
return G
title='iotSeriesTest'
G=processIOTSeries(G,seriesID)
#G=processEpisode(G,episodeID)
print 'writing yaml...'
nx.write_yaml(G,'graphdata/'+title+'.yaml')
print '...ok, done yaml'
print 'writing yaml...'
nx.write_gml(G,'graphdata/'+title+'.gml')
print '...ok, done gml'
print 'writing graphml...'
nx.write_graphml(G, 'graphdata/'+title+'.graphml')
print '...ok, done graphml'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment