Skip to content

Instantly share code, notes, and snippets.

@elidickinson
Created January 28, 2012 17:20
Show Gist options
  • Save elidickinson/1695094 to your computer and use it in GitHub Desktop.
Save elidickinson/1695094 to your computer and use it in GitHub Desktop.
show how to access libots C library from Python
from ctypes import *
from ctypeslib.contrib.pythonhdr import *
def quick_summary(data,words=150):
ots = OTS()
ots.parse_string(data)
return ots.summarize_by_words(words)
class OTS:
libots = None
article = None
dict_name = "en"
def __init__(self):
cdll.LoadLibrary('libots-1.so.0.5.0')
self.libots = CDLL('libots-1.so.0.5.0')
def parse_file(self,fname):
self.article = self.libots.ots_new_article()
self.libots.ots_load_xml_dictionary(self.article,self.dict_name)
f = open(fname,'r')
self.libots.ots_parse_file(PyFile_AsFile(f),self.article) #0
f.close()
self.libots.ots_grade_doc(self.article)
def parse_string(self,data):
self.article = self.libots.ots_new_article()
self.libots.ots_load_xml_dictionary(self.article,self.dict_name)
if type(data) == unicode:
data = unicode.encode(data, 'utf-8')
self.libots.ots_parse_stream(data, len(data), self.article)
self.libots.ots_grade_doc(self.article)
def article_word_count(self):
return self.libots.ots_get_article_word_count(self.article)
def summarize_by_words(self,word_count=150):
self.libots.ots_highlight_doc_words(self.article,word_count)
summary = self._get_summary_string()
summary = summary.decode('utf-8')
return summary
def _get_summary_string(self):
# get summary as string
bytes_out = c_int()
self.libots.ots_get_doc_text.restype = c_char_p
summary = self.libots.ots_get_doc_text(self.article,byref(bytes_out))
return summary
from ctypes import *
from ctypeslib.contrib.pythonhdr import *
cdll.LoadLibrary('libots-1.so.0.5.0')
libots = CDLL('libots-1.so.0.5.0')
f = open('/home/eli/article1.txt','r')
art = libots.ots_new_article()
# load dict
libots.ots_load_xml_dictionary(art,"en")
libots.ots_parse_file(PyFile_AsFile(f),art) #0
word_count = libots.ots_get_article_word_count(art)
print "%d words in the article" % word_count
libots.ots_grade_doc(art)
# libots.ots_highlight_doc (art, 2)
libots.ots_highlight_doc_words(art,200)
bytes_out = c_int()
libots.ots_get_doc_text.restype = c_char_p
summary = libots.ots_get_doc_text(art,byref(bytes_out))
print summary
libots.ots_free_article(art);
f.close()
@elidickinson
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment