Skip to content

Instantly share code, notes, and snippets.

Forked from bigsquirrel/
Last active Aug 29, 2015
What would you like to do?
# filename:
# author: ivanchou
import codecs, os
import xml.etree.ElementTree as ET
paper_tag = ('article','inproceedings','proceedings','book',
class AllEntities:
def __getitem__(self, key):
return key
print ('----------parse begin----------')
result ='authors','w','utf-8')
parser = ET.XMLParser()
parser.entity = AllEntities()
for event, article in ET.iterparse('dblp_part.xml'):
for author in article.findall('author'):
result.write(author.text + u'|')
if event == 'end' and article.tag in paper_tag:
print ('----------parse end----------')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment