Skip to content

Instantly share code, notes, and snippets.

@matteoferla
Created July 2, 2019 06:27
Show Gist options
  • Save matteoferla/4a644fab0735dd41e2c1fb7827bf3e2f to your computer and use it in GitHub Desktop.
Save matteoferla/4a644fab0735dd41e2c1fb7827bf3e2f to your computer and use it in GitHub Desktop.
import wikitextparser as wtp
import re
####### code to convert template to dictionary
def arg_to_val(arg):
val = arg.value
for t in arg.templates:
tval = t.arguments[0].value
if t.normal_name() in ('nowrap', 'val'):
if any(['ul' in a.name for a in t.arguments]): #unit!
tval += [a.value for a in t.arguments if 'ul' in a.name ][0]
val = val.replace(t.string, tval)
val = re.sub('<.*?\/>','',val) #remove self closing tags
val = re.sub('<.*?>.*?<\/.*?>','',val) # remove tags
val = re.sub('<!--.*?-->','',val) # remove comments
val = val.replace('–','-') # en dash to hyphen minus
val = val.replace('–','-') # em dash to hyphen minus
val = re.sub('±\s+\d+\.?\d*','', val) #clear error for safety
val = val.rstrip().lstrip()
return val
def arg_to_key(arg):
return arg.name.rstrip().lstrip()
def template_to_dict(template):
return {arg_to_key(arg): arg_to_val(arg) for arg in template.arguments}
########### example
for t in wtp.parse(text).templates:
if t.normal_name() == 'Starbox astrometry': # not using t.name has training space.
print(template_to_dict(t))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment