Skip to content

Instantly share code, notes, and snippets.

@xvzf
Created April 17, 2021 21:45
Show Gist options
  • Save xvzf/725013889a0dc84bffd996a616074d5b to your computer and use it in GitHub Desktop.
Save xvzf/725013889a0dc84bffd996a616074d5b to your computer and use it in GitHub Desktop.
import xml.etree.ElementTree as Xet
import pandas as pd
cols = [
"Praeposition",
"Kasus",
"Genus",
"Sonstiges",
"Zitat",
"Quelle",
"Geschlecht",
"Alter",
"Region",
"Vertrautheit",
"Art",
"DGD",
]
rows = []
# Parsing the XML file
for f in [
"DGD-Download_KWIC_ID057C3083-2053-D618-675E-195C949D95F7.xml",
"DGD-Download_KWIC_ID180EE36C-8CEE-F4DC-4291-E405D77B6FED.xml",
"DGD-Download_KWIC_ID38633448-31B8-B1F8-C4DA-ED41D52F4F6B.xml",
"DGD-Download_KWIC_ID6ED5059F-287A-13F4-D937-DD68323ECC80.xml",
"DGD-Download_KWIC_ID7BA9DF48-8F81-72F2-DCE8-F3FE11D7028A.xml",
"DGD-Download_KWIC_ID923583CB-8D12-71B7-5C7D-19A0926D0429.xml",
"DGD-Download_KWIC_IDA9C3D57A-6D61-7AD9-77C3-C0AAADD6DD8E.xml",
"DGD-Download_KWIC_IDCE4ABF25-D0B3-A260-1ECE-D9BA5A0AC864.xml",
]:
xmlparse = Xet.parse(f)
root = xmlparse.getroot()
for i in root:
left = i.find("left-context")
match = i.find("match")
right = i.find("right-context")
content = ""
try:
content = content + left.text
except:
pass
content = content + match.text
try:
content = content + " " + right.text
except:
pass
rows.append({
"Praeposition": i.find("match").text,
"Kasus": "",
"Genus": "",
"Sonstiges": "",
"Zitat": content,
"Quelle": f"{i.find('transcript-id').text} {i.find('v_e_datum').text} {i.find('speaker-id').text}",
"Geschlecht": i.find("v_s_geschlecht").text,
"Alter": i.find("v_ses_alter_s").text,
"Region": i.find("v_e_region_wiesinger").text,
"Vertrautheit": i.find("v_e_se_vertrautheit").text,
"Art": i.find("v_e_se_art").text,
"DGD": i.find("dgd-link").text,
})
df = pd.DataFrame(rows, columns=cols)
df.to_csv('output.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment