Last active
November 13, 2018 00:09
-
-
Save MiriamMakhyoun/5ff5dac1ea3d14ae9553d0fcd69a3c31 to your computer and use it in GitHub Desktop.
Step 2: Parsing data from XML into CSV using Element Tree (data from California Independent System Operator's OASIS Platform)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
#Make sure the file is executable if you plan to run it as a module | |
#>>>./parse2.py Data/Raw/ | |
import os | |
import sys | |
from xml.etree import ElementTree | |
import csv | |
directory = sys.argv[1] | |
#for file_name in os.listdir(directory): | |
#if file_name.endswith('.xml'): | |
#full_file = os.path.join(directory, file_name) | |
#print(full_file) | |
#dom = ElementTree.parse(full_file) | |
#for value in dom.findall("./ns:MessagePayload/ns:RTO/ns:REPORT_ITEM/ns:REPORT_DATA/[ns:RESOURCE_NAME='TH_NP15_GEN-APND']/ns:VALUE", {'ns': 'http://www.caiso.com/soa/OASISReport_v1.xsd'}): | |
#print(" " + value.text) | |
ns='{http://www.caiso.com/soa/OASISReport_v1.xsd}' | |
file_index=0 | |
for file_name in os.listdir(directory): | |
if file_name.endswith('.xml'): | |
file_index+=1 | |
full_file = os.path.join(directory, file_name) | |
print(full_file) | |
tree = ElementTree.parse(full_file) | |
root = tree.getroot() | |
print(root.tag) | |
target_filename='target_'+str(file_index)+'.csv' | |
print('Processing file ' + str(file_index)) | |
with open(target_filename, 'w', newline='') as r: | |
writer = csv.writer(r) | |
writer.writerow(['RTO', 'MARKET TYPE', 'DATA ITEM', 'PRICE','LOCATION','START TIME','END TIME']) # WRITING HEADERS | |
#for child in root: | |
#print (child.tag) | |
for message in root.findall(ns+'MessagePayload'): | |
for rto in message.findall(ns+'RTO'): | |
name = rto.find(ns+'name').text | |
for item in rto.findall(ns+'REPORT_ITEM'): | |
for header in item.findall(ns+'REPORT_HEADER'): | |
mkt = header.find(ns+'MKT_TYPE').text | |
for reportdata in item.findall(ns+'REPORT_DATA'): | |
dataitem = reportdata.find(ns+'DATA_ITEM').text | |
price = reportdata.find(ns+'VALUE').text | |
resource = reportdata.find(ns+'RESOURCE_NAME').text | |
starttime = reportdata.find(ns+'INTERVAL_START_GMT').text | |
endtime = reportdata.find(ns+'INTERVAL_END_GMT').text | |
writer.writerow([name,mkt,dataitem,price,resource,starttime,endtime]) |
Got it! Also, I just updated the code to port the XML data into a CSV file!
For update:
with open('data/20180919_20180919_PRC_LMP_DAM_LMP_v1.xml') as fd:
doc = xmltodict.parse(fd.read())
[x['REPORT_DATA'] for x in doc['OASISReport']['MessagePayload']['RTO']['REPORT_ITEM'] if x['REPORT_DATA'][0]['RESOURCE_NAME'] == '0096WD_7_N001']
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The shebang line needs to be the first line FYI: https://stackoverflow.com/questions/12910744/why-should-the-shebang-line-always-be-the-first-line