Skip to content

Instantly share code, notes, and snippets.

@MiriamMakhyoun
Last active November 13, 2018 00:09
Show Gist options
  • Save MiriamMakhyoun/5ff5dac1ea3d14ae9553d0fcd69a3c31 to your computer and use it in GitHub Desktop.
Save MiriamMakhyoun/5ff5dac1ea3d14ae9553d0fcd69a3c31 to your computer and use it in GitHub Desktop.
Step 2: Parsing data from XML into CSV using Element Tree (data from California Independent System Operator's OASIS Platform)
#!/usr/bin/env python3
#Make sure the file is executable if you plan to run it as a module
#>>>./parse2.py Data/Raw/
import os
import sys
from xml.etree import ElementTree
import csv
directory = sys.argv[1]
#for file_name in os.listdir(directory):
#if file_name.endswith('.xml'):
#full_file = os.path.join(directory, file_name)
#print(full_file)
#dom = ElementTree.parse(full_file)
#for value in dom.findall("./ns:MessagePayload/ns:RTO/ns:REPORT_ITEM/ns:REPORT_DATA/[ns:RESOURCE_NAME='TH_NP15_GEN-APND']/ns:VALUE", {'ns': 'http://www.caiso.com/soa/OASISReport_v1.xsd'}):
#print(" " + value.text)
ns='{http://www.caiso.com/soa/OASISReport_v1.xsd}'
file_index=0
for file_name in os.listdir(directory):
if file_name.endswith('.xml'):
file_index+=1
full_file = os.path.join(directory, file_name)
print(full_file)
tree = ElementTree.parse(full_file)
root = tree.getroot()
print(root.tag)
target_filename='target_'+str(file_index)+'.csv'
print('Processing file ' + str(file_index))
with open(target_filename, 'w', newline='') as r:
writer = csv.writer(r)
writer.writerow(['RTO', 'MARKET TYPE', 'DATA ITEM', 'PRICE','LOCATION','START TIME','END TIME']) # WRITING HEADERS
#for child in root:
#print (child.tag)
for message in root.findall(ns+'MessagePayload'):
for rto in message.findall(ns+'RTO'):
name = rto.find(ns+'name').text
for item in rto.findall(ns+'REPORT_ITEM'):
for header in item.findall(ns+'REPORT_HEADER'):
mkt = header.find(ns+'MKT_TYPE').text
for reportdata in item.findall(ns+'REPORT_DATA'):
dataitem = reportdata.find(ns+'DATA_ITEM').text
price = reportdata.find(ns+'VALUE').text
resource = reportdata.find(ns+'RESOURCE_NAME').text
starttime = reportdata.find(ns+'INTERVAL_START_GMT').text
endtime = reportdata.find(ns+'INTERVAL_END_GMT').text
writer.writerow([name,mkt,dataitem,price,resource,starttime,endtime])
@MiriamMakhyoun
Copy link
Author

MiriamMakhyoun commented Nov 9, 2018

#!/usr/bin/env python3
import os
import sys
from xml.etree import ElementTree

directory = sys.argv[1]

for file_name in os.listdir(directory):
    if file_name.endswith('.xml'):
        full_file = os.path.join(directory, file_name)
        print(full_file)

        dom = ElementTree.parse(full_file)
        for value in dom.findall("./ns:MessagePayload/ns:RTO/ns:REPORT_ITEM/ns:REPORT_DATA/[ns:RESOURCE_NAME='TH_NP15_GEN-APND']/ns:VALUE", {'ns': 'http://www.caiso.com/soa/OASISReport_v1.xsd'}):
            print("  " + value.text)

@Empact
Copy link

Empact commented Nov 9, 2018

@MiriamMakhyoun
Copy link
Author

Got it! Also, I just updated the code to port the XML data into a CSV file!

@MiriamMakhyoun
Copy link
Author

For update:
with open('data/20180919_20180919_PRC_LMP_DAM_LMP_v1.xml') as fd:
doc = xmltodict.parse(fd.read())

[x['REPORT_DATA'] for x in doc['OASISReport']['MessagePayload']['RTO']['REPORT_ITEM'] if x['REPORT_DATA'][0]['RESOURCE_NAME'] == '0096WD_7_N001']

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment