Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from lxml import etree
import glob
import csv
# Input Files.
hipFiles = glob.glob('*hip.xml')
gameFiles = glob.glob('*data.xml')
# Output File.
csvFile = open('hitsPerGame.csv', 'wb')
# CSV Writer.
writer = csv.writer( csvFile )
# XML Parser.
parser = etree.XMLParser( ns_clean = True )
# Lists were data will be stored.
h_desc = [] # Hit Description.
x_cord = [] # Hit x coordinate.
y_cord = [] # Hit y coordinate.
g_stad = [] # Stadium game was played in .
b_team = [] # Batting team.
g_num = [] # Game number
for i in xrange( len( gameFiles ) ):
# Game Data: Playing teams, Stadium.
gameTree = etree.parse( gameFiles[i], parser )
# hip Data: hip description, X & Y coordinates, Batting team.
hipTree = etree.parse( hipFiles[i], parser )
h_desc.extend( hipTree.xpath( '//hip/@des' ) )
x_cord.extend( hipTree.xpath( '//hip/@x' ) )
y_cord.extend( hipTree.xpath( '//hip/@y' ) )
# Check if team is guest or home, then set team name based on that.
t_goh = hipTree.xpath( '//hip/@team' )
t_name = gameTree.xpath('//team/@name_brief')
# Extend stadium to fit number of hits.
g_stad.extend( gameTree.xpath('//stadium/@name') * len( t_goh ) )
b_team.extend([ t_name[0] if j == 'H' else t_name[1] for j in t_goh ])
# Game Number.
g_num.extend( [i + 1] * len( t_goh ) )
data = zip( g_num, g_stad, b_team, x_cord, y_cord, h_desc )
# Add data to csv file.
for row in data:
writer.writerow( row )
csvFile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment