atiw003/CSV Parsing In Python and R

## CSV Parsing In Python and R
#Using csv reader
import csv
csvreader=csv.reader(open('x.csv','r'))
list = [ x[0] for x in csvreader]

#Using zip
columns = zip(*csvreader)
nocolumns=len(columns)


#Using zip
import csv
data = csv.reader(open(‘data.csv’))
# Read the column names from the first line of the file
fields = data.next()
for row in data:
item = dict(zip(fields, row))

#Using with statement and multiple csv
from __future__ import with_statement

import csv

def twiddle_csv(file1, file2):
    def mess_with_record(record):
        record['90mdist'] = 2 * int(record['90mdist']) + 30
    with open(file1, "r") as fin:
        with open(file2, "w") as fout:
            fields = ['ID', 'transect', '90mdist']
            reader = csv.DictReader(fin, fieldnames=fields)
            writer = csv.DictWriter(fout, fieldnames=fields)
            fout.write(",".join(fields) + '\n')
            reader.next()   # Skip the column header
            for record in reader:
                mess_with_record(record)
                writer.writerow(record)

if __name__ == '__main__':
    twiddle_csv('file1', 'file2')


#Using numpy
from numpy import *
data = loadtxt('table.dat', unpack=True)

#Using scipy
def read_array(filename, dtype, separator=','):
    """ Read a file with an arbitrary number of columns.
        The type of data in each column is arbitrary
        It will be cast to the given dtype at runtime
    """
    cast = N.cast
    data = [[] for dummy in xrange(len(dtype))]
    for line in open(filename, 'r'):
        fields = line.strip().split(separator)
        for i, number in enumerate(fields):
            data[i].append(number)
    for i in xrange(len(dtype)):
        data[i] = cast[dtype[i]](data[i])
    return N.rec.array(data, dtype=dtype)

This can then be called with the corresponding dtype:
mydescr = N.dtype([('column1', 'int32'), ('column2Name', 'uint32'), ('col3', 'uint64'), ('c4', 'float32')])
myrecarray = read_array('file.csv', mydescr)

#In R
# read in a CSV file with headers
my_table = read.csv("myfile.csv", header=T)

#access the column with header "states"
my_table$states

# access the first column
my_table[,1]

# iterate over columns and print 3rd element of each column
# this would give output Arizona, Green, poplar
for(column in my_table) {
print(column[3])
}
	#Using csv reader
	import csv
	csvreader=csv.reader(open('x.csv','r'))
	list = [ x[0] for x in csvreader]

	#Using zip
	columns = zip(*csvreader)
	nocolumns=len(columns)


	#Using zip
	import csv
	data = csv.reader(open(‘data.csv’))
	# Read the column names from the first line of the file
	fields = data.next()
	for row in data:
	item = dict(zip(fields, row))

	#Using with statement and multiple csv
	from __future__ import with_statement

	import csv

	def twiddle_csv(file1, file2):
	def mess_with_record(record):
	record['90mdist'] = 2 * int(record['90mdist']) + 30
	with open(file1, "r") as fin:
	with open(file2, "w") as fout:
	fields = ['ID', 'transect', '90mdist']
	reader = csv.DictReader(fin, fieldnames=fields)
	writer = csv.DictWriter(fout, fieldnames=fields)
	fout.write(",".join(fields) + '\n')
	reader.next() # Skip the column header
	for record in reader:
	mess_with_record(record)
	writer.writerow(record)

	if __name__ == '__main__':
	twiddle_csv('file1', 'file2')


	#Using numpy
	from numpy import *
	data = loadtxt('table.dat', unpack=True)

	#Using scipy
	def read_array(filename, dtype, separator=','):
	""" Read a file with an arbitrary number of columns.
	The type of data in each column is arbitrary
	It will be cast to the given dtype at runtime
	"""
	cast = N.cast
	data = [[] for dummy in xrange(len(dtype))]
	for line in open(filename, 'r'):
	fields = line.strip().split(separator)
	for i, number in enumerate(fields):
	data[i].append(number)
	for i in xrange(len(dtype)):
	data[i] = cast[dtype[i]](data[i])
	return N.rec.array(data, dtype=dtype)

	This can then be called with the corresponding dtype:
	mydescr = N.dtype([('column1', 'int32'), ('column2Name', 'uint32'), ('col3', 'uint64'), ('c4', 'float32')])
	myrecarray = read_array('file.csv', mydescr)

	#In R
	# read in a CSV file with headers
	my_table = read.csv("myfile.csv", header=T)

	#access the column with header "states"
	my_table$states

	# access the first column
	my_table[,1]

	# iterate over columns and print 3rd element of each column
	# this would give output Arizona, Green, poplar
	for(column in my_table) {
	print(column[3])
	}