mcgill-a/process_csv.py

## process_csv.py
import sys
import csv


def load_data(filename):
    with open(filename, 'r', encoding='UTF-8') as file:
        reader = csv.reader(file)
        for row in reader:
            a.append(row[0])
            b.append(row[1])
            c.append(row[2])
            d.append(row[3])

def replace_newline(arr):
    for i in range(len(arr)):
        arr[i] = arr[i].replace('\n', ' ')
    return arr

# columns
a, b, c , d = [], [], [], []

in_filename = 'data/Final-DU-Oxide-EIS.csv'
out_filename = 'data/Final-DU-Oxide-EIS-Output.csv'
load_data(in_filename)

a = replace_newline(a)
b = replace_newline(b)
c = replace_newline(c)
d = replace_newline(d)


# list of marked indices
idx_remove = []

for i in range(0, len(a)):
    # if there is no comment number, the comment is continuing from the previous page.
    if a[i] == '':
        # append the fields to the previous index
        b[i-1] += ' ' + b[i]
        c[i-1] += ' ' + c[i]
        d[i-1] += ' ' + d[i]
        # mark the current index to be removed
        idx_remove.append(i)

print("Processed " + str(len(idx_remove)) +  " rows")

# remove the duplicate comments that were marked
counter = 0
for idx in idx_remove:
    idx -= counter
    a.pop(idx)
    b.pop(idx)
    c.pop(idx)
    d.pop(idx)
    counter += 1


output = []

for i in range(len(a)):
    output.append([a[i], b[i], c[i], d[i]])

# save the data
writer = csv.writer(open(out_filename, 'w', newline=''))
for row in output:
    writer.writerow(row)
	import sys
	import csv


	def load_data(filename):
	with open(filename, 'r', encoding='UTF-8') as file:
	reader = csv.reader(file)
	for row in reader:
	a.append(row[0])
	b.append(row[1])
	c.append(row[2])
	d.append(row[3])

	def replace_newline(arr):
	for i in range(len(arr)):
	arr[i] = arr[i].replace('\n', ' ')
	return arr

	# columns
	a, b, c , d = [], [], [], []

	in_filename = 'data/Final-DU-Oxide-EIS.csv'
	out_filename = 'data/Final-DU-Oxide-EIS-Output.csv'
	load_data(in_filename)

	a = replace_newline(a)
	b = replace_newline(b)
	c = replace_newline(c)
	d = replace_newline(d)


	# list of marked indices
	idx_remove = []

	for i in range(0, len(a)):
	# if there is no comment number, the comment is continuing from the previous page.
	if a[i] == '':
	# append the fields to the previous index
	b[i-1] += ' ' + b[i]
	c[i-1] += ' ' + c[i]
	d[i-1] += ' ' + d[i]
	# mark the current index to be removed
	idx_remove.append(i)

	print("Processed " + str(len(idx_remove)) + " rows")

	# remove the duplicate comments that were marked
	counter = 0
	for idx in idx_remove:
	idx -= counter
	a.pop(idx)
	b.pop(idx)
	c.pop(idx)
	d.pop(idx)
	counter += 1


	output = []

	for i in range(len(a)):
	output.append([a[i], b[i], c[i], d[i]])

	# save the data
	writer = csv.writer(open(out_filename, 'w', newline=''))
	for row in output:
	writer.writerow(row)