Skip to content

Instantly share code, notes, and snippets.

@mcgill-a
Created May 11, 2020 14:25
Show Gist options
  • Save mcgill-a/4cbe12e5514d37b0b2a975908f5eeddd to your computer and use it in GitHub Desktop.
Save mcgill-a/4cbe12e5514d37b0b2a975908f5eeddd to your computer and use it in GitHub Desktop.
Process data from a CSV file to merge comments that span over multiple pages
import sys
import csv
def load_data(filename):
with open(filename, 'r', encoding='UTF-8') as file:
reader = csv.reader(file)
for row in reader:
a.append(row[0])
b.append(row[1])
c.append(row[2])
d.append(row[3])
def replace_newline(arr):
for i in range(len(arr)):
arr[i] = arr[i].replace('\n', ' ')
return arr
# columns
a, b, c , d = [], [], [], []
in_filename = 'data/Final-DU-Oxide-EIS.csv'
out_filename = 'data/Final-DU-Oxide-EIS-Output.csv'
load_data(in_filename)
a = replace_newline(a)
b = replace_newline(b)
c = replace_newline(c)
d = replace_newline(d)
# list of marked indices
idx_remove = []
for i in range(0, len(a)):
# if there is no comment number, the comment is continuing from the previous page.
if a[i] == '':
# append the fields to the previous index
b[i-1] += ' ' + b[i]
c[i-1] += ' ' + c[i]
d[i-1] += ' ' + d[i]
# mark the current index to be removed
idx_remove.append(i)
print("Processed " + str(len(idx_remove)) + " rows")
# remove the duplicate comments that were marked
counter = 0
for idx in idx_remove:
idx -= counter
a.pop(idx)
b.pop(idx)
c.pop(idx)
d.pop(idx)
counter += 1
output = []
for i in range(len(a)):
output.append([a[i], b[i], c[i], d[i]])
# save the data
writer = csv.writer(open(out_filename, 'w', newline=''))
for row in output:
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment