Created
May 11, 2020 14:25
-
-
Save mcgill-a/4cbe12e5514d37b0b2a975908f5eeddd to your computer and use it in GitHub Desktop.
Process data from a CSV file to merge comments that span over multiple pages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import csv | |
def load_data(filename): | |
with open(filename, 'r', encoding='UTF-8') as file: | |
reader = csv.reader(file) | |
for row in reader: | |
a.append(row[0]) | |
b.append(row[1]) | |
c.append(row[2]) | |
d.append(row[3]) | |
def replace_newline(arr): | |
for i in range(len(arr)): | |
arr[i] = arr[i].replace('\n', ' ') | |
return arr | |
# columns | |
a, b, c , d = [], [], [], [] | |
in_filename = 'data/Final-DU-Oxide-EIS.csv' | |
out_filename = 'data/Final-DU-Oxide-EIS-Output.csv' | |
load_data(in_filename) | |
a = replace_newline(a) | |
b = replace_newline(b) | |
c = replace_newline(c) | |
d = replace_newline(d) | |
# list of marked indices | |
idx_remove = [] | |
for i in range(0, len(a)): | |
# if there is no comment number, the comment is continuing from the previous page. | |
if a[i] == '': | |
# append the fields to the previous index | |
b[i-1] += ' ' + b[i] | |
c[i-1] += ' ' + c[i] | |
d[i-1] += ' ' + d[i] | |
# mark the current index to be removed | |
idx_remove.append(i) | |
print("Processed " + str(len(idx_remove)) + " rows") | |
# remove the duplicate comments that were marked | |
counter = 0 | |
for idx in idx_remove: | |
idx -= counter | |
a.pop(idx) | |
b.pop(idx) | |
c.pop(idx) | |
d.pop(idx) | |
counter += 1 | |
output = [] | |
for i in range(len(a)): | |
output.append([a[i], b[i], c[i], d[i]]) | |
# save the data | |
writer = csv.writer(open(out_filename, 'w', newline='')) | |
for row in output: | |
writer.writerow(row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment