Last active
June 4, 2021 00:09
-
-
Save vs2961/d3622db320a30f8e7d14daf69d13b2f2 to your computer and use it in GitHub Desktop.
Parses MN.csv and creates two files, after.csv and before.csv, which contain the number of floors and when the building was created for each building in the samples.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import random | |
import math | |
def get_data(isAfter1950): | |
with open("MN.csv") as csvfile: | |
reader = csv.reader(csvfile) | |
x = [] | |
for row in reader: | |
if row[44] != "NumFloors" and float(row[44]) != 0 and row[60].isnumeric() and int(row[60]) != 0: | |
if isAfter1950 and int(row[60]) >= 1950: | |
x.append((float(row[44]), row[60])) | |
elif not isAfter1950 and int(row[60]) < 1950: | |
x.append((float(row[44]), row[60])) | |
return x | |
def get_sample(x): | |
ind = set() | |
for i in range(40): | |
gen = random.randint(1, len(x) - 1) | |
while gen in ind: | |
gen = random.randint(1, len(x) - 1) | |
ind.add(gen) | |
mean = 0 | |
for i in ind: | |
mean += x[i][0] | |
mean /= len(ind) | |
diff = 0 | |
for j in ind: | |
diff += (x[j][0] - mean) ** 2 | |
diff /= (len(ind) - 1) | |
diff = math.sqrt(diff) | |
return (mean, diff, ind) | |
x = get_data(True) | |
y = get_data(False) | |
m1, s1, i1 = get_sample(x) | |
m2, s2, i2 = get_sample(y) | |
value = (m1 - m2) / math.sqrt((s1 ** 2 / 40) + (s2 ** 2 / 40)) | |
print(m1, s1) | |
print(m2, s2) | |
print(value) | |
newcsv = open("after.csv", "w") | |
for i in i1: | |
newcsv.write(f"{x[i][0]}, {x[i][1]}\n") | |
newcsv.close() | |
newcsv = open("before.csv", "w") | |
for i in i2: | |
newcsv.write(f"{y[i][0]}, {y[i][1]}\n") | |
newcsv.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment