Created
November 28, 2020 01:51
-
-
Save dpiponi/a697f6d21cf49b361a8f04f9e1545183 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import math | |
import matplotlib.pyplot as plt | |
import locale | |
def frac(x): | |
return x - math.floor(x) | |
def first_digit(x): | |
return int(math.floor(math.pow(10, frac(math.log10(x))) + 0.0001)) | |
# https://exoplanetarchive.ipac.caltech.edu | |
def orbital_periods(): | |
filename = 'PS_2020.11.27_15.14.11.csv' | |
with open(filename) as file: | |
csvdata = csv.reader(file) | |
headings = next(csvdata) | |
index = dict(zip(headings, range(1000)))['pl_orbper'] | |
for data in csvdata: | |
if not data[index]: | |
return | |
yield float(data[index]) | |
# https://www.census.gov/data/tables/time-series/demo/popest/2010s-total-cities-and-towns.html#tables | |
def town_pops(): | |
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' ) | |
filename = 'SUB-IP-EST2019-ANNRES.csv' | |
with open(filename) as file: | |
csvdata = csv.reader(file) | |
headings = next(csvdata) | |
index = 1 | |
for data in csvdata: | |
# print(data) | |
if not data[index]: | |
return | |
try: | |
value = locale.atoi(data[index]) | |
yield locale.atoi(data[index]) | |
except: | |
pass | |
# http://www.genomesize.com | |
def genome_sizes(): | |
filename = 'genome_size_data_271120_17_16_11.csv' | |
with open(filename) as file: | |
csvdata = csv.reader(file) | |
headings = next(csvdata) | |
index = dict(zip(headings, range(1000)))['C-value'] | |
for data in csvdata: | |
try: | |
print(data[index]) | |
yield float(data[index]) | |
except: | |
pass | |
# https://www.astronexus.com/hyg | |
def stellar_distances(): | |
filename = 'hygdata_v3.csv' | |
with open(filename) as file: | |
csvdata = csv.reader(file) | |
headings = next(csvdata) | |
index = dict(zip(headings, range(1000)))['dist'] | |
for data in csvdata: | |
try: | |
dist = float(data[index]) | |
if dist > 0: # Eliminate Sol | |
# Not trusting exact numners. | |
if data[index][-4:] != '0000': | |
print(data[index]) | |
yield dist | |
except: | |
pass | |
# periods = town_pops() | |
# # periods = orbital_periods() | |
# first_digits = list(map(first_digit, periods)) | |
# print(first_digits) | |
# n = len(first_digits) | |
def pdf(x): | |
return math.log10(x + 1) - math.log10(x) | |
datasets = [ | |
('Expoplanet Orbital Periods (in days)', orbital_periods), | |
('US Town Populations', town_pops), | |
('Organism genome size (in picograms)', genome_sizes), | |
('Stellar distances from Hipparcos (in parsecs)', stellar_distances) | |
] | |
i = 1 | |
plt.figure(figsize=(20,12)) | |
for name, gen in datasets: | |
plt.subplot(2, 2, i) | |
i += 1 | |
first_digits = list(map(first_digit, gen())) | |
n = len(first_digits) | |
plt.hist(first_digits, bins=[(x) for x in range(0, 11)], width = 0.5) | |
plt.bar([x for x in range(1, 10)], list(map(lambda x: n * pdf(x), range(1, 10))), color='r', width = 0.5) | |
plt.title(name + " (N=" + str(n) + ")") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment