Skip to content

Instantly share code, notes, and snippets.

@dpiponi
Created November 28, 2020 01:51
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dpiponi/a697f6d21cf49b361a8f04f9e1545183 to your computer and use it in GitHub Desktop.
Save dpiponi/a697f6d21cf49b361a8f04f9e1545183 to your computer and use it in GitHub Desktop.
import csv
import math
import matplotlib.pyplot as plt
import locale
def frac(x):
return x - math.floor(x)
def first_digit(x):
return int(math.floor(math.pow(10, frac(math.log10(x))) + 0.0001))
# https://exoplanetarchive.ipac.caltech.edu
def orbital_periods():
filename = 'PS_2020.11.27_15.14.11.csv'
with open(filename) as file:
csvdata = csv.reader(file)
headings = next(csvdata)
index = dict(zip(headings, range(1000)))['pl_orbper']
for data in csvdata:
if not data[index]:
return
yield float(data[index])
# https://www.census.gov/data/tables/time-series/demo/popest/2010s-total-cities-and-towns.html#tables
def town_pops():
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' )
filename = 'SUB-IP-EST2019-ANNRES.csv'
with open(filename) as file:
csvdata = csv.reader(file)
headings = next(csvdata)
index = 1
for data in csvdata:
# print(data)
if not data[index]:
return
try:
value = locale.atoi(data[index])
yield locale.atoi(data[index])
except:
pass
# http://www.genomesize.com
def genome_sizes():
filename = 'genome_size_data_271120_17_16_11.csv'
with open(filename) as file:
csvdata = csv.reader(file)
headings = next(csvdata)
index = dict(zip(headings, range(1000)))['C-value']
for data in csvdata:
try:
print(data[index])
yield float(data[index])
except:
pass
# https://www.astronexus.com/hyg
def stellar_distances():
filename = 'hygdata_v3.csv'
with open(filename) as file:
csvdata = csv.reader(file)
headings = next(csvdata)
index = dict(zip(headings, range(1000)))['dist']
for data in csvdata:
try:
dist = float(data[index])
if dist > 0: # Eliminate Sol
# Not trusting exact numners.
if data[index][-4:] != '0000':
print(data[index])
yield dist
except:
pass
# periods = town_pops()
# # periods = orbital_periods()
# first_digits = list(map(first_digit, periods))
# print(first_digits)
# n = len(first_digits)
def pdf(x):
return math.log10(x + 1) - math.log10(x)
datasets = [
('Expoplanet Orbital Periods (in days)', orbital_periods),
('US Town Populations', town_pops),
('Organism genome size (in picograms)', genome_sizes),
('Stellar distances from Hipparcos (in parsecs)', stellar_distances)
]
i = 1
plt.figure(figsize=(20,12))
for name, gen in datasets:
plt.subplot(2, 2, i)
i += 1
first_digits = list(map(first_digit, gen()))
n = len(first_digits)
plt.hist(first_digits, bins=[(x) for x in range(0, 11)], width = 0.5)
plt.bar([x for x in range(1, 10)], list(map(lambda x: n * pdf(x), range(1, 10))), color='r', width = 0.5)
plt.title(name + " (N=" + str(n) + ")")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment