Skip to content

Instantly share code, notes, and snippets.

@lorenzodisidoro
Last active October 22, 2020 09:03
Show Gist options
  • Save lorenzodisidoro/f23d6df6f1ad0845931bb0e9e7ab4ff4 to your computer and use it in GitHub Desktop.
Save lorenzodisidoro/f23d6df6f1ad0845931bb0e9e7ab4ff4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from benfordslaw import benfordslaw
import pandas as pd
import sys
import csv
csv_path = sys.argv[1]
alpha = float(sys.argv[2])
print("CSV Data: " + csv_path)
print("alpha: " + str(alpha))
df = pd.read_csv (csv_path)
bl = benfordslaw()
countries = list(df.columns.values)[2:]
results = []
for country in countries:
X = df[country].values
result = bl.fit(X)
P = result['P']
if P <= alpha:
element = {'Country': country, 'P': P, 'Alpha': alpha, 'Anomaly': True}
results.append(element)
else:
element = {'Country': country, 'P': P, 'Alpha': alpha, 'Anomaly': False}
results.append(element)
csv_columns = ['Country', 'P', 'Alpha', 'Anomaly']
csv_file = "covid_benford_results.csv"
try:
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for data in results:
writer.writerow(data)
except IOError:
print("I/O error")
# plot one result using the following code
# plot_country = "Italy"
# X = df[plot_country].values
# results = bl.fit(X)
# bl.plot(title=plot_country)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment