Skip to content

Instantly share code, notes, and snippets.

@hadisfr
Created February 24, 2021 20:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hadisfr/5078731c041585df55ea5d41ca86753c to your computer and use it in GitHub Desktop.
Save hadisfr/5078731c041585df55ea5d41ca86753c to your computer and use it in GitHub Desktop.
Bank Refah Iranian DB Plotter and Aggregator - https://refahdb.mcls.gov.ir/fa/sample
#!/usr/bin/env python3
import csv
from collections import defaultdict
from sys import stderr
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
FIELDS_TO_AGGREGATE = [
"Bardasht95",
"Variz95",
"MandehAval95",
"MandehAkhar95",
"Sood95",
"Bardasht96",
"Variz96",
"MandehAval96",
"MandehAkhar96",
"Sood96",
"Bardasht97",
"Variz97",
"MandehAval97",
"MandehAkhar97",
"Sood97",
"Card9801",
"Card9802",
"Card9803",
"Card9804",
"Card9805",
"Card9806",
"Trip_AirNonPilgrimageCount_95",
"Trip_AirNonPilgrimageCount_96",
"Trip_AirNonPilgrimageCount_97",
"Trip_AirNonPilgrimageCount_98",
"Cars_Count",
"CarPrice_Sum",
"Daramad_Total_Rials",
]
FIELD_TO_PLOT = "MandehAval97"
PLOT_PRECISION = 1
AGGREGATE_KEY = "ParentId"
def read_db():
print("reading", file=stderr)
fields_to_aggregate_set = set(FIELDS_TO_AGGREGATE)
with open("500000FamilySample-990402.csv") as f: # available at http://mashghema.ir/500000FamilySample-990402.rar
aggregated = defaultdict(lambda: defaultdict(int))
reader = csv.DictReader(f)
for person in tqdm(reader, total=1456232):
aggregated[person[AGGREGATE_KEY]][AGGREGATE_KEY] = person[AGGREGATE_KEY]
for field in fields_to_aggregate_set:
aggregated[person[AGGREGATE_KEY]][field] += int(person[field])
return aggregated
def plot(capital):
print("plotting", file=stderr)
plt.figure(figsize=(5, 5))
plt.plot(
np.arange(0, 100, PLOT_PRECISION),
list(map(
lambda x: sum(sorted(capital)[-int(len(capital)/100*x):]) / sum(capital) * 100 if x > 0 else 0,
tqdm(np.arange(0, 100, PLOT_PRECISION)))
)
)
plt.xlabel("families")
plt.ylabel("capital (%s)" % AGGREGATE_KEY)
plt.grid()
plt.show()
def write_aggregated_db(aggregated):
print("writing", file=stderr)
with open("500000FamilySample-990402-aggregated.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=[AGGREGATE_KEY] + FIELDS_TO_AGGREGATE)
writer.writeheader()
writer.writerows(map(lambda row: row[1], tqdm(aggregated.items())))
def main():
aggregated = read_db()
print("%d aggregated rows" % len(aggregated))
write_aggregated_db(aggregated)
capital = list(map(lambda x: x[1][FIELD_TO_PLOT], aggregated.items()))
plot(capital)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment