Skip to content

Instantly share code, notes, and snippets.

@victor-abz
Created October 7, 2021 13:18
Show Gist options
  • Save victor-abz/b54a905d389a98bc1e5ea36ba8413182 to your computer and use it in GitHub Desktop.
Save victor-abz/b54a905d389a98bc1e5ea36ba8413182 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
import pandas as pd
import numpy as np
import faker
import datetime
import random
from six.moves import reduce
from dateutil.relativedelta import relativedelta
import uuid
# initialize a generator
fake = faker.Faker()
status_list = ['ACTIVE', 'PENDING', 'COMPLETED', "REJECTED"]
last_name_list = ['Alexandre', 'Victor', 'John', "Emmanuel", "Peter", "Kanamugire", "Joyeuse", "Claire", "Violette", "Henrietter", "Emerthe", "Rachel", "Clementine", "Phoebe", "Jean d'amour", "Joseph",
"Rene", "Julius", "Israel", "Benoit", "Pius", "Anita", "Rosine","Rosine", "Julia", "Daria", "Doreen", "Emery",
"Esperance", "Patricie", "Nathan", "Samuel"
]
first_name_list = ['Kayonga', 'Abizeyimana', 'Ooon', "Davis", "Byiringiro", "Kwizera", "Uwamahoro", "Masengesho",
"Mahoro", "Rukundo", "Cyuzuzo", "Keza", "Kaliza", "Mucyo", "Mizero", "Gaju",
"Manzi", "Juru", "Mutesi", "Sheja", "Izabayo", "Tuyisenge", "Tuyizere","Akimana", "Uwimana", "Tuyishime"
, "Iradukunda", "Ntakirutimana", "munzezero", "Mbanzabigwi", "Byiringiro", "Bapfakurera", "Hakizimana",
"Habanabakize", "Habanabashaka", "Nziyomaze", "Bazindyiki", "Mahirwe", "Yehovayire", "Uwamariya",
"Uwimbabazi", "Dushime", "Shimirwa", "Aganze", "Ganza"
]
employee_id = []
for i in range(217):
employee_id.append(str(uuid.uuid4()))
print(len(employee_id))
month_list = [
{
'start': datetime.date(2021, 4, 1),
'end': datetime.date(2021, 6, 30)
},
{
'start': datetime.date(2021, 7, 1),
'end': datetime.date(2021, 7, 31)
},
{
'start': datetime.date(2021, 8, 1),
'end': 'today'
}
]
def Rand(start, end, num):
res = []
for j in range(num):
requestedAmount = np.random.randint(start, end)
month = np.random.choice(month_list, p=[0.05,0.21,0.74])
status = np.random.choice(status_list, p=[0.66,0.14,0.07,0.13])
outstandingAmount = None
approvedAmount = None
if status == 'COMPLETED':
outstandingAmount = 0
approvedAmount = requestedAmount
if status == 'ACTIVE':
outstandingAmount = requestedAmount - (requestedAmount * 30/100)
approvedAmount = requestedAmount
maxLoanAmount = requestedAmount + (requestedAmount * 6/100)
createdAt = fake.date_between(start_date=month['start'], end_date=month['end'])
employeeId = np.random.choice(employee_id)
approvedDate=None
disbursedDate=None
dueDate=None
if status not in ['PENDING', 'REJECTED']:
approval_days = [2,3,4,5,6]
dys_to_approve = np.random.choice(approval_days, p=[0.25,0.3,0.25,0.1,0.1])
approvedDate = createdAt + datetime.timedelta(days=int(dys_to_approve))
disbursedDate = approvedDate
dueDate = disbursedDate + relativedelta(months=+3)
res.append(
{
'firstName': np.random.choice(first_name_list),
'lastName': np.random.choice(last_name_list),
'approvedDate':approvedDate,
'createdAt':createdAt,
'dueDate': dueDate,
'disbursedDate': disbursedDate,
"creditScore": np.random.randint(321,876),
'status': status,
'requestedAmount': requestedAmount,
'approvedAmount': approvedAmount,
'outstandingAmount': outstandingAmount,
'paymentFrequency': 'MONTHLY',
'maxLoanAmount': maxLoanAmount,
'isPaused': 'FALSE',
'interestRate': 2,
"employeeId": employeeId
}
)
return res
loans = 413
minimum_amount = 88000
maximum_amount = 237600
loans_df = pd.DataFrame(Rand(minimum_amount, maximum_amount, loans))
# loans_df['createdAt'] = pd.DatetimeIndex(loans_df['createdAt']).month_name()
# g= loans_df.resample(rule='M', on='createdAt')
# g = loans_df.groupby(['createdAt'])["requestedAmount"].sum()
# g
# g.plot(x="createdAt", y=["requestedAmount"])
loans_df.to_csv('loans.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment