Skip to content

Instantly share code, notes, and snippets.

@thoroc
Created April 29, 2022 14:54
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save thoroc/19a8401f3d4724a0255764d6e5317025 to your computer and use it in GitHub Desktop.
Save thoroc/19a8401f3d4724a0255764d6e5317025 to your computer and use it in GitHub Desktop.
Custom Faker Provider
# Based on the following:
# https://www.datainsightonline.com/post/how-to-generate-fake-dataset-with-python-faker-library
# https://deparkes.co.uk/2020/12/28/python-fake-data-with-faker/
from faker.providers import BaseProvider
from faker import Faker
import pandas as pd
from loguru import logger
fake = Faker("en_GB")
for i in range(5):
logger.info("order: %s" % fake.bothify(text="ord-###"))
for i in range(5):
logger.info("time: %s" % fake.date_between(start_date="-2y", end_date="today"))
for i in range(5):
logger.info("name: %s" % fake.name())
class MyProvider(BaseProvider):
__provider__ = "item_category"
__provider__ = "food"
__provider__ = "fruit"
item_categories = ["food", "fruit"]
foods = ["rice", "yam", "beans", "spaghetti"]
fruits = ["orange", "mango", "banana", "apple"]
def item_category(self):
return self.random_element(self.item_categories)
def food(self):
return self.random_element(self.foods)
def fruit(self):
return self.random_element(self.fruits)
fake.add_provider(MyProvider)
for i in range(5):
logger.info("category: %s" % fake.item_category())
def link_variables():
item_cat = fake.item_category()
item = fake.fruit() if item_cat == "fruit" else fake.food()
return {"Item_Category": item_cat, "Item_Name": item}
for i in range(5):
logger.info("variables: %s" % link_variables())
thelist = []
for x in range(100):
dataset = {
"Order_ID": fake.bothify(text="ord-###"),
"Order_Date": fake.date_between(start_date="-2y", end_date="today"),
"Customer_Name": fake.name()
}
dataset_copy = dataset.copy()
for key, value in link_variables().items():
dataset_copy[key] = value
thelist.append(dataset_copy)
dataset_frame = pd.DataFrame(thelist)
logger.info("\n%s" % dataset_frame.head(10))
@Sohail1964140
Copy link

This is vry help full for me

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment