|
from pyspark.sql import SparkSession |
|
import datetime |
|
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType, DoubleType |
|
from pyspark.sql.functions import col, current_timestamp |
|
|
|
def generate_sample_data(batch_id=1): |
|
data = [ |
|
("ORD001", "PROD-1", "CUST-1", "LOC-1", "STORE-1", "DEPT-1", "EMP-1", 5, 500.0, "Electronics", "North", str(datetime.datetime.now()), "HIGH", "ONLINE", "CREDIT", "COMPLETED"), |
|
("ORD002", "PROD-2", "CUST-2", "LOC-2", "STORE-2", "DEPT-2", "EMP-2", 2, 150.0, "Clothing", "South", str(datetime.datetime.now()), "MEDIUM", "IN-STORE", "CASH", "PENDING"), |
|
("ORD003", "PROD-3", "CUST-1", "LOC-3", "STORE-3", "DEPT-3", "EMP-3", 3, 300.0, "Electronics", "North", str(datetime.datetime.now()), "LOW", "MOBILE", "DEBIT", "COMPLETED"), |