!pip install pyyaml
import yaml
from pyspark.dbutils import DBUtils
import pyspark.sql.functions as F
import time
def get_lastest_file(path):
lists = dbutils.fs.ls(path)
list_of_files = [(file.path, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(file.modificationTime/ 1000))) for file in lists]
lastest_file = max(list_of_files, key=lambda x: x[1])
return lastest_file
env = "prodfix"
workspace_type = "t"
short_code = "dtrans"
sa = f"{env}dseus2{workspace_type}{short_code}sa01"
container = 'ds-tdtrans-landing'
path = f"abfss://{container}@{sa}.dfs.core.windows.net/"
lastest_file = get_lastest_file(path)
file_path = lastest_file[0]
# Load spi into dataframe
df = spark.read.format("csv").load(file_path, inferSchema = True, header = True)
# Adopt business user format
df = df.withColumnRenamed('EID', 'consumer_src_mid').select(F.col('consumer_src_mid'))