RachidAZ/misc.py

## misc.py
# set access info , disclaimer: this is not the best way to access your data from security perspective.
spark.conf.set(
  "fs.azure.account.key.{storage_account_name}.dfs.core.windows.net",
  "{storage_key_here}"
)


import datetime
now = datetime.datetime.now()

filePath='abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/raw/data01/'+str(now.year)+'/'+'{:02d}'.format(now.month)+'/*.csv'
spark.conf.set('f.filePath',filePath)


%sql
CREATE OR REPLACE TEMPORARY VIEW V_SomeView
USING CSV
OPTIONS (path '${f.filePath}', header "true", mode "FAILFAST")


# save the output after transformation/cleansing..

filePath_fact=''
spark.conf.set('f.filePath_fact',filePath_fact)

df=spark.sql("select * from V_SomeView")
df                    \
  .repartition(1)     \
  .write.format("com.databricks.spark.csv") \
  .mode("overwrite") \
  .option("header", "true") \
  .save(filePath_fact+"fact02")
	# set access info , disclaimer: this is not the best way to access your data from security perspective.
	spark.conf.set(
	"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net",
	"{storage_key_here}"
	)


	import datetime
	now = datetime.datetime.now()

	filePath='abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/raw/data01/'+str(now.year)+'/'+'{:02d}'.format(now.month)+'/*.csv'
	spark.conf.set('f.filePath',filePath)


	%sql
	CREATE OR REPLACE TEMPORARY VIEW V_SomeView
	USING CSV
	OPTIONS (path '${f.filePath}', header "true", mode "FAILFAST")


	# save the output after transformation/cleansing..

	filePath_fact=''
	spark.conf.set('f.filePath_fact',filePath_fact)

	df=spark.sql("select * from V_SomeView")
	df \
	.repartition(1) \
	.write.format("com.databricks.spark.csv") \
	.mode("overwrite") \
	.option("header", "true") \
	.save(filePath_fact+"fact02")