amalgjose/spark_adls_filesystem_operations.py

## spark_adls_filesystem_operations.py
from pyspark.sql import SparkSession
# Author: Amal G Jose
# Reference: https://amalgjose.com

# prepare spark session
spark = SparkSession.builder.appName('filesystemoperations').getOrCreate()
# spark context
sc = spark.sparkContext

# set ADLS file system URI
sc._jsc.hadoopConfiguration().set('fs.defaultFS', 'abfs://CONTAINER@ACCOUNTNAME.dfs.core.windows.net/')

# FileSystem manager
fs = (sc._jvm.org
      .apache.hadoop
      .fs.FileSystem
      .get(sc._jsc.hadoopConfiguration())
      )
# Enter the ADLS path
path = "Your/adls/path"

# Delete the file or directory in ADLS using the below command
deletion_status = fs.delete(sc._jvm.org.apache.hadoop.fs.Path(path), True)
print("Deletion status -->", deletion_status)

# check whether the file or directory got deleted. This will return True if exists and False if does not
status = fs.exists(sc._jvm.org.apache.hadoop.fs.Path(path))
print("Status -->", status)
	from pyspark.sql import SparkSession
	# Author: Amal G Jose
	# Reference: https://amalgjose.com

	# prepare spark session
	spark = SparkSession.builder.appName('filesystemoperations').getOrCreate()
	# spark context
	sc = spark.sparkContext

	# set ADLS file system URI
	sc._jsc.hadoopConfiguration().set('fs.defaultFS', 'abfs://CONTAINER@ACCOUNTNAME.dfs.core.windows.net/')

	# FileSystem manager
	fs = (sc._jvm.org
	.apache.hadoop
	.fs.FileSystem
	.get(sc._jsc.hadoopConfiguration())
	)
	# Enter the ADLS path
	path = "Your/adls/path"

	# Delete the file or directory in ADLS using the below command
	deletion_status = fs.delete(sc._jvm.org.apache.hadoop.fs.Path(path), True)
	print("Deletion status -->", deletion_status)

	# check whether the file or directory got deleted. This will return True if exists and False if does not
	status = fs.exists(sc._jvm.org.apache.hadoop.fs.Path(path))
	print("Status -->", status)