Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Using Python
import os, zipfile
z = zipfile.ZipFile('/databricks/driver/D-Dfiles.zip')
for f in z.namelist():
if f.endswith('/'):
os.makedirs(f)
# Reading zipped folder data in Pyspark
import zipfile
import io
def zip_extract(x):
in_memory_data = io.BytesIO(x[1])
file_obj = zipfile.ZipFile(in_memory_data, "r")
files = [i for i in file_obj.namelist()]
return dict(zip(files, [file_obj.open(file).read() for file in files]))
zips = sc.binaryFiles("dbfs:/mnt/vedant-demo/ONG/data/las_raw/D-Dfiles.zip")
files_data = zips.map(zip_extract)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.