Skip to content

Instantly share code, notes, and snippets.

@sp1thas
Last active July 7, 2020 06:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sp1thas/8a05a4e2710b82e9d8e57d6153a7dd1f to your computer and use it in GitHub Desktop.
Save sp1thas/8a05a4e2710b82e9d8e57d6153a7dd1f to your computer and use it in GitHub Desktop.
scrapy ImagePipeline: store images efficiently using folder tree structure
import os
from scrapy.pipelines.images import ImagesPipeline
class FolderStructureImagePipeline(ImagesPipeline):
"""Store Images using a folder tree structure.
DEPTH attribute can be used to specify the depth of the tree.
"""
DEPTH = 3
def tree_path(self, path: str) -> str:
"""Generate a folder tree based on given path.
I.e: path/to/image.jpg -> path/to/i/m/a/image.jpg
:param path: original image filepath.
:return: image filepath with extra folder tree.
"""
filename = os.path.basename(path)
dirname = os.path.dirname(path)
return os.path.join(
dirname, *[_ for _ in filename[:self.DEPTH]], filename
)
def file_path(self, request, response=None, info=None):
return self.tree_path(
super().file_path(request, response, info)
)
def thumb_path(self, request, thumb_id, response=None, info=None):
return self.tree_path(
super().thumb_path(request, thumb_id, response, info)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment