Skip to content

Instantly share code, notes, and snippets.

@Natim
Created August 29, 2012 09:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Natim/3508821 to your computer and use it in GitHub Desktop.
Save Natim/3508821 to your computer and use it in GitHub Desktop.
Read and URL and get the file from cache if possible or update the file.
# -*- coding: utf-8 -*-
import hashlib
import datetime
import settings
import requests
import os
from wsgiref.handlers import format_date_time
INPUT_CACHE_DIR = getattr(settings, 'INPUT_CACHE_DIR', '/tmp')
def get_file_for_url(url):
"""Return the file object related to the URL"""
hash_id = hashlib.sha256(url).hexdigest()
headers = {}
cache_file_path = os.path.join(INPUT_CACHE_DIR, hash_id)
if os.path.exists(cache_file_path):
hash_id_m_time = os.path.getmtime(cache_file_path)
headers = {'If-Modified-Since': format_date_time(hash_id_m_time)}
req = requests.get(url, headers=headers)
if req.status_code == 304:
fd = open(cache_file_path, 'r')
return fd, True
else:
fd = open(cache_file_path, 'r+')
fd.write(req.content)
fd.seek(0)
return fd, False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment