Skip to content

Instantly share code, notes, and snippets.

@honestbleeps
Created June 22, 2016 18:36
Show Gist options
  • Save honestbleeps/fd515a11db0e7b369a5a98b9f01f8d53 to your computer and use it in GitHub Desktop.
Save honestbleeps/fd515a11db0e7b369a5a98b9f01f8d53 to your computer and use it in GitHub Desktop.
# here's the hot mess I whipped up during our code test!
# obvious things I'd change / improve:
# - no hard-coding of "chunk.*" - use the filename as a prefix
# - no writing to disk - store directly to memcache
# - store the # of chunks and an md5 hash of the file in a meta key or in the first binary chunk, rather than in an info.txt
# to save a roundtrip to memcached on each request
# - write a "getChunkAt" like function, to get chunk #36, for example, in case it has fallen off the cache
from pymemcache.client.base import Client
import glob
class CacheBigFile:
# split our large file into chunks of 1mb each
# define the function to split the file into smaller chunks
def splitFile(inputFile, chunkSize):
#read the contents of the file
f = open(inputFile, 'rb')
data = f.read() # read the entire content of the file
f.close()
# get the length of data, ie size of the input file in bytes
bytes = len(data)
#calculate the number of chunks to be created
noOfChunks = bytes/chunkSize
if bytes % chunkSize:
noOfChunks += 1
#create a info.txt file for writing metadata
f = open('info.txt', 'w')
f.write(inputFile+','+'chunk.,'+str(noOfChunks)+'i,'+str(chunkSize))
f.close()
chunkCount = 0
chunkNames = []
for i in range(0, bytes + 1, chunkSize):
chunkCount = chunkCount + 1
fn1 = "chunk.%s" % chunkCount
chunkNames.append(fn1)
f = open(fn1, 'wb')
f.write(data[i:i+ chunkSize])
f.close()
def getFiles(self):
files = glob.glob("chunk.*")
return files
def writeCache(self):
client = Client(('localhost', 11211))
files = self.getFiles()
for fileName in files:
with open(fileName) as f:
client.set(fileName, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment