Created
June 22, 2016 18:36
-
-
Save honestbleeps/fd515a11db0e7b369a5a98b9f01f8d53 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# here's the hot mess I whipped up during our code test! | |
# obvious things I'd change / improve: | |
# - no hard-coding of "chunk.*" - use the filename as a prefix | |
# - no writing to disk - store directly to memcache | |
# - store the # of chunks and an md5 hash of the file in a meta key or in the first binary chunk, rather than in an info.txt | |
# to save a roundtrip to memcached on each request | |
# - write a "getChunkAt" like function, to get chunk #36, for example, in case it has fallen off the cache | |
from pymemcache.client.base import Client | |
import glob | |
class CacheBigFile: | |
# split our large file into chunks of 1mb each | |
# define the function to split the file into smaller chunks | |
def splitFile(inputFile, chunkSize): | |
#read the contents of the file | |
f = open(inputFile, 'rb') | |
data = f.read() # read the entire content of the file | |
f.close() | |
# get the length of data, ie size of the input file in bytes | |
bytes = len(data) | |
#calculate the number of chunks to be created | |
noOfChunks = bytes/chunkSize | |
if bytes % chunkSize: | |
noOfChunks += 1 | |
#create a info.txt file for writing metadata | |
f = open('info.txt', 'w') | |
f.write(inputFile+','+'chunk.,'+str(noOfChunks)+'i,'+str(chunkSize)) | |
f.close() | |
chunkCount = 0 | |
chunkNames = [] | |
for i in range(0, bytes + 1, chunkSize): | |
chunkCount = chunkCount + 1 | |
fn1 = "chunk.%s" % chunkCount | |
chunkNames.append(fn1) | |
f = open(fn1, 'wb') | |
f.write(data[i:i+ chunkSize]) | |
f.close() | |
def getFiles(self): | |
files = glob.glob("chunk.*") | |
return files | |
def writeCache(self): | |
client = Client(('localhost', 11211)) | |
files = self.getFiles() | |
for fileName in files: | |
with open(fileName) as f: | |
client.set(fileName, f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment