Skip to content

Instantly share code, notes, and snippets.

@kamawanu
Created June 18, 2013 14:21
Show Gist options
  • Save kamawanu/030d78b377d4a620a0c8 to your computer and use it in GitHub Desktop.
Save kamawanu/030d78b377d4a620a0c8 to your computer and use it in GitHub Desktop.
#!python
# -*- coding: utf-8 -*-
import sys, unicodedata, logging, re, os.path
basepath = os.path.dirname( sys.argv[0] )
sys.path.append( os.path.join( basepath, "pyexcelerator-0.6.4.1.zip") ) # http://sourceforge.net/projects/pyexcelerator/
import pyExcelerator
class patchedReader(pyExcelerator.CompoundDoc.Reader): # pyExceleratorの中を改造
def get_stream_data(self, data, SAT, start_sid, sect_size):
sid = start_sid
chunks = [(sid, sid)]
stream_data = ''
try:
while SAT[sid] >= 0:
next_in_chain = SAT[sid]
last_chunk_start, last_chunk_finish = chunks[-1]
if next_in_chain - last_chunk_finish <= 1:
chunks[-1] = last_chunk_start, next_in_chain
else:
chunks.extend([(next_in_chain, next_in_chain)])
sid = next_in_chain
except:
import logging
### logging.warn( sid ) # 無視するしか無さそう
for s, f in chunks:
stream_data += data[s*sect_size:(f+1)*sect_size]
#print chunks
return stream_data
pyExcelerator.CompoundDoc.Reader = patchedReader# pyExceleratorの中で例外が出ちゃうので入れ替える
def extractxlsbook(filename1): # xlsブック単位の処理
xlsbooksheetdicts = pyExcelerator.parse_xls( filename1 ) # pyExcelerator/ImportXLS.py
import md5, os.path, time
md5o = md5.new()
md5o.update( open(filename1).read() )
print "# %s %d bytes modtime=%s md5=%s " % ( filename1,
os.path.getsize( filename1 ),
time.strftime("%Y%m%d-%H%M%S", time.localtime(os.path.getmtime(filename1)) ),
md5o.hexdigest() )
for (sheetName, sheetcellsdict) in xlsbooksheetdicts:
print "*" * 50 + " " + sheetName.encode("utf-8") + " " + "*" * 50
sheetdatamap = extractxlssheet( sheetName, sheetcellsdict )
def extractxlssheet( sheetName, sheetcellsdict ): # シート単位の処理
keys = sorted(sheetcellsdict.keys())
### logging.warn(keys[:10])
lastrow = None
lastbuffer = None
for (row, col) in keys:
value = sheetcellsdict[(row, col)]
value = unicode(value)
#### if col > 300: continue
if row == lastrow:
if len(lastbuffer) > 1000:
break
padding = col - len(lastbuffer)
if padding <= 1: padding = 1
lastbuffer = lastbuffer + " " * padding + value
else:
## if re.compile( ur"" ).match('^ +$') != None:
## lastbuffer = ""
if lastbuffer != None:
print lastbuffer.encode("utf-8")
if lastrow != None:
for ii in xrange( lastrow, row-1 ):
print ""
lastbuffer = " " * col + value
lastrow = row
if lastbuffer != None:
print lastbuffer.encode("utf-8")
for filename1 in sys.argv[1:]:
extractxlsbook(filename1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment