Skip to content

Instantly share code, notes, and snippets.

@cutewalker
Created October 26, 2012 02:28
Show Gist options
  • Save cutewalker/3956577 to your computer and use it in GitHub Desktop.
Save cutewalker/3956577 to your computer and use it in GitHub Desktop.
use chardet example, from openerp
# -*- coding: utf-8 -*-
# https://github.com/buke/openerp-web-import-chardet/blob/master/web_import_chardet/controllers/__init__.py
##############################################################################
# web_import_chardet
# Auto Detect Import File Encoding & Remove BOM Header In UTF8 File.
# Copyright 2012 wangbuke <wangbuke@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# For Commercial or OEM, you need to contact the author and/or licensor
# and ask for their permission.
##############################################################################
try:
from chardet.universaldetector import UniversalDetector
except ImportError:
import sys,os
sys.path.append (os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'libs', 'chardet-1.1')))
from chardet.universaldetector import UniversalDetector
import simplejson
import csv
import itertools
import operator
import xmlrpclib
try:
# embedded
import openerp.addons.web.common.http as openerpweb
from openerp.addons.web import common
from openerp.addons.web.controllers.main import Import
except ImportError:
# standalone
from web import common
import web.common.http as openerpweb
from web.controllers.main import Import
class ChardetImport(Import):
_cp_path = "/web/import2"
@openerpweb.httprequest
def detect_data(self, req, csvfile, csvsep=',', csvdel='"', csvcode='utf-8', jsonp='callback'):
#detect encoding
if csvcode == 'auto':
u = UniversalDetector()
for line in csvfile:
u.feed(line)
u.close()
csvcode = u.result['encoding'].lower()
csvfile.seek(0)
# gb2312 gbk hz-gb-2312 hz-gb
if csvcode == 'gb2312': csvcode = 'gbk'
if 'hz' in csvcode: csvcode = 'hz'
#remove bom
if 'utf' in csvcode:
if 'utf-8' in csvcode:
contents = csvfile.read().decode('utf-8-sig').encode('utf-8')
csvcode = 'utf-8'
#FIXME not support utf-16
if 'utf-16' in csvcode:
contents = csvfile.read().decode(csvcode).encode('utf-16')
csvcode = 'utf-16'
#FIXME not support utf-32
if 'utf-32' in csvcode:
contents = csvfile.read().decode('utf-32be').encode('utf-32')
csvcode = 'utf-32'
csvfile.truncate(0)
csvfile.write(contents)
csvfile.seek(0)
try:
data = list(csv.reader(
csvfile, quotechar=str(csvdel), delimiter=str(csvsep)))
except csv.Error, e:
csvfile.seek(0)
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({'error': {
'message': 'Error parsing CSV file: %s' % e,
# decodes each byte to a unicode character, which may or
# may not be printable, but decoding will succeed.
# Otherwise simplejson will try to decode the `str` using
# utf-8, which is very likely to blow up on characters out
# of the ascii range (in range [128, 256))
'preview': csvfile.read(200).decode('iso-8859-1')}}))
try:
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps(
#{'records': data[:10]}, encoding=csvcode))
{'records': data[:10], 'encoding':csvcode}, encoding=csvcode))
except UnicodeDecodeError:
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({
'message': u"Failed to decode CSV file using encoding %s, "
u"try switching to a different encoding" % csvcode
}))
except:
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({
'message': u"Failed to decode CSV file using encoding %s, "
u"try switching to a different encoding" % csvcode
}))
@openerpweb.httprequest
def import_data(self, req, model, csvfile, csvsep, csvdel, csvcode, jsonp,
meta):
#remove bom
if 'utf' in csvcode:
if 'utf-8' in csvcode:
contents = csvfile.read().decode('utf-8-sig').encode('utf-8')
csvcode = 'utf-8'
#FIXME not support utf-16
if 'utf-16' in csvcode:
contents = csvfile.read().decode(csvcode).encode('utf-16')
csvcode = 'utf-16'
#FIXME not support utf-32
if 'utf-32' in csvcode:
contents = csvfile.read().decode('utf-32be').encode('utf-32')
csvcode = 'utf-32'
csvfile.truncate(0)
csvfile.write(contents)
csvfile.seek(0)
skip, indices, fields, context = \
operator.itemgetter('skip', 'indices', 'fields', 'context')(
simplejson.loads(meta, object_hook=common.nonliterals.non_literal_decoder))
error = None
if not (csvdel and len(csvdel) == 1):
error = u"The CSV delimiter must be a single character"
if not indices and fields:
error = u"You must select at least one field to import"
if error:
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({'error': {'message': error}}))
# skip ignored records (@skip parameter)
# then skip empty lines (not valid csv)
# nb: should these operations be reverted?
rows_to_import = itertools.ifilter(
None,
itertools.islice(
csv.reader(csvfile, quotechar=str(csvdel), delimiter=str(csvsep)),
skip, None))
# if only one index, itemgetter will return an atom rather than a tuple
if len(indices) == 1: mapper = lambda row: [row[indices[0]]]
else: mapper = operator.itemgetter(*indices)
data = None
error = None
try:
# decode each data row
data = [
[record.decode(csvcode) for record in row]
for row in itertools.imap(mapper, rows_to_import)
# don't insert completely empty rows (can happen due to fields
# filtering in case of e.g. o2m content rows)
if any(row)
]
except UnicodeDecodeError:
error = u"Failed to decode CSV file using encoding %s" % csvcode
except csv.Error, e:
error = u"Could not process CSV file: %s" % e
# If the file contains nothing,
if not data:
error = u"File to import is empty"
if error:
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({'error': {'message': error}}))
try:
(code, record, message, _nope) = req.session.model(model).import_data(
fields, data, 'init', '', False,
req.session.eval_context(context))
except xmlrpclib.Fault, e:
error = {"message": u"%s, %s" % (e.faultCode, e.faultString)}
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({'error':error}))
if code != -1:
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({'success':True}))
msg = u"Error during import: %s\n\nTrying to import record %r" % (
message, record)
return '<script>window.top.%s(%s);</script>' % (
jsonp, simplejson.dumps({'error': {'message':msg}}))
# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment