Skip to content

Instantly share code, notes, and snippets.

@iOS0x00
Last active July 12, 2024 01:53
Show Gist options
  • Save iOS0x00/1e370da2e6ec99ce479a to your computer and use it in GitHub Desktop.
Save iOS0x00/1e370da2e6ec99ce479a to your computer and use it in GitHub Desktop.
2000w数据csv入库修正版,基于网上的版本修正,不要太在意代码风格 :doge:
# -*- coding:utf-8 -*-
import os
import csv
import MySQLdb as mysql
inpath = '.'
uipath = unicode(inpath, "utf-8")
files = os.listdir(uipath) # 存放2000万开房数据的路径
print files
def validate(line):
drop = False
if line[0] == '\xf0\xa6\x8d\x8b\xe5\xa4\xa7\xe7\xb6\xad':
return True
if len(line[2]) > 300:
print 'Desciot illegal', line[2]
drop = True
elif len(line[5]) > 4:
print 'Gender illegal', line[5]
drop = True
elif len(line[20]) > 80:
print 'Tel illegal', line[20]
drop = True
elif len(line[21]) > 80:
print 'Fax illegal', line[21]
drop = True
elif len(line[24]) > 300:
print 'Taste illegal', line[24]
drop = True
# if drop:
# print '---------- ', line
return drop
try:
conn = mysql.connect(host="localhost", user="root", passwd="123456", port=3306)
conn.set_character_set('utf8')
cur = conn.cursor()
cur.execute('set names utf8')
cur.execute('SET CHARACTER SET utf8')
cur.execute('SET character_set_connection=utf8')
sql = "CREATE DATABASE IF NOT EXISTS HOTELDATA DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci"
cur.execute(sql)
cur.execute("USE HOTELDATA")
cur.execute("DROP TABLE IF EXISTS KAIFANG")
sql = ''' CREATE TABLE KAIFANG(
Name varchar(50),
CardNo varchar(30),
Descriot varchar(300),
CtfTp varchar(30),
CtfId varchar(50),
Gender varchar(4),
Birthday varchar(30),
Address varchar(100),
Zip varchar(30),
Dirty varchar(30),
District1 varchar(30),
District2 varchar(30),
District3 varchar(30),
District4 varchar(30),
District5 varchar(30),
District6 varchar(30),
FirstNm varchar(100),
LastNm varchar(100),
Duty varchar(30),
Mobile varchar(100),
Tel varchar(80),
Fax varchar(80),
EMail varchar(100),
Nation varchar(30),
Taste varchar(300),
Education varchar(30),
Company varchar(50),
CTel varchar(100),
CAddress varchar(50),
CZip varchar(30),
Family varchar(30),
Version varchar(30),
id varchar(30)
) ENGINE=InnoDB DEFAULT CHARSET=utf8'''
cur.execute(sql)
for this_file in files: # 循环处理文件
# file = uipath + '\\' + file
print this_file # 输出文件名
if not this_file.endswith('.csv'):
continue
fp = open(this_file, "rU")
lines = csv.reader(fp)
data = []
i = 0
j = 0
for line in lines:
i += 1
if i == 1:
continue # 每个文件的第一行不处理
datalen = len(line)
if datalen != 33:
if datalen == 8:
line.extend([''] * 25)
elif datalen == 26:
line.extend([''] * 7)
else:
print '++++++++ERROR++++++ ', i, datalen, line # 输出长度不对的行
continue
if not validate(line):
data.append(line)
j += 1
# print data[0][0], data[0][7]
# 50w记录先入一次
if j >= 500000:
print '50w record in'
cur.executemany("INSERT INTO KAIFANG values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, %s, %s, %s)", data)
conn.commit()
j = 0
data = []
# 不满50w的入
if j > 0:
print j, 'record in'
cur.executemany("INSERT INTO KAIFANG values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, %s, %s, %s)", data)
conn.commit()
cur.close()
conn.close()
except mysql.Error, e:
print "Error Mysql %d:%s" % (e.args[0], e.args[1])
#!/usr/bin/python
fp = open('a.html', 'r')
data = fp.read()
fp.close()
lines = data.split('\n')
#print len(lines)
print 'INSERT INTO KAIFANG(`Name`, `CtfTp`, `CtfId`, `Birthday`, `Mobile`, `Address`) VALUES'
for l in lines:
a = eval(l)
name = a[0].split()[0].strip() if a[0] else ''
if not name:
continue
try:
id_index = a.index('ID')
id_txt = a[id_index + 1]
address = a[id_index + 4]
except ValueError:
id_txt = ''
except IndexError:
address = ''
birthday = ''
for i in a:
if len(i) == 8:
birthday = i
break
mobile = ''
for i in a:
if len(i) == 11:
mobile = i
break
print "('{name}', 'ID', '{id_txt}', '{birthday}', '{mobile}', '{address}'),".format(name=name,
birthday=birthday, id_txt=id_txt, address=address, mobile=mobile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment