Created
January 13, 2012 12:48
-
-
Save harmy/1605943 to your computer and use it in GitHub Desktop.
合区脚本
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
使用说明: | |
1.将老区1和老区2两个区的数据库导出成csv文本,每个表导出成一个csv文件,分别放在两个文件夹中; | |
2.导出时设定导出的编码为936,行分隔符为CRLF,文本限定符为双引号("),栏位分隔符为逗号(,) | |
3.使用命令行hequ.py 老区1数据路径 老区2数据路径 新区id执行脚本,如果没有报错,合完的数据保存在merged-db-区id的文件夹中 | |
4.在新区数据库使用最新的建库脚本生成干净的数据库结构,然后导入merged-db-区id的文件夹中的数据 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#encoding=gbk | |
import datetime | |
__author__ = 'harmy' | |
import os, sys, shutil, csv, shelve, multiprocessing | |
from multiprocessing import Process | |
if len(sys.argv) != 4: | |
print "用法: hequ.py 老区1数据路径 老区2数据路径 新区区id" | |
exit(1) | |
data_path1 = sys.argv[1] | |
data_path2 = sys.argv[2] | |
zone_id = int(sys.argv[3]) | |
output_path = "merged-db-%d" % zone_id | |
def csv_to_cache(filename): | |
f = open(filename, "r") | |
reader = csv.DictReader((line.replace('\0','') for line in f)) | |
try: | |
db = shelve.open(filename.replace('.txt', '.cache'), flag='n', protocol=2) | |
try: | |
if reader.fieldnames[0] == "account": | |
for row in reader: | |
db[row[reader.fieldnames[0]]] = row | |
else: | |
for idx, row in enumerate(reader): | |
db[str(idx)] = row | |
except csv.Error, e: | |
print 'file %s, line %d: %s' % (filename, reader.line_num, e) | |
finally: | |
db.close() | |
return reader.fieldnames | |
def merge_table(table_name, generate_id=False, id_column="id", foreign_key="", id_mapper=[]): | |
print "{}正在处理{}...\n".format(multiprocessing.current_process().name, table_name) | |
fieldnames = csv_to_cache(os.path.join("%s/%s.txt" % (data_path1, table_name))) | |
csv_to_cache(os.path.join("%s/%s.txt" % (data_path2, table_name))) | |
merged_content = csv.DictWriter(open(os.path.join("%s/%s.txt" % (output_path, table_name)), "w"), | |
fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) | |
merged_content.writeheader() | |
id = 0 | |
id_map1 = {} | |
result = [] | |
content1 = shelve.open(os.path.join("%s/%s.cache" % (data_path1, table_name)), 'r') | |
content2 = shelve.open(os.path.join("%s/%s.cache" % (data_path2, table_name)), 'w') | |
for key, row in content1.iteritems(): | |
if generate_id: | |
id +=1 | |
id_map1[row[id_column]] = id | |
row[id_column] = id | |
if row.has_key("zoneid"): | |
row["zoneid"] = zone_id | |
if row.has_key(foreign_key) and row[foreign_key] != "0": | |
if id_mapper[0].has_key(row[foreign_key]): | |
row[foreign_key] = id_mapper[0][row[foreign_key]] | |
else: | |
print "错误:外键映射表中找不到%s" % row[foreign_key] | |
row[foreign_key] = 0 | |
#特殊处理一下 | |
if table_name == "tbl_gold": | |
if key in content2: | |
row["gold"] = int(row["gold"]) + int(content2[key]["gold"]) | |
row["paygold"] = int(row["paygold"]) + int(content2[key]["paygold"]) | |
del(content2[key]) | |
merged_content.writerow(row) | |
result.append(id_map1) | |
id_map2 = {} | |
for key, row in content2.iteritems(): | |
if generate_id: | |
id +=1 | |
id_map2[row[id_column]] = id | |
row[id_column] = id | |
if row.has_key("zoneid"): | |
row["zoneid"] = zone_id | |
if row.has_key(foreign_key) and row[foreign_key] != "0": | |
if id_mapper[1].has_key(row[foreign_key]): | |
row[foreign_key] = id_mapper[1][row[foreign_key]] | |
else: | |
print "错误:外键映射表中找不到%s" % row[foreign_key] | |
row[foreign_key] = 0 | |
merged_content.writerow(row) | |
result.append(id_map2) | |
content1.close() | |
content2.close() | |
return result | |
def main(): | |
if not os.path.exists(data_path1) or not os.path.exists(data_path2): | |
print "数据路径不存在!" | |
exit(1) | |
if os.path.exists(output_path): | |
shutil.rmtree(output_path) | |
os.mkdir(output_path) | |
#先判断两个区的数据版本是否一致 | |
content1 = csv.DictReader(open(os.path.join("%s/tbl_version.txt" % data_path1), 'r')) | |
versionid1 = int(max([row['versionid'] for row in content1])) | |
content2 = csv.DictReader(open(os.path.join("%s/tbl_version.txt" % data_path2), 'r')) | |
versionid2 = int(max([row['versionid'] for row in content2])) | |
if versionid1 != versionid2: | |
print "两个区的数据版本分别为%d,%d,必须升级为一致的版本才可以合区!" % (versionid1, versionid2) | |
exit(1) | |
start_time = datetime.datetime.now() | |
id_mapper = merge_table("tbl_character_item", True) | |
merge_table("tbl_system_mailbox", True, "id", "attachment", id_mapper) | |
merge_args = [ | |
("tbl_auction", True), | |
("tbl_bidder_info",), | |
("tbl_character_task",), | |
("tbl_character_misc",), | |
("tbl_character_info",), | |
("tbl_gold",), | |
("tbl_gold_get", True), | |
("tbl_gold_unuse", True), | |
("tbl_guild",), | |
("tbl_guild_member",), | |
("tbl_guild_request",), | |
("tbl_character_autoplay",), | |
("tbl_friend",), | |
("tbl_character_magic",) | |
] | |
for arg in merge_args: | |
Process(target=merge_table, args=arg).start() | |
while multiprocessing.active_children(): | |
pass | |
print "合区成功!本次合区耗时%s,合区后的新sql脚本位于目录 %s 下" % (datetime.datetime.now() - start_time, output_path) | |
if __name__ =="__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment