Skip to content

Instantly share code, notes, and snippets.

@k4200
Created June 18, 2011 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save k4200/1033145 to your computer and use it in GitHub Desktop.
Save k4200/1033145 to your computer and use it in GitHub Desktop.
Script to create a CSV file for en-ja term dictionary
# -*- coding: utf-8 -*-
# This is my first python script. Whoo hoo!
# I needed to create a csv, each line of which consists of
# key, Japanese text, and English text, out of Java language
# resource files.
import sys
import glob
import re
import csv
def is_en_file(fn):
return not re.search(r'_ja.properties', fn)
ja_files = glob.glob('*_ja.properties')
en_files = filter(is_en_file, glob.glob('*.properties'))
reline = re.compile(r'(.*?)=(.*)')
# http://stackoverflow.com/questions/267436/how-do-i-treat-an-ascii-string-as-unicode-and-unescape-the-escaped-characters-in
def encode_val_ja(str):
#return str.decode('unicode-escape').encode('utf-8')
return str.decode('unicode-escape').encode('cp932')
dic = {}
def add_to_dic(files, lang):
for fn in files:
f = open(fn, 'r')
for line in f:
m = re.search(reline, line)
if m:
key = m.group(1)
val = m.group(2)
valstr = encode_val_ja(val)
if dic.get(key):
dic[key][lang] = valstr
else:
dic[key] = {lang: valstr}
add_to_dic(ja_files, 'ja')
add_to_dic(en_files, 'en')
writer = csv.writer(sys.stdout, lineterminator="\n")
for k, v in dic.iteritems():
writer.writerow([k, v.get('en'), v.get('ja')])
#print k, v.get('en'), v.get('ja')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment