Skip to content

Instantly share code, notes, and snippets.

@ltaoist
Last active December 19, 2015 11:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ltaoist/5946822 to your computer and use it in GitHub Desktop.
Save ltaoist/5946822 to your computer and use it in GitHub Desktop.
根据transifex的api上面的resources,根据返回的user信息统计每个用户的贡献条数,根据翻译者的翻译原文总数排序后输出。 第一行没有翻译者名字的为未翻译版本。count是翻译条目数量,sstr是原文长度,dstr是翻译后的长度。
#!/usr/bin/python2
# -*- coding: utf-8 -*-
'''
Any.py
~~~~~~
Pull all resources file from transifex, and analyse them.
This file will pull data using multiprocessing, and gen
the .pk file to save the remote data.
usage:
$ mkdir workdir
$ mv any.py workdir
$ cd workdir
$ # Modify the conf arguments in line 26~30 in this file
$ python any.py
$ rm *.pk # remove all temporary generated .pk file
'''
from os.path import walk
import multiprocessing
import pickle
import base64
import urllib2
import urllib
import json
import pickle
PROJECT = "opensuse-manuals" # The project slug
LANG_CODE = "zh_CN" # The lang_code
USERNAME = "" # Your username in transifex, require
PASSWORD = "" # Your password, require
THNUM = 10 # Using how many processing
class TxcClient:
'''A simple wrapper of transifex API.'''
def __init__(self, prefix, username, password) :
self._prefix = prefix
self._username = username
self._password = password
base64str = base64.encodestring('%s:%s' %
(username, password)
).replace('\n', '')
self._authstr = 'Basic %s' % base64str
def get(self, url, **kwargs):
request = urllib2.Request(self._prefix + url)
if kwargs : # need to check or it will fail
data = urllib.urlencode(kwargs)
request.add(data)
request.add_header("Authorization", self._authstr)
return json.load(urllib2.urlopen(request))
def worker(project, lang_code):
'''
A worker pull data from remote, using pickle dump
the raw into the `db_<res_slugs>.pk` file.
It will not repull the .pk file again, so
you need clear all old .pk file from new data.
'''
txc = TxcClient("https://www.transifex.com/api/2/",
USERNAME, PASSWORD)
resd = {}
res_slugs = txc.get("project/%s/resources/" % project)
for rd in res_slugs :
r = rd['slug']
try:
open('db_%s.pk' % r)
continue
except:
pass
f = open('db_%s.pk' % r, 'wb')
data = txc.get(
"project/%s/resource/%s/translation/%s/strings/?details" %\
(project, r, lang_code))
pickle.dump(data, f)
print 'Done... %s' % r
def analysis():
'''Gen the report from .pk file.'''
data = []
def check(args, dirname, names):
for f in names:
if f.endswith('.pk') :
data.extend(pickle.load(open(f)))
walk('.', check, None)
user = {}
for d in data :
author = d['user']
if author not in user :
user[author] = {
'count' : 0,
'dstr' : 0,
'sstr' : 0
}
dist = user[author]
dist['count'] = dist['count'] + 1
dist['sstr'] = dist['sstr'] + len(d['source_string'])
dist['dstr'] = dist['dstr'] + len(d['translation'])
user = user.items()
user.sort(key=lambda x: -x[1]['sstr'])
print '%-20s %8s %8s %8s' % ('author', 'count', 'sstr', 'dstr')
print '\n'.join(['%-20s %8s %8s %8s' % (x[0], x[1]['count'], x[1]['sstr'], x[1]['dstr']) for x in user])
if __name__ == '__main__' :
if not USERNAME and not PASSWORD:
print 'Please modify the conf arguments in line 26~30 in this file'
print 'And execute this file again:'
print ' $ python any.py'
exit()
print 'Start pulling ...'
pool = multiprocessing.Pool(processes=THNUM)
for i in range(THNUM):
pool.apply_async(worker, (PROJECT, LANG_CODE))
pool.close()
pool.join()
print 'Pull finish.'
analysis()
@ltaoist
Copy link
Author

ltaoist commented Jul 8, 2013

一行使用法:

mkdir workdir;cd workdir;wget https://gist.github.com/LTaoist/5946822/raw/5d1fddf876f029f7b996f808eb3fc2f7d75c9c2a/any.py;nano any.py;python any.py | tee any.txt

中途会打开nano,到第26行填入你在transifex的帐号密码然后关闭即可。

PROJECT = "opensuse-manuals"  # The project slug
LANG_CODE = "zh_CN"           # The lang_code
USERNAME = "" # Your username in transifex, require
PASSWORD = ""          # Your password, require
THNUM = 10      # Using how many process

如果要更新,一行写法:

rm *.pk ; python any.py | tee any.txt

使用后删除用到的 workdir。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment