Last active
December 19, 2015 11:19
-
-
Save ltaoist/5946822 to your computer and use it in GitHub Desktop.
根据transifex的api上面的resources,根据返回的user信息统计每个用户的贡献条数,根据翻译者的翻译原文总数排序后输出。 第一行没有翻译者名字的为未翻译版本。count是翻译条目数量,sstr是原文长度,dstr是翻译后的长度。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
# -*- coding: utf-8 -*- | |
''' | |
Any.py | |
~~~~~~ | |
Pull all resources file from transifex, and analyse them. | |
This file will pull data using multiprocessing, and gen | |
the .pk file to save the remote data. | |
usage: | |
$ mkdir workdir | |
$ mv any.py workdir | |
$ cd workdir | |
$ # Modify the conf arguments in line 26~30 in this file | |
$ python any.py | |
$ rm *.pk # remove all temporary generated .pk file | |
''' | |
from os.path import walk | |
import multiprocessing | |
import pickle | |
import base64 | |
import urllib2 | |
import urllib | |
import json | |
import pickle | |
PROJECT = "opensuse-manuals" # The project slug | |
LANG_CODE = "zh_CN" # The lang_code | |
USERNAME = "" # Your username in transifex, require | |
PASSWORD = "" # Your password, require | |
THNUM = 10 # Using how many processing | |
class TxcClient: | |
'''A simple wrapper of transifex API.''' | |
def __init__(self, prefix, username, password) : | |
self._prefix = prefix | |
self._username = username | |
self._password = password | |
base64str = base64.encodestring('%s:%s' % | |
(username, password) | |
).replace('\n', '') | |
self._authstr = 'Basic %s' % base64str | |
def get(self, url, **kwargs): | |
request = urllib2.Request(self._prefix + url) | |
if kwargs : # need to check or it will fail | |
data = urllib.urlencode(kwargs) | |
request.add(data) | |
request.add_header("Authorization", self._authstr) | |
return json.load(urllib2.urlopen(request)) | |
def worker(project, lang_code): | |
''' | |
A worker pull data from remote, using pickle dump | |
the raw into the `db_<res_slugs>.pk` file. | |
It will not repull the .pk file again, so | |
you need clear all old .pk file from new data. | |
''' | |
txc = TxcClient("https://www.transifex.com/api/2/", | |
USERNAME, PASSWORD) | |
resd = {} | |
res_slugs = txc.get("project/%s/resources/" % project) | |
for rd in res_slugs : | |
r = rd['slug'] | |
try: | |
open('db_%s.pk' % r) | |
continue | |
except: | |
pass | |
f = open('db_%s.pk' % r, 'wb') | |
data = txc.get( | |
"project/%s/resource/%s/translation/%s/strings/?details" %\ | |
(project, r, lang_code)) | |
pickle.dump(data, f) | |
print 'Done... %s' % r | |
def analysis(): | |
'''Gen the report from .pk file.''' | |
data = [] | |
def check(args, dirname, names): | |
for f in names: | |
if f.endswith('.pk') : | |
data.extend(pickle.load(open(f))) | |
walk('.', check, None) | |
user = {} | |
for d in data : | |
author = d['user'] | |
if author not in user : | |
user[author] = { | |
'count' : 0, | |
'dstr' : 0, | |
'sstr' : 0 | |
} | |
dist = user[author] | |
dist['count'] = dist['count'] + 1 | |
dist['sstr'] = dist['sstr'] + len(d['source_string']) | |
dist['dstr'] = dist['dstr'] + len(d['translation']) | |
user = user.items() | |
user.sort(key=lambda x: -x[1]['sstr']) | |
print '%-20s %8s %8s %8s' % ('author', 'count', 'sstr', 'dstr') | |
print '\n'.join(['%-20s %8s %8s %8s' % (x[0], x[1]['count'], x[1]['sstr'], x[1]['dstr']) for x in user]) | |
if __name__ == '__main__' : | |
if not USERNAME and not PASSWORD: | |
print 'Please modify the conf arguments in line 26~30 in this file' | |
print 'And execute this file again:' | |
print ' $ python any.py' | |
exit() | |
print 'Start pulling ...' | |
pool = multiprocessing.Pool(processes=THNUM) | |
for i in range(THNUM): | |
pool.apply_async(worker, (PROJECT, LANG_CODE)) | |
pool.close() | |
pool.join() | |
print 'Pull finish.' | |
analysis() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
一行使用法:
中途会打开nano,到第26行填入你在transifex的帐号密码然后关闭即可。
如果要更新,一行写法:
使用后删除用到的 workdir。