kokukuma/gist:b52eca26a8d018425552 Secret

## gistfile1.py
import os
import re
import itertools
#import subprocess
#from urlparse import urlparse
from bzrlib import commit
from bzrlib.branch import Branch
from bzrlib.workingtree import WorkingTree
from bzrlib.commands import Command,register_command


version_info = (0, 0, 1, 'dev', 1)
__author__ = 'karino'
__date__   = '2012/01/23'

search_limit   = 350
capture_limit  = 10
cos_limit      = 0.4
top            = 5
only_my_commit = False
ignore_message = ['commit_message','fix']


class cmd_suggest(Command):
    takes_args = ["revision?"]

    def run(self, revision=None):

        #
        branch  = Branch.open_containing(os.getcwd())[0]

        #
        change_file = self.get_changes_from_status()
        if len(change_file)==0:
            print 'no change file'
            return
        else:
            print
            print 'changed files '
            for x in change_file:
                print ' + ' + x
            print

        #
        print 'Loading commit logs...'
        dataset    = self.get_log_dataset(branch, change_file)
        dataset[0] = [ change_file, 'commit_message']

        # make vector
        data = self.make_matrix(dataset)

        # print log
        prefix = ''
        if len(data) <= 1:
            print
            print 'no nearly commit'
        else:
            msgdata = []
            for k,d in data.items():
                if 'commit_message' in dataset[k][1]: continue
                if self.cos(data[0], d) > 0:
                    msgdata.append([self.cos(data[0], d), dataset[k][1]])

            # get relative commit message
            msgdata.sort(reverse=True)

            for msgd in msgdata:
                print ' + ', round(msgd[0], 3) ," : ", msgd[1]


            #prefix =  self.get_prefix(cos_limit, data, dataset, msgdata) + ' '
            prefix =  self.get_prefix(cos_limit, msgdata) + ' '
            print
            print 'recommended prefix : ' + prefix


        # make commit message
        print
        msg = self.commit_input('commit message detail > ')
        print
        print '............... commit message : ' + prefix + msg
        print

        print 'Select action'
        print ' + j: set prefix and commit'
        print ' + k: no prefix commit'
        print ' + l: no commit'
        act = self.user_input('............... j/k/l > ')
        print

        if act == 'set_prefix_commit':
            print "exe commit ",prefix + ' ' + msg
            self.exe_commit(prefix + ' ' + msg)

        elif act == 'no_prefix_commit':
            if msg == '':
                print 'no commit message'
                return

            print "exe commit ",msg
            self.exe_commit(msg)

        else:
            print "no commit "

        print

        return


    def get_log_dataset(self,branch, change_file):
        x = 0
        y = 0
        dataset = {}

        for target_id in reversed( branch.revision_history() ):


            if only_my_commit and not self.get_username(branch) in target_id:
                continue

            # too slowly
            changes        = branch.repository.get_revision_delta(target_id)
            files          =  [ z[0] for z in itertools.chain(changes.added, changes.modified)]

            x +=1
            if x > search_limit or y > capture_limit:
                break

            for ch in change_file:
                if ch in files:
                    y += 1
                    commit_message = branch.repository.get_revision(target_id).message

                    #print files, commit_message
                    tmp = []
                    tmp.append(files)
                    tmp.append(commit_message)
                    dataset[target_id] = tmp
        return dataset


    def get_username(self, branch):
        username =  branch.get_config().username()
        _re_mailadd = re.compile(r'<(.+)>').search
        try:
            mo = _re_mailadd(username)
        except IndexError:
            return None
        return mo.group(1) if mo else None


    #def get_prefix(self, cos_limit, data, dataset, msgdata):
    def get_prefix(self, cos_limit, msgdata):
        tmp = []

        for msgd in msgdata[:top]:
            if msgd[0] >= cos_limit:
                tmp += msgd[1].split(' ')


        #
        for word in tmp:

            ## remove two-byte charactor
            # _re_type = re.compile(r'[^A-Za-z]')
            # if not _re_type.search(word) == None:
            #     while word in tmp:
            #         tmp.remove(word)

            # remove ignore keyword
            if word in ignore_message:
                while word in tmp:
                    tmp.remove(word)

        #
        count_word = {}
        for word in set(tmp):
            count_word[word] = tmp.count(word)

        # sort
        aux = [(v,k) for k, v in count_word.items()]
        aux.sort()
        res = [k for v, k in reversed(aux)]
        if res:
            return res[0]
        else:
            return ''


    def cos(self, v1, v2):
        import math
        #
        inner_product = sum( v1[x] * v2[x] for x in range(len(v1)) )
        v1_length = math.sqrt( sum(v1[x] * v1[x] for x in range(len(v1))) )
        v2_length = math.sqrt( sum(v2[x] * v2[x] for x in range(len(v2))) )

        cos = inner_product / (v1_length * v2_length)
        return cos


    def get_keyword(self, dataset):
        # keyword list
        keyword  = []
        for key,data in dataset.items():
            for f in data[0]:
                if not f in keyword:
                    keyword.append(f)
        return keyword

    def make_vecter(self, keyword, data):
        vector = []
        for f in keyword:
            if f in data[0]:
                vector.append(1)
            else:
                vector.append(0)

        return vector


    def make_matrix(self, dataset):

        # keyword list
        keyword = self.get_keyword(dataset)

        # make vector
        result = {}
        for key, data in dataset.items():
            vector = self.make_vecter(keyword, data)
            result[key] = vector

        return result


    def commit_input(self, prompt):
        com = raw_input(prompt)
        return com.decode('utf-8')

    def user_input(self, prompt):
        ok = raw_input(prompt)
        if ok in ['j']:
            return 'set_prefix_commit'
        elif ok in ['k']:
            return 'no_prefix_commit'
        else:
            return 'no_commit'


    def exe_commit(self, msg):
        wt = WorkingTree.open_containing(os.getcwd())[0]
        cm = commit.Commit()
        revno = cm.commit(message=msg, working_tree=wt)
        return revno

    def get_changes_from_status(self):
        # get workingtree
        wt = WorkingTree.open_containing(os.getcwd())[0]

        # get changes
        changes = wt.changes_from(wt.basis_tree(),want_unversioned=True)
        result = []
        # for add in changes.added:
        #     result.append(add[0].encode('utf-8'))
        for mod in changes.modified:
            #result.append(os.path.basename(mod[0].encode('utf-8')))
            result.append(mod[0].encode('utf-8'))

        return result

register_command(cmd_suggest)
	import os
	import re
	import itertools
	#import subprocess
	#from urlparse import urlparse
	from bzrlib import commit
	from bzrlib.branch import Branch
	from bzrlib.workingtree import WorkingTree
	from bzrlib.commands import Command,register_command


	version_info = (0, 0, 1, 'dev', 1)
	__author__ = 'karino'
	__date__ = '2012/01/23'

	search_limit = 350
	capture_limit = 10
	cos_limit = 0.4
	top = 5
	only_my_commit = False
	ignore_message = ['commit_message','fix']


	class cmd_suggest(Command):
	takes_args = ["revision?"]

	def run(self, revision=None):

	#
	branch = Branch.open_containing(os.getcwd())[0]

	#
	change_file = self.get_changes_from_status()
	if len(change_file)==0:
	print 'no change file'
	return
	else:
	print
	print 'changed files '
	for x in change_file:
	print ' + ' + x
	print

	#
	print 'Loading commit logs...'
	dataset = self.get_log_dataset(branch, change_file)
	dataset[0] = [ change_file, 'commit_message']

	# make vector
	data = self.make_matrix(dataset)

	# print log
	prefix = ''
	if len(data) <= 1:
	print
	print 'no nearly commit'
	else:
	msgdata = []
	for k,d in data.items():
	if 'commit_message' in dataset[k][1]: continue
	if self.cos(data[0], d) > 0:
	msgdata.append([self.cos(data[0], d), dataset[k][1]])

	# get relative commit message
	msgdata.sort(reverse=True)

	for msgd in msgdata:
	print ' + ', round(msgd[0], 3) ," : ", msgd[1]


	#prefix = self.get_prefix(cos_limit, data, dataset, msgdata) + ' '
	prefix = self.get_prefix(cos_limit, msgdata) + ' '
	print
	print 'recommended prefix : ' + prefix



	# make commit message
	print
	msg = self.commit_input('commit message detail > ')
	print
	print '............... commit message : ' + prefix + msg
	print

	print 'Select action'
	print ' + j: set prefix and commit'
	print ' + k: no prefix commit'
	print ' + l: no commit'
	act = self.user_input('............... j/k/l > ')
	print

	if act == 'set_prefix_commit':
	print "exe commit ",prefix + ' ' + msg
	self.exe_commit(prefix + ' ' + msg)

	elif act == 'no_prefix_commit':
	if msg == '':
	print 'no commit message'
	return

	print "exe commit ",msg
	self.exe_commit(msg)

	else:
	print "no commit "

	print

	return


	def get_log_dataset(self,branch, change_file):
	x = 0
	y = 0
	dataset = {}

	for target_id in reversed( branch.revision_history() ):


	if only_my_commit and not self.get_username(branch) in target_id:
	continue

	# too slowly
	changes = branch.repository.get_revision_delta(target_id)
	files = [ z[0] for z in itertools.chain(changes.added, changes.modified)]

	x +=1
	if x > search_limit or y > capture_limit:
	break

	for ch in change_file:
	if ch in files:
	y += 1
	commit_message = branch.repository.get_revision(target_id).message

	#print files, commit_message
	tmp = []
	tmp.append(files)
	tmp.append(commit_message)
	dataset[target_id] = tmp
	return dataset


	def get_username(self, branch):
	username = branch.get_config().username()
	_re_mailadd = re.compile(r'<(.+)>').search
	try:
	mo = _re_mailadd(username)
	except IndexError:
	return None
	return mo.group(1) if mo else None




	#def get_prefix(self, cos_limit, data, dataset, msgdata):
	def get_prefix(self, cos_limit, msgdata):
	tmp = []

	for msgd in msgdata[:top]:
	if msgd[0] >= cos_limit:
	tmp += msgd[1].split(' ')


	#
	for word in tmp:

	## remove two-byte charactor
	# _re_type = re.compile(r'[^A-Za-z]')
	# if not _re_type.search(word) == None:
	# while word in tmp:
	# tmp.remove(word)

	# remove ignore keyword
	if word in ignore_message:
	while word in tmp:
	tmp.remove(word)

	#
	count_word = {}
	for word in set(tmp):
	count_word[word] = tmp.count(word)

	# sort
	aux = [(v,k) for k, v in count_word.items()]
	aux.sort()
	res = [k for v, k in reversed(aux)]
	if res:
	return res[0]
	else:
	return ''



	def cos(self, v1, v2):
	import math
	#
	inner_product = sum( v1[x] * v2[x] for x in range(len(v1)) )
	v1_length = math.sqrt( sum(v1[x] * v1[x] for x in range(len(v1))) )
	v2_length = math.sqrt( sum(v2[x] * v2[x] for x in range(len(v2))) )

	cos = inner_product / (v1_length * v2_length)
	return cos


	def get_keyword(self, dataset):
	# keyword list
	keyword = []
	for key,data in dataset.items():
	for f in data[0]:
	if not f in keyword:
	keyword.append(f)
	return keyword

	def make_vecter(self, keyword, data):
	vector = []
	for f in keyword:
	if f in data[0]:
	vector.append(1)
	else:
	vector.append(0)

	return vector


	def make_matrix(self, dataset):

	# keyword list
	keyword = self.get_keyword(dataset)

	# make vector
	result = {}
	for key, data in dataset.items():
	vector = self.make_vecter(keyword, data)
	result[key] = vector

	return result


	def commit_input(self, prompt):
	com = raw_input(prompt)
	return com.decode('utf-8')

	def user_input(self, prompt):
	ok = raw_input(prompt)
	if ok in ['j']:
	return 'set_prefix_commit'
	elif ok in ['k']:
	return 'no_prefix_commit'
	else:
	return 'no_commit'


	def exe_commit(self, msg):
	wt = WorkingTree.open_containing(os.getcwd())[0]
	cm = commit.Commit()
	revno = cm.commit(message=msg, working_tree=wt)
	return revno

	def get_changes_from_status(self):
	# get workingtree
	wt = WorkingTree.open_containing(os.getcwd())[0]

	# get changes
	changes = wt.changes_from(wt.basis_tree(),want_unversioned=True)
	result = []
	# for add in changes.added:
	# result.append(add[0].encode('utf-8'))
	for mod in changes.modified:
	#result.append(os.path.basename(mod[0].encode('utf-8')))
	result.append(mod[0].encode('utf-8'))

	return result

	register_command(cmd_suggest)