ibigbug/clean_google_contacts.py

## clean_google_contacts.py
#!/usr/bin/env python


def is_cn_char(c):
    """
    Chinese char detecting
    """
    return 0x4e00 <= ord(c) <= 0x9fa5


class MainHandler(object):
    """
    *Use it on your own risk*
    Clean up your google contact.
    Remove contacts without a name
    Format given name and family name via one's full name
    You may change the code according to your own habbit
    """
    def __init__(self, google_csv='google.csv', encoding='utf-16',
                 target='./cleaned_google.csv'):
        """
        google_csv: source file export from google
        encoding: file encoding
        target: output file
        """
        self.google_csv = google_csv
        self.encoding = encoding
        self.target = target

    def reader(self):
        self.raw_data = open(self.google_csv, encoding=self.encoding)
        self.field_list = self.raw_data.readline().split(',')

        return self

    def writer(self):
        target = open(self.target, 'w+', encoding=self.encoding)
        target.write(','.join(self.field_list))

        total_count = 0
        result_count = 0
        no_name = 0
        no_whole_name = 0

        for p in self.raw_data.readlines():
            total_count += 1

            value_list = p.split(',')

            # remember to sort the result dict
            contact = dict(zip(self.field_list, value_list))

            if contact.get('Name') == '':
                no_name += 1
                continue

            given_name = contact.get('Given Name')
            family_name = contact.get('Family Name')
            if (given_name == '') or (family_name == ''):
                no_whole_name += 1
                fullname = contact.get('Name')
                Given_Name = fullname[1:]
                Family_name = fullname[0]

                if is_cn_char(Family_name):
                    contact['Given Name'] = Given_Name
                    contact['Family Name'] = Family_name
            value_list = map(lambda field: contact[field], self.field_list)
            target.write(','.join(value_list))
            result_count += 1

        result = """
        共有{total_count}名联系人
        其中无名无姓者{no_name}人
        姓名格式不正确者{no_whole_name}人
        处理后幸存{result_count}人
        """.format(
            total_count=total_count,
            no_name=no_name,
            no_whole_name=no_whole_name,
            result_count=result_count,
        )
        print(result)

        target.close()

        return self

    def run(self):
        self.reader().writer()


def main():
    import argparse
    parser = argparse.ArgumentParser(
        description='This tool will format your google\
                    contacts\' Family Name and Given Name via\
                    thier full name so that they will display\
                    more beautifully in your mobile phone.'
    )
    parser.add_argument(
        '-f', '--from',
        dest='google_csv',
        help='The source file export from google',
        default='google.csv',
    )
    parser.add_argument(
        '-t', '--to',
        dest='target',
        help='The file to store the output',
        default='cleaned_google.csv',
    )
    parser.add_argument(
        '-e', '--encoding',
        dest='encoding',
        help='The file encoding',
        default='utf-16',
    )

    args = parser.parse_args()

    m = MainHandler(args.google_csv, args.encoding, args.target)
    m.run()


if __name__ == '__main__':
    main()
	#!/usr/bin/env python


	def is_cn_char(c):
	"""
	Chinese char detecting
	"""
	return 0x4e00 <= ord(c) <= 0x9fa5


	class MainHandler(object):
	"""
	Use it on your own risk
	Clean up your google contact.
	Remove contacts without a name
	Format given name and family name via one's full name
	You may change the code according to your own habbit
	"""
	def __init__(self, google_csv='google.csv', encoding='utf-16',
	target='./cleaned_google.csv'):
	"""
	google_csv: source file export from google
	encoding: file encoding
	target: output file
	"""
	self.google_csv = google_csv
	self.encoding = encoding
	self.target = target

	def reader(self):
	self.raw_data = open(self.google_csv, encoding=self.encoding)
	self.field_list = self.raw_data.readline().split(',')

	return self

	def writer(self):
	target = open(self.target, 'w+', encoding=self.encoding)
	target.write(','.join(self.field_list))

	total_count = 0
	result_count = 0
	no_name = 0
	no_whole_name = 0

	for p in self.raw_data.readlines():
	total_count += 1

	value_list = p.split(',')

	# remember to sort the result dict
	contact = dict(zip(self.field_list, value_list))

	if contact.get('Name') == '':
	no_name += 1
	continue

	given_name = contact.get('Given Name')
	family_name = contact.get('Family Name')
	if (given_name == '') or (family_name == ''):
	no_whole_name += 1
	fullname = contact.get('Name')
	Given_Name = fullname[1:]
	Family_name = fullname[0]

	if is_cn_char(Family_name):
	contact['Given Name'] = Given_Name
	contact['Family Name'] = Family_name
	value_list = map(lambda field: contact[field], self.field_list)
	target.write(','.join(value_list))
	result_count += 1

	result = """
	共有{total_count}名联系人
	其中无名无姓者{no_name}人
	姓名格式不正确者{no_whole_name}人
	处理后幸存{result_count}人
	""".format(
	total_count=total_count,
	no_name=no_name,
	no_whole_name=no_whole_name,
	result_count=result_count,
	)
	print(result)

	target.close()

	return self

	def run(self):
	self.reader().writer()


	def main():
	import argparse
	parser = argparse.ArgumentParser(
	description='This tool will format your google\
	contacts\' Family Name and Given Name via\
	thier full name so that they will display\
	more beautifully in your mobile phone.'
	)
	parser.add_argument(
	'-f', '--from',
	dest='google_csv',
	help='The source file export from google',
	default='google.csv',
	)
	parser.add_argument(
	'-t', '--to',
	dest='target',
	help='The file to store the output',
	default='cleaned_google.csv',
	)
	parser.add_argument(
	'-e', '--encoding',
	dest='encoding',
	help='The file encoding',
	default='utf-16',
	)

	args = parser.parse_args()

	m = MainHandler(args.google_csv, args.encoding, args.target)
	m.run()


	if __name__ == '__main__':
	main()