Skip to content

Instantly share code, notes, and snippets.

@logc
Last active November 16, 2015 11:49
Show Gist options
  • Save logc/8c5ba934e4f4b2f32fa5 to your computer and use it in GitHub Desktop.
Save logc/8c5ba934e4f4b2f32fa5 to your computer and use it in GitHub Desktop.
"""
Convert: filename of Gmail contacts -> XLSX file
Converts the contents of a filename where exported Gmail contacts can be found
into an XLSX spreadsheet where only the relevant information of the contacts is
found.
"""
import argparse
import codecs
import os
import os.path
import pandas as pd
OUTFILE_EXT = '.xlsx'
RELEVANT_FIELDS = [
'First Name', 'Last Name', 'Job Title', 'Company', 'Categories',
'Mobile Phone', 'Home Address', 'E-mail Address']
def parse_commandline():
"""Parses the command line"""
parser = argparse.ArgumentParser()
parser.add_argument('filename')
return parser.parse_args()
def ensure_codecs(args, tmpfilename):
"""
Ensures that the file we are reading the DataFrame from is encoded as
utf-8
"""
infile = codecs.open(args.filename, 'r', 'latin1')
contents = infile.read()
with codecs.open(tmpfilename, 'w', 'utf-8') as tmpfile:
tmpfile.write(contents)
def filename_without_extension(filename):
"""
Returns the base name of a filename, removing its extension, e.g.
'input.csv' results in 'input'
"""
return os.path.splitext(filename)[0]
def main():
"""Main processing"""
tmpfilename = 'input.encoded.csv'
args = parse_commandline()
ensure_codecs(args, tmpfilename)
dataframe = pd.read_csv(tmpfilename, index_col=False, header=0)
outfile = filename_without_extension(args.filename) + OUTFILE_EXT
out_enconding = 'utf8'
# pylint: disable=no-member
dataframe.to_excel(outfile, encoding=out_enconding, columns=RELEVANT_FIELDS)
# pylint: enable=no-member
os.remove(tmpfilename)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment