tanmaykm/merge_monthly.py

## merge_monthly.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Mon Oct  1 10:28:26 2012

Companion merge script for Android "My Archives" application.
App: https://play.google.com/store/apps/details?id=com.meeteoric.myarchives&hl=en
Instructions: http://sidekick.windforwings.com/2012/10/python-script-to-merge-my-archives-call.html

@author: tan
"""

import sys, glob, csv, os, shutil, httplib2, pprint, subprocess

from apiclient.discovery import build
from apiclient.http import MediaFileUpload
from oauth2client.client import Credentials
from oauth2client.client import OAuth2WebServerFlow

class GDriveMgr:
    """Utility functions for google drive"""
    CREDS_FILE = 'creds.pickle'
    # Credentials for Google Drive App
    CLIENT_ID = '<place your client id here>'
    CLIENT_SECRET = '<place your client secret here>'

    # We need full access for the time being
    # as our app needs to work on files already existing and
    # drive api is still not granular enuf for requesting access only to a folder
    OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'

    # This redirect URI is to be used by installed apps
    REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'

    MY_ANDROID_ARCHIVES = None

    drive_service = None
    credentials = None

    def __init__(self):
        if os.path.exists(self.CREDS_FILE):
            credsfile = open(self.CREDS_FILE, "r")
            credentials = Credentials.new_from_json(credsfile.read())
        else:
            credentials = self.auth()
        self.make_drive_service(credentials)

    def auth(self):
        flow = OAuth2WebServerFlow(self.CLIENT_ID, self.CLIENT_SECRET, self.OAUTH_SCOPE, self.REDIRECT_URI)
        authorize_url = flow.step1_get_authorize_url()
        print 'Go to the following link in your browser: ' + authorize_url
        code = raw_input('Enter verification code: ').strip()
        credentials = flow.step2_exchange(code)

        credsfile = open(self.CREDS_FILE, "w")
        credsfile.write(credentials.to_json())

        return credentials

    def make_drive_service(self, credentials):
        http = httplib2.Http()
        http = credentials.authorize(http)
        self.drive_service = build('drive', 'v2', http=http)

    def file_id(self, f_obj):
        return f_obj['id']

    def file_name(self, f_obj):
        return f_obj['title']

    def file_dnld_url(self, f_obj):
        return (f_obj['exportLinks'])['application/x-vnd.oasis.opendocument.spreadsheet']

    def download(self, year, mon):
        print "Downloading from GDrive..."
        search_str = 'title = \'My_Android_Archives\' and trashed = false'
        param = {}
        param['q'] = search_str
        file_list = self.drive_service.files().list(**param).execute()
        #pprint.pprint(file_list)

        if (len(file_list['items']) == 0):
            print "No archives found"
            sys.exit()

        self.MY_ANDROID_ARCHIVES = self.file_id((file_list['items'])[0])
        #print "Found My_Android_Archives: " + MY_ANDROID_ARCHIVES

        search_str = '\'' + self.MY_ANDROID_ARCHIVES + '\' in parents'
        search_str += ' and trashed = false'
        search_str += ' and (title contains \'calls_' + year + '_' + mon + '\''
        search_str += ' or title contains \'sms_' + year + '_' + mon + '\''
        search_str += ' or title contains \'contacts_' + year + '_' + mon + '\''
        search_str += ')'

        #print "search_str [" + search_str + "]"
        param = {}
        param['q'] = search_str
        file_list = self.drive_service.files().list(**param).execute()

        #pprint.pprint(file_list)
        file_items = file_list['items']
        print "To download " + str(len(file_items)) + " files"

        in_error = False
        files_downloaded = []
        for one_file in file_items:
            dnld_url = self.file_dnld_url(one_file)
            file_name_only = self.file_name(one_file)
            print "\t" + file_name_only + ".ods ..."#+ " from " + dnld_url
            if dnld_url:
                resp, content = self.drive_service._http.request(dnld_url)
                if resp.status == 200:
                    dnld_file = open(file_name_only + ".ods", "wb")
                    dnld_file.write(content)
                    dnld_file.close()
                    file_dets = {}
                    file_dets['dnld_url'] = dnld_url
                    file_dets['file_name_only'] = file_name_only
                    file_dets['file_id'] = self.file_id(one_file)
                    files_downloaded.append(file_dets)
                else:
                    print "Could not download " + file_name_only + ". HTTP Error!"
                    in_error = True
                    break
            else:
                print "Could not download " + file_name_only + ". No download URL!"
                in_error = True
                break

        if in_error:
            return None
        else:
            return files_downloaded

    def upload(self, file_names):
        print "Uploading " + str(len(file_names)) + " files..."
        for file_name in file_names:
            print "\t" + file_name
            media_body = MediaFileUpload(file_name, mimetype='text/csv', resumable=True)
            file_name_without_extn = file_name[:-4]
            body = {
                'title': file_name_without_extn,
                'description': file_name_without_extn,
                'mimeType': 'text/csv',
                'parents': [{ 'id': self.MY_ANDROID_ARCHIVES }]
                }
            file = self.drive_service.files().insert(body=body, media_body=media_body, convert=True).execute()
            print "\tUploaded file " + file_name + " to Google Drive. Id: " + file['id']

    def trash(self, files_downloaded):
        print "Trashing " + str(len(files_downloaded)) + " files..."
        for file_dict in files_downloaded:
            print "\t" + file_dict['file_name_only']
            self.trash_single_file(file_dict['file_id'])

    def trash_single_file(self, file_id):
        self.drive_service.files().trash(fileId=file_id).execute()
        #print "\tTrashing file with id: " + file_id


class MyArchivesMerger:
    """Utility functions for merging my archives files"""
    CONVERTED_FILES = []
    CALLS_FILES = []
    SMS_FILES = []
    CONTACT_FILES = []
    CREATED_FILES = []

    def convert_files(self, files):
        self.CONVERTED_FILES = []
        self.CALLS_FILES = []
        self.SMS_FILES = []
        self.CONTACT_FILES = []
        self.CREATED_FILES = []

        if len(files) <= 0:
            return

        command_line = "./unoconv.py -f csv "
        for file_dict in files:
            file_name_only = file_dict['file_name_only']
            command_line += file_name_only + ".ods "

        print "Converting to csv..."
        subprocess.check_call(command_line, shell=True)

        for file_dict_rem in files:
            file_name_only = file_dict_rem['file_name_only']
            if os.path.exists(file_name_only + ".ods"):
                os.remove(file_name_only + ".ods")
                self.CONVERTED_FILES.append(file_name_only + ".csv")
                if file_name_only.startswith("calls"):
                    self.CALLS_FILES.append(file_name_only + ".csv")
                elif file_name_only.startswith("sms"):
                    self.SMS_FILES.append(file_name_only + ".csv")
                elif file_name_only.startswith("contacts"):
                    self.CONTACT_FILES.append(file_name_only + ".csv")

        self.CALLS_FILES.sort()
        self.SMS_FILES.sort()
        self.CONTACT_FILES.sort()

    def merge_files(self, files):
        if len(files) <= 0:
            return

        begin_file = os.path.basename(files[0])
        end_file = os.path.basename(files[len(files)-1])
        out_file = begin_file[:-4] + "_to" + end_file[end_file.find("_"):]
        csvoutfile = open(out_file, "w")
        csvwriter = csv.writer(csvoutfile)

        for idx in range(len(files)):
            print "\t" + files[idx]
            csvinfile = open(files[idx], "r")
            csvreader = csv.reader(csvinfile)
            is_first = True
            for row in csvreader:
                #print "first:" + str(is_first) + ": " + str(row)
                if (is_first & (idx > 0)):
                    is_first = False
                    continue
                else:
                    is_first = False

                csvwriter.writerow(row)
            csvinfile.close()
            os.remove(files[idx])
        csvoutfile.close()
        self.CREATED_FILES.append(out_file)

    def keep_only_latest(self, files):
        if len(files) <= 0:
            return

        cp_src = files[len(files)-1]
        print "\t Keeping " + cp_src
        self.CREATED_FILES.append(cp_src)

        for idx in range(len(files)-1):
            os.remove(files[idx])

    def process_files(self):
        print "Processing " + str(len(self.CONVERTED_FILES)) + " files..."
        print "Processing " + str(len(self.CALLS_FILES)) + " call files..."
        self.merge_files(self.CALLS_FILES)
        print "Processing " + str(len(self.SMS_FILES)) + " sms files..."
        self.merge_files(self.SMS_FILES)
        print "Processing " + str(len(self.CONTACT_FILES)) + " contact files..."
        self.keep_only_latest(self.CONTACT_FILES)


"""
BEGIN MAIN
"""

if len(sys.argv) < 3:
    print "Usage: python merge_monthly.py YYYY MM"
    sys.exit()

YYYY = sys.argv[1]
MM = sys.argv[2]

print "Merging " + YYYY + " " + MM

gdrive_mgr = GDriveMgr()
files_downloaded = gdrive_mgr.download(YYYY, MM)

if files_downloaded:
    archives_mgr = MyArchivesMerger();
    archives_mgr.convert_files(files_downloaded)
    archives_mgr.process_files()
    gdrive_mgr.upload(archives_mgr.CREATED_FILES)
    gdrive_mgr.trash(files_downloaded)
    for c_file in archives_mgr.CREATED_FILES:
        os.remove(c_file)
else:
    print "Error downloading files or no files to process."


"""
END MAIN
"""
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	Created on Mon Oct 1 10:28:26 2012

	Companion merge script for Android "My Archives" application.
	App: https://play.google.com/store/apps/details?id=com.meeteoric.myarchives&hl=en
	Instructions: http://sidekick.windforwings.com/2012/10/python-script-to-merge-my-archives-call.html

	@author: tan
	"""

	import sys, glob, csv, os, shutil, httplib2, pprint, subprocess

	from apiclient.discovery import build
	from apiclient.http import MediaFileUpload
	from oauth2client.client import Credentials
	from oauth2client.client import OAuth2WebServerFlow

	class GDriveMgr:
	"""Utility functions for google drive"""
	CREDS_FILE = 'creds.pickle'
	# Credentials for Google Drive App
	CLIENT_ID = '<place your client id here>'
	CLIENT_SECRET = '<place your client secret here>'

	# We need full access for the time being
	# as our app needs to work on files already existing and
	# drive api is still not granular enuf for requesting access only to a folder
	OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'

	# This redirect URI is to be used by installed apps
	REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'

	MY_ANDROID_ARCHIVES = None

	drive_service = None
	credentials = None

	def __init__(self):
	if os.path.exists(self.CREDS_FILE):
	credsfile = open(self.CREDS_FILE, "r")
	credentials = Credentials.new_from_json(credsfile.read())
	else:
	credentials = self.auth()
	self.make_drive_service(credentials)

	def auth(self):
	flow = OAuth2WebServerFlow(self.CLIENT_ID, self.CLIENT_SECRET, self.OAUTH_SCOPE, self.REDIRECT_URI)
	authorize_url = flow.step1_get_authorize_url()
	print 'Go to the following link in your browser: ' + authorize_url
	code = raw_input('Enter verification code: ').strip()
	credentials = flow.step2_exchange(code)

	credsfile = open(self.CREDS_FILE, "w")
	credsfile.write(credentials.to_json())

	return credentials

	def make_drive_service(self, credentials):
	http = httplib2.Http()
	http = credentials.authorize(http)
	self.drive_service = build('drive', 'v2', http=http)

	def file_id(self, f_obj):
	return f_obj['id']

	def file_name(self, f_obj):
	return f_obj['title']

	def file_dnld_url(self, f_obj):
	return (f_obj['exportLinks'])['application/x-vnd.oasis.opendocument.spreadsheet']

	def download(self, year, mon):
	print "Downloading from GDrive..."
	search_str = 'title = \'My_Android_Archives\' and trashed = false'
	param = {}
	param['q'] = search_str
	file_list = self.drive_service.files().list(**param).execute()
	#pprint.pprint(file_list)

	if (len(file_list['items']) == 0):
	print "No archives found"
	sys.exit()

	self.MY_ANDROID_ARCHIVES = self.file_id((file_list['items'])[0])
	#print "Found My_Android_Archives: " + MY_ANDROID_ARCHIVES

	search_str = '\'' + self.MY_ANDROID_ARCHIVES + '\' in parents'
	search_str += ' and trashed = false'
	search_str += ' and (title contains \'calls_' + year + '_' + mon + '\''
	search_str += ' or title contains \'sms_' + year + '_' + mon + '\''
	search_str += ' or title contains \'contacts_' + year + '_' + mon + '\''
	search_str += ')'

	#print "search_str [" + search_str + "]"
	param = {}
	param['q'] = search_str
	file_list = self.drive_service.files().list(**param).execute()

	#pprint.pprint(file_list)
	file_items = file_list['items']
	print "To download " + str(len(file_items)) + " files"

	in_error = False
	files_downloaded = []
	for one_file in file_items:
	dnld_url = self.file_dnld_url(one_file)
	file_name_only = self.file_name(one_file)
	print "\t" + file_name_only + ".ods ..."#+ " from " + dnld_url
	if dnld_url:
	resp, content = self.drive_service._http.request(dnld_url)
	if resp.status == 200:
	dnld_file = open(file_name_only + ".ods", "wb")
	dnld_file.write(content)
	dnld_file.close()
	file_dets = {}
	file_dets['dnld_url'] = dnld_url
	file_dets['file_name_only'] = file_name_only
	file_dets['file_id'] = self.file_id(one_file)
	files_downloaded.append(file_dets)
	else:
	print "Could not download " + file_name_only + ". HTTP Error!"
	in_error = True
	break
	else:
	print "Could not download " + file_name_only + ". No download URL!"
	in_error = True
	break

	if in_error:
	return None
	else:
	return files_downloaded

	def upload(self, file_names):
	print "Uploading " + str(len(file_names)) + " files..."
	for file_name in file_names:
	print "\t" + file_name
	media_body = MediaFileUpload(file_name, mimetype='text/csv', resumable=True)
	file_name_without_extn = file_name[:-4]
	body = {
	'title': file_name_without_extn,
	'description': file_name_without_extn,
	'mimeType': 'text/csv',
	'parents': [{ 'id': self.MY_ANDROID_ARCHIVES }]
	}
	file = self.drive_service.files().insert(body=body, media_body=media_body, convert=True).execute()
	print "\tUploaded file " + file_name + " to Google Drive. Id: " + file['id']

	def trash(self, files_downloaded):
	print "Trashing " + str(len(files_downloaded)) + " files..."
	for file_dict in files_downloaded:
	print "\t" + file_dict['file_name_only']
	self.trash_single_file(file_dict['file_id'])

	def trash_single_file(self, file_id):
	self.drive_service.files().trash(fileId=file_id).execute()
	#print "\tTrashing file with id: " + file_id



	class MyArchivesMerger:
	"""Utility functions for merging my archives files"""
	CONVERTED_FILES = []
	CALLS_FILES = []
	SMS_FILES = []
	CONTACT_FILES = []
	CREATED_FILES = []

	def convert_files(self, files):
	self.CONVERTED_FILES = []
	self.CALLS_FILES = []
	self.SMS_FILES = []
	self.CONTACT_FILES = []
	self.CREATED_FILES = []

	if len(files) <= 0:
	return

	command_line = "./unoconv.py -f csv "
	for file_dict in files:
	file_name_only = file_dict['file_name_only']
	command_line += file_name_only + ".ods "

	print "Converting to csv..."
	subprocess.check_call(command_line, shell=True)

	for file_dict_rem in files:
	file_name_only = file_dict_rem['file_name_only']
	if os.path.exists(file_name_only + ".ods"):
	os.remove(file_name_only + ".ods")
	self.CONVERTED_FILES.append(file_name_only + ".csv")
	if file_name_only.startswith("calls"):
	self.CALLS_FILES.append(file_name_only + ".csv")
	elif file_name_only.startswith("sms"):
	self.SMS_FILES.append(file_name_only + ".csv")
	elif file_name_only.startswith("contacts"):
	self.CONTACT_FILES.append(file_name_only + ".csv")

	self.CALLS_FILES.sort()
	self.SMS_FILES.sort()
	self.CONTACT_FILES.sort()

	def merge_files(self, files):
	if len(files) <= 0:
	return

	begin_file = os.path.basename(files[0])
	end_file = os.path.basename(files[len(files)-1])
	out_file = begin_file[:-4] + "_to" + end_file[end_file.find("_"):]
	csvoutfile = open(out_file, "w")
	csvwriter = csv.writer(csvoutfile)

	for idx in range(len(files)):
	print "\t" + files[idx]
	csvinfile = open(files[idx], "r")
	csvreader = csv.reader(csvinfile)
	is_first = True
	for row in csvreader:
	#print "first:" + str(is_first) + ": " + str(row)
	if (is_first & (idx > 0)):
	is_first = False
	continue
	else:
	is_first = False

	csvwriter.writerow(row)
	csvinfile.close()
	os.remove(files[idx])
	csvoutfile.close()
	self.CREATED_FILES.append(out_file)

	def keep_only_latest(self, files):
	if len(files) <= 0:
	return

	cp_src = files[len(files)-1]
	print "\t Keeping " + cp_src
	self.CREATED_FILES.append(cp_src)

	for idx in range(len(files)-1):
	os.remove(files[idx])

	def process_files(self):
	print "Processing " + str(len(self.CONVERTED_FILES)) + " files..."
	print "Processing " + str(len(self.CALLS_FILES)) + " call files..."
	self.merge_files(self.CALLS_FILES)
	print "Processing " + str(len(self.SMS_FILES)) + " sms files..."
	self.merge_files(self.SMS_FILES)
	print "Processing " + str(len(self.CONTACT_FILES)) + " contact files..."
	self.keep_only_latest(self.CONTACT_FILES)


	"""
	BEGIN MAIN
	"""

	if len(sys.argv) < 3:
	print "Usage: python merge_monthly.py YYYY MM"
	sys.exit()

	YYYY = sys.argv[1]
	MM = sys.argv[2]

	print "Merging " + YYYY + " " + MM

	gdrive_mgr = GDriveMgr()
	files_downloaded = gdrive_mgr.download(YYYY, MM)

	if files_downloaded:
	archives_mgr = MyArchivesMerger();
	archives_mgr.convert_files(files_downloaded)
	archives_mgr.process_files()
	gdrive_mgr.upload(archives_mgr.CREATED_FILES)
	gdrive_mgr.trash(files_downloaded)
	for c_file in archives_mgr.CREATED_FILES:
	os.remove(c_file)
	else:
	print "Error downloading files or no files to process."


	"""
	END MAIN
	"""