Created
October 17, 2012 12:02
-
-
Save tanmaykm/3905178 to your computer and use it in GitHub Desktop.
Python script to merge "My Archive" call and SMS logs in Google Drive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Oct 1 10:28:26 2012 | |
Companion merge script for Android "My Archives" application. | |
App: https://play.google.com/store/apps/details?id=com.meeteoric.myarchives&hl=en | |
Instructions: http://sidekick.windforwings.com/2012/10/python-script-to-merge-my-archives-call.html | |
@author: tan | |
""" | |
import sys, glob, csv, os, shutil, httplib2, pprint, subprocess | |
from apiclient.discovery import build | |
from apiclient.http import MediaFileUpload | |
from oauth2client.client import Credentials | |
from oauth2client.client import OAuth2WebServerFlow | |
class GDriveMgr: | |
"""Utility functions for google drive""" | |
CREDS_FILE = 'creds.pickle' | |
# Credentials for Google Drive App | |
CLIENT_ID = '<place your client id here>' | |
CLIENT_SECRET = '<place your client secret here>' | |
# We need full access for the time being | |
# as our app needs to work on files already existing and | |
# drive api is still not granular enuf for requesting access only to a folder | |
OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive' | |
# This redirect URI is to be used by installed apps | |
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob' | |
MY_ANDROID_ARCHIVES = None | |
drive_service = None | |
credentials = None | |
def __init__(self): | |
if os.path.exists(self.CREDS_FILE): | |
credsfile = open(self.CREDS_FILE, "r") | |
credentials = Credentials.new_from_json(credsfile.read()) | |
else: | |
credentials = self.auth() | |
self.make_drive_service(credentials) | |
def auth(self): | |
flow = OAuth2WebServerFlow(self.CLIENT_ID, self.CLIENT_SECRET, self.OAUTH_SCOPE, self.REDIRECT_URI) | |
authorize_url = flow.step1_get_authorize_url() | |
print 'Go to the following link in your browser: ' + authorize_url | |
code = raw_input('Enter verification code: ').strip() | |
credentials = flow.step2_exchange(code) | |
credsfile = open(self.CREDS_FILE, "w") | |
credsfile.write(credentials.to_json()) | |
return credentials | |
def make_drive_service(self, credentials): | |
http = httplib2.Http() | |
http = credentials.authorize(http) | |
self.drive_service = build('drive', 'v2', http=http) | |
def file_id(self, f_obj): | |
return f_obj['id'] | |
def file_name(self, f_obj): | |
return f_obj['title'] | |
def file_dnld_url(self, f_obj): | |
return (f_obj['exportLinks'])['application/x-vnd.oasis.opendocument.spreadsheet'] | |
def download(self, year, mon): | |
print "Downloading from GDrive..." | |
search_str = 'title = \'My_Android_Archives\' and trashed = false' | |
param = {} | |
param['q'] = search_str | |
file_list = self.drive_service.files().list(**param).execute() | |
#pprint.pprint(file_list) | |
if (len(file_list['items']) == 0): | |
print "No archives found" | |
sys.exit() | |
self.MY_ANDROID_ARCHIVES = self.file_id((file_list['items'])[0]) | |
#print "Found My_Android_Archives: " + MY_ANDROID_ARCHIVES | |
search_str = '\'' + self.MY_ANDROID_ARCHIVES + '\' in parents' | |
search_str += ' and trashed = false' | |
search_str += ' and (title contains \'calls_' + year + '_' + mon + '\'' | |
search_str += ' or title contains \'sms_' + year + '_' + mon + '\'' | |
search_str += ' or title contains \'contacts_' + year + '_' + mon + '\'' | |
search_str += ')' | |
#print "search_str [" + search_str + "]" | |
param = {} | |
param['q'] = search_str | |
file_list = self.drive_service.files().list(**param).execute() | |
#pprint.pprint(file_list) | |
file_items = file_list['items'] | |
print "To download " + str(len(file_items)) + " files" | |
in_error = False | |
files_downloaded = [] | |
for one_file in file_items: | |
dnld_url = self.file_dnld_url(one_file) | |
file_name_only = self.file_name(one_file) | |
print "\t" + file_name_only + ".ods ..."#+ " from " + dnld_url | |
if dnld_url: | |
resp, content = self.drive_service._http.request(dnld_url) | |
if resp.status == 200: | |
dnld_file = open(file_name_only + ".ods", "wb") | |
dnld_file.write(content) | |
dnld_file.close() | |
file_dets = {} | |
file_dets['dnld_url'] = dnld_url | |
file_dets['file_name_only'] = file_name_only | |
file_dets['file_id'] = self.file_id(one_file) | |
files_downloaded.append(file_dets) | |
else: | |
print "Could not download " + file_name_only + ". HTTP Error!" | |
in_error = True | |
break | |
else: | |
print "Could not download " + file_name_only + ". No download URL!" | |
in_error = True | |
break | |
if in_error: | |
return None | |
else: | |
return files_downloaded | |
def upload(self, file_names): | |
print "Uploading " + str(len(file_names)) + " files..." | |
for file_name in file_names: | |
print "\t" + file_name | |
media_body = MediaFileUpload(file_name, mimetype='text/csv', resumable=True) | |
file_name_without_extn = file_name[:-4] | |
body = { | |
'title': file_name_without_extn, | |
'description': file_name_without_extn, | |
'mimeType': 'text/csv', | |
'parents': [{ 'id': self.MY_ANDROID_ARCHIVES }] | |
} | |
file = self.drive_service.files().insert(body=body, media_body=media_body, convert=True).execute() | |
print "\tUploaded file " + file_name + " to Google Drive. Id: " + file['id'] | |
def trash(self, files_downloaded): | |
print "Trashing " + str(len(files_downloaded)) + " files..." | |
for file_dict in files_downloaded: | |
print "\t" + file_dict['file_name_only'] | |
self.trash_single_file(file_dict['file_id']) | |
def trash_single_file(self, file_id): | |
self.drive_service.files().trash(fileId=file_id).execute() | |
#print "\tTrashing file with id: " + file_id | |
class MyArchivesMerger: | |
"""Utility functions for merging my archives files""" | |
CONVERTED_FILES = [] | |
CALLS_FILES = [] | |
SMS_FILES = [] | |
CONTACT_FILES = [] | |
CREATED_FILES = [] | |
def convert_files(self, files): | |
self.CONVERTED_FILES = [] | |
self.CALLS_FILES = [] | |
self.SMS_FILES = [] | |
self.CONTACT_FILES = [] | |
self.CREATED_FILES = [] | |
if len(files) <= 0: | |
return | |
command_line = "./unoconv.py -f csv " | |
for file_dict in files: | |
file_name_only = file_dict['file_name_only'] | |
command_line += file_name_only + ".ods " | |
print "Converting to csv..." | |
subprocess.check_call(command_line, shell=True) | |
for file_dict_rem in files: | |
file_name_only = file_dict_rem['file_name_only'] | |
if os.path.exists(file_name_only + ".ods"): | |
os.remove(file_name_only + ".ods") | |
self.CONVERTED_FILES.append(file_name_only + ".csv") | |
if file_name_only.startswith("calls"): | |
self.CALLS_FILES.append(file_name_only + ".csv") | |
elif file_name_only.startswith("sms"): | |
self.SMS_FILES.append(file_name_only + ".csv") | |
elif file_name_only.startswith("contacts"): | |
self.CONTACT_FILES.append(file_name_only + ".csv") | |
self.CALLS_FILES.sort() | |
self.SMS_FILES.sort() | |
self.CONTACT_FILES.sort() | |
def merge_files(self, files): | |
if len(files) <= 0: | |
return | |
begin_file = os.path.basename(files[0]) | |
end_file = os.path.basename(files[len(files)-1]) | |
out_file = begin_file[:-4] + "_to" + end_file[end_file.find("_"):] | |
csvoutfile = open(out_file, "w") | |
csvwriter = csv.writer(csvoutfile) | |
for idx in range(len(files)): | |
print "\t" + files[idx] | |
csvinfile = open(files[idx], "r") | |
csvreader = csv.reader(csvinfile) | |
is_first = True | |
for row in csvreader: | |
#print "first:" + str(is_first) + ": " + str(row) | |
if (is_first & (idx > 0)): | |
is_first = False | |
continue | |
else: | |
is_first = False | |
csvwriter.writerow(row) | |
csvinfile.close() | |
os.remove(files[idx]) | |
csvoutfile.close() | |
self.CREATED_FILES.append(out_file) | |
def keep_only_latest(self, files): | |
if len(files) <= 0: | |
return | |
cp_src = files[len(files)-1] | |
print "\t Keeping " + cp_src | |
self.CREATED_FILES.append(cp_src) | |
for idx in range(len(files)-1): | |
os.remove(files[idx]) | |
def process_files(self): | |
print "Processing " + str(len(self.CONVERTED_FILES)) + " files..." | |
print "Processing " + str(len(self.CALLS_FILES)) + " call files..." | |
self.merge_files(self.CALLS_FILES) | |
print "Processing " + str(len(self.SMS_FILES)) + " sms files..." | |
self.merge_files(self.SMS_FILES) | |
print "Processing " + str(len(self.CONTACT_FILES)) + " contact files..." | |
self.keep_only_latest(self.CONTACT_FILES) | |
""" | |
BEGIN MAIN | |
""" | |
if len(sys.argv) < 3: | |
print "Usage: python merge_monthly.py YYYY MM" | |
sys.exit() | |
YYYY = sys.argv[1] | |
MM = sys.argv[2] | |
print "Merging " + YYYY + " " + MM | |
gdrive_mgr = GDriveMgr() | |
files_downloaded = gdrive_mgr.download(YYYY, MM) | |
if files_downloaded: | |
archives_mgr = MyArchivesMerger(); | |
archives_mgr.convert_files(files_downloaded) | |
archives_mgr.process_files() | |
gdrive_mgr.upload(archives_mgr.CREATED_FILES) | |
gdrive_mgr.trash(files_downloaded) | |
for c_file in archives_mgr.CREATED_FILES: | |
os.remove(c_file) | |
else: | |
print "Error downloading files or no files to process." | |
""" | |
END MAIN | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment