Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@tanmaykm
Created October 17, 2012 12:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tanmaykm/3905178 to your computer and use it in GitHub Desktop.
Save tanmaykm/3905178 to your computer and use it in GitHub Desktop.
Python script to merge "My Archive" call and SMS logs in Google Drive
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 1 10:28:26 2012
Companion merge script for Android "My Archives" application.
App: https://play.google.com/store/apps/details?id=com.meeteoric.myarchives&hl=en
Instructions: http://sidekick.windforwings.com/2012/10/python-script-to-merge-my-archives-call.html
@author: tan
"""
import sys, glob, csv, os, shutil, httplib2, pprint, subprocess
from apiclient.discovery import build
from apiclient.http import MediaFileUpload
from oauth2client.client import Credentials
from oauth2client.client import OAuth2WebServerFlow
class GDriveMgr:
"""Utility functions for google drive"""
CREDS_FILE = 'creds.pickle'
# Credentials for Google Drive App
CLIENT_ID = '<place your client id here>'
CLIENT_SECRET = '<place your client secret here>'
# We need full access for the time being
# as our app needs to work on files already existing and
# drive api is still not granular enuf for requesting access only to a folder
OAUTH_SCOPE = 'https://www.googleapis.com/auth/drive'
# This redirect URI is to be used by installed apps
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
MY_ANDROID_ARCHIVES = None
drive_service = None
credentials = None
def __init__(self):
if os.path.exists(self.CREDS_FILE):
credsfile = open(self.CREDS_FILE, "r")
credentials = Credentials.new_from_json(credsfile.read())
else:
credentials = self.auth()
self.make_drive_service(credentials)
def auth(self):
flow = OAuth2WebServerFlow(self.CLIENT_ID, self.CLIENT_SECRET, self.OAUTH_SCOPE, self.REDIRECT_URI)
authorize_url = flow.step1_get_authorize_url()
print 'Go to the following link in your browser: ' + authorize_url
code = raw_input('Enter verification code: ').strip()
credentials = flow.step2_exchange(code)
credsfile = open(self.CREDS_FILE, "w")
credsfile.write(credentials.to_json())
return credentials
def make_drive_service(self, credentials):
http = httplib2.Http()
http = credentials.authorize(http)
self.drive_service = build('drive', 'v2', http=http)
def file_id(self, f_obj):
return f_obj['id']
def file_name(self, f_obj):
return f_obj['title']
def file_dnld_url(self, f_obj):
return (f_obj['exportLinks'])['application/x-vnd.oasis.opendocument.spreadsheet']
def download(self, year, mon):
print "Downloading from GDrive..."
search_str = 'title = \'My_Android_Archives\' and trashed = false'
param = {}
param['q'] = search_str
file_list = self.drive_service.files().list(**param).execute()
#pprint.pprint(file_list)
if (len(file_list['items']) == 0):
print "No archives found"
sys.exit()
self.MY_ANDROID_ARCHIVES = self.file_id((file_list['items'])[0])
#print "Found My_Android_Archives: " + MY_ANDROID_ARCHIVES
search_str = '\'' + self.MY_ANDROID_ARCHIVES + '\' in parents'
search_str += ' and trashed = false'
search_str += ' and (title contains \'calls_' + year + '_' + mon + '\''
search_str += ' or title contains \'sms_' + year + '_' + mon + '\''
search_str += ' or title contains \'contacts_' + year + '_' + mon + '\''
search_str += ')'
#print "search_str [" + search_str + "]"
param = {}
param['q'] = search_str
file_list = self.drive_service.files().list(**param).execute()
#pprint.pprint(file_list)
file_items = file_list['items']
print "To download " + str(len(file_items)) + " files"
in_error = False
files_downloaded = []
for one_file in file_items:
dnld_url = self.file_dnld_url(one_file)
file_name_only = self.file_name(one_file)
print "\t" + file_name_only + ".ods ..."#+ " from " + dnld_url
if dnld_url:
resp, content = self.drive_service._http.request(dnld_url)
if resp.status == 200:
dnld_file = open(file_name_only + ".ods", "wb")
dnld_file.write(content)
dnld_file.close()
file_dets = {}
file_dets['dnld_url'] = dnld_url
file_dets['file_name_only'] = file_name_only
file_dets['file_id'] = self.file_id(one_file)
files_downloaded.append(file_dets)
else:
print "Could not download " + file_name_only + ". HTTP Error!"
in_error = True
break
else:
print "Could not download " + file_name_only + ". No download URL!"
in_error = True
break
if in_error:
return None
else:
return files_downloaded
def upload(self, file_names):
print "Uploading " + str(len(file_names)) + " files..."
for file_name in file_names:
print "\t" + file_name
media_body = MediaFileUpload(file_name, mimetype='text/csv', resumable=True)
file_name_without_extn = file_name[:-4]
body = {
'title': file_name_without_extn,
'description': file_name_without_extn,
'mimeType': 'text/csv',
'parents': [{ 'id': self.MY_ANDROID_ARCHIVES }]
}
file = self.drive_service.files().insert(body=body, media_body=media_body, convert=True).execute()
print "\tUploaded file " + file_name + " to Google Drive. Id: " + file['id']
def trash(self, files_downloaded):
print "Trashing " + str(len(files_downloaded)) + " files..."
for file_dict in files_downloaded:
print "\t" + file_dict['file_name_only']
self.trash_single_file(file_dict['file_id'])
def trash_single_file(self, file_id):
self.drive_service.files().trash(fileId=file_id).execute()
#print "\tTrashing file with id: " + file_id
class MyArchivesMerger:
"""Utility functions for merging my archives files"""
CONVERTED_FILES = []
CALLS_FILES = []
SMS_FILES = []
CONTACT_FILES = []
CREATED_FILES = []
def convert_files(self, files):
self.CONVERTED_FILES = []
self.CALLS_FILES = []
self.SMS_FILES = []
self.CONTACT_FILES = []
self.CREATED_FILES = []
if len(files) <= 0:
return
command_line = "./unoconv.py -f csv "
for file_dict in files:
file_name_only = file_dict['file_name_only']
command_line += file_name_only + ".ods "
print "Converting to csv..."
subprocess.check_call(command_line, shell=True)
for file_dict_rem in files:
file_name_only = file_dict_rem['file_name_only']
if os.path.exists(file_name_only + ".ods"):
os.remove(file_name_only + ".ods")
self.CONVERTED_FILES.append(file_name_only + ".csv")
if file_name_only.startswith("calls"):
self.CALLS_FILES.append(file_name_only + ".csv")
elif file_name_only.startswith("sms"):
self.SMS_FILES.append(file_name_only + ".csv")
elif file_name_only.startswith("contacts"):
self.CONTACT_FILES.append(file_name_only + ".csv")
self.CALLS_FILES.sort()
self.SMS_FILES.sort()
self.CONTACT_FILES.sort()
def merge_files(self, files):
if len(files) <= 0:
return
begin_file = os.path.basename(files[0])
end_file = os.path.basename(files[len(files)-1])
out_file = begin_file[:-4] + "_to" + end_file[end_file.find("_"):]
csvoutfile = open(out_file, "w")
csvwriter = csv.writer(csvoutfile)
for idx in range(len(files)):
print "\t" + files[idx]
csvinfile = open(files[idx], "r")
csvreader = csv.reader(csvinfile)
is_first = True
for row in csvreader:
#print "first:" + str(is_first) + ": " + str(row)
if (is_first & (idx > 0)):
is_first = False
continue
else:
is_first = False
csvwriter.writerow(row)
csvinfile.close()
os.remove(files[idx])
csvoutfile.close()
self.CREATED_FILES.append(out_file)
def keep_only_latest(self, files):
if len(files) <= 0:
return
cp_src = files[len(files)-1]
print "\t Keeping " + cp_src
self.CREATED_FILES.append(cp_src)
for idx in range(len(files)-1):
os.remove(files[idx])
def process_files(self):
print "Processing " + str(len(self.CONVERTED_FILES)) + " files..."
print "Processing " + str(len(self.CALLS_FILES)) + " call files..."
self.merge_files(self.CALLS_FILES)
print "Processing " + str(len(self.SMS_FILES)) + " sms files..."
self.merge_files(self.SMS_FILES)
print "Processing " + str(len(self.CONTACT_FILES)) + " contact files..."
self.keep_only_latest(self.CONTACT_FILES)
"""
BEGIN MAIN
"""
if len(sys.argv) < 3:
print "Usage: python merge_monthly.py YYYY MM"
sys.exit()
YYYY = sys.argv[1]
MM = sys.argv[2]
print "Merging " + YYYY + " " + MM
gdrive_mgr = GDriveMgr()
files_downloaded = gdrive_mgr.download(YYYY, MM)
if files_downloaded:
archives_mgr = MyArchivesMerger();
archives_mgr.convert_files(files_downloaded)
archives_mgr.process_files()
gdrive_mgr.upload(archives_mgr.CREATED_FILES)
gdrive_mgr.trash(files_downloaded)
for c_file in archives_mgr.CREATED_FILES:
os.remove(c_file)
else:
print "Error downloading files or no files to process."
"""
END MAIN
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment