Skip to content

Instantly share code, notes, and snippets.

@cmungall
Created June 12, 2019 01:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save cmungall/ebf0a592829af4ef72c494861a0817f4 to your computer and use it in GitHub Desktop.
Save cmungall/ebf0a592829af4ef72c494861a0817f4 to your computer and use it in GitHub Desktop.
Generate HTML index of a synced google drive folder. For me this is faster to search and navigate than the clunky slow google web interface. Pretty hacky code but it works for me
#!/usr/bin/env python3
import os
import re
import logging
import click
# don't index these
excludes = {
'single_files',
'Icon',
'.svn',
'.git'
}
# indentation (for markdown)
SPC = ' '
# regex for matching gdoc JSON indices
pat = re.compile('"url": "(https:\\S+)",')
@click.command(help=
"""
generate index of a synced google drive folder.
First you need to sync your google drive to disk using "Backup and Sync from Google"
This will create a folder like "~/Google Drive"
This script will crawl that folder and make an HTML index that (YMMV) is
faster to search and navigate than the web interface.
"""
)
@click.option('-t', '--outformat', default='html', help='md or html')
@click.option('-d', '--dir', default= "/Users/cjm/Google Drive", help='location of synced gdrive folder')
def main(outformat, dir):
rootpath = dir.split(os.sep)
len_rp = len(rootpath)
is_html = outformat == 'html'
#print("Dir: {}".format(dir))
last_lp = -1
for root, subdirs, files in os.walk(dir):
path = root.split(os.sep)
pathstr = "/".join(path)
rlink = re.sub('[^0-9a-zA-Z]+', '', pathstr.replace(dir, ""))
if len([x for x in path if x.strip('\m\n') in excludes]) > 0:
continue
lp = len(path) - len_rp
bn = os.path.basename(root)
url = 'https://drive.google.com/drive/u/0/search?q={}'.format(bn)
# ugly logic for switching between HTML and Markdown.
# Original idea was to support MD and use pandoc, but pandoc too slow
if is_html:
while last_lp < lp:
last_lp += 1
open_ul(last_lp)
while last_lp > lp:
close_ul(last_lp)
last_lp -= 1
if is_html:
print('{}<span><li><a name="{}"/><a href="{}">{}</a>[<a href="file://{}/">local</a>] <a href="#{}">[*]</a> <a href="#{}-files">--></a></li></span>\n'.format((lp+1) * SPC, rlink, url, bn, pathstr, rlink, rlink))
else:
print('{} * [{}]({})'.format(lp * SPC, bn, url))
if is_html:
open_ul(lp+1)
for file in files:
url = None
if (file.endswith('.gdoc') or file.endswith('gsheet')):
with open(pathstr + '/' + file) as s:
line = s.readline()
m = pat.search(line)
if m:
url = m.group(1)
else:
logging.error("No URL in: {}".format(line))
if not url:
url = 'file://{}/{}'.format(pathstr,file)
if is_html:
print('{}<li><a href="{}">{}</a></li>\n'.format((lp+2) * SPC, url, file))
else:
print('{}* [{}]({})'.format((lp+1) * SPC, file, url))
if is_html:
print('<a name="{}-files"/>'.format(rlink))
close_ul(lp+1)
last_lp = lp
# Too lazy to use a python HTML lib...
def open_ul(depth):
html('ul', depth)
def close_ul(depth):
html('/ul', depth)
def html(el, depth):
print('{}<{}>'.format(depth * SPC, el))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment