Skip to content

Instantly share code, notes, and snippets.

@dchud
Created October 13, 2010 19:11
Show Gist options
  • Save dchud/624671 to your computer and use it in GitHub Desktop.
Save dchud/624671 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import grp
import mimetypes
from optparse import OptionParser
import os
from pprint import pprint
import pwd
from stat import *
import sys
import time
import solr
SOLR_URL = 'http://127.0.0.1:8983/solr'
solr_conn = solr.SolrConnection(SOLR_URL)
def utc_time(secs):
return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(secs))
needs_commit = False
def handle_file(path, filename, verbose):
global needs_commit
full_name = os.path.join(path, filename)
if verbose:
print 'HANDLE:', path
mode, ino, dev, nlink, uid, gid, \
size, atime, mtime, ctime = os.lstat(full_name)
d = {}
d['fullname'] = path + '/' + filename
d['path'] = path
d['filename'] = filename
root, ext = os.path.splitext(filename)
d['base'] = root
d['extension'] = ext[1:] # skip the dot!
d['accessdt'] = utc_time(atime)
d['editdt'] = utc_time(mtime)
d['createdt'] = utc_time(ctime)
d['perms'] = mode # FIXME
d['size'] = size
d['owner'] = pwd.getpwuid(uid)[0]
d['group'] = grp.getgrgid(gid)[0]
d['mimetype'] = mimetypes.guess_type(full_name)[0]
if verbose:
pprint(d)
solr_conn.add(**d)
# commit every 10 secs or so
if needs_commit:
if int(time.time()) % 10 == 0:
print '\n\n\tCOMMIT\n'
solr_conn.commit()
needs_commit = False
else:
needs_commit = True
return d
def walk(path, verbose=False):
if verbose:
print path
for root, dirs, files in os.walk(path, topdown=True):
for filename in files:
if verbose:
print 'file:', filename
handle_file(os.path.join(path, root), filename, verbose)
for dirname in dirs:
if verbose:
print 'dir:', dirname
walk(os.path.join(path, root, dirname), verbose)
def main():
usage = 'usage: %prog [options] PATH1 [PATH2 PATH3 ...]'
parser = OptionParser(usage=usage)
parser.add_option('-v', '--verbose', action='store_true',
dest='verbose', default=False, help='show verbose status')
options, args = parser.parse_args()
for path in args:
walk(path, options.verbose)
solr_conn.commit()
sys.exit()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment