Skip to content

Instantly share code, notes, and snippets.

@dopuskh3
Created October 5, 2009 10:35
Show Gist options
  • Save dopuskh3/202042 to your computer and use it in GitHub Desktop.
Save dopuskh3/202042 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2.5
# vim:set fileencoding=utf-8
import os
import sys
import tagpy
from lucene import \
Document, IndexSearcher, FSDirectory, MultiFieldQueryParser, QueryParser, StandardAnalyzer, IndexWriter, \
StringReader, IndexReader, MoreLikeThis, Term, TermQuery, BooleanQuery,BooleanClause , Field, initVM, CLASSPATH
def indexFilesFromPath(dir, idxPath):
initVM(CLASSPATH)
writer = IndexWriter(idxPath, StandardAnalyzer(), True)
count = 1
for root, dirs, files in os.walk(dir):
for file in files:
#print "- "+str(root)+" "+file
sfx = file[-3:]
if sfx.lower() not in [ "mp3","mp4", "ogg", "flac" ]:
continue
filepath = os.path.join(root, file)
try:
doc = Document()
f = tagpy.FileRef(filepath)
title = unicode(f.tag().title)
artist = unicode(f.tag().artist)
album = unicode(f.tag().album)
genre = unicode(f.tag().genre)
path = unicode(filepath)
doc.add(Field("title", title, Field.Store.YES, Field.Index.TOKENIZED))
doc.add(Field("artist", artist, Field.Store.YES, Field.Index.TOKENIZED))
doc.add(Field("album",album, Field.Store.YES, Field.Index.TOKENIZED))
doc.add(Field("genre",genre, Field.Store.YES, Field.Index.TOKENIZED))
doc.add(Field("all", album + u" " + artist + u" " + title, Field.Store.YES, Field.Index.TOKENIZED))
doc.add(Field("path", path, Field.Store.YES, Field.Index.UN_TOKENIZED))
print "\r %5d files indexed"%count, # (%s/%s/%s)..."%(count, f.tag().artist, f.tag().album. f.tag().title)
count+=1
writer.addDocument(doc)
except Exception, e:
#print str(e)
# print "+"
continue
print "Done %d"%count
writer.optimize()
writer.close()
def search( searchRequest, idxPath):
initVM(CLASSPATH)
fsDir = FSDirectory.getDirectory(idxPath, False)
searcher = IndexSearcher(fsDir)
language = StandardAnalyzer()
queryp = QueryParser('all', language)
query = queryp.parse(searchRequest) # "title", language)
hits = searcher.search(query)
print "# Found %d hits for %s"%(len(hits), searchRequest)
for i in range(0, hits.length()):
doc = hits.doc(i)
#print u"# %s - %s - %s"%(doc.getField('artist'), doc.getField('album'), doc.getField('title'))
print u"%s"%unicode(doc.getField('path').stringValue())
if __name__ == "__main__":
if sys.argv[1] == "index":
indexFilesFromPath(sys.argv[2], sys.argv[3])
elif sys.argv[1] == "search":
search(sys.argv[2], sys.argv[3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment