Skip to content

Instantly share code, notes, and snippets.

@ael-code
Created June 3, 2016 13:42
Show Gist options
  • Save ael-code/98eed9d336ddafc504768bba03348d5f to your computer and use it in GitHub Desktop.
Save ael-code/98eed9d336ddafc504768bba03348d5f to your computer and use it in GitHub Desktop.
import json
import string
import sys
import re, os
import logging
from archivant import Archivant
logging.basicConfig(level=logging.DEBUG)
logging.getLogger("elasticsearch").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
log = logging.getLogger("importer")
defaults = {
'FSDB_PATH': "/mnt/data/libreantDB/fsdb/",
'ES_HOSTS': None,
'ES_INDEXNAME': "propirata_v1"
}
test = {
'FSDB_PATH': "/tmp/fsdb/",
'ES_HOSTS': None,
'ES_INDEXNAME': "test"
}
ar = Archivant(defaults)
try:
dir_path=sys.argv[1]
except:
dir_path=os.getcwd()
def read_from_path(dir_path=dir_path):
if dir_path.endswith("/"):
dir_path = dir_path[:-1]
for path, dirname, files in os.walk(dir_path):
for _file in files:
if _file.startswith('.'):
continue
splitted = string.rsplit(_file,'.', 1)
if len(splitted) != 2:
log.debug("skipping: '{}'".format(os.path.join(path, _file)))
continue
filename, ext = splitted
if ext in ["pdf","epub","doc"]:
tags=path[len(dir_path):].split("/")
metadata={}
for i, tag in enumerate(tags):
if tag!='':
metadata["tag"+str(i)]=tag
metadata['title'] = filename
metadata['original_path'] = path[len(dir_path):]+"/"+_file
metadata['_language'] = "it"
attachments = dict()
attachments['file'] = path+"/"+_file
yield metadata, [attachments]
def already_exists(metadata):
res = ar._db.user_search('original_path:"{}"'.format(metadata['original_path']))
from pprint import pprint
return (res['hits']['total'] > 0)
for metadata, attachments in read_from_path():
log.debug("Adding file: {}'".format(metadata['original_path']))
if already_exists(metadata):
log.debug("Skipping already existing file: '{}'".format(metadata['original_path']))
continue
ar.insert_volume(metadata,attachments)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment