Skip to content

Instantly share code, notes, and snippets.

@TomAugspurger
Created February 13, 2014 15:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TomAugspurger/8976751 to your computer and use it in GitHub Desktop.
Save TomAugspurger/8976751 to your computer and use it in GitHub Desktop.
script to organize paper repository
import os
import re
import sys
import subprocess
import pathlib
class Parser(object):
def __init__(self, path,
repo=pathlib.PosixPath('/Users/tom/Economics/Papers')):
self.repo = repo
self.path = self.path_parse(path)
self.exists = self.check_existance(self.path)
self.is_full = self.check_full(path)
self.check_type(self.path)
self.added = []
def path_parse(self, path):
"""Ensures a common point of entry to the functions.
Returns a pathlib.PosixPath object
"""
if not isinstance(path, pathlib.PosixPath):
path = pathlib.PosixPath(path)
return path
else:
return path
def check_existance(self, path):
if not path.exists():
raise OSError('The supplied path does not exist.')
else:
return True
def check_type(self, path):
if path.is_dir():
self.is_dir = True
self.is_file = False
else:
self.is_file = True
self.is_dir = False
def check_full(self, path):
if path.parent().as_posix() in path.as_posix():
return True
def parser(self, f):
"""The parsing logic to find authors and paper name from a file.
f is a full path.
"""
try:
file_name = f.parts[-1]
self.file_name = file_name
r = re.compile(r' \([\d-]{0,4}\)')
sep_authors = re.compile(r' & |, | and')
all_authors, paper = re.split(r, file_name)
paper = paper.lstrip(' - ')
authors = re.split(sep_authors, all_authors)
authors = [author.strip('& ' or 'and ') for author in authors]
self.authors, self.paper = authors, paper
return (authors, paper)
except:
print('Missed on {}'.format(file_name))
def make_dir(self, authors):
repo = self.repo
for author in authors:
try:
os.mkdir(repo[author].as_posix())
except OSError:
pass
def copy_and_link(self, authors, f, replace=True):
repo = self.repo
file_name = f.parts[-1]
for author in authors:
if author == authors[0]:
try:
subprocess.call(["cp", f.as_posix(),
repo[author].as_posix()])
success = True
except:
success = False
else:
subprocess.call(["ln", "-s",
repo[authors[0]][file_name].as_posix(),
repo[author].as_posix()])
success = True
if replace and author == authors[0] and success:
try:
f.unlink()
subprocess.call(["ln", "-s",
repo[authors[0]][file_name].as_posix(),
f.parts[:-1].as_posix()])
except:
raise OSError
def main(self, f):
authors, paper = self.parser(f)
self.make_dir(authors)
self.copy_and_link(authors, f)
def run(self):
if self.exists and self.is_full:
if self.is_dir:
for f in self.path:
if f.parts[-1][0] == '.' or f.is_symlink():
pass
else:
try:
self.main(f)
self.added.append(f)
except:
print('Failed on %s' % str(f))
else:
self.main(self.path)
self.added.append(self.path)
for item in self.added:
print(item.parts[-1])
if __name__ == "__main__":
p = pathlib.PosixPath(sys.argv[1])
try:
repo = pathlib.PosixPath(sys.argv[2])
except:
repo = pathlib.PosixPath('/Users/tom/Economics/Papers')
print(p)
obj = Parser(p, repo)
obj.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment