Last active
February 10, 2016 20:54
-
-
Save mpacer/02326b51a7e0009c2c04 to your computer and use it in GitHub Desktop.
Prose clock: an extensible framework for regularly clocking how much prose (in tex) you've written + some other stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import os.path | |
import datetime | |
import csv | |
import pandas as pd | |
from difflib import SequenceMatcher | |
def map_feat_to_vals(features,vals,f,words,contents): | |
return {f:v for f,v in zip(features,vals)} | |
def test_pandas_io_check(pd1,pd2): | |
return pd1.equals(pd2) | |
def test_valid_dict_encoding(change_table,bag_o_words,sorted_word_set,sorted_word_counts): | |
# at time of writing sorted_word_set = "unique_words"; sorted_word_counts="bag_o_words_vals" | |
return {k : v for k,v in zip(change_table[-1][sorted_word_set],change_table[-1][sorted_word_counts])} == bag_o_words | |
blah = re.compile('\.ipynb.*$') | |
#could have used glob | |
d='.' | |
dirs = [os.path.join(d,o) for o in os.listdir(d) if os.path.isdir(os.path.join(d,o)) and not blah.search(o)] | |
f_path_list = [] | |
for zub in dirs: | |
f_path_list.extend([os.path.join(zub,f) for f in os.listdir(zub) if f.endswith(".tex")]) | |
f_path_list = [] | |
for dirname, dirnames, filenames in os.walk(os.getcwd(), followlinks=True): | |
if blah.search(dirname): | |
continue | |
for f in filenames: | |
if f.endswith(".tex"): | |
f_path_list.append(os.path.join(dirname, f)) | |
time = datetime.datetime.now() | |
change_table = [] | |
features = ['f_name', | |
'time', | |
'num_words', | |
'num_unique_words', | |
'unique_words', | |
'bag_o_words_vals', | |
'file_str', | |
'word_list_words', | |
'file_path' | |
] | |
db_name = 'db.pkl' | |
try: | |
db = pd.read_pickle(db_name) | |
flag_not_empty = True | |
except: | |
db = pd.DataFrame(columns=features) | |
db.to_pickle(db_name) | |
flag_not_empty = False | |
for f in f_path_list: | |
with open(f, 'r') as fh: | |
contents = fh.read() | |
if flag_not_empty: | |
if not any(db['file_path'].isin([f])): | |
pass | |
elif contents == db[db['f_name'] == os.path.basename(f)].sort_values(by='time')[-1:]["file_str"].reset_index(drop=True)[0]: | |
continue | |
words = sorted(contents.replace(" \n","\n ").replace("\t"," \t ").replace('"',"").split(" ")) | |
words = [word for word in words if word is not ""] | |
bag_o_words = {word: contents.count(word) for word in sorted(set(words))} | |
vals_for_feats = [os.path.basename(f), | |
time, | |
len(words), | |
len(set(words)), | |
sorted(set(words)), | |
[bag_o_words[word] for word in sorted(set(words))], | |
contents, | |
words, | |
f] | |
change_table.append(map_feat_to_vals(features,vals_for_feats,f,words,contents)) | |
if change_table != []: | |
df = pd.DataFrame(change_table) | |
df = df[features] | |
test_invertibility = 'test_db_file.pkl' | |
# if you want to test any other formats to be saved, be sure to test_pandas_io_check | |
# csv and json have been shown to fail for these reasons | |
# df.to_csv(save_db_name) | |
# d2 = pd.read_csv(save_db_name, index_col=0) | |
# df.to_json(test_invertibility) | |
# d2 = pd.read_json(test_invertibility) | |
df.to_pickle(test_invertibility) | |
d2 = pd.read_pickle(test_invertibility) | |
assert(test_pandas_io_check(df,d2)) | |
os.remove(test_invertibility) | |
db = db.append(df,ignore_index=True) | |
db.to_pickle(db_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#use /bin/bash to run commands, overriding the default set by cron | |
SHELL=/bin/bash | |
#change path to be able to access `sysctl` and other utilities | |
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin | |
# run proseclock on minute 42 of every hour | |
# replace `~/your/proseclock/path/here/` with the path to the directory that you put proseclock in | |
# replace `~/your/executable/python3` with the path to `python3`, try `which python3` to get this path | |
# for me that path was `/usr/local/bin/python3` | |
42 * * * * cd ~/your/proseclock/path/here/ && ~/your/executable/python3 proseclock.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment