Skip to content

Instantly share code, notes, and snippets.

@conchoecia
Created October 4, 2016 22:58
Show Gist options
  • Save conchoecia/d7f1d5c811965172d03c452ad7ef8321 to your computer and use it in GitHub Desktop.
Save conchoecia/d7f1d5c811965172d03c452ad7ef8321 to your computer and use it in GitHub Desktop.
Parses out a lab notebook in md format into subproject files for easy tracking.
#!/usr/bin/env python3
# script: make_notebooks.py
# author: darrin t schultz
# date : 20161004
# make_notebooks.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# make_notebooks.py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with make_notebooks.py. If not, see <http://www.gnu.org/licenses/>.
# This script will compile a project notebook for each project based
# on a list of input files.
"""
This script parses out a lab notebook in md format with date headings and
subprojects. This is targeted at github-flavored markdown.
- The lab notebook must be in the following format, where the line
"## Project List:" is followed by "### projectname" for every project that your
notebook contains.
- You must have a file called 'notebooklist.txt' that has either the full paths
or filenames for every notebook markdown file. For example, for the example
notebook we have called 'exampleNotebook.md', the 'notebooklist.txt' file
looks like this:
```
exampleNotebook.md
```
- Here is 'exampleNotebook.md'
```
# filename: exampleNotebook.md (doesn't matter what is here)
## Project List:
### projectA
### projectB
## (whatever you want here) 20160915
### projectA
- Today I worked on ProjectA
- I made a lot of progress
- It was great
### projectB
- I didn't do anything today. Oh well.
## (whatever you want here) 20160920
- This text should be ignored
### projectB
- I took 4 days off of work and didn't get anything done.
- All of my cultures dried up.
```
- After running this script in the directory where you would like the compiled
notebooks, you will have a `compiled_notebooks/projectA.md` and
`compiled_notebooks/projectB.md` file.
************ How this script works *******************
1. Get a list of all the tracked files in the current git repo.
2. Get a list of all the files to look at to compile the notebook.
3. Get a list of all the project names from all the notebooks. Keyword in
notebooks is: "## Project List:"
4. Make an object for a project
5. Loop through all the dates and look for a "###" with a project name
- Take the project name and add that date and entry to the dictionary for
that project's object
6. When adding an entry to each dict, check for media references and add those
to git if they aren't tracked.
5. For each project, if a referenced file isn't tracked by the git project,
add it.
important names:
<tracked> is all of the files that are tracked in the git repo currently
<notebooks> is all the notebook files to compile from
"""
import os
import sys
import subprocess
import datetime
def get_project_list(filename):
projectNames = []
with open(filename, "r") as f:
collecting = False
for line in f:
if line.strip():
if collecting and (line.strip().split(" ")[0] == "##"):
return projectNames
elif collecting:
projectNames.append(line.strip().split(" ")[1])
elif line.strip() == "## Project List:":
collecting = True
class notebook:
def __init__(self, name):
""" Entries are dictionarys where:
key = entry date
val = entry
"""
self.name = name
self.entries = {}
self.date = self.date()
def date(self):
return datetime.date.today().strftime('%Y%m%d')
def add_entry(self, date, entry):
#print("adding entry to {} for {}\n{}".format(self.name, date, entry))
if self.name == "2016_long_reads":
print(date)
print(entry)
self.entries[date] = entry
def get_entries(self):
return self.entries
def print_entries(self):
print(self.entries)
def main():
# 1. get a list of all the tracked files in the current git repo.
#os.chdir("/Users/darrin/git/lab_notebook_DTS")
tracked = subprocess.run("git ls-tree --full-tree -r HEAD | head | cut -f2",
shell=True, stdout=subprocess.PIPE).stdout.decode("utf-8").split("\n")
# 2. get a list of all the files to looks at to compile the notebook.
notebooks = []
with open("notebooklist.txt", "r") as f:
for line in f:
if line.strip():
notebooks.append(line.strip())
print("Compiling from these notebooks:")
for each in notebooks:
print(" - {}".format(each))
print()
# 3. get all of the projects from all the files
projectNames = []
for each in notebooks:
projectNames += get_project_list(each)
print("Looking for these projects:")
for each in projectNames:
print(" - {}".format(each))
print()
# 4. Make an object for each project
projectObjects = {}
for name in projectNames:
projectObjects[name] = notebook(name)
# 5. Loop through all the files, keeping track of the date.
# Look for ### entries with a project name
# if that project name is in the projectNames list, add entry
for each in notebooks:
with open(each, "r") as f:
start = False
date = ""
entryString = ""
currentProject = ""
for line in f:
spacedSplit = [x.strip() for x in line.split(" ")]
# print(spacedSplit)
if spacedSplit[0] and spacedSplit[0] in "###" and currentProject and entryString.strip():
#this correctly enters when it is time to make a new entry.
# Here is the breakdown of the logic for each qualifier
# <spacedSplit[0]>
# <spacedSplit[0] in "###">
# <currentProject>
# <entryString.strip()>
#print(line)
#print(entryString)
projectObjects[currentProject].add_entry(date, entryString)
entryString = ""
if spacedSplit[0] == "##" and spacedSplit[-1].isdigit() and len(spacedSplit[-1]) == 8:
date = int(spacedSplit[-1])
start = True
elif spacedSplit[0] == "###" and spacedSplit[-1] in projectNames and start:
#This is correctly finding the project entries under each date
currentProject = spacedSplit[-1]
elif start and currentProject:
#correctly adding the lines to each project
entryString += line
if start and currentProject and entryString.strip():
projectObjects[currentProject].add_entry(date, entryString)
# This compiles notebooks for things that have entries
noteDir = "compiled_notebooks"
if not os.path.exists(noteDir):
print("Making the directory for compiled notebooks:\n - {}".format(noteDir))
os.makedirs(noteDir)
print("\nMaking the compiled notebook:")
for name in projectNames:
if projectObjects[name].entries != {}:
newName = "{}.md".format(name, "w")
print(" - {}".format(os.path.basename(newName)))
newFile = os.path.join(noteDir, newName)
with open(newFile, "w") as f:
for date in sorted(projectObjects[name].entries):
print("\n## {}".format(date), file=f)
print(projectObjects[name].entries[date], file=f)
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment