Rebecca Sutton Koeser rlskoeser

## count_pgptxtfiles.sh
#!/bin/bash

# based on https://blog.benoitblanchon.fr/git-file-count-vs-time/

OUTPUT=stats.csv

# create output file with a CSV header
# echo "date;transcription_count;transcribed_documents;translation_count;translated_documents" > $OUTPUT
echo "date,transcriptions,transcribed_documents,translations,translated_documents" > $OUTPUT

## django_logentries_export.py
import csv
from django.contrib.admin.models import LogEntry, ADDITION, CHANGE, DELETION

# convert action codes to labels
action_label = {ADDITION: 'addition', CHANGE: 'change', DELETION: "deletion"}

with open('/tmp/django-logentries.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['action_time', 'user', 'content_type', 'object_id', 'change_message', 'action_flag'])
    for log in LogEntry.objects.all():

## genizabibliography_sources_ris2csv.py
# pip install pandas rispy
import pandas as pd
import rispy

# download RIS file: https://www.repository.cam.ac.uk/handle/1810/256117

# parse RIS file into entries
with open('genizahbibliography20160203.txt') as bibfile:
    entries = rispy.load(bibfile)

## gvision_ocr.py
#!/usr/bin/env python

# pip install google-cloud-vision
# Follow quick start instructions to set up access to the API and authentication
# https://pypi.org/project/google-cloud-vision/

import glob
import io
import os

## dhqa_data.py
#!/usr/bin/env python

'''
Script to parse data DH Q&A archive.

Install python dependencies:

    pip install beautifulsoup4 feedparser

Clone  DH Q&A archive repository:

## sonify-derrida-references.py
#!/usr/bin/env python

# Based on sample script from https://programminghistorian.org/en/lessons/sonification#miditime

# currently written for python2; miditime only goes up to python 3.4
# pip install miditime

import csv
from collections import defaultdict

## cdh-scrape.py
#!/usr/bin/env python

# Script to scrape all links from a site, compile counts of each link, status
# codes of access and output the results as a CSV
#
# There's absolutely no reason this shouldn't be pulled into an OOP paradigm
# per se, but I left it functionalized because that can be easier for multitasking.
#
# Requirements:
# requests, bs4

## asana-post-commit.py
#!/usr/bin/env python

# git post-commit hook for linking git commits to asana tasks
# (inspired / adapted in part from https://github.com/Darunada/git-asana-post-commit-hook)
#
# Tested with Python 2.7 and Python 3.5
#
# INSTALLATION
# - Copy this script to .git/hooks/post-commit in your local repository
#   (be sure the script is executable)

## zotero-rdf-tags.py
#! /usr/bin/env python

# simple script to pull data from a Zotero library RDF export
# and generate a CSV file with identifier, type of item, title,
# date, and the number of tags
#
# Only supports book and bookSection item types, all other items are ignored
#
# The CSV file will be generated with the same base name as the
# RDF input file.

## fed4client.py
#!/usr/bin/env python

# fedora 4 test client
# NOTE: this is experimental / spike code!!
# (written to get a sense of  the fedora 4 LDP API)
import requests
import rdflib


DC = rdflib.Namespace('http://purl.org/dc/elements/1.1/')
	#!/bin/bash

	# based on https://blog.benoitblanchon.fr/git-file-count-vs-time/

	OUTPUT=stats.csv

	# create output file with a CSV header
	# echo "date;transcription_count;transcribed_documents;translation_count;translated_documents" > $OUTPUT
	echo "date,transcriptions,transcribed_documents,translations,translated_documents" > $OUTPUT
	import csv
	from django.contrib.admin.models import LogEntry, ADDITION, CHANGE, DELETION

	# convert action codes to labels
	action_label = {ADDITION: 'addition', CHANGE: 'change', DELETION: "deletion"}

	with open('/tmp/django-logentries.csv', 'w') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(['action_time', 'user', 'content_type', 'object_id', 'change_message', 'action_flag'])
	for log in LogEntry.objects.all():
	# pip install pandas rispy
	import pandas as pd
	import rispy

	# download RIS file: https://www.repository.cam.ac.uk/handle/1810/256117

	# parse RIS file into entries
	with open('genizahbibliography20160203.txt') as bibfile:
	entries = rispy.load(bibfile)
	#!/usr/bin/env python

	# pip install google-cloud-vision
	# Follow quick start instructions to set up access to the API and authentication
	# https://pypi.org/project/google-cloud-vision/

	import glob
	import io
	import os
	#!/usr/bin/env python

	'''
	Script to parse data DH Q&A archive.

	Install python dependencies:

	pip install beautifulsoup4 feedparser

	Clone DH Q&A archive repository:
	#!/usr/bin/env python

	# Based on sample script from https://programminghistorian.org/en/lessons/sonification#miditime

	# currently written for python2; miditime only goes up to python 3.4
	# pip install miditime

	import csv
	from collections import defaultdict
	#!/usr/bin/env python

	# Script to scrape all links from a site, compile counts of each link, status
	# codes of access and output the results as a CSV
	#
	# There's absolutely no reason this shouldn't be pulled into an OOP paradigm
	# per se, but I left it functionalized because that can be easier for multitasking.
	#
	# Requirements:
	# requests, bs4
	#!/usr/bin/env python

	# git post-commit hook for linking git commits to asana tasks
	# (inspired / adapted in part from https://github.com/Darunada/git-asana-post-commit-hook)
	#
	# Tested with Python 2.7 and Python 3.5
	#
	# INSTALLATION
	# - Copy this script to .git/hooks/post-commit in your local repository
	# (be sure the script is executable)
	#! /usr/bin/env python

	# simple script to pull data from a Zotero library RDF export
	# and generate a CSV file with identifier, type of item, title,
	# date, and the number of tags
	#
	# Only supports book and bookSection item types, all other items are ignored
	#
	# The CSV file will be generated with the same base name as the
	# RDF input file.
	#!/usr/bin/env python

	# fedora 4 test client
	# NOTE: this is experimental / spike code!!
	# (written to get a sense of the fedora 4 LDP API)
	import requests
	import rdflib


	DC = rdflib.Namespace('http://purl.org/dc/elements/1.1/')