View isobuster-report-demo.py
#! /usr/bin/env python
"""Script that demonstrates custom DFXML reports in IsoBuster"""
import os
import subprocess as sub
def launchSubProcess(args):
"""Launch subprocess and return exit code, stdout and stderr"""
try:
# Execute command line; stdout + stderr redirected to objects
View checkHttpstatus.sh
#!/bin/bash
# Check HTTP status for list of URLs
#
# Uses curl: https://curl.haxx.se/
# Display usage message if command line does not contain expected
# number of arguments
if [ "$#" -ne 2 ] ; then
echo "Usage: checkHttpStatus.sh fileIn fileOut" >&2
View extractURLs.sh
#!/bin/bash
# Extract all URLs from a document.
#
# Steps:
#
# 1. Use Apache Tika to extract text to plain text document
# 2. Use xurls to extract URLs from Tika output (and use sort to remove duplicates)
#
# Dependencies:
View delpher-favorieten.pdf.txt
%PDF-1.3
3 0 obj
<</Type /Page
/Parent 1 0 R
/Resources 2 0 R
/MediaBox [0 0 595.28 841.89]
/Contents 4 0 R
>>
endobj
4 0 obj
View mets_emptyfiles.xml
<mets:mets xmlns:ebucore="urn:ebu:metadata-schema:ebuCore_2017" xmlns:isolyzer="https://github.com/KBNLresearch/isolyzer" xmlns:mets="http://www.loc.gov/METS/" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:premis="http://www.loc.gov/premis/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mods/v3 https://www.loc.gov/standards/mods/v3/mods-3-4.xsd http://www.loc.gov/premis/v3 https://www.loc.gov/standards/premis/premis.xsd">
<mets:dmdSec ID="dmdSec_1">
<mets:mdWrap MDTYPE="MODS" MDTYPEVERSION="3.4">
<mets:xmlData>
<mods:mods>
<mods:titleInfo>
<mods:title>Thea Beckman</mods:title>
</mods:titleInfo>
<mods:name>
<mods:namePart>Binnendijk, Dik</mods:namePart>
View toJP2.sh
#!/bin/bash
# Convert uncompressed TIFF images to JP2, using KB specs for lossless
# preservation masters and lossy access copies using Kakadu.
#
# Script automatically chooses the appropriate bitrate values depending
# on the number of samples per pixel (works for both RGB and grayscale
# images, provided that the number of bits per sample equals 8)
#
# Dependencies:
View perf-openjpeg-lossless.csv
We can make this file beautiful and searchable if this error is corrected: It looks like row 10 should actually have 5 columns, instead of 1.
fileIn,time_21,time_22,time_23,time_kdu
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000080_moreCompression.JP2,11.036751527,3.482625629,3.425432101,2.361188220
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000064_moreCompression.JP2,10.779915135,3.466235106,3.380231731,2.376212069
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000082_moreCompression.JP2,10.788571724,3.448439201,3.391688197,2.819778484
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000069_moreCompression.JP2,10.922965130,3.516191343,3.387377817,2.329715358
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000076_moreCompression.JP2,10.937612953,3.448598046,3.382380951,2.333200692
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000075_moreCompression.JP2,10.406887702,3.604703379,3.423830778,2.194799838
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000081_moreCompression.JP2,10.768077311,3.433976151,3.379309657,2.245751607
/home/johan/testOpenJPEG/jp2Lossless/IMAGE000060_moreCompression.JP2,10.714907748,3.405195912,3.309173947,2.260180601
/home/johan/testOpenJPEG/jp2Lossless/IMA
View perf-openjpeg-JP2sDaniel.csv
fileIn time_21 time_22 time_23 time_kdu
/home/johan/testOpenJPEG/jp2sDaniel/3281G028358.jp2 44.598101392 11.195378686 11.529406741 8.029106229
/home/johan/testOpenJPEG/jp2sDaniel/2619731579.jp2 .961620779 .288206453 .288807712 .201174597
View perf-openjpeg-compiledfromsource.csv
We can make this file beautiful and searchable if this error is corrected: It looks like row 10 should actually have 5 columns, instead of 1.
fileIn,time_21,time_22,time_23,time_kdu
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000082_moreCompression.jp2,5.410303975,3.575379289,2.313317871,1.233227022
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000079_moreCompression.jp2,6.034543479,3.802899472,2.521215140,.908657665
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000063_moreCompression.jp2,6.105316587,3.914514886,2.697573012,1.017977377
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000067_moreCompression.jp2,5.186735259,3.617373786,2.309958361,.806755249
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000076_moreCompression.jp2,5.702024388,3.694388503,2.358431098,.825980657
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000068_moreCompression.jp2,5.564530207,3.715652266,2.349179815,1.317579486
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000075_moreCompression.jp2,5.635532457,3.669438702,2.469565982,1.093434089
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000077_moreCompression.jp2,6.075032068,3.665625404,2.364993336,1.091026821
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000073_moreCompres
View kdu-threads-1.csv
We can make this file beautiful and searchable if this error is corrected: It looks like row 10 should actually have 5 columns, instead of 1.
fileIn,time_21,time_22,time_23,time_kdu
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000082_moreCompression.jp2,5.423803107,4.981237919,4.730796714,1.996674996
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000079_moreCompression.jp2,5.846709741,5.148845304,5.025870829,2.191312568
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000063_moreCompression.jp2,6.136973310,5.420557560,5.178961369,2.638147991
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000067_moreCompression.jp2,5.190737260,4.807502718,4.588716888,2.138407845
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000076_moreCompression.jp2,5.181907813,4.793821055,4.560945289,1.933658029
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000068_moreCompression.jp2,5.621030691,5.072809764,4.836144228,2.237490161
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000075_moreCompression.jp2,5.287440855,4.759237733,4.675085429,2.288701165
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000077_moreCompression.jp2,5.430498697,5.011486640,4.795154760,2.135530343
/home/johan/testOpenJPEG/jp2LossyKB/IMAGE000073_moreComp