Ben O'Steen benosteen

## Dockerfile.slim
FROM ubuntu:16.04 as base
USER root

## Setting default environment variables
ENV WEB_ROOT=/web_root
# Root project folder
ENV ARCHES_ROOT=${WEB_ROOT}/arches
ENV WHEELS=/wheels
ENV PYTHONUNBUFFERED=1

## Dockerfile.slim
FROM ubuntu:16.04 as base
USER root

## Setting default environment variables
ENV WEB_ROOT=/web_root
ENV DOCKER_DIR=/docker
# Root project folder
ENV ARCHES_ROOT=${WEB_ROOT}/arches
ENV WHEELS=/wheels
ENV PYTHONUNBUFFERED=1

## resource_export_tests.py
    def test_jsonld_concept_match_no_label(self):
        dt = self.DT.get_instance("concept")
        # from the thesaurus that should be loaded into Arches,
        # the following concept value should have a key of 43d75450-7282-4754-af63-02e13032b73a
        jf = {
                    "@id": "http://localhost:8000/concepts/86be632e-0dad-4d88-b5da-3d65875d6239",
                    "@type": [
                      "http://www.cidoc-crm.org/cidoc-crm/E55_Type"
                    ]
                  }

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                benosteen
                / README.md
            
            
              Last active
              August 30, 2018 21:17
            
              
                This injects a PyCallGraph middleware into the normal flow of the Arches project. 
              
          
    When any resource is requested with a "?graph=true" parameter, it engages the pycallgraph object to start tracking, stopping at the end of the processing and then exports a .png of the callgraph to the CALLGRAPH_DIR (or "ROOT_DIR/callgraphs" if CALLGRAPH_DIR is not set).
It adds the requisite dependencies to the Dockerfile:
apt-get install graphviz
pip install pycallgraph

  
## arches container log (from docker-compose .... up -d)
Full command: run_arches
Command: run_arches
Testing if database server is up...
Database server is up
Testing if Elasticsearch is up...
Elasticsearch is up
Checking if database dev exists...
Database dev does not exists yet, starting setup...
Current work directory: /web_root/arches

## Rundown of creating a core developer environment.
(fork arches repo and clone your fork locally)

$ cd arches
$ cp docker-compose.yml docker-compose-local.py

(edit it in the manner described at the bottom of https://github.com/archesproject/arches/blob/master/docker/Readme.md#arches-core-development)

$ docker-compose -f docker-compose-local.yml run --entrypoint /bin/bash arches

(It will pull images and build the arches image, and then put you into a bash shell on the arches container)

## convert_to_srt.py
from xml.etree import ElementTree as ET
import re

def format_t(time_string, frac_sep = ".", output_frag_sep = "."):
  h = m = secs = ms = 0
  frags = time_string.split(":")
  spart = frags[-1].split(frac_sep)
  if len(spart) == 1:
    s = int(spart[0])
  elif len(spart) == 2:

## formatting.py
Some equivalent string formatting things to give you an idea of the basics

"I love {favourite} pie, but hate {hated} ones.".format(favourite = "apple", hated = "pear")

fruit = {"favourite": "apple", "hated": "pear"}
"I love {favourite} pie, but hate {hated} ones.".format(**fruit)

fruit = {"favourite": "apple", "hated": "pear"}
"I love {favourite} pie, but hate {hated} ones.".format(**fruit)

## get_images.py
# 'doc' is ALTO XML, parsed into an etree xml obj.

def get_illustration_coords(doc, component="PrintSpace"):
  page = doc.find("Layout/Page")
  illustrations = doc.findall('Layout/Page/{0}/ComposedBlock[@TYPE="Illustration"]/GraphicalElement'.format(component))
  pageh, pagew = int(page.attrib['HEIGHT']), int(page.attrib['WIDTH'])
  images = []
  for img in illustrations:
    x,y = map(int, [img.attrib['HPOS'], img.attrib['VPOS']])
    h,w = map(int, [img.attrib['HEIGHT'], img.attrib['WIDTH']])

## altototxt.sh
for xmlfile in `find OCR -name *xml`
do
  echo "Processing `basename $xmlfile`"
  mkdir -p text/`dirname ${xmlfile#OCR/}`
  xsltproc --output text/${xmlfile#OCR/}.txt alto2txt.xslt $xmlfile
done
	FROM ubuntu:16.04 as base
	USER root

	## Setting default environment variables
	ENV WEB_ROOT=/web_root
	# Root project folder
	ENV ARCHES_ROOT=${WEB_ROOT}/arches
	ENV WHEELS=/wheels
	ENV PYTHONUNBUFFERED=1
	def test_jsonld_concept_match_no_label(self):
	dt = self.DT.get_instance("concept")
	# from the thesaurus that should be loaded into Arches,
	# the following concept value should have a key of 43d75450-7282-4754-af63-02e13032b73a
	jf = {
	"@id": "http://localhost:8000/concepts/86be632e-0dad-4d88-b5da-3d65875d6239",
	"@type": [
	"http://www.cidoc-crm.org/cidoc-crm/E55_Type"
	]
	}
	Full command: run_arches
	Command: run_arches
	Testing if database server is up...
	Database server is up
	Testing if Elasticsearch is up...
	Elasticsearch is up
	Checking if database dev exists...
	Database dev does not exists yet, starting setup...
	Current work directory: /web_root/arches
	(fork arches repo and clone your fork locally)

	$ cd arches
	$ cp docker-compose.yml docker-compose-local.py

	(edit it in the manner described at the bottom of https://github.com/archesproject/arches/blob/master/docker/Readme.md#arches-core-development)

	$ docker-compose -f docker-compose-local.yml run --entrypoint /bin/bash arches

	(It will pull images and build the arches image, and then put you into a bash shell on the arches container)
	from xml.etree import ElementTree as ET
	import re

	def format_t(time_string, frac_sep = ".", output_frag_sep = "."):
	h = m = secs = ms = 0
	frags = time_string.split(":")
	spart = frags[-1].split(frac_sep)
	if len(spart) == 1:
	s = int(spart[0])
	elif len(spart) == 2:
	Some equivalent string formatting things to give you an idea of the basics

	"I love {favourite} pie, but hate {hated} ones.".format(favourite = "apple", hated = "pear")

	fruit = {"favourite": "apple", "hated": "pear"}
	"I love {favourite} pie, but hate {hated} ones.".format(**fruit)

	fruit = {"favourite": "apple", "hated": "pear"}
	"I love {favourite} pie, but hate {hated} ones.".format(**fruit)
	# 'doc' is ALTO XML, parsed into an etree xml obj.

	def get_illustration_coords(doc, component="PrintSpace"):
	page = doc.find("Layout/Page")
	illustrations = doc.findall('Layout/Page/{0}/ComposedBlock[@TYPE="Illustration"]/GraphicalElement'.format(component))
	pageh, pagew = int(page.attrib['HEIGHT']), int(page.attrib['WIDTH'])
	images = []
	for img in illustrations:
	x,y = map(int, [img.attrib['HPOS'], img.attrib['VPOS']])
	h,w = map(int, [img.attrib['HEIGHT'], img.attrib['WIDTH']])
	for xmlfile in `find OCR -name *xml`
	do
	echo "Processing `basename $xmlfile`"
	mkdir -p text/`dirname ${xmlfile#OCR/}`
	xsltproc --output text/${xmlfile#OCR/}.txt alto2txt.xslt $xmlfile
	done