Daniel Pett portableant

## a8tools.geojson

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                portableant
                / a8tools.geojson
            
            
              Created
              November 24, 2016 13:03
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## transcription.json
[
  {
    "info": {
      "transcription": "far longer than the execution.  What had actually happened in the first World War, of course this is pre-, at the time, wasvpre-air raid nobody bothered much to safeguard the collections",
      "comments": ""
    },
    "task_id": 71400,
    "created": "2017-03-14T16:25:25.441232",
    "finish_time": "2017-03-14T16:30:18.816810",
    "project_id": 372,

## faceDetectionPython3.py
#!/usr/bin/env python

from SPARQLWrapper import SPARQLWrapper, JSON
import urllib.request, urllib.parse, urllib.error
import os
from PIL import Image
import subprocess
import cv2
import argparse
import time

## findit.py
# read in the json and get the graph of places
import json
import urllib.request, urllib.parse, urllib.error
import gzip
import os

latest = "http://atlantides.org/downloads/pleiades/json/pleiades-places-latest.json.gz"
fn = os.path.join(os.getcwd(), os.path.basename(latest))
urllib.request.urlretrieve(latest, fn)

## findsorgukSealMatrices.R
library(jsonlite)
url <- 'https://finds.org.uk/database/search/results/q/objectType%3A%22SEAL+MATRIX%22+inscription%3A%2A/format/json'
json <- fromJSON(url)
total <- json$meta$totalResults
results <- json$meta$resultsPerPage
pagination <- ceiling(total/results)
keeps <- c("id","old_findID","broadperiod", "inscription", "institution", "creator", "fourFigureLat", "fourFigureLon")
data <- json$results
data <- data[,(names(data) %in% keeps)]
for (i in seq(from=2, to=pagination, by=1)){

## pybossa
server {
    listen  80;
    server_name crowdsourced.micropasts.org;
    large_client_header_buffers 4 32k;
    real_ip_header X-Forwarded-For;

    # change that to your pybossa directory
    root /var/www/pybossa;

    client_max_body_size 20M;

## esExtract.txt
The CIIM uses an out of date version of ES 2.3.5.

Install elastic dump on windows (powershell)

$ npm install elasticdump@2.4.2 -g

Run dump for mappings

$ elasticdump --input=http://{IP or URL}/es/ --output mapping.json --type=mapping

## extractExif.R
setwd("{directory}")
install.packages("devtools")
devtools::install_github("paleolimbot/exifr")
library(exifr)
image_files <- list.files(pattern = "*.jpg")
data <- as.data.frame(read_exif(image_files, tags = c("filename", "headline", "Description", "Keywords", "Title", "Copyright Notice")))
data$Keywords <- sapply(data$Keywords, paste, collapse=",")
write.csv(data, file='metadata.csv',row.names=FALSE, na="")

## scrapeCyprusImages.R
setwd("/Users/danielpett/Documents/research/fitzwilliam/")
csv <- "cyprus.csv"
data <- read.csv(csv, header=T, na.strings=c("","NA"))
images <- data[!is.na(data$multimedia.0.processed.original.location),]
uris <- images$admin.uri
urlList <- as.character(uris)
print(urlList)
for(a in urlList){
]  page <- a %>% read_html()
  files <- page %>% html_nodes("img") %>% html_attr("src")

## pasImageScrape.R
#' A script for getting images from PAS
#' Please do improve
#' My R skills are poor.

# Set your working directory
setwd("/Users/danielpett/Documents/research/electricarchaeo")

# Use the following libraries
library(jsonlite)
library(RCurl)
	[
	{
	"info": {
	"transcription": "far longer than the execution. What had actually happened in the first World War, of course this is pre-, at the time, wasvpre-air raid nobody bothered much to safeguard the collections",
	"comments": ""
	},
	"task_id": 71400,
	"created": "2017-03-14T16:25:25.441232",
	"finish_time": "2017-03-14T16:30:18.816810",
	"project_id": 372,
	#!/usr/bin/env python

	from SPARQLWrapper import SPARQLWrapper, JSON
	import urllib.request, urllib.parse, urllib.error
	import os
	from PIL import Image
	import subprocess
	import cv2
	import argparse
	import time
	# read in the json and get the graph of places
	import json
	import urllib.request, urllib.parse, urllib.error
	import gzip
	import os

	latest = "http://atlantides.org/downloads/pleiades/json/pleiades-places-latest.json.gz"
	fn = os.path.join(os.getcwd(), os.path.basename(latest))
	urllib.request.urlretrieve(latest, fn)
	library(jsonlite)
	url <- 'https://finds.org.uk/database/search/results/q/objectType%3A%22SEAL+MATRIX%22+inscription%3A%2A/format/json'
	json <- fromJSON(url)
	total <- json$meta$totalResults
	results <- json$meta$resultsPerPage
	pagination <- ceiling(total/results)
	keeps <- c("id","old_findID","broadperiod", "inscription", "institution", "creator", "fourFigureLat", "fourFigureLon")
	data <- json$results
	data <- data[,(names(data) %in% keeps)]
	for (i in seq(from=2, to=pagination, by=1)){
	server {
	listen 80;
	server_name crowdsourced.micropasts.org;
	large_client_header_buffers 4 32k;
	real_ip_header X-Forwarded-For;

	# change that to your pybossa directory
	root /var/www/pybossa;

	client_max_body_size 20M;
	The CIIM uses an out of date version of ES 2.3.5.

	Install elastic dump on windows (powershell)

	$ npm install elasticdump@2.4.2 -g

	Run dump for mappings

	$ elasticdump --input=http://{IP or URL}/es/ --output mapping.json --type=mapping
	setwd("{directory}")
	install.packages("devtools")
	devtools::install_github("paleolimbot/exifr")
	library(exifr)
	image_files <- list.files(pattern = "*.jpg")
	data <- as.data.frame(read_exif(image_files, tags = c("filename", "headline", "Description", "Keywords", "Title", "Copyright Notice")))
	data$Keywords <- sapply(data$Keywords, paste, collapse=",")
	write.csv(data, file='metadata.csv',row.names=FALSE, na="")
	setwd("/Users/danielpett/Documents/research/fitzwilliam/")
	csv <- "cyprus.csv"
	data <- read.csv(csv, header=T, na.strings=c("","NA"))
	images <- data[!is.na(data$multimedia.0.processed.original.location),]
	uris <- images$admin.uri
	urlList <- as.character(uris)
	print(urlList)
	for(a in urlList){
	] page <- a %>% read_html()
	files <- page %>% html_nodes("img") %>% html_attr("src")
	#' A script for getting images from PAS
	#' Please do improve
	#' My R skills are poor.

	# Set your working directory
	setwd("/Users/danielpett/Documents/research/electricarchaeo")

	# Use the following libraries
	library(jsonlite)
	library(RCurl)