Skip to content

Instantly share code, notes, and snippets.

@felixlohmeier
Last active April 12, 2022 05:27
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d to your computer and use it in GitHub Desktop.
Save felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d to your computer and use it in GitHub Desktop.
How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts

How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts

tested on Fedora 32 with bash 5.0.17 and curl 7.69.1

Quick start

  1. Clone this gist
git clone https://gist.github.com/d76bd27fbc4b8ab6d683822cdf61f81d.git bash-refine
  1. Execute all supplied examples for a quick demo
cd bash-refine
./templates.sh

Build your own workflow

  1. Copy minimal pre-structured script to a new file
cp minimal.sh myworkflow.sh
  1. Use the templates in templates.sh to develop your workflow
#!/bin/bash
# bash-refine v1.3.4: bash-refine.sh, Felix Lohmeier, 2020-11-02
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
# license: MIT License https://choosealicense.com/licenses/mit/
# TODO: support for macOS
# ================================== CONFIG ================================== #
endpoint="${REFINE_ENDPOINT:-http://localhost:3333}"
memory="${REFINE_MEMORY:-1400M}"
csrf="${REFINE_CSRF:-true}"
date="$(date +%Y%m%d_%H%M%S)"
if [[ -n "$(readlink -e "${REFINE_WORKDIR}")" ]]; then
workdir="$(readlink -e "${REFINE_WORKDIR}")"
else
workdir="$(readlink -m "${BASH_SOURCE%/*}/output/${date}")"
fi
if [[ -n "$(readlink -f "${REFINE_LOGFILE}")" ]]; then
logfile="$(readlink -f "${REFINE_LOGFILE}")"
else
logfile="$(readlink -m "${BASH_SOURCE%/*}/log/${date}.log")"
fi
if [[ -n "$(readlink -e "${REFINE_JQ}")" ]]; then
jq="$(readlink -e "${REFINE_JQ}")"
else
jq="$(readlink -m "${BASH_SOURCE%/*}/lib/jq")"
fi
if [[ -n "$(readlink -e "${REFINE_REFINE}")" ]]; then
refine="$(readlink -e "${REFINE_REFINE}")"
else
refine="$(readlink -m "${BASH_SOURCE%/*}/lib/openrefine/refine")"
fi
declare -A checkpoints # associative array for stats
declare -A pids # associative array for monitoring background jobs
declare -A projects # associative array for OpenRefine projects
# =============================== REQUIREMENTS =============================== #
function requirements {
# check existence of java and cURL
if [[ -z "$(command -v java 2> /dev/null)" ]] ; then
echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \
"https://openjdk.java.net/install/"
exit 1
fi
if [[ -z "$(command -v curl 2> /dev/null)" ]] ; then
echo 1>&2 "ERROR: This shell script requires cURL" \
"https://curl.haxx.se/download.html"
exit 1
fi
# download jq and OpenRefine if necessary
if [[ -z "$(readlink -e "${jq}")" ]]; then
echo "Download jq..."
mkdir -p "$(dirname "${jq}")"
# jq 1.4 has much faster startup time than 1.5 and 1.6
curl -L --output "${jq}" \
"https://github.com/stedolan/jq/releases/download/jq-1.4/jq-linux-x86_64"
chmod +x "${jq}"; echo
fi
if [[ -z "$(readlink -e "${refine}")" ]]; then
echo "Download OpenRefine..."
mkdir -p "$(dirname "${refine}")"
curl -L --output openrefine.tar.gz \
"https://github.com/OpenRefine/OpenRefine/releases/download/3.4/openrefine-linux-3.4.tar.gz"
echo "Install OpenRefine in subdirectory $(dirname "${refine}")..."
tar -xzf openrefine.tar.gz -C "$(dirname "${refine}")" --strip 1 --totals
rm -f openrefine.tar.gz
# do not try to open OpenRefine in browser
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' \
"$(dirname "${refine}")"/refine.ini
# set min java heap space to allocated memory
sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' \
"$(dirname "${refine}")"/refine
# set autosave period from 5 minutes to 25 hours
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1500/' \
"$(dirname "${refine}")"/refine.ini
echo
fi
}
# ============================== OPENREFINE API ============================== #
function refine_start {
echo "start OpenRefine server..."
local dir
dir="$(readlink -e "${workdir}")"
${refine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
pid_server=${!}
timeout 30s bash -c "until curl -s \"${endpoint}\" \
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|| error "starting OpenRefine server failed!"
}
function refine_stats {
# print server load
ps -o start,etime,%mem,%cpu,rss -p "${pid_server}"
}
function refine_kill {
# kill OpenRefine immediately; SIGKILL (kill -9) prevents saving projects
{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
# delete temporary OpenRefine projects
(cd "${workdir}" && rm -rf ./*.project* && rm -f workspace.json)
}
function refine_check {
if grep -i 'exception\|error' "${logfile}"; then
error "log contains warnings!"
else
log "checked log file, all good!"
fi
}
function refine_stop {
echo "stop OpenRefine server and print server load..."
refine_stats
echo
refine_kill
echo "check log for any warnings..."
refine_check
}
function refine_csrf {
# get CSRF token (introduced in OpenRefine 3.3)
if [[ "${csrf}" = true ]]; then
local response
response=$(curl -fs "${endpoint}/command/core/get-csrf-token")
if [[ "${response}" != '{"token":"'* ]]; then
error "getting CSRF token failed!"
else
echo "?csrf_token=$(echo "$response" | cut -d \" -f 4)"
fi
fi
}
function refine_store {
# check and store project id from import in associative array projects
if [[ $# = 2 ]]; then
projects[$1]=$(cut -d '=' -f 2 "$2")
else
error "invalid arguments supplied to import function!"
fi
if [[ "${#projects[$1]}" != 13 ]]; then
error "returned project id is not valid!"
else
rm "$2"
fi
# check if project contains at least one row (may be skipped to gain ~40ms)
local rows
rows=$(curl -fs --get \
--data project="${projects[$1]}" \
--data limit=0 \
"${endpoint}/command/core/get-rows" \
| tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then
error "imported project contains 0 rows!"
fi
}
# ============================ SCRIPT ENVIRONMENT ============================ #
function log {
# log status message
echo "$(date +%H:%M:%S.%3N) [ client] $1"
}
function error {
# log error message and exit
echo 1>&2 "ERROR: $1"
refine_kill; pkill -P $$; exit 1
}
function monitor {
# store pid of last execution
pids[$1]="$!"
}
function monitoring {
# wait for stored pids, remove them from array and check log for errors
for pid in "${!pids[@]}"; do
wait "${pids[$pid]}" \
|| error "${pid} (${projects[$pid]}) failed!" \
&& unset pids["$pid"]
done
refine_check
}
function checkpoint {
# store timestamp in associative array checkpoints and print checkpoint
checkpoints[$1]=$(date +%s.%3N)
printf '%*.*s %s %*.*s\n' \
0 "$(((80-2-${#1})/2))" "$(printf '%0.1s' ={1..40})" \
"${#checkpoints[@]}. $1" \
0 "$(((80-1-${#1})/2))" "$(printf '%0.1s' ={1..40})"
}
function checkpoint_stats {
# calculate run time based on checkpoints
local k keys values i diffsec
echo "starting time and run time (hh:mm:ss) of each step..."
# sort keys by value and store in array key
readarray -t keys < <(
for k in "${!checkpoints[@]}"; do
echo "${checkpoints[$k]}:::$k"
done | sort | awk -F::: '{print $2}')
# remove milliseconds from corresponding values and store in array values
readarray -t values < <(
for k in "${keys[@]}" ; do
echo "${checkpoints[$k]%.*}"
done)
# add final timestamp for calculation
values+=("$(date +%s)")
# calculate and print run time for each step
for i in "${!keys[@]}"; do
diffsec=$(( values[$((i + 1))] - values[i] ))
printf "%35s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
"$(date -d @"${values[$i]}")" \
"($(date -d @${diffsec} -u +%H:%M:%S))"
done
# calculate and print total run time
diffsec=$(( values[${#keys[@]}] - values[0] ))
printf "%80s\n%80s\n" "----------" "($(date -d @${diffsec} -u +%H:%M:%S))"
}
function count_output {
# word count on all files in workdir
echo "files (number of lines / size in bytes) in ${workdir}..."
(cd "${workdir}" && wc -c -l ./*)
}
function init {
# check requirements and download software if necessary
requirements
# set trap, create directories and tee to log file
trap 'error "script interrupted!"' HUP INT QUIT TERM
mkdir -p "${workdir}" "$(dirname "${logfile}")"
exec &> >(tee -i -a "${logfile}")
}
#!/bin/bash
# bash-refine v1.3.4: minimal.sh, Felix Lohmeier, 2020-11-02
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
# license: MIT License https://choosealicense.com/licenses/mit/
# =============================== ENVIRONMENT ================================ #
source "${BASH_SOURCE%/*}/bash-refine.sh" || exit 1
cd "${BASH_SOURCE%/*}/" || exit 1
init
# ================================= STARTUP ================================== #
checkpoint "Startup"; echo
refine_start; echo
# ================================== IMPORT ================================== #
checkpoint "Import"; echo
# <-- insert snippet from templates.sh here -->
# ================================ TRANSFORM ================================= #
checkpoint "Transform"; echo
# <-- insert snippet from templates.sh here -->
# ================================== EXPORT ================================== #
checkpoint "Export"; echo
# <-- insert snippet from templates.sh here -->
# ================================== FINISH ================================== #
checkpoint "Finish"; echo
refine_stop; echo
checkpoint_stats; echo
count_output
#!/bin/bash
# bash-refine v1.3.4: templates.sh, Felix Lohmeier, 2020-11-04
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
# license: MIT License https://choosealicense.com/licenses/mit/
# TODO: example for setting metadata
# TODO: example for engine config (facets)
# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
# The following code shows several options for import, transform and export
# use the templates to write your own scripts or execute this file for a demo
# =============================== ENVIRONMENT ================================ #
# source the main script
source "${BASH_SOURCE%/*}/bash-refine.sh" || exit 1
# make script executable from another directory
cd "${BASH_SOURCE%/*}/" || exit 1
### override default config?
#endpoint="http://localhost:3333"
#memory="1400M" # increase to available RAM
#csrf=true # set to false for OpenRefine < 3.3
#date="$(date +%Y%m%d_%H%M%S)"
#workdir="${BASH_SOURCE%/*}/output/${date}"
#logfile="${BASH_SOURCE%/*}/log/${date}.log"
#jq="${BASH_SOURCE%/*}/lib/jq" # path to executable
#refine="${BASH_SOURCE%/*}/lib/openrefine/refine" # path to executable
# check requirements, set trap, create workdir and tee to logfile
init
# ================================= STARTUP ================================== #
checkpoint "Startup"; echo
# start OpenRefine server
refine_start; echo
# ============================= MOCKUP TEST DATA ============================= #
mkdir -p input
cat << "DATA" > "input/example1.csv"
a,b,c
1,2,3
0,0,0
$,\,'
DATA
cat << "DATA" > "input/example2.tsv"
a b c
' \ $
0 0 0
3 2 1
DATA
cat << "DATA" > "input/example-operations-history.json"
[
{
"op": "core/column-addition",
"engineConfig": {
"mode": "row-based"
},
"newColumnName": "apply-from-file",
"columnInsertIndex": 2,
"baseColumnName": "b",
"expression": "grel:value.replace('2','TEST')",
"onError": "set-to-blank"
}
]
DATA
# ================================== IMPORT ================================== #
checkpoint "Import"; echo
# declare input
projects["from heredoc"]=""
projects["csv file example"]="input/example1.csv"
projects["tsv file example"]="input/example2.tsv"
projects["another csv example"]="input/example1.csv"
projects["yet another csv example"]="input/example1.csv"
# --------------------------- IMPORT FROM HEREDOC ---------------------------- #
# quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
# project id will be stored in as ${projects[csv file example]}
p="from heredoc"
f="" # optional filename, will be stored in OpenRefine project metadata
echo "import heredoc..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
--form project-name="${p}" \
--form format="text/line-based/*sv" \
--form options='{
"encoding": "UTF-8",
"separator": " "
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${p}.id" \
<< "DATA"
a b c
1 2 3
0 0 0
$ \ '
DATA
then
log "imported heredoc as ${p}"
else
error "import of ${p} failed!"
fi
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
echo
# ---------------------------- IMPORT FROM FILE ------------------------------ #
# project id will be stored in ${projects[tsv file example]}
p="tsv file example"
echo "import file ${projects[$p]} ..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$p]}" \
--form project-name="${p}" \
--form format="text/line-based/*sv" \
--form options='{
"encoding": "UTF-8",
"separator": "\t"
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${p}.id"
then
log "imported ${projects[$p]} as ${p}"
else
error "import of ${projects[$p]} failed!"
fi
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
echo
# -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
# project ids will be stored in ${projects[another csv example]} etc.
ps=( "csv file example" "another csv example" "yet another csv example" )
echo "import files" \
"$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
for p in "${ps[@]}"; do
(if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$p]}" \
--form project-name="${p}" \
--form format="line-based" \
--form options='{
"encoding": "UTF-8",
"separator": ","
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${p}.id"
then
log "imported ${projects[$p]} as ${p}"
else
error "import of ${projects[$p]} failed!"
fi) &
monitor "${p}"
done
monitoring
for p in "${ps[@]}"; do
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
done
echo
# ================================ TRANSFORM ================================= #
checkpoint "Transform"; echo
# ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
p="csv file example"
f="input/example-operations-history.json"
echo "apply ${f} to ${p}..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode operations@"${f}" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
p="csv file example"
echo "add column apply-from-heredoc to ${p}..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"mode": "row-based"
},
"newColumnName": "apply-from-heredoc",
"columnInsertIndex": 2,
"baseColumnName": "b",
"expression": "grel:value.replace('2','TEST')",
"onError": "set-to-blank"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
# unquoted heredocs with variable and multi-line expression (requires jq)
# \ must be used to quote the characters \, $, and `.
p="csv file example"
replace='TEST'
column="apply with variables"
echo "add column ${column} to ${p}..."
read -r -d '' expression << EXPRESSION
grel:value.replace(
'2',
'${replace}'
)
EXPRESSION
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< JSON
[
{
"op": "core/column-addition",
"engineConfig": {
"mode": "row-based"
},
"newColumnName": "${column}",
"columnInsertIndex": 2,
"baseColumnName": "b",
"expression": $(echo "${expression}" | ${jq} -s -R '.'),
"onError": "set-to-blank"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL) ------ #
# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
ps=( "another csv example" "yet another csv example" )
echo "add column apply-from-heredoc to" "${ps[@]}" "..."
for p in "${ps[@]}"; do
(if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"mode": "row-based"
},
"newColumnName": "apply-from-heredoc",
"columnInsertIndex": 2,
"baseColumnName": "b",
"expression": "grel:value.replace('2','TEST')",
"onError": "set-to-blank"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi) &
monitor "${p}"
done
monitoring
echo
# ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
# unquoted heredoc (JSON) with variables and multiplied (requires jq)
# \ must be used to quote the characters \, $, and `.
p="csv file example"
columns=( "apply-from-file" "apply-from-heredoc" )
echo "delete columns" "${columns[@]}" "in ${p}..."
for column in "${columns[@]}"; do
cat << JSON >> "${workdir}/${p}.tmp"
[
{
"op": "core/column-removal",
"columnName": "${column}"
}
]
JSON
done
if "${jq}" -s add "${workdir}/${p}.tmp" | curl -fs \
--data project="${projects[$p]}" \
--data-urlencode operations@- \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
then
log "transformed ${p} (${projects[$p]})"
rm "${workdir}/${p}.tmp"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== EXPORT ================================== #
checkpoint "Export"; echo
# ----------------------------- EXPORT TO STDOUT ----------------------------- #
p="csv file example"
format="tsv"
echo "export ${p} in ${format} format..."
if curl -fs \
--data project="${projects[$p]}" \
--data format="tsv" \
--data engine='{"facets":[],"mode":"row-based"}' \
"${endpoint}/command/core/export-rows"
then
log "exported ${p} (${projects[$p]})"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------ EXPORT TO FILE ------------------------------ #
p="csv file example"
format="csv"
echo "export ${p} to ${format} file..."
if curl -fs \
--data project="${projects[$p]}" \
--data format="${format}" \
--data engine='{"facets":[],"mode":"row-based"}' \
"${endpoint}/command/core/export-rows" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
p="csv file example"
format="json"
echo "export ${p} to ${format} file using template..."
IFS= read -r -d '' template << "TEMPLATE"
{
"a": {{cells['a'].value.jsonize()}},
"b": {{cells['b'].value.jsonize()}},
"c": {{cells['c'].value.jsonize()}}
}
TEMPLATE
if echo "${template}" | head -c -2 | curl -fs \
--data project="${projects[$p]}" \
--data format="template" \
--data prefix="[
" \
--data suffix="
]" \
--data separator=",
" \
--data engine='{"facets":[],"mode":"row-based"}' \
--data-urlencode template@- \
"${endpoint}/command/core/export-rows" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
ps=( "another csv example" "yet another csv example" )
format="tsv"
echo "export" "${ps[@]}" "to ${format} files..."
for p in "${ps[@]}"; do
(if curl -fs \
--data project="${projects[$p]}" \
--data format="${format}" \
--data engine='{"facets":[],"mode":"row-based"}' \
"${endpoint}/command/core/export-rows" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi) &
monitor "${p}"
done
monitoring
echo
# ------------------------------ EXPORT PROJECT ------------------------------ #
p="csv file example"
format="openrefine.tar.gz"
echo "export ${p} to ${format} file..."
if curl -fs \
--data project="${projects[$p]}" \
"${endpoint}/command/core/export-project" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ================================ UTILITIES ================================= #
checkpoint "Utilities"; echo
# ------------------------------ LIST PROJECTS ------------------------------- #
# get all project metadata and reshape json to print a list (requires jq)
echo "list projects..."
if curl -fs --get \
"${endpoint}/command/core/get-all-project-metadata" \
| "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
then
: #log "printed list of projects"
else
error "getting list of projects failed!"
fi
echo
# ------------------------------- GET METADATA ------------------------------- #
# get project metadata and reshape json to include project id (requires jq)
p="csv file example"
echo "metadata for ${p}..."
if curl -fs --get \
--data project="${projects[$p]}" \
"${endpoint}/command/core/get-project-metadata" \
| "${jq}" "{ id: ${projects[$p]} } + ."
then
: #log "printed metadata of ${p} (${projects[$p]})"
else
error "getting metadata of ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------ GET ROW COUNT ------------------------------- #
# get total number of rows
p="csv file example"
echo "total number of rows in ${p}..."
if curl -fs --get \
--data project="${projects[$p]}" \
--data limit=0 \
"${endpoint}/command/core/get-rows" \
| tr "," "\n" | grep total | cut -d ":" -f 2
then
: #log "printed row count of ${p} (${projects[$p]})"
else
error "getting row count of ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------- GET COLUMNS -------------------------------- #
# get column names from project model (requires jq)
p="csv file example"
echo "column names of ${p}..."
if curl -fs --get \
--data project="${projects[$p]}" \
"${endpoint}/command/core/get-models" \
| "${jq}" -r '.columnModel | .columns[] | .name'
then
: #log "printed column names of ${p} (${projects[$p]})"
else
error "getting column names of ${p} (${projects[$p]}) failed!"
fi
echo
# -------------------------- GET OPERATIONS HISTORY -------------------------- #
# get operations history and reshape json to make it applicable (requires jq)
p="csv file example"
f="${workdir}/${p}_history.json"
echo "history of operations for ${p}..."
if curl -fs --get \
--data project="${projects[$p]}" \
"${endpoint}/command/core/get-operations" \
| "${jq}" '[ .entries[] | .operation ]' \
> "${f}"
then
log "saved ops history of ${p} (${projects[$p]}) to ${f}"
else
error "getting ops history of ${p} (${projects[$p]}) failed!"
fi
echo
# ---------------------------- GET IMPORT HISTORY ---------------------------- #
# get project metadata and filter import options history (requires jq)
p="csv file example"
echo "history of import for ${p}..."
if curl -fs --get \
--data project="${projects[$p]}" \
"${endpoint}/command/core/get-project-metadata" \
| "${jq}" ".importOptionMetadata[0]"
then
: #log "printed import history of ${p} (${projects[$p]})"
else
error "getting import history of ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------ DELETE PROJECT ------------------------------ #
# delete a project (rarely needed for batch processing)
p="yet another csv example"
echo "delete project ${p}..."
if curl -fs \
--data project="${projects[$p]}" \
"${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
then
log "deleted ${p} (${projects[$p]})"
else
error "deletion of ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== FINISH ================================== #
checkpoint "Finish"; echo
# stop OpenRefine server
refine_stop; echo
# calculate run time based on checkpoints
checkpoint_stats; echo
# word count on all files in workdir
count_output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment