Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Command Line Script to bulk load XLSX into ES
unzip -Uo 10k_records.xlsx xl/worksheets/sheet1.xml xl/sharedStrings.xml -d proc
cat proc/xl/sharedStrings.xml \
| sed -e 's/[^[:print:][:blank:][:space:]]/[?]/g' -e 's/|/-delim-/g' \
| mawk 'BEGIN {RS = "</t></si><si><t>|<si><t>|</t></si></sst>|</t></si><si><t xml:space=\"preserve\">"
FS = "999ABCDE"
OFS = "|"}
{ print NR - 2, $1 }' \
| LANG=C sort -f -t "|" -k1 \
| grep -v "<sst xmlns=\|<?xml version" \
> keys
cat proc/xl/worksheets/sheet1.xml \
| mawk 'BEGIN {RS = "<c r=\""
FS = "><v>|</v></c>|\" s=\""
}
{
col = substr($1,1,1)
row = substr($1,2)
if($3 == "")
print $3, $1, col, row > "garbage"
else
if($2 ~ /.*t=\"s\".*/)
printf "%s|%s|%s\n", $3, row, col > "strings"
else
if(col ~ /[ABCEF]/)
printf "%s|%s|%s\n", row, col, $3 > "nonstrings"
else
if(col ~ /[DGHI]/)
printf "%s|%s|%.6g\n", row, col, $3 > "nonstrings"
else print $3, $1, col, row > "garbage"
}
END {
close("garbage")
close("nonstrings")
close("strings")
}'
sort nonstrings > nssort
LANG=C sort -f -t "|" -k1 strings \
| LANG=C join -i -e 'missing' -t '|' -1 1 -2 1 - keys \
| cut -d'|' -f2,3,4 \
| sort \
| sort -m - nssort \
> finout
sort -g finout -k1,1n \
| tee finsort \
| cut -d'|' -f1 \
| sed '1d' \
> sortout
paste -d'|' sortout finsort \
| mawk '{FS = "|"
if($1 != $2)
printf "%s|%s|%s\n-EOR-\n", $2, $3, $4
else
printf "%s|%s|%s\n", $2, $3, $4
}' \
| tr '\n' '|' \
| sed -e 's/-EOR-/\n/g' \
| mawk '{FS = "|"
if(NR > 2 && $0 ~ /[^|.$]/)
printf "{ \"index\" : { \"_index\" : \"cerberus\", \"_type\" : \"results\"}}\n {\"upload_record_id\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\",\"%s\":\"%s\"}\n", $2,$3,$4,$6,$7,$9,$10,$12,$13,$15,$16,$18,$19,$21,$22,$24,$25,$27,$28
next
}' \
| sed -e 's/,\"\":\"\"//g' \
| curl -XPUT 'http://localhost:9200/_bulk' -o bulk_out --data-binary @-
rm -r proc finout nssort garbage nonstrings strings keys finsort sortout
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.