Skip to content

Instantly share code, notes, and snippets.

@werdan
Last active June 2, 2016 11:49
Show Gist options
  • Save werdan/894d65d9a47beae8f9a3b8c3bcf42f86 to your computer and use it in GitHub Desktop.
Save werdan/894d65d9a47beae8f9a3b8c3bcf42f86 to your computer and use it in GitHub Desktop.
#!/bin/bash
cd /var/www/shared/emex
rm all.csv
rm *.rar
wget ftp://emexonline.com/megaprice/{EMIR,EMIJ,EMIS,EMIN,EMIT,EUSA,EMIZ,EMIL,EURU,FAST,KORA,KOSA,OPTA}.rar
find . -name "*.rar" -exec unrar x -o+ {} \;
# Pre-sorting SKU and Brands mapping
## SKU,ID,BRAND,PN
## ABS_0001Q,2731821,A.B.S.,0001Q
LANG=en_EN sort sku.lst -o sku.lst
## EMEX_BRAND_CODE,BOODMO_BRAND_NAME,BOODMO_BRAND_ID
## !!,ARMSTER,4478
LANG=en_EN sort brands.lst -o brands.lst
## https://gist.github.com/werdan/20606e3f02837f83e1b7d9ac407e443a
LANG=en_EN sort suppliers.lst -o suppliers.lst
for f in `find . -name "*.txt" -type f`; do
file=$(basename "$f" | cut -d. -f1)
echo "Processing $file.txt ..."
# Converting to UTF8
sed "s/\t/,/g" $file.txt | iconv -f WINDOWS-1251 -t UTF-8 > $file.tmp
# Remove unneeded fields
# Convert prices with multiplicator and round down
# Replace /tab to comma
awk -F, -v OFS=',' 'NR>1 {$3=($3*1.4+50)*67;print $1,$2,int($3)}' $file.tmp > $file.csv
# Joining with brand name information
LANG=en_EN join -a1 -j1 -t, -o2.2,1.2,1.3 <(LANG=en_EN sort $file.csv) brands.lst > $file.csv2
# Prefix each line with Supplier code
nl -s $file, $file.csv2 | sed -e 's/^[ \t]*//' | sed -e 's/^[0-9]*//' > $file.csv
# Joining with supplier id information
LANG=en_EN join -a1 -j1 -t, -o1.2,1.3,1.4,2.2 <(LANG=en_EN sort $file.csv) suppliers.lst > $file.csv2
# Creating SKUs from Brand + PN + qty=1 + enabled=1
awk -F, -v OFS=',' '{gsub(/[^[:alnum:]]/,"",$2); print toupper($1)"_"toupper($2),$3,$2,$1,$4,"1","1"}' $file.csv2 > $file.csv3
rm -fr $file.csv
# Joining with SKUs that are currently in our DB as parts
LANG=en_EN join -a1 -j1 -t, -o1.5,2.2,1.4,1.3,1.2,1.6,1.7 <(LANG=en_EN sort $file.csv3) sku.lst > $file.csv
# Removing temp files
rm $file.{tmp,txt,csv2,csv3}
# Gluing all together - file that contains both matched and not-matched lines
# We need it for analytics
# Line format
# supplier_id,part_id,brand_code,number,price,qty,enabled
cat $file.csv >> all.csv
# Removing not-matched lines
awk -F, -v OFS=',' '$2>0 {print $1,$2,$3,$4,$5,$6,$7}' all.csv > all_matched.csv
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment