Skip to content

Instantly share code, notes, and snippets.

@deoxxa
Created June 7, 2016 10:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deoxxa/21252a74c25870d4c4225d7869c3f28a to your computer and use it in GitHub Desktop.
Save deoxxa/21252a74c25870d4c4225d7869c3f28a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import csv
import hashlib
import sys
writer = csv.writer(sys.stdout)
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
location = "{}, {} {}".format(row[5], row[6], row[7])
ext_id = hashlib.md5("{}|{}".format(row[2], row[4])).hexdigest()
writer.writerow([row[2], ext_id, (row[2], row[0])[bool(row[0])], row[4], location, row[9], row[8], ''])
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
def slugify(s):
s = s.lower()
for c in [' ', '-', '.', '/']:
s = s.replace(c, '_')
s = re.sub('\W', '', s)
s = s.replace('_', ' ')
s = re.sub('\s+', ' ', s)
s = s.strip()
s = s.replace(' ', '-')
return s
writer = csv.writer(sys.stdout)
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
seen = set([])
for row in reader:
if not slugify(row[1]) in seen:
writer.writerow(['Walgreens', '', slugify(row[1]), row[1]])
seen.add(slugify(row[1]))
if not slugify(row[1])+'/'+slugify(row[2]) in seen:
writer.writerow(['Walgreens', slugify(row[1]), slugify(row[1])+'/'+slugify(row[2]), row[2]])
seen.add(slugify(row[1])+'/'+slugify(row[2]))
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
def slugify(s):
s = s.lower()
for c in [' ', '-', '.', '/']:
s = s.replace(c, '_')
s = re.sub('\W', '', s)
s = s.replace('_', ' ')
s = re.sub('\s+', ' ', s)
s = s.strip()
s = s.replace(' ', '-')
return s
# Coles,10576,bread-bakery/packaged-bread-bakery/kosher-bakery,67561,3.99
writer = csv.writer(sys.stdout)
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
writer.writerow([row[7], row[0], row[6]])
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
def slugify(s):
s = s.lower()
for c in [' ', '-', '.', '/']:
s = s.replace(c, '_')
s = re.sub('\W', '', s)
s = s.replace('_', ' ')
s = re.sub('\s+', ' ', s)
s = s.strip()
s = s.replace(' ', '-')
return s
writer = csv.writer(sys.stdout)
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
writer.writerow([row[7], row[0], row[6]])
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
def slugify(s):
s = s.lower()
for c in [' ', '-', '.', '/']:
s = s.replace(c, '_')
s = re.sub('\W', '', s)
s = s.replace('_', ' ')
s = re.sub('\s+', ' ', s)
s = s.strip()
s = s.replace(' ', '-')
return s
writer = csv.writer(sys.stdout)
chain = sys.argv[2]
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
seen = set([])
for row in reader:
if not slugify(row[3]) in seen:
writer.writerow([chain, '', slugify(row[3]), row[3]])
seen.add(slugify(row[3]))
if not slugify(row[3])+'/'+slugify(row[4]) in seen:
writer.writerow([chain, slugify(row[3]), slugify(row[3])+'/'+slugify(row[4]), row[4]])
seen.add(slugify(row[3])+'/'+slugify(row[4]))
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
def slugify(s):
s = s.lower()
for c in [' ', '-', '.', '/']:
s = s.replace(c, '_')
s = re.sub('\W', '', s)
s = s.replace('_', ' ')
s = re.sub('\s+', ' ', s)
s = s.strip()
s = s.replace(' ', '-')
return s
writer = csv.writer(sys.stdout)
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
product_id = hashlib.md5('Whole Foods Market##' + row[5]).hexdigest()
catalogue_id = 'whole_foods/1'
writer.writerow(['Whole Foods Market', catalogue_id, slugify(row[3])+'/'+slugify(row[4]), product_id, row[6]])
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
def slugify(s):
s = s.lower()
for c in [' ', '-', '.', '/']:
s = s.replace(c, '_')
s = re.sub('\W', '', s)
s = s.replace('_', ' ')
s = re.sub('\s+', ' ', s)
s = s.strip()
s = s.replace(' ', '-')
return s
writer = csv.writer(sys.stdout)
chain = sys.argv[2]
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
if row[0] == 'zipcode':
continue
product_id = hashlib.md5('Whole Foods Market##' + row[5]).hexdigest()
catalogue_id = slugify(chain) + '/1'
adjust_by = int(hashlib.md5(chain + '##' + product_id).hexdigest()[:2], 16) / 25
price = float(row[6][1:])
price = '$%.2f' % (price + price / 100 * adjust_by)
if hashlib.md5(chain + '##' + product_id).hexdigest() < 'b':
writer.writerow([chain, catalogue_id, slugify(row[3])+'/'+slugify(row[4]), product_id, price])
#!/usr/bin/env python
import csv
import hashlib
import re
import sys
writer = csv.writer(sys.stdout)
with open(sys.argv[1], 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
product_id = hashlib.md5('Whole Foods Market##' + row[5]).hexdigest()
writer.writerow([product_id, row[5], row[9]])
#!/bin/sh
set -e
# IMPORT_URL="http://127.0.0.1:3001/api/v1/import"
IMPORT_URL="http://web-staging-api-lb-401381424.us-east-1.elb.amazonaws.com/api/v1/import"
# echo "importing chains"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @chains.csv ${IMPORT_URL}/chains -o /dev/null
# echo "importing categories"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @../../exp/go-scrapers/coles/categories.csv ${IMPORT_URL}/categories -o /dev/null
# echo "importing products"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @../../exp/go-scrapers/coles/products.csv ${IMPORT_URL}/products -o /dev/null
# echo "importing prices"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @../../exp/go-scrapers/coles/prices.csv ${IMPORT_URL}/prices -o /dev/null
# echo "importing stores"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @stores.csv ${IMPORT_URL}/stores -o /dev/null
# echo "importing stores_us"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @stores_us.csv ${IMPORT_URL}/stores -o /dev/null
# echo "importing whole_foods_market_categories"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @whole_foods_market_categories.csv ${IMPORT_URL}/categories -o /dev/null
# echo "importing whole_foods_market_products"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @whole_foods_market_products.csv ${IMPORT_URL}/products -o /dev/null
# echo "importing whole_foods_market_prices"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @whole_foods_market_prices.csv ${IMPORT_URL}/prices -o /dev/null
# echo "importing whole_foods_market_stores"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @whole_foods_market_stores.csv ${IMPORT_URL}/stores -o /dev/null
# echo "importing walgreens_categories"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @walgreens_categories.csv ${IMPORT_URL}/categories -o /dev/null
# echo "importing walgreens_prices"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @walgreens_prices.csv ${IMPORT_URL}/prices -o /dev/null
# echo "importing walgreens_stores"
# time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @walgreens_stores.csv ${IMPORT_URL}/stores -o /dev/null
echo "importing bel-air_stores"
time curl -s -D - -H 'maintenance-key: sozo-maintenance' --data-binary @bel-air_stores.csv ${IMPORT_URL}/stores
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment