lmacken/veracart-exporter.py

## veracart-exporter.py
# VeraCart Product Catalog Exporter
# Luke Macken <lewk@csh.rit.edu>
# License: GPLv3+

import os
import csv
import uuid

from urllib import urlretrieve
from urllib2 import urlopen
from xml.dom.minidom import parse, parseString
from BeautifulSoup import BeautifulSoup

APP_NAME = ""
API_TOKEN = ""
URL = "https://corp1.veracart.com/api/rest/catalog/?api_token=%s&call_name=" % API_TOKEN
STATIC_BASE = "https://static.veracart.com/" + APP_NAME

category_cols = ['id', 'unique_id', 'parent_id', 'name', 'header', 'footer',
        'hide', 'sort', 'image_sm', 'image_md', 'image_lg', 'updated_at']
item_cols = ['id','unique_id','sku','name','subtitle','summary' , 'detail',
        'price' , 'sale_price' , 'no_shipping' , 'no_tax' , 'weight' ,
        'status' , 'image_sm' , 'image_md' , 'image_lg' , 'seo_title_tag' ,
        'seo_meta_keywords' , 'seo_meta_description' ,
        'updated_at', 'created_at']

done_items = []

def get(element, name):
    node = element.getElementsByTagName(name)[0]
    try:
        if node.childNodes:
            return node.childNodes[0].data
        else:
            return ''
    except Exception, e:
        print "get(%s, %s)" % (element, name)
        print str(e)

category_dom = parseString(urlopen(URL + "get_category_list").read())
category_csv = csv.writer(open('categories.csv', 'wb'))
category_csv.writerow(category_cols)
categories = category_dom.lastChild.getElementsByTagName('categories')[0]

item_csv = csv.writer(open('products.csv', 'wb'))
item_csv.writerow(item_cols + ['category_id'])

for category in categories.getElementsByTagName('category'):
    name = get(category, 'name')
    row = []
    for col in category_cols:
        row.append(get(category, col))
    category_csv.writerow(row)

    # download category images
    for size in ('sm', 'md', 'lg'):
        img = get(category, 'image_%s' % size)
        if img:
            output = img.replace(STATIC_BASE, '')
            dirname = os.path.dirname(output)
            if os.path.exists(output):
                continue
            if not os.path.isdir(dirname):
                os.makedirs(dirname)
            urlretrieve(img, filename=output)

    catid = get(category, 'id')
    if os.path.exists(catid + '.xml'):
        item_xml = file(catid + '.xml').read()
    else:
        print "Downloading %s products" % name
        item_xml = urlopen(URL + "get_category_items&category_id=%s" %
                           get(category, 'id')).read()
        out_xml = file(get(category, 'id') + '.xml', 'w')
        out_xml.write(item_xml)
        out_xml.close()
    if 'No Items found in this category' in item_xml:
        continue

    item_soup = BeautifulSoup(item_xml)
    for item in item_soup.findAll('item'):
        item_id = item.find('unique_id').text
        if item_id in done_items:
            continue
        else:
            done_items.append(item_id)
        row = []
        print item.find('name').text
        for col in item_cols:
            row.append(item.find(col).text)
        item_csv.writerow(row + [catid])

        # download item images
        for size in ('sm', 'md', 'lg'):
            img = item.find('image_%s' % size).text
            if img:
                output = img.replace(STATIC_BASE, '')
                dirname = os.path.dirname(output)
                if os.path.exists(output):
                    continue
                if not os.path.isdir(dirname):
                    os.makedirs(dirname)
                print "Downloading", img
                urlretrieve(img, filename=output)


print "Done! %d items" % len(done_items)
	# VeraCart Product Catalog Exporter
	# Luke Macken <lewk@csh.rit.edu>
	# License: GPLv3+

	import os
	import csv
	import uuid

	from urllib import urlretrieve
	from urllib2 import urlopen
	from xml.dom.minidom import parse, parseString
	from BeautifulSoup import BeautifulSoup

	APP_NAME = ""
	API_TOKEN = ""
	URL = "https://corp1.veracart.com/api/rest/catalog/?api_token=%s&call_name=" % API_TOKEN
	STATIC_BASE = "https://static.veracart.com/" + APP_NAME

	category_cols = ['id', 'unique_id', 'parent_id', 'name', 'header', 'footer',
	'hide', 'sort', 'image_sm', 'image_md', 'image_lg', 'updated_at']
	item_cols = ['id','unique_id','sku','name','subtitle','summary' , 'detail',
	'price' , 'sale_price' , 'no_shipping' , 'no_tax' , 'weight' ,
	'status' , 'image_sm' , 'image_md' , 'image_lg' , 'seo_title_tag' ,
	'seo_meta_keywords' , 'seo_meta_description' ,
	'updated_at', 'created_at']

	done_items = []

	def get(element, name):
	node = element.getElementsByTagName(name)[0]
	try:
	if node.childNodes:
	return node.childNodes[0].data
	else:
	return ''
	except Exception, e:
	print "get(%s, %s)" % (element, name)
	print str(e)

	category_dom = parseString(urlopen(URL + "get_category_list").read())
	category_csv = csv.writer(open('categories.csv', 'wb'))
	category_csv.writerow(category_cols)
	categories = category_dom.lastChild.getElementsByTagName('categories')[0]

	item_csv = csv.writer(open('products.csv', 'wb'))
	item_csv.writerow(item_cols + ['category_id'])

	for category in categories.getElementsByTagName('category'):
	name = get(category, 'name')
	row = []
	for col in category_cols:
	row.append(get(category, col))
	category_csv.writerow(row)

	# download category images
	for size in ('sm', 'md', 'lg'):
	img = get(category, 'image_%s' % size)
	if img:
	output = img.replace(STATIC_BASE, '')
	dirname = os.path.dirname(output)
	if os.path.exists(output):
	continue
	if not os.path.isdir(dirname):
	os.makedirs(dirname)
	urlretrieve(img, filename=output)

	catid = get(category, 'id')
	if os.path.exists(catid + '.xml'):
	item_xml = file(catid + '.xml').read()
	else:
	print "Downloading %s products" % name
	item_xml = urlopen(URL + "get_category_items&category_id=%s" %
	get(category, 'id')).read()
	out_xml = file(get(category, 'id') + '.xml', 'w')
	out_xml.write(item_xml)
	out_xml.close()
	if 'No Items found in this category' in item_xml:
	continue

	item_soup = BeautifulSoup(item_xml)
	for item in item_soup.findAll('item'):
	item_id = item.find('unique_id').text
	if item_id in done_items:
	continue
	else:
	done_items.append(item_id)
	row = []
	print item.find('name').text
	for col in item_cols:
	row.append(item.find(col).text)
	item_csv.writerow(row + [catid])

	# download item images
	for size in ('sm', 'md', 'lg'):
	img = item.find('image_%s' % size).text
	if img:
	output = img.replace(STATIC_BASE, '')
	dirname = os.path.dirname(output)
	if os.path.exists(output):
	continue
	if not os.path.isdir(dirname):
	os.makedirs(dirname)
	print "Downloading", img
	urlretrieve(img, filename=output)


	print "Done! %d items" % len(done_items)