Skip to content

Instantly share code, notes, and snippets.

@zerko
Created January 10, 2011 12:54
Show Gist options
  • Save zerko/772744 to your computer and use it in GitHub Desktop.
Save zerko/772744 to your computer and use it in GitHub Desktop.
<?php
/* yahoo_store_export_to_froogle.pl port to PHP
# Author: Anatoly Kudinov <zz@rialabs.org>
# Original description:
# yahoo_store_export_to_froogle.pl
#
# This is a Perl script to convert Yahoo! Stores XML feed into
# Froogle data feed, and FTP the file to Froogle's server.
# Note: Many stores are automatically indexed by Froogle.
#
# REQUIREMENTS
# A merchant agreement/ftp account with Froogle.
# A Yahoo! store account with store export enabled.
#
# INSTRUCTIONS
# This script requires no command line parameters and creates no output.
# It places the Froogle data feed into the system temporary directory.
# Parameters for the store and Froogle user account
# must be entered in the section labeled "CUSTOM INFORMATION".
# The script can be tested against a saved Yahoo store's xml file
# and alternate FTP server by setting the "DEBUG VARIABLES".
#
# FEATURES
# Fully automated. The Froogle data format is the simple format, not
# extended format. Entire store product library is processed.
# Product code is taken as the Yahoo! store created abbreviation.
# Image URLs and Section names are handled correctly.
# All HTML tags are removed from product captions.
# All tabs, carraige returns, and new lines are replaced with spaces.
# Only products with a price are sent. Orderable flag is ignored.
#
# LIMITATIONS
# No special handling for books, music, dvd, etc.
# No support for quantity pricing or alternate currency.
# No support for item options.
# No support for partial uploads.
#
# EXAMPLE CRONTAB ('crontab -e' to edit the crontab)
#] # Send Yahoo Store data to Froogle daily at 6 am
#] 0 6 * * * /home/u/user/bin/yahoo_store_export_to_froogle.pl
#
# LINKS
# Yahoo Store XML DTD
# http://store.yahoo.com/lib/vw/StoreExport.dtd
# Store export overview
# http://store.yahoo.com/storexport.html
# Froogle merchant info
# http://froogle.google.com/froogle/merchants.html
# CPAN Perl documentation
# http://search.cpan.org/
# UTF8 bug under red hat when warning enabled
# http://archive.develooper.com/perl5-porters@perl.org/msg88085.html
#
# AUTHOR
# Shailesh Humbad, March 21, 2003, http://www.somacon.com
# This code is hereby granted to the public domain.
*/
// DEBUG VARIABLES
// set debug to 1 to enable debug mode, otherwise set to 0
$debug = 0;
// enter file containing xml feed to use in debug mode
$debug_xmlfilename = "storeexportdebug.xml";
// ftp server to use in debug mode
$debug_username = "";
$debug_password = "";
$debug_ftpserver = "";
// CUSTOM INFORMATION
$froogle_username = "";
$froogle_password = "";
$froogle_ftpserver = "";
$store_xmlfeed_url = "http://store.yahoo.com/storename/objinfo.xml";
$store_xmlfeed_url = "http://store.yahoo.com/ygear-test/objinfo.xml";
// DEFINE XML PARSER SUBROUTINES
$xmlparser = xml_parser_create();
xml_set_element_handler($xmlparser, "tag_start", "tag_end");
xml_set_character_data_handler($xmlparser,"handle_char");
// INITIALIZE VARIABLES
$product = Array();
$productarray = Array();
$context = Array();
if($debug) { echo "Yahoo Store Export to Froogle - Debug.\n"; }
// set the filename for the temporary froogle data file
$froogle_data_file = tmpfile();
// print column names
fwrite($froogle_data_file, "product_url\tname\tdescription\tprice\timage_url\tcategory\tcode\n");
// RETRIEVE THE ENTIRE XML FEED INTO A STRING
if (!$debug)
{
// allow url fopen should be true, could be ported to curl
$xmldata = file_get_contents($store_xmlfeed_url);
if(!$xmldata)
die ("Error while getting ".$store_xmlfeed_url.
"\nAborting");
}
else
{
$xmldata = file_get_contents($debug_xml_filename);
}
// RUN THE XML PARSER (PARSING IS DONE IN THE SUBROUTINES)
xml_parse($xmlparser, $xmldata);
// WRITE THE PARSED DATA TO THE TEMP FILE
foreach($productarray as $product)
{
fwrite($froogle_data_file, $product['product_url']."\t");
fwrite($froogle_data_file, $product['name']."\t");
fwrite($froogle_data_file, $product['description']."\t");
fwrite($froogle_data_file, $product['price']."\t");
fwrite($froogle_data_file, $product['image_url']."\t");
fwrite($froogle_data_file, $product['category']."\t");
fwrite($froogle_data_file, $product['code']."\t");
fwrite($froogle_data_file, "\n");
}
if($debug)
{
$froogle_ftpserver = $debug_ftpserver;
$froogle_username = $debug_username;
$froogle_password = $debug_password;
}
echo "Generated csv: \n";
fseek($froogle_data_file,0);
while($str = fgets($froogle_data_file))
echo $str;
/*
# UPLOAD THE FROOGLE FORMAT DATA TO FROOGLE
$ftp = Net::FTP->new
(
$froogle_ftpserver,
Timeout => 30
) or die "Could not connect to FTP server: $froogle_ftpserver.\n";
$ftp->login($froogle_username, $froogle_password)
or die "Could not log in to FTP server.\n";
$ftp->put($froogle_data_filename);
$ftp->quit();
*/
// END OF SCRIPT
// --------- XML ROUTINES ---------
// XML TAG START ROUTINE
// Start (Parser, Element [, Attr, Val [,...]])
function tag_start($parser, $tagvalue, $attrs) {
global $product, $context;
$tagvalue = strtolower($tagvalue);
$tagvalue = ucfirst($tagvalue);
if ($tagvalue == "Product") {
// reset the product data
$product['product_url'] = "";
$product['name'] = "";
$product['description'] = "";
$product['price'] = "";
$product['image_url'] = "";
$product['orderable'] = "";
$product['category'] = "";
$product['code'] = $attrs['ID'];
}
$context[] = $tagvalue;
}
// XML CHARACTER DATA ROUTINE
function handle_char($parser, $tagvalue) {
// Retrieve context
global $context, $product;
$_context = implode(" ", $context);
// convert the XML feed to froogle format
// froogle attributes
if ($_context == "Storeexport Products Product Url") {
$product['product_url'] .= $tagvalue;
}
if ($_context == "Storeexport Products Product Description") {
$product['name'] .= $tagvalue;
}
if ($_context == "Storeexport Products Product Caption") {
$product['description'] .= $tagvalue;
}
if ($_context == "Storeexport Products Product Pricing Baseprice") {
$product['price'] .= $tagvalue;
}
if ($_context == "Storeexport Products Product Picture") {
// must be concatenation due to &gt;
$product['image_url'] .= $tagvalue;
}
// meta attributes
if ($_context == "Storeexport Products Product Orderable") {
$product['orderable'] .= $tagvalue;
}
if ($_context == "Storeexport Products Product Path Productref") {
// must be concatenation to create category name
// and to combine names with '&'
$product['category'] .= $tagvalue;
}
}
// XML TAG END ROUTINE
function tag_end($parser, $tagvalue) {
global $product, $productarray, $context;
$tagvalue = strtolower($tagvalue);
$tagvalue = ucfirst($tagvalue);
if($tagvalue == "Product") {
// CLEAN UP THE PRODUCT DATA
// trim off the trailing separator for the category
if(strlen($product['category']) > 0) {
$product['category'] = substr($product['category'], 0,
strlen($product['category'])-3);
}
// extract the URL for the image
$matches = Array();
preg_match("/.*?src\=(.*?)\>/i", $product['image_url'], $matches);
if(@$matches[1]) {
$product['image_url'] = $matches[1];
}
// replace all cr, lf, and tab with spaces
// in all fields of the hash
foreach ($product as $key=>$value) {
if($product[$key]) {
$product[$key] = str_replace(Array("\t","\n","\r"), " ",$product[$key]);
}
}
// replace all html tags with empty string
$product['description'] = strip_tags($product['description']);
// PUSH THE PREVIOUS PRODUCT
if($product['price']) {
// do not push products without a price
// these are either sections or unpriced items
$productarray[]= Array(
'code' => $product['code'],
'name' => $product['name'],
'description' => $product['description'],
'product_url' => $product['product_url'],
'image_url' => $product['image_url'],
'price' => $product['price'],
'orderable' => $product['orderable'],
'category' => $product['category']);
}
}
// add separator for each ProductRef to create section name
if($tagvalue == "Productref") {
$product['category'] .= " > ";
}
array_pop($context);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment