Created
January 10, 2011 12:54
-
-
Save zerko/772744 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* yahoo_store_export_to_froogle.pl port to PHP | |
# Author: Anatoly Kudinov <zz@rialabs.org> | |
# Original description: | |
# yahoo_store_export_to_froogle.pl | |
# | |
# This is a Perl script to convert Yahoo! Stores XML feed into | |
# Froogle data feed, and FTP the file to Froogle's server. | |
# Note: Many stores are automatically indexed by Froogle. | |
# | |
# REQUIREMENTS | |
# A merchant agreement/ftp account with Froogle. | |
# A Yahoo! store account with store export enabled. | |
# | |
# INSTRUCTIONS | |
# This script requires no command line parameters and creates no output. | |
# It places the Froogle data feed into the system temporary directory. | |
# Parameters for the store and Froogle user account | |
# must be entered in the section labeled "CUSTOM INFORMATION". | |
# The script can be tested against a saved Yahoo store's xml file | |
# and alternate FTP server by setting the "DEBUG VARIABLES". | |
# | |
# FEATURES | |
# Fully automated. The Froogle data format is the simple format, not | |
# extended format. Entire store product library is processed. | |
# Product code is taken as the Yahoo! store created abbreviation. | |
# Image URLs and Section names are handled correctly. | |
# All HTML tags are removed from product captions. | |
# All tabs, carraige returns, and new lines are replaced with spaces. | |
# Only products with a price are sent. Orderable flag is ignored. | |
# | |
# LIMITATIONS | |
# No special handling for books, music, dvd, etc. | |
# No support for quantity pricing or alternate currency. | |
# No support for item options. | |
# No support for partial uploads. | |
# | |
# EXAMPLE CRONTAB ('crontab -e' to edit the crontab) | |
#] # Send Yahoo Store data to Froogle daily at 6 am | |
#] 0 6 * * * /home/u/user/bin/yahoo_store_export_to_froogle.pl | |
# | |
# LINKS | |
# Yahoo Store XML DTD | |
# http://store.yahoo.com/lib/vw/StoreExport.dtd | |
# Store export overview | |
# http://store.yahoo.com/storexport.html | |
# Froogle merchant info | |
# http://froogle.google.com/froogle/merchants.html | |
# CPAN Perl documentation | |
# http://search.cpan.org/ | |
# UTF8 bug under red hat when warning enabled | |
# http://archive.develooper.com/perl5-porters@perl.org/msg88085.html | |
# | |
# AUTHOR | |
# Shailesh Humbad, March 21, 2003, http://www.somacon.com | |
# This code is hereby granted to the public domain. | |
*/ | |
// DEBUG VARIABLES | |
// set debug to 1 to enable debug mode, otherwise set to 0 | |
$debug = 0; | |
// enter file containing xml feed to use in debug mode | |
$debug_xmlfilename = "storeexportdebug.xml"; | |
// ftp server to use in debug mode | |
$debug_username = ""; | |
$debug_password = ""; | |
$debug_ftpserver = ""; | |
// CUSTOM INFORMATION | |
$froogle_username = ""; | |
$froogle_password = ""; | |
$froogle_ftpserver = ""; | |
$store_xmlfeed_url = "http://store.yahoo.com/storename/objinfo.xml"; | |
$store_xmlfeed_url = "http://store.yahoo.com/ygear-test/objinfo.xml"; | |
// DEFINE XML PARSER SUBROUTINES | |
$xmlparser = xml_parser_create(); | |
xml_set_element_handler($xmlparser, "tag_start", "tag_end"); | |
xml_set_character_data_handler($xmlparser,"handle_char"); | |
// INITIALIZE VARIABLES | |
$product = Array(); | |
$productarray = Array(); | |
$context = Array(); | |
if($debug) { echo "Yahoo Store Export to Froogle - Debug.\n"; } | |
// set the filename for the temporary froogle data file | |
$froogle_data_file = tmpfile(); | |
// print column names | |
fwrite($froogle_data_file, "product_url\tname\tdescription\tprice\timage_url\tcategory\tcode\n"); | |
// RETRIEVE THE ENTIRE XML FEED INTO A STRING | |
if (!$debug) | |
{ | |
// allow url fopen should be true, could be ported to curl | |
$xmldata = file_get_contents($store_xmlfeed_url); | |
if(!$xmldata) | |
die ("Error while getting ".$store_xmlfeed_url. | |
"\nAborting"); | |
} | |
else | |
{ | |
$xmldata = file_get_contents($debug_xml_filename); | |
} | |
// RUN THE XML PARSER (PARSING IS DONE IN THE SUBROUTINES) | |
xml_parse($xmlparser, $xmldata); | |
// WRITE THE PARSED DATA TO THE TEMP FILE | |
foreach($productarray as $product) | |
{ | |
fwrite($froogle_data_file, $product['product_url']."\t"); | |
fwrite($froogle_data_file, $product['name']."\t"); | |
fwrite($froogle_data_file, $product['description']."\t"); | |
fwrite($froogle_data_file, $product['price']."\t"); | |
fwrite($froogle_data_file, $product['image_url']."\t"); | |
fwrite($froogle_data_file, $product['category']."\t"); | |
fwrite($froogle_data_file, $product['code']."\t"); | |
fwrite($froogle_data_file, "\n"); | |
} | |
if($debug) | |
{ | |
$froogle_ftpserver = $debug_ftpserver; | |
$froogle_username = $debug_username; | |
$froogle_password = $debug_password; | |
} | |
echo "Generated csv: \n"; | |
fseek($froogle_data_file,0); | |
while($str = fgets($froogle_data_file)) | |
echo $str; | |
/* | |
# UPLOAD THE FROOGLE FORMAT DATA TO FROOGLE | |
$ftp = Net::FTP->new | |
( | |
$froogle_ftpserver, | |
Timeout => 30 | |
) or die "Could not connect to FTP server: $froogle_ftpserver.\n"; | |
$ftp->login($froogle_username, $froogle_password) | |
or die "Could not log in to FTP server.\n"; | |
$ftp->put($froogle_data_filename); | |
$ftp->quit(); | |
*/ | |
// END OF SCRIPT | |
// --------- XML ROUTINES --------- | |
// XML TAG START ROUTINE | |
// Start (Parser, Element [, Attr, Val [,...]]) | |
function tag_start($parser, $tagvalue, $attrs) { | |
global $product, $context; | |
$tagvalue = strtolower($tagvalue); | |
$tagvalue = ucfirst($tagvalue); | |
if ($tagvalue == "Product") { | |
// reset the product data | |
$product['product_url'] = ""; | |
$product['name'] = ""; | |
$product['description'] = ""; | |
$product['price'] = ""; | |
$product['image_url'] = ""; | |
$product['orderable'] = ""; | |
$product['category'] = ""; | |
$product['code'] = $attrs['ID']; | |
} | |
$context[] = $tagvalue; | |
} | |
// XML CHARACTER DATA ROUTINE | |
function handle_char($parser, $tagvalue) { | |
// Retrieve context | |
global $context, $product; | |
$_context = implode(" ", $context); | |
// convert the XML feed to froogle format | |
// froogle attributes | |
if ($_context == "Storeexport Products Product Url") { | |
$product['product_url'] .= $tagvalue; | |
} | |
if ($_context == "Storeexport Products Product Description") { | |
$product['name'] .= $tagvalue; | |
} | |
if ($_context == "Storeexport Products Product Caption") { | |
$product['description'] .= $tagvalue; | |
} | |
if ($_context == "Storeexport Products Product Pricing Baseprice") { | |
$product['price'] .= $tagvalue; | |
} | |
if ($_context == "Storeexport Products Product Picture") { | |
// must be concatenation due to > | |
$product['image_url'] .= $tagvalue; | |
} | |
// meta attributes | |
if ($_context == "Storeexport Products Product Orderable") { | |
$product['orderable'] .= $tagvalue; | |
} | |
if ($_context == "Storeexport Products Product Path Productref") { | |
// must be concatenation to create category name | |
// and to combine names with '&' | |
$product['category'] .= $tagvalue; | |
} | |
} | |
// XML TAG END ROUTINE | |
function tag_end($parser, $tagvalue) { | |
global $product, $productarray, $context; | |
$tagvalue = strtolower($tagvalue); | |
$tagvalue = ucfirst($tagvalue); | |
if($tagvalue == "Product") { | |
// CLEAN UP THE PRODUCT DATA | |
// trim off the trailing separator for the category | |
if(strlen($product['category']) > 0) { | |
$product['category'] = substr($product['category'], 0, | |
strlen($product['category'])-3); | |
} | |
// extract the URL for the image | |
$matches = Array(); | |
preg_match("/.*?src\=(.*?)\>/i", $product['image_url'], $matches); | |
if(@$matches[1]) { | |
$product['image_url'] = $matches[1]; | |
} | |
// replace all cr, lf, and tab with spaces | |
// in all fields of the hash | |
foreach ($product as $key=>$value) { | |
if($product[$key]) { | |
$product[$key] = str_replace(Array("\t","\n","\r"), " ",$product[$key]); | |
} | |
} | |
// replace all html tags with empty string | |
$product['description'] = strip_tags($product['description']); | |
// PUSH THE PREVIOUS PRODUCT | |
if($product['price']) { | |
// do not push products without a price | |
// these are either sections or unpriced items | |
$productarray[]= Array( | |
'code' => $product['code'], | |
'name' => $product['name'], | |
'description' => $product['description'], | |
'product_url' => $product['product_url'], | |
'image_url' => $product['image_url'], | |
'price' => $product['price'], | |
'orderable' => $product['orderable'], | |
'category' => $product['category']); | |
} | |
} | |
// add separator for each ProductRef to create section name | |
if($tagvalue == "Productref") { | |
$product['category'] .= " > "; | |
} | |
array_pop($context); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment