Created
January 28, 2011 17:12
-
-
Save wezm/800580 to your computer and use it in GitHub Desktop.
Example of using genx to generate a Sphinx xmlpipe2 stream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var genx = require('genx') | |
, http = require('http') | |
, jsdom = require('jsdom').jsdom | |
, qs = require('querystring'); | |
function generateXml(body) { | |
var w = new genx.Writer() | |
, window = jsdom(body, null, { | |
features: { | |
FetchExternalResources: false, | |
MutationEvents: false | |
}}).createWindow() | |
, $ = require('jquery').create(window); | |
w.on('data', function(data) { | |
process.stdout.write(data); | |
}); | |
// Declare the elements and attributes that we'll need | |
var sphinx = w.declareNamespace('http://sphinxsearch.com/', 'sphinx') | |
// Sphinx elements (in the sphinx namespace) | |
, field = w.declareElement(sphinx, 'field') | |
, docset = w.declareElement(sphinx, 'docset') | |
, attr = w.declareElement(sphinx, 'attr') | |
, schema = w.declareElement(sphinx, 'schema') | |
, document = w.declareElement(sphinx, 'document') | |
// Document elements | |
, href = w.declareElement('href') | |
, title = w.declareElement('title') | |
, description = w.declareElement('description') | |
// Attributes | |
, idAttr = w.declareAttribute('id') | |
, nameAttr = w.declareAttribute('name'); | |
// Write out the start of the document and the schema | |
w.startDocument() | |
.startElement(docset) | |
.startElement(schema) | |
.startElement(field).addAttribute(nameAttr, 'href').endElement() | |
.startElement(field).addAttribute(nameAttr, 'title').endElement() | |
.startElement(field).addAttribute(nameAttr, 'description').endElement() | |
.endElement(); | |
// Remove the green URL from the result, don't want it in the description | |
$('#ires ol li .s .f').remove(); | |
// Loop over each result | |
var id = 1; | |
$("#ires ol li").each(function(i, li) { | |
var result = { | |
title: $('h3.r a', li).text(), | |
href: $('h3.r a', li).attr('href'), | |
description: $('#ires ol li .s').text() | |
}; | |
// start each document on a new line | |
w.addText("\n") | |
.startElement(document) | |
.addAttribute(idAttr, id.toString()) | |
.startElement(href).addText(result.href).endElement() | |
.startElement(title).addText(result.title).endElement() | |
.startElement(description).addText(result.description).endElement() | |
.endElement(); | |
id++; | |
}); | |
w.addText("\n") | |
.endElement() | |
.endDocument(); | |
} | |
// Make the request and when finished generate the XML | |
// http://www.google.com/search?q=node.js+xml | |
var google = http.createClient(80, 'www.google.com'); | |
var query = qs.stringify({ | |
q: "node.js xml", | |
ie: 'UTF-8', | |
oe: 'UTF-8' | |
}); | |
var request = google.request( | |
'GET', | |
'/search?' + query, | |
{'host': 'www.google.com'} | |
); | |
request.end(); | |
request.on('response', function (response) { | |
if (response.statusCode != 200) { | |
console.log("Error: Unexpected response code " + response.statusCode); | |
return; // Would be nice to abort the request here | |
} | |
var body = ''; | |
response.setEncoding('utf8'); | |
response.on('data', function (chunk) { | |
body += chunk; | |
}); | |
response.on('end', function() { | |
generateXml(body); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment