bdelacretaz/camel.POST.groovy

## camel.POST.groovy
// WHAT'S THIS?
//
// This is a Stanbol/Groovy/Camel proof of concept: the goal is
// to define Stanbol content enhancement chains as flowchart-like
// structures from a script language. Intermediate results (as
// the detected language in this example) can be used to route
// the content being analyzed to different paths in the flowchart.
//
// Apache Camel is used as the flowchart execution engine.
//
// This script sets up and runs an Apache Camel context
// that includes custom components written in
// Groovy, directly in this script.
//
// The final goal is to allow such scripts to define
// dynamic Stanbol enhancer chains, where intermediate
// results can influence the sequence of operations
// applied to the content that's being enhanced.
//
// Example use cases (once this is integrated in Stanbol):
// -Run a different part of speech tagger for German
//
// -Run an additional domain-specific enhancement engine
//  based on entities that have been identified by a generic
//  engine at the beginning of the flowchart.
//
// -Add user-supplied components (written in groovy as
//  in this script) to the enhancement chain.
//
//  This is just a proof of concept, that runs on Sling
//  as I knew how to run groovy scripts out of the box with Sling.
//  We'll probably need to add a few bundles and glue
//  code to Stanbol to do the same thing there.
//
//  The is the first time I've used Camel, so I probably
//  overlooked a few things, but it works.
//
//  HOW TO RUN THIS IN SLING:
//
//  1. Start the Sling Launchpad
//  (found at http://svn.apache.org/repos/asf/sling/trunk/launchpad/builder)
//
//  2. Install and start these two bundles, via /system/console/bundles:
//
//  <groupId>org.apache.camel</groupId>
//  <artifactId>camel-core-osgi</artifactId>
//  <version>2.8.1</version>
//
//  <groupId>org.fusesource.commonman</groupId>
//  <artifactId>commons-management</artifactId>
//  <version>1.0</version>
//
//  3. Create a test node in the Sling repository:
//  $ curl -u admin:admin -Fsling:resourceType=camel http://localhost:8080/camel
//
//  4. Upload this script so that Sling uses it for a POST with the .camel extension:
//  $ curl -u admin:admin -X MKCOL http://localhost:8080/apps/camel
//  $ curl -u admin:admin -T camel.POST.groovy http://localhost:8080/apps/camel/camel.POST.groovy
//
//  5. Enjoy, and note that our French text is processed using a slightly different chain than
//     the one used to process German - that's what I meant to demonstrate, along with the ability
//     to inject simple scripted components like WordCount in the pipeline.
//
//  $ curl -s -X POST http://localhost:8080/camel.camel?text=Salut,+Le+monsieur+dans+le+train
//  *** Messages received from mock://result ***
//  content='Salut, Le monsieur dans le train', metadata=[words:6, length:32, language:fr]
//  content='path=/camel', metadata=[words:1, length:11, something:about english, language:en]
//  content='sling:resourceType=camel', metadata=[words:1, length:24, something:about english, language:en]
//  content='jcr:primaryType=nt:unstructured', metadata=[words:1, length:31, something:about english, language:en]
//
//  $ curl -s -X POST http://localhost:8080/camel.camel?text=Der+Mann+im+Zug
//  *** Messages received from mock://result ***
//  content='Der Mann im Zug', metadata=[words:4, length:15, etwas:ueber Deutsch, language:de]
//  content='path=/camel', metadata=[words:1, length:11, something:about english, language:en]
//  content='sling:resourceType=camel', metadata=[words:1, length:24, something:about english, language:en]
//  content='jcr:primaryType=nt:unstructured', metadata=[words:1, length:31, something:about english, language:en]
//
//  NEXT STEPS:
//  -Setup Stanbol so that it can run this
//  -Hook up the actual Stanbol enhancement engines (or parts of them to make it easier to mix
//   and match), and combine with custom Groovy components like this script does.

import org.apache.camel.Exchange
import org.apache.camel.Predicate

// This is the data that passes through our enhancement
// pipeline components: some content with metadata that's
// added by the pipeline components
class EnhancedString {
    String content
    Map attributes

    EnhancedString(String str) {
        content = str
        attributes = new HashMap();
    }

    String toString() {
        return "content='" + content + "', metadata=" + attributes
    }
}

// Convert a String to an EnhancedString (there's probably
// a more integrated way to do this with Camel)
class EnhancedStringConverter {
    public EnhancedString pipe(String str) {
        return new EnhancedString(str)
    }
}

// Example pipeline component: count the number of words in
// our content and add the result as metadata. I didn't want
// to go into deep semantic analysis for this example...but the
// principle is the same as Stanbol's enhancement engines
class WordCount {
    public EnhancedString pipe(EnhancedString es) {
        es.attributes.put("words", es.content.split(" ").length)
        return es
    }
}

// Another "enhancement engine" that adds the string length
class StringLength {
    public EnhancedString pipe(EnhancedString es) {
        es.attributes.put("length", es.content.length())
        return es
    }
}

// A very stupid language detector
class LanguageDetector {
    public EnhancedString pipe(EnhancedString es) {
        String language = "en"

        if(es.content.toLowerCase().contains("mann")) {
            language = "de"
        } else if(es.content.toLowerCase().contains("salut")) {
            language = "fr"
        }

        es.attributes.put("language", language)
        return es
    }
}

// Add an arbitrary attribute to our metadata, to simulate
// using different engines after the language detection
class AddAttribute {
    String n
    String v

    AddAttribute(name, value) {
        n = name
        v = value
    }

    public EnhancedString pipe(EnhancedString es) {
        es.attributes.put(n, v)
        return es
    }
}

// Camel Predicate which returns true if the detected
// language matches what we expect
class MatchLanguage implements org.apache.camel.Predicate {
    String language

    MatchLanguage(expectedLanguage) {
        language = expectedLanguage
    }

    boolean matches(Exchange e) {
        return language.equals(e.getIn().getBody().attributes.get("language"))
    }
}

// Build the Camel "route" (I'd rather call it a flowchart) which
// analyzes our content
class MyRouteBuilder extends org.apache.camel.builder.RouteBuilder {
    void configure() {
        // Get messages from our input and convert them to EnhancedString
        from("direct://input")
        .bean(new EnhancedStringConverter(), "pipe")

        // Detect language
        .bean(new LanguageDetector(),"pipe")

        // Based on language, simulate running different engines - note
        // that for French we don't run anything, as an example
        .choice()
        .when(new MatchLanguage("de")).bean(new AddAttribute("etwas", "ueber Deutsch"), "pipe")
        .when(new MatchLanguage("fr"))
        .otherwise().bean(new AddAttribute("something", "about english"), "pipe")
        .end()

        // Run a few more common engines
        .bean(new StringLength(),"pipe")
        .bean(new WordCount(), "pipe")

        // And send the result to output
        .to("mock://result")
    }
}

// Camel setup boilerplate
// TODO does the context need to be stopped, and when?
mrb = new MyRouteBuilder()
ctx = new org.apache.camel.impl.DefaultCamelContext()
ctx.addRoutes mrb
ctx.start()
p = ctx.createProducerTemplate()

// Send some data to our input based on request parameters
p.sendBody "direct:input", request.getParameter("text")

// Send some Sling-supplied data...won't happen with Stanbol
// but useful for my tests
p.sendBody "direct:input", "path=" + currentNode.getPath()
props = currentNode.getProperties()
while(props.hasNext()) {
    prop = props.nextProperty()
    p.sendBody "direct:input", prop.getName() + "=" + prop.getValue().getString()
}

// Output the results to the response as plain text
pt = "mock://result"
w = response.getWriter()
response.setContentType("text/plain")
w.write("*** Messages received from " + pt + " ***\n")
ctx.getEndpoint(pt).exchanges.each { ex ->
    w.write("${ex.getIn().getBody()}\n")
}
	// WHAT'S THIS?
	//
	// This is a Stanbol/Groovy/Camel proof of concept: the goal is
	// to define Stanbol content enhancement chains as flowchart-like
	// structures from a script language. Intermediate results (as
	// the detected language in this example) can be used to route
	// the content being analyzed to different paths in the flowchart.
	//
	// Apache Camel is used as the flowchart execution engine.
	//
	// This script sets up and runs an Apache Camel context
	// that includes custom components written in
	// Groovy, directly in this script.
	//
	// The final goal is to allow such scripts to define
	// dynamic Stanbol enhancer chains, where intermediate
	// results can influence the sequence of operations
	// applied to the content that's being enhanced.
	//
	// Example use cases (once this is integrated in Stanbol):
	// -Run a different part of speech tagger for German
	//
	// -Run an additional domain-specific enhancement engine
	// based on entities that have been identified by a generic
	// engine at the beginning of the flowchart.
	//
	// -Add user-supplied components (written in groovy as
	// in this script) to the enhancement chain.
	//
	// This is just a proof of concept, that runs on Sling
	// as I knew how to run groovy scripts out of the box with Sling.
	// We'll probably need to add a few bundles and glue
	// code to Stanbol to do the same thing there.
	//
	// The is the first time I've used Camel, so I probably
	// overlooked a few things, but it works.
	//
	// HOW TO RUN THIS IN SLING:
	//
	// 1. Start the Sling Launchpad
	// (found at http://svn.apache.org/repos/asf/sling/trunk/launchpad/builder)
	//
	// 2. Install and start these two bundles, via /system/console/bundles:
	//
	// <groupId>org.apache.camel</groupId>
	// <artifactId>camel-core-osgi</artifactId>
	// <version>2.8.1</version>
	//
	// <groupId>org.fusesource.commonman</groupId>
	// <artifactId>commons-management</artifactId>
	// <version>1.0</version>
	//
	// 3. Create a test node in the Sling repository:
	// $ curl -u admin:admin -Fsling:resourceType=camel http://localhost:8080/camel
	//
	// 4. Upload this script so that Sling uses it for a POST with the .camel extension:
	// $ curl -u admin:admin -X MKCOL http://localhost:8080/apps/camel
	// $ curl -u admin:admin -T camel.POST.groovy http://localhost:8080/apps/camel/camel.POST.groovy
	//
	// 5. Enjoy, and note that our French text is processed using a slightly different chain than
	// the one used to process German - that's what I meant to demonstrate, along with the ability
	// to inject simple scripted components like WordCount in the pipeline.
	//
	// $ curl -s -X POST http://localhost:8080/camel.camel?text=Salut,+Le+monsieur+dans+le+train
	// * Messages received from mock://result *
	// content='Salut, Le monsieur dans le train', metadata=[words:6, length:32, language:fr]
	// content='path=/camel', metadata=[words:1, length:11, something:about english, language:en]
	// content='sling:resourceType=camel', metadata=[words:1, length:24, something:about english, language:en]
	// content='jcr:primaryType=nt:unstructured', metadata=[words:1, length:31, something:about english, language:en]
	//
	// $ curl -s -X POST http://localhost:8080/camel.camel?text=Der+Mann+im+Zug
	// * Messages received from mock://result *
	// content='Der Mann im Zug', metadata=[words:4, length:15, etwas:ueber Deutsch, language:de]
	// content='path=/camel', metadata=[words:1, length:11, something:about english, language:en]
	// content='sling:resourceType=camel', metadata=[words:1, length:24, something:about english, language:en]
	// content='jcr:primaryType=nt:unstructured', metadata=[words:1, length:31, something:about english, language:en]
	//
	// NEXT STEPS:
	// -Setup Stanbol so that it can run this
	// -Hook up the actual Stanbol enhancement engines (or parts of them to make it easier to mix
	// and match), and combine with custom Groovy components like this script does.

	import org.apache.camel.Exchange
	import org.apache.camel.Predicate

	// This is the data that passes through our enhancement
	// pipeline components: some content with metadata that's
	// added by the pipeline components
	class EnhancedString {
	String content
	Map attributes

	EnhancedString(String str) {
	content = str
	attributes = new HashMap();
	}

	String toString() {
	return "content='" + content + "', metadata=" + attributes
	}
	}

	// Convert a String to an EnhancedString (there's probably
	// a more integrated way to do this with Camel)
	class EnhancedStringConverter {
	public EnhancedString pipe(String str) {
	return new EnhancedString(str)
	}
	}

	// Example pipeline component: count the number of words in
	// our content and add the result as metadata. I didn't want
	// to go into deep semantic analysis for this example...but the
	// principle is the same as Stanbol's enhancement engines
	class WordCount {
	public EnhancedString pipe(EnhancedString es) {
	es.attributes.put("words", es.content.split(" ").length)
	return es
	}
	}

	// Another "enhancement engine" that adds the string length
	class StringLength {
	public EnhancedString pipe(EnhancedString es) {
	es.attributes.put("length", es.content.length())
	return es
	}
	}

	// A very stupid language detector
	class LanguageDetector {
	public EnhancedString pipe(EnhancedString es) {
	String language = "en"

	if(es.content.toLowerCase().contains("mann")) {
	language = "de"
	} else if(es.content.toLowerCase().contains("salut")) {
	language = "fr"
	}

	es.attributes.put("language", language)
	return es
	}
	}

	// Add an arbitrary attribute to our metadata, to simulate
	// using different engines after the language detection
	class AddAttribute {
	String n
	String v

	AddAttribute(name, value) {
	n = name
	v = value
	}

	public EnhancedString pipe(EnhancedString es) {
	es.attributes.put(n, v)
	return es
	}
	}

	// Camel Predicate which returns true if the detected
	// language matches what we expect
	class MatchLanguage implements org.apache.camel.Predicate {
	String language

	MatchLanguage(expectedLanguage) {
	language = expectedLanguage
	}

	boolean matches(Exchange e) {
	return language.equals(e.getIn().getBody().attributes.get("language"))
	}
	}

	// Build the Camel "route" (I'd rather call it a flowchart) which
	// analyzes our content
	class MyRouteBuilder extends org.apache.camel.builder.RouteBuilder {
	void configure() {
	// Get messages from our input and convert them to EnhancedString
	from("direct://input")
	.bean(new EnhancedStringConverter(), "pipe")

	// Detect language
	.bean(new LanguageDetector(),"pipe")

	// Based on language, simulate running different engines - note
	// that for French we don't run anything, as an example
	.choice()
	.when(new MatchLanguage("de")).bean(new AddAttribute("etwas", "ueber Deutsch"), "pipe")
	.when(new MatchLanguage("fr"))
	.otherwise().bean(new AddAttribute("something", "about english"), "pipe")
	.end()

	// Run a few more common engines
	.bean(new StringLength(),"pipe")
	.bean(new WordCount(), "pipe")

	// And send the result to output
	.to("mock://result")
	}
	}

	// Camel setup boilerplate
	// TODO does the context need to be stopped, and when?
	mrb = new MyRouteBuilder()
	ctx = new org.apache.camel.impl.DefaultCamelContext()
	ctx.addRoutes mrb
	ctx.start()
	p = ctx.createProducerTemplate()

	// Send some data to our input based on request parameters
	p.sendBody "direct:input", request.getParameter("text")

	// Send some Sling-supplied data...won't happen with Stanbol
	// but useful for my tests
	p.sendBody "direct:input", "path=" + currentNode.getPath()
	props = currentNode.getProperties()
	while(props.hasNext()) {
	prop = props.nextProperty()
	p.sendBody "direct:input", prop.getName() + "=" + prop.getValue().getString()
	}

	// Output the results to the response as plain text
	pt = "mock://result"
	w = response.getWriter()
	response.setContentType("text/plain")
	w.write("* Messages received from " + pt + " *\n")
	ctx.getEndpoint(pt).exchanges.each { ex ->
	w.write("${ex.getIn().getBody()}\n")
	}