Skip to content

Instantly share code, notes, and snippets.

@mumrah
Created October 27, 2015 19:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mumrah/98a0c0f3863cc6700103 to your computer and use it in GitHub Desktop.
Save mumrah/98a0c0f3863cc6700103 to your computer and use it in GitHub Desktop.
Things that must match up:
package com.lucidworks.fusion.pipeline.index.config;
import com.lucidworks.apollo.pipeline.schema.Annotations.Schema;
import com.lucidworks.apollo.pipeline.schema.Annotations.SchemaProperty;
import com.lucidworks.apollo.pipeline.schema.validation.RESTValidationError;
import com.lucidworks.apollo.pipeline.schema.validation.SchemaValidator;
import com.lucidworks.fusion.pipeline.Configuration;
import org.codehaus.jackson.annotate.JsonCreator;
import org.codehaus.jackson.annotate.JsonProperty;
import org.codehaus.jackson.annotate.JsonTypeName;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
@JsonTypeName(RegexFilterConfig.TYPE)
@Schema(
type = RegexFilterConfig.TYPE,
title = "Regular Expression Filter",
description = "This stage can filter entities using regular expressions"
)
public class RegexFilterConfig extends Configuration {
public static final String TYPE = "regex-filter";
@SchemaProperty(title = "Filters")
private final List<RegexFilterRule> filters;
@JsonCreator
public RegexFilterConfig(
@JsonProperty("id") String id,
@JsonProperty("filters") List<RegexFilterRule> rules) {
super(id);
if (rules != null) {
this.filters = Collections.unmodifiableList(rules);
} else {
this.filters = Collections.emptyList();
}
}
@JsonProperty("filters")
public List<RegexFilterRule> getFilters() {
return filters;
}
public static class RegexFilterRule {
@SchemaProperty(name = "sourceField", title = "Source Field", required = true)
private final String sourceField;
@SchemaProperty(name = "pattern", title = "Filter Pattern", required = true)
private final Pattern pattern;
@JsonCreator
public RegexFilterRule(
@JsonProperty("sourceField") String sourceField,
@JsonProperty("pattern") String pattern) throws RESTValidationError {
this.sourceField = sourceField;
this.pattern = SchemaValidator.parseRegex("pattern", pattern);
}
@JsonProperty("sourceField")
public String getSourceField() {
return sourceField;
}
@JsonProperty("pattern")
public Pattern getPattern() {
return pattern;
}
}
}
package com.lucidworks.fusion.pipeline.index.stages;
import com.google.inject.assistedinject.Assisted;
import com.lucidworks.apollo.common.pipeline.PipelineDocument;
import com.lucidworks.apollo.common.pipeline.PipelineField;
import com.lucidworks.fusion.pipeline.AutoDiscover;
import com.lucidworks.fusion.pipeline.Context;
import com.lucidworks.fusion.pipeline.StageAssistFactory;
import com.lucidworks.fusion.pipeline.StageOutput;
import com.lucidworks.fusion.pipeline.index.DocumentProcessor;
import com.lucidworks.fusion.pipeline.index.config.RegexFilterConfig;
import javax.inject.Inject;
import java.util.regex.Matcher;
public class RegexFilterStage extends DocumentProcessor<RegexFilterConfig> {
@AutoDiscover(value=RegexFilterConfig.TYPE, to=RegexFilterStage.class)
public interface RegexFilterStageFactory extends StageAssistFactory<RegexFilterStage> { }
@Inject
public RegexFilterStage(@Assisted String id) {
super(id);
}
@Override
public void init(RegexFilterConfig config) {
}
@Override
public void process(PipelineDocument document, Context context, RegexFilterConfig config, StageOutput<PipelineDocument> output) {
PipelineDocument tempDocument = new PipelineDocument();
tempDocument.setId(document.getId());
tempDocument.setMetadata(document.getMetadata());
for(String name : document.getFieldNames()) {
for (PipelineField field : document.getFields(name)) {
boolean removeField = false;
for (RegexFilterConfig.RegexFilterRule rule : config.getFilters()) {
if (rule.getSourceField().equalsIgnoreCase(name)) {
if (field.getValue() != null) {
Matcher matcher = rule.getPattern().matcher(field.getValue().toString());
if (matcher.matches()) {
removeField = true;
break;
}
}
}
}
if (!removeField) {
tempDocument.addField(field);
getMetricRegistry().counter("removed.field").inc();
}
}
}
output.send(tempDocument, context);
}
@Override
public Class<RegexFilterConfig> getConfigurationClass() {
return RegexFilterConfig.class;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment