Skip to content

Instantly share code, notes, and snippets.

@tysm
Created May 16, 2019 12:26
Show Gist options
  • Save tysm/195578566e227d869d166852c5706b01 to your computer and use it in GitHub Desktop.
Save tysm/195578566e227d869d166852c5706b01 to your computer and use it in GitHub Desktop.
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.potelo.prelude.searcher;
import com.potelo.prelude.hitfield.HighlightRange;
import com.yahoo.component.chain.dependencies.After;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.component.chain.dependencies.Provides;
import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.fastsearch.FastHit;
import com.yahoo.prelude.hitfield.HitField;
import com.yahoo.prelude.searcher.JuniperSearcher;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
import com.yahoo.search.searchchain.PhaseNames;
import java.util.*;
/**
* Lists all HighlightRange from Juniper highlighting tags.
* <p>
* Note: This searchers only gathers information over the position of backend binary
* highlighting tags in highlighted fields. Based on JuniperSearcher.
*
* @author Thalles Medrado
*/
@After(com.yahoo.prelude.searcher.JuniperSearcher.JUNIPER_TAG_REPLACING)
@Provides(com.potelo.prelude.searcher.JuniperMapSearcher.HIGHLIGHTING_TAG_INFORMATION)
public class JuniperMapSearcher extends Searcher {
private final static char RAW_HIGHLIGHT_CHAR = JuniperSearcher.RAW_HIGHLIGHT_CHAR;
private final static char RAW_SEPARATOR_CHAR = JuniperSearcher.RAW_SEPARATOR_CHAR;
// The name of the field containing document type
private static final String MAGIC_FIELD = Hit.SDDOCNAME_FIELD;
public static final String HIGHLIGHTING_TAG_INFORMATION = "HighlightingTagInformation";
/**
* Produce a List of HighlightRange from Juniper highlighting tags for each field of a Hit.
*/
@Override
public Result search(Query query, Execution execution) {
Result result = execution.search(query); // get results from previous components in the chain.
processHits(query.getPresentation().getBolding(), result.hits().deepIterator(), null,
execution.context().getIndexFacts().newSession(query));
return result;
}
@Override
public void fill(Result result, String summaryClass, Execution execution) {
int worstCase = result.getHitCount();
List<Hit> hits = new ArrayList<>(worstCase);
for (Iterator<Hit> i = result.hits().deepIterator(); i.hasNext();) {
Hit sniffHit = i.next();
if ( ! (sniffHit instanceof FastHit)) continue;
FastHit hit = (FastHit) sniffHit;
if (hit.isFilled(summaryClass)) continue;
hits.add(hit);
}
execution.fill(result, summaryClass);
processHits(result.getQuery().getPresentation().getBolding(), hits.iterator(), summaryClass,
execution.context().getIndexFacts().newSession(result.getQuery()));
}
private void processHits(boolean bolding, Iterator<Hit> hitsToProcess,
String summaryClass, IndexFacts.Session indexFacts) {
while (hitsToProcess.hasNext()) {
Hit hit = hitsToProcess.next();
Object previousFieldValue = hit.setField("hiranges", null);
assert(previousFieldValue == null);
if ( ! (hit instanceof FastHit)) continue;
FastHit fastHit = (FastHit) hit;
if (summaryClass != null && ! fastHit.isFilled(summaryClass)) continue;
Object searchDefinitionField = fastHit.getField(MAGIC_FIELD);
if (searchDefinitionField == null) continue;
Map<String, Object> hiRanges = new HashMap<>();
for (Index index : indexFacts.getIndexes(searchDefinitionField.toString())) {
if (index.getDynamicSummary() || index.getHighlightSummary()) {
HitField fieldValue = fastHit.buildHitField(index.getName(), true);
if (fieldValue != null) {
List<HighlightRange> fieldHiRanges = gatherPositions(fieldValue, bolding);
if (fieldHiRanges != null && fieldHiRanges.size() > 0)
hiRanges.put(index.getName(), fieldHiRanges);
}
}
}
if (hiRanges.size() > 0 )
hit.setField("hiranges", hiRanges);
}
}
private List<HighlightRange> gatherPositions(HitField field, boolean bolding) {
List<HighlightRange> highlightRanges = null;
int tagCount = 0;
Character lastChar = null;
boolean insideHighlight = false;
String toProcess = field.getContent();
for (int i = 0; i < toProcess.length(); ++i) {
char key = toProcess.charAt(i);
switch (key) {
case RAW_HIGHLIGHT_CHAR:
highlightRanges = initHighlightRanges(highlightRanges);
addPosition(bolding, insideHighlight, highlightRanges, i, tagCount, lastChar);
insideHighlight = !insideHighlight;
case RAW_SEPARATOR_CHAR:
tagCount++;
break;
default:
break;
}
lastChar = key;
}
return highlightRanges;
}
private void addPosition(boolean bolding, boolean insideHighlight, List<HighlightRange> highlightRanges,
int i, int tagCount, Character lastChar) {
if (bolding) {
int rawPos = i - tagCount + 1;
if (insideHighlight) {
assert(highlightRanges.size() > 0); // if insideHighlight... Doesn't make sense to fail.
HighlightRange lastPosition = highlightRanges.get(highlightRanges.size() - 1);
lastPosition.setLength(rawPos - lastPosition.getOffset());
} else {
if (lastChar != null && lastChar.equals(RAW_HIGHLIGHT_CHAR)) {
// if not insideHighlight and lastChar == RAW_HIGHLIGHT_CHAR
// means that lastChar was a boldCloseTag and now we're on a
// boldOpenTag, so we should merge these intervals instead of
// open a new HighlightRange since JuniperSearcher does the
// same.
// Due this case, we just need to reopen the last interval
// and let this algorithm close the tag.
assert(highlightRanges.size() > 0); // if lastChar.equals(RAW_HIGHLIGHT_CHAR)...
HighlightRange lastPosition = highlightRanges.get(highlightRanges.size() - 1);
lastPosition.setLength(null);
}
else {
// otherwise we just need to open a new boldOpenTag :3
highlightRanges.add(new HighlightRange(rawPos, null));
}
}
}
}
private List<HighlightRange> initHighlightRanges(List<HighlightRange> highlightRanges) {
if (highlightRanges == null)
highlightRanges = new ArrayList<>();
return highlightRanges;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment