Skip to content

Instantly share code, notes, and snippets.

@adichad
Last active October 15, 2015 18:57
Show Gist options
  • Save adichad/06ea4460d9bb6fe55e2c to your computer and use it in GitHub Desktop.
Save adichad/06ea4460d9bb6fe55e2c to your computer and use it in GitHub Desktop.
def shingleFull(field: String, boost: Float, w: Array[String], fuzzyprefix: Int, maxShingle: Int, minShingle: Int = 1, fuzzy: Boolean = true) = {
val fieldQuery = boolQuery.minimumShouldMatch("33%")
(minShingle to math.min(maxShingle, w.length)).foreach { len =>
val lboost = boost * superBoost(len)
w.sliding(len).foreach { shingle =>
fieldQuery.should(fuzzyOrTermQuery(field, shingle.mkString(" "), lboost, fuzzyprefix, fuzzy))
}
}
nestIfNeeded(field, fieldQuery)
}
def shingleSpan(field: String, boost: Float, w: Array[String], fuzzyprefix: Int, maxShingle: Int, minShingle: Int = 1, sloppy: Boolean = true, fuzzy: Boolean = true) = {
val fieldQuery1 = boolQuery.minimumShouldMatch("33%")
val terms: Array[BaseQueryBuilder with SpanQueryBuilder] = w.map(x=>
if(x.length > 8 && fuzzy)
spanMultiTermQueryBuilder(
fuzzyQuery(field, x).prefixLength(fuzzyprefix).fuzziness(if(x.length > 12) Fuzziness.TWO else Fuzziness.ONE))
else
spanTermQuery(field, x)
)
(minShingle to Math.min(terms.length, maxShingle)).foreach { len =>
val slop = if(sloppy) len/3 else 0
terms.sliding(len).foreach { shingle =>
val nearQuery = spanNearQuery.slop(slop).inOrder(!sloppy).boost(boost * 2 * len) // * math.max(1,i)
shingle.foreach(nearQuery.clause)
fieldQuery1.should(nearQuery)
}
}
nestIfNeeded(field, fieldQuery1)
}
def currQuery(tokenFields: Map[String, Float],
recomFields: Map[String, Float],
w: Array[String], fuzzy: Boolean = false, sloppy: Boolean = false, span: Boolean = false, tokenRelax: Int = 0) = {
if(span)
disMaxQuery.addAll(tokenFields.map(field => shingleSpan(field._1, field._2, w, 1, w.length, math.max(w.length - tokenRelax, 1), sloppy, fuzzy)))
else {
disMaxQuery.addAll(recomFields.map(field => shingleFull(field._1, field._2, w, 1, w.length, math.max(w.length - tokenRelax, 1), fuzzy)))
}
}
def shinglePartition(tokenFields: Map[String, Float], recomFields: Map[String, Float], w: Array[String],
maxShingle: Int, minShingle: Int = 1, fuzzy: Boolean = false, sloppy: Boolean = false,
span: Boolean = false, tokenRelax: Int = 0): BoolQueryBuilder = {
if(w.length>0)
boolQuery.minimumNumberShouldMatch(1).shouldAll(
(math.max(1, math.min(minShingle, w.length)) to math.min(maxShingle, w.length)).map(len=>(w.slice(0, len), w.slice(len, w.length))).map { x =>
if (x._2.length > 0)
shinglePartition(tokenFields, recomFields, x._2, maxShingle, minShingle, fuzzy, sloppy, span, tokenRelax)
.must(currQuery(tokenFields, recomFields, x._1, fuzzy, sloppy, span, tokenRelax))
else
currQuery(tokenFields, recomFields, x._1, fuzzy, sloppy, span, tokenRelax)
}
)
else
boolQuery
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment