Skip to content

Instantly share code, notes, and snippets.

@alextp
Created January 22, 2013 01:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alextp/4591270 to your computer and use it in GitHub Desktop.
Save alextp/4591270 to your computer and use it in GitHub Desktop.
jinho features style
def addFeature(v: SparseIndexedTensor1, f: String) { v.update(ClassifierPosFeatureDomain.index(f), 1.0) }
def addLemma(v: SparseIndexedTensor1, w: WordData, f: String, prefix: String) {
if (w.ambiguityClasses.contains(f)) addFeature(v, prefix+f)
}
def getAffinity(sent: SentenceData, w: WordData, pos: Int) {
val f = sent.get(sent.lemmas, pos)
if (w.ambiguityClasses.contains(f)) w.ambiguityClasses(f) else ""
}
def getLemmaFeature(sent: SentenceData, w: WordData, pos: Int, dif: Int) = {
val prefix = "W"+(dif)+"="
val lemma = sent.get(sent.lemmas, pos+dif)
if (w.ambiguityClasses.contains(lemma))
prefix+lemma
else
prefix
}
def addFeatures(sent: SentenceData, pos: Int, f: SparseIndexedTensor1, w: WordData) {
val wp3 = getLemmaFeature(sent, w, pos, +3)
val wp2 = getLemmaFeature(sent, w, pos, +2)
val wp1 = getLemmaFeature(sent, w, pos, +1)
val wf = getLemmaFeature(sent, w, pos, 0)
val wm1 = getLemmaFeature(sent, w, pos, -1)
val wm2 = getLemmaFeature(sent, w, pos, -2)
val wm3 = getLemmaFeature(sent, w, pos, -3)
val pm3 = "POS-3="+sent.get(sent.labels, pos-3)
val pm2 = "POS-2="+sent.get(sent.labels, pos-2)
val pm1 = "POS-1="+sent.get(sent.labels, pos-1)
val a0 = "A="+getAffinity(sent, w, pos)
val ap1 = "A+1="+getAffinity(sent, w, pos+1)
val ap2 = "A+2="+getAffinity(sent, w, pos+2)
val ap3 = "A+3="+getAffinity(sent, w, pos+3)
addFeature(f, wp3)
addFeature(f, wp2)
addFeature(f, wp1)
addFeature(f, wf)
addFeature(f, wm1)
addFeature(f, wm2)
addFeature(f, wm3)
addFeature(f, pm3)
addFeature(f, pm2)
addFeature(f, pm1)
addFeature(f, a0)
addFeature(f, ap1)
addFeature(f, ap2)
addFeature(f, ap2)
addFeature(f, wm2+wm1)
addFeature(f, wm1+wf)
addFeature(f, wf+wp1)
addFeature(f, wp1+wp2)
addFeature(f, wm1+wp1)
addFeature(f, pm2+pm1)
addFeature(f, ap1+ap2)
addFeature(f, pm1+ap1)
addFeature(f, pm1+a0)
addFeature(f, a0+ap1)
addFeature(f, wm2+wm1+wf)
addFeature(f, wm1+wf+wp1)
addFeature(f, wf+wp1+wp2)
addFeature(f, wm2+wm1+wp1)
addFeature(f, wm1+wp1+wp2)
addFeature(f, pm2+pm1+a0)
addFeature(f, pm1+a0+ap1)
addFeature(f, pm2+pm1+ap1)
addFeature(f, pm1+ap1+ap2)
addFeature(f, a0+ap1+ap2)
addFeature(f, "PREFX3="+wf.take(3))
addFeature(f, "SUFX4="+wf.takeRight(4))
addFeature(f, "Shape="+strings.stringShape(wf, 2)) // TODO(apassos): add the remaining jinho features not contained in shape
addFeature(f, "HasPeriod="+wf.contains("."))
addFeature(f, "HasDigit="+wf.contains("0"))
addFeature(f, "HasHyphen="+wf.contains("-"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment