Create an inverted index from a file
/** | |
* From a file that contains | |
* doc_id w1 w2 w3 ... lines, separated by tabs | |
* return an inverted index Map of w -> Set(doc_id) | |
* | |
* @param filename well isn't it obvious | |
* @return Map[String,Set[String]] | |
*/ | |
import scala.collection.immutable.Map | |
def invertedIndex(filename:String) = { | |
io.Source.fromFile(fn).getLines. // this is an iterator over lines | |
map(_.split("\t")). // split at tabs | |
filter(_.size > 0). // make sure there is at least one item | |
map(x => x.drop(1).map(y => (y,x(0)) )). // get inverted pairs for all lines | |
toList. // ? required but i'm not sure why... | |
flatMap(x => x). // flatten to pairs -- you could filter on these | |
groupBy(_._1). // group by the first key | |
map(p => (p._1,p._2.map(_._2).toSet)) // map over groups values, turning 2nd value into sets | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment