Skip to content

Instantly share code, notes, and snippets.

@alaz
Created January 20, 2012 13:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alaz/1647302 to your computer and use it in GitHub Desktop.
Save alaz/1647302 to your computer and use it in GitHub Desktop.
REPLable Subset play
import scala.io._
import collection.JavaConversions._
import com.mongodb._
import com.osinka.subset._
import SmartValues._
// we want to store it in MongoDB. ok..
val mongo = new Mongo("192.168.0.164")
val db = mongo getDB "test"
val coll = db getCollection "timeline"
// how would we build the unique index so that we do not store duplicates?
val id = "id".fieldOf[Long]
coll.ensureIndex(id.int === 1, "unique".fieldOf[Boolean] === true)
// We want structured data and Twitter may give us some. We want them as DBObjects
def tweets(name: String, count: Int = 20): Iterable[DBObject] = {
import com.mongodb.util._
val stream = Source.fromURL("http://api.twitter.com/1/statuses/user_timeline.json?screen_name=%s&count=%d".format(name, count))
val timeline = JSON.parse( stream.mkString ).asInstanceOf[DBObject]
for {k <- timeline.keySet}
yield timeline.get(k).asInstanceOf[DBObject]
}
// ok, let's save few
tweets("planetscala", 50) foreach {doc =>
coll save doc match {
case wr if wr.getError != null => println("Failed to save: "+wr.getError)
case _ =>
}
}
// feel free to repeat!
// we want to see DateTime in Joda, but they have a specific format in Twitter
import org.joda.time.DateTime
implicit val myDateReader = {
import java.util.Locale
import org.joda.time.format.DateTimeFormat
val fmt = DateTimeFormat.forPattern("EEE MMM dd HH:mm:ss Z yyyy").withLocale(Locale.ENGLISH)
ValueReader[DateTime]({ case s: String => fmt.parseDateTime(s) })
}
val text = "text".fieldOf[String]
val createdAt = "created_at".fieldOf[DateTime]
coll.find.iterator foreach { case text(t) ~ createdAt(dt) => println(t + " @ " + dt) }
// ok, fine, how about a User subdocument?
object User {
val tweets = "statuses_count".fieldOf[Int]
val description = "description".fieldOf[String]
val name = "screen_name".fieldOf[String]
}
val user = "user".fieldOf[DBObject]
val U = User.name ~ User.tweets
// U is an extractor, that returns a tuple of "name" and "tweets" from DBObjects, so this should work:
coll.find.iterator take 2 flatMap {doc => user.unapply(doc).toIterator} foreach { case U(n,t) => println(n+" has "+t) }
// since User is an extractor itself:
coll.find.iterator take 5 foreach { case user(U(n, t)) => println(n+" has "+t) }
// or we can extract both subdocument and document fields at once
coll.find.iterator take 5 foreach { case user(U(n, cnt)) ~ text(t) ~ createdAt(dt) => println("%s [%d] @ %s: %s".format(n, cnt, dt, t)) }
// by the way, we are not obliged to query MongoDB collection, we may directly deserialize DBObjects from any source, e.g. our stream
tweets("planetscala", 2) foreach { case text(t) ~ createdAt(dt) => println(t+" @ "+dt) }
// want User to return a useful object?
// :paste
case class User(name: String, tweets: Int, description: String)
object User {
val tweets = "statuses_count".fieldOf[Int]
val description = "description".fieldOf[String]
val name = "screen_name".fieldOf[String]
implicit val userReader =
ValueReader[User]({
case name(n) ~ tweets(t) ~ description(d) => new User(n,t,d)
})
}
// :paste off
val user = "user".subset(User).of[User]
// then,
coll.find.iterator take 5 foreach { case user(u) ~ text(t) ~ createdAt(dt) => println("%s @ %s: %s".format(u, dt, t)) }
// just in case you wonder if you can save something
implicit val dateTimeWriter = ValueWriter[DateTime](_.toDate)
val coll1 = db getCollection "timeline1"
coll.find.iterator foreach { case user(u) ~ text(t) ~ createdAt(dt) =>
coll1.insert(text(t) ~ createdAt(dt))
}
// did you notice which fields the documents in the new collection have?.. how would you keep all the original fields?
coll1.remove(Query.empty)
coll.find.iterator foreach { dbo =>
dbo match { case text(t) ~ createdAt(dt) =>
val lens = createdAt(new DateTime) ~ text("new timeline")
coll1 save lens(dbo)
}
}
// go make sure, all the records have current timestamp at "created_at" field
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment