Skip to content

Instantly share code, notes, and snippets.

@jpablo
Last active December 10, 2018 07:54
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpablo/5a76d9af9c0a37c82fd26fe1de6d41fd to your computer and use it in GitHub Desktop.
Save jpablo/5a76d9af9c0a37c82fd26fe1de6d41fd to your computer and use it in GitHub Desktop.
Twitter to csv
object creds {
val consumer_key = ""
val consumer_secret = ""
val access_token = ""
val access_secret = ""
}
// 1. Install java 8: https://www.java.com/en/download/
// 2. Install ammonite:
// sudo sh -c '(echo "#!/usr/bin/env sh" && curl -L https://github.com/lihaoyi/Ammonite/releases/download/1.4.2/2.12-1.4.2) > ./amm && chmod +x ./amm' && ./amm
// 3. Add add correct credentials below
// 4. Run this script:
// amm twitter2csv.sc user "JRCossio" --out jrcossio.csv
// amm twitter2csv.sc search "M_OlgaSCordero" --count 100 --out out.csv
// amm twitter2csv.sc followers "JRCossio" --count 100 --out out.csv
import $ivy.`com.danielasfregola::twitter4s:5.5`
import $ivy.`com.github.tototoshi::scala-csv:1.3.5`
import $ivy.`io.circe::circe-core:0.10.0`
import $ivy.`io.circe::circe-generic:0.10.0`
import $ivy.`io.circe::circe-parser:0.10.0`
import $ivy.`io.circe::circe-java8:0.10.0`
import java.io.File
import java.nio.file.Files
import java.time.Instant
import scala.concurrent.Await
import scala.concurrent.duration._
import com.danielasfregola.twitter4s.TwitterRestClient
import com.danielasfregola.twitter4s.entities._
import com.github.tototoshi.csv._
import $file.credentials
import credentials.creds
// configure the twitter client
val consumerToken = ConsumerToken (key = creds.consumer_key, secret = creds.consumer_secret)
val accessToken = AccessToken (key = creds.access_token, secret = creds.access_secret)
val client = TwitterRestClient(consumerToken, accessToken)
val timeout = 10.seconds
object ColumnDescription {
// ----------------------
// User
// ----------------------
val userHeaders = List(
"id_str",
"name",
"screen_name",
"location",
"description",
"verified",
"followers_count",
"friends_count",
"listed_count",
"favourites_count",
"statuses_count",
"created_at.toString",
"lang",
"profile_background_image_url_https"
)
// available attributes:
// https://github.com/DanielaSfregola/twitter4s/blob/master/src/main/scala/com/danielasfregola/twitter4s/entities/User.scala#L5
def extractUser(u: User) = List(
u.id_str,
u.name,
u.screen_name,
u.location.getOrElse(""),
u.description.getOrElse(""),
u.verified,
u.followers_count,
u.friends_count,
u.listed_count,
u.favourites_count,
u.statuses_count,
u.created_at,
u.lang,
Option(u.profile_background_image_url_https).getOrElse(""),
)
// -----------------------------------
// Tweet
// -----------------------------------
val tweetHeaders = List(
"created_at",
"id_str",
"text",
"source",
"in_reply_to_status_id_str",
"in_reply_to_user_id_str",
"in_reply_to_screen_name",
"coordinates",
"place",
"quoted_status_id_str",
"retweeted_status",
"retweet_count",
"favorite_count",
"hashtags",
"user_mentions",
"urls",
"lang",
)
// list of available attributes can be found here:
// https://github.com/DanielaSfregola/twitter4s/blob/master/src/main/scala/com/danielasfregola/twitter4s/entities/Tweet.scala#L7
def extractTweet(t: Tweet) = List(
t.created_at,
t.id_str,
t.text,
t.source,
t.in_reply_to_status_id_str.getOrElse(""),
t.in_reply_to_user_id_str.getOrElse(""),
t.in_reply_to_screen_name.getOrElse(""),
t.coordinates.map(_.coordinates.map(_.toString).mkString("|")).getOrElse(""),
t.place.map(_.full_name).getOrElse(""),
t.quoted_status_id_str.getOrElse(""),
t.retweeted_status.map(_.id.toString).getOrElse(""),
t.retweet_count,
t.favorite_count,
t.entities.map(_.hashtags.map(_.text).mkString("|")).getOrElse(""),
t.entities.map(_.user_mentions.map(_.name).mkString("|")).getOrElse(""),
t.entities.map(_.urls.map(_.url).mkString("|")).getOrElse(""),
t.lang.getOrElse(""),
)
}
import io.circe._, io.circe.generic.semiauto._, io.circe.syntax._
import io.circe.java8.time._
@main
def main(queryType: String, query: String, count: Int = 10, json: Boolean = false, out: String): Unit = {
import ColumnDescription._
val f = new File(out)
val writer = CSVWriter.open(f)
import Encoders._
queryType match {
case "user" =>
val user: User = Await.result(client.user(query), timeout).data
if (json)
Files.write(f.toPath, user.asJson.noSpaces.getBytes())
else
writer.writeAll(List(userHeaders, extractUser(user)))
println(s"Wrote 1 row")
writer.close()
case "followers" =>
val followers = Await.result(client.followersForUser(query, count = count), timeout)
.data.users.map(u => List(u.id_str, u.name, "follower"))
val friends = Await.result(client.friendsForUser(query, count = count), timeout)
.data.users.map(u => List(u.id_str, u.name, "friend"))
val rows = (followers ++ friends).toList
val headers = List("id_str", "name", "type")
if (json)
Files.write(f.toPath, rows.asJson.noSpaces.getBytes())
else
writer.writeAll(headers :: rows)
println(s"Wrote ${rows.size} rows")
writer.close()
case "search" =>
val statusSearch = Await.result(client.searchTweet(query, count), timeout).data
if (json)
Files.write(f.toPath, statusSearch.statuses.asJson.noSpaces.getBytes())
else
writer.writeAll((tweetHeaders ++ userHeaders) :: statusSearch.statuses.map(t => extractTweet(t) ++ t.user.map(extractUser).getOrElse(List.empty)))
println(s"Wrote ${statusSearch.statuses.size} rows")
writer.close()
case _ =>
println(s"Operation `$queryType` not supported. Exiting")
sys.exit(1)
}
}
object Encoders {
implicit val d: Encoder[java.util.Date] = Encoder { d => Json.fromString(d.toString) }
implicit val e2: Encoder[TweetId] = deriveEncoder
implicit val e1: Encoder[Contributor] = deriveEncoder
implicit val e3: Encoder[Coordinates] = deriveEncoder
implicit val e411: Encoder[Variant] = deriveEncoder
implicit val e410: Encoder[VideoInfo] = deriveEncoder
implicit val e412: Encoder[Size] = deriveEncoder
implicit val e41: Encoder[Media] = deriveEncoder
implicit val e42: Encoder[HashTag] = deriveEncoder
implicit val e43: Encoder[Symbol] = deriveEncoder
implicit val e45: Encoder[UrlDetails] = deriveEncoder
implicit val e44: Encoder[Urls] = deriveEncoder
implicit val e46: Encoder[UserMention] = deriveEncoder
implicit val e4: Encoder[Entities] = deriveEncoder
implicit val e5: Encoder[ExtendedTweet] = deriveEncoder
implicit val e61: Encoder[Area] = deriveEncoder
lazy implicit val e6: Encoder[GeoPlace] = deriveEncoder
implicit val e7: Encoder[Geo] = deriveEncoder
implicit val e72: Encoder[ProfileImage] = deriveEncoder
implicit val e8: Encoder[StatusMetadata] = deriveEncoder
lazy implicit val eUser: Encoder[User] = deriveEncoder[User].contramap { u =>
val u1 = if (u.profile_background_image_url == null) u.copy(profile_background_image_url = "") else u
val u2 = if (u1.profile_background_image_url_https == null) u1.copy(profile_background_image_url_https = "") else u1
u2
}
lazy implicit val te: Encoder[Tweet] = deriveEncoder[Tweet]
}
@jpablo
Copy link
Author

jpablo commented Nov 18, 2018

Instalar ammonite:

sudo sh -c '(echo "#!/usr/bin/env sh" && curl -L https://github.com/lihaoyi/Ammonite/releases/download/1.4.2/2.12-1.4.2) > /usr/local/bin/amm && chmod +x /usr/local/bin/amm' && amm

Agregar tokens en el archivo credentials.sc.

Ejemplo para buscar datos de usuario:

amm twitter2csv.sc user "JRCossio" --out jrcossio.csv

Ejemplo para hacer queries arbitrarios:

amm twitter2csv.sc search "Guillermo I. Ortiz" --count 100 --out query1.csv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment