Last active
December 10, 2018 07:54
-
-
Save jpablo/5a76d9af9c0a37c82fd26fe1de6d41fd to your computer and use it in GitHub Desktop.
Twitter to csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object creds { | |
val consumer_key = "" | |
val consumer_secret = "" | |
val access_token = "" | |
val access_secret = "" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1. Install java 8: https://www.java.com/en/download/ | |
// 2. Install ammonite: | |
// sudo sh -c '(echo "#!/usr/bin/env sh" && curl -L https://github.com/lihaoyi/Ammonite/releases/download/1.4.2/2.12-1.4.2) > ./amm && chmod +x ./amm' && ./amm | |
// 3. Add add correct credentials below | |
// 4. Run this script: | |
// amm twitter2csv.sc user "JRCossio" --out jrcossio.csv | |
// amm twitter2csv.sc search "M_OlgaSCordero" --count 100 --out out.csv | |
// amm twitter2csv.sc followers "JRCossio" --count 100 --out out.csv | |
import $ivy.`com.danielasfregola::twitter4s:5.5` | |
import $ivy.`com.github.tototoshi::scala-csv:1.3.5` | |
import $ivy.`io.circe::circe-core:0.10.0` | |
import $ivy.`io.circe::circe-generic:0.10.0` | |
import $ivy.`io.circe::circe-parser:0.10.0` | |
import $ivy.`io.circe::circe-java8:0.10.0` | |
import java.io.File | |
import java.nio.file.Files | |
import java.time.Instant | |
import scala.concurrent.Await | |
import scala.concurrent.duration._ | |
import com.danielasfregola.twitter4s.TwitterRestClient | |
import com.danielasfregola.twitter4s.entities._ | |
import com.github.tototoshi.csv._ | |
import $file.credentials | |
import credentials.creds | |
// configure the twitter client | |
val consumerToken = ConsumerToken (key = creds.consumer_key, secret = creds.consumer_secret) | |
val accessToken = AccessToken (key = creds.access_token, secret = creds.access_secret) | |
val client = TwitterRestClient(consumerToken, accessToken) | |
val timeout = 10.seconds | |
object ColumnDescription { | |
// ---------------------- | |
// User | |
// ---------------------- | |
val userHeaders = List( | |
"id_str", | |
"name", | |
"screen_name", | |
"location", | |
"description", | |
"verified", | |
"followers_count", | |
"friends_count", | |
"listed_count", | |
"favourites_count", | |
"statuses_count", | |
"created_at.toString", | |
"lang", | |
"profile_background_image_url_https" | |
) | |
// available attributes: | |
// https://github.com/DanielaSfregola/twitter4s/blob/master/src/main/scala/com/danielasfregola/twitter4s/entities/User.scala#L5 | |
def extractUser(u: User) = List( | |
u.id_str, | |
u.name, | |
u.screen_name, | |
u.location.getOrElse(""), | |
u.description.getOrElse(""), | |
u.verified, | |
u.followers_count, | |
u.friends_count, | |
u.listed_count, | |
u.favourites_count, | |
u.statuses_count, | |
u.created_at, | |
u.lang, | |
Option(u.profile_background_image_url_https).getOrElse(""), | |
) | |
// ----------------------------------- | |
// Tweet | |
// ----------------------------------- | |
val tweetHeaders = List( | |
"created_at", | |
"id_str", | |
"text", | |
"source", | |
"in_reply_to_status_id_str", | |
"in_reply_to_user_id_str", | |
"in_reply_to_screen_name", | |
"coordinates", | |
"place", | |
"quoted_status_id_str", | |
"retweeted_status", | |
"retweet_count", | |
"favorite_count", | |
"hashtags", | |
"user_mentions", | |
"urls", | |
"lang", | |
) | |
// list of available attributes can be found here: | |
// https://github.com/DanielaSfregola/twitter4s/blob/master/src/main/scala/com/danielasfregola/twitter4s/entities/Tweet.scala#L7 | |
def extractTweet(t: Tweet) = List( | |
t.created_at, | |
t.id_str, | |
t.text, | |
t.source, | |
t.in_reply_to_status_id_str.getOrElse(""), | |
t.in_reply_to_user_id_str.getOrElse(""), | |
t.in_reply_to_screen_name.getOrElse(""), | |
t.coordinates.map(_.coordinates.map(_.toString).mkString("|")).getOrElse(""), | |
t.place.map(_.full_name).getOrElse(""), | |
t.quoted_status_id_str.getOrElse(""), | |
t.retweeted_status.map(_.id.toString).getOrElse(""), | |
t.retweet_count, | |
t.favorite_count, | |
t.entities.map(_.hashtags.map(_.text).mkString("|")).getOrElse(""), | |
t.entities.map(_.user_mentions.map(_.name).mkString("|")).getOrElse(""), | |
t.entities.map(_.urls.map(_.url).mkString("|")).getOrElse(""), | |
t.lang.getOrElse(""), | |
) | |
} | |
import io.circe._, io.circe.generic.semiauto._, io.circe.syntax._ | |
import io.circe.java8.time._ | |
@main | |
def main(queryType: String, query: String, count: Int = 10, json: Boolean = false, out: String): Unit = { | |
import ColumnDescription._ | |
val f = new File(out) | |
val writer = CSVWriter.open(f) | |
import Encoders._ | |
queryType match { | |
case "user" => | |
val user: User = Await.result(client.user(query), timeout).data | |
if (json) | |
Files.write(f.toPath, user.asJson.noSpaces.getBytes()) | |
else | |
writer.writeAll(List(userHeaders, extractUser(user))) | |
println(s"Wrote 1 row") | |
writer.close() | |
case "followers" => | |
val followers = Await.result(client.followersForUser(query, count = count), timeout) | |
.data.users.map(u => List(u.id_str, u.name, "follower")) | |
val friends = Await.result(client.friendsForUser(query, count = count), timeout) | |
.data.users.map(u => List(u.id_str, u.name, "friend")) | |
val rows = (followers ++ friends).toList | |
val headers = List("id_str", "name", "type") | |
if (json) | |
Files.write(f.toPath, rows.asJson.noSpaces.getBytes()) | |
else | |
writer.writeAll(headers :: rows) | |
println(s"Wrote ${rows.size} rows") | |
writer.close() | |
case "search" => | |
val statusSearch = Await.result(client.searchTweet(query, count), timeout).data | |
if (json) | |
Files.write(f.toPath, statusSearch.statuses.asJson.noSpaces.getBytes()) | |
else | |
writer.writeAll((tweetHeaders ++ userHeaders) :: statusSearch.statuses.map(t => extractTweet(t) ++ t.user.map(extractUser).getOrElse(List.empty))) | |
println(s"Wrote ${statusSearch.statuses.size} rows") | |
writer.close() | |
case _ => | |
println(s"Operation `$queryType` not supported. Exiting") | |
sys.exit(1) | |
} | |
} | |
object Encoders { | |
implicit val d: Encoder[java.util.Date] = Encoder { d => Json.fromString(d.toString) } | |
implicit val e2: Encoder[TweetId] = deriveEncoder | |
implicit val e1: Encoder[Contributor] = deriveEncoder | |
implicit val e3: Encoder[Coordinates] = deriveEncoder | |
implicit val e411: Encoder[Variant] = deriveEncoder | |
implicit val e410: Encoder[VideoInfo] = deriveEncoder | |
implicit val e412: Encoder[Size] = deriveEncoder | |
implicit val e41: Encoder[Media] = deriveEncoder | |
implicit val e42: Encoder[HashTag] = deriveEncoder | |
implicit val e43: Encoder[Symbol] = deriveEncoder | |
implicit val e45: Encoder[UrlDetails] = deriveEncoder | |
implicit val e44: Encoder[Urls] = deriveEncoder | |
implicit val e46: Encoder[UserMention] = deriveEncoder | |
implicit val e4: Encoder[Entities] = deriveEncoder | |
implicit val e5: Encoder[ExtendedTweet] = deriveEncoder | |
implicit val e61: Encoder[Area] = deriveEncoder | |
lazy implicit val e6: Encoder[GeoPlace] = deriveEncoder | |
implicit val e7: Encoder[Geo] = deriveEncoder | |
implicit val e72: Encoder[ProfileImage] = deriveEncoder | |
implicit val e8: Encoder[StatusMetadata] = deriveEncoder | |
lazy implicit val eUser: Encoder[User] = deriveEncoder[User].contramap { u => | |
val u1 = if (u.profile_background_image_url == null) u.copy(profile_background_image_url = "") else u | |
val u2 = if (u1.profile_background_image_url_https == null) u1.copy(profile_background_image_url_https = "") else u1 | |
u2 | |
} | |
lazy implicit val te: Encoder[Tweet] = deriveEncoder[Tweet] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Instalar ammonite:
Agregar tokens en el archivo
credentials.sc
.Ejemplo para buscar datos de usuario:
Ejemplo para hacer queries arbitrarios: