Skip to content

Instantly share code, notes, and snippets.

@cjmatta
Last active April 12, 2017 15:48
Show Gist options
  • Save cjmatta/503c142d67036b9080be to your computer and use it in GitHub Desktop.
Save cjmatta/503c142d67036b9080be to your computer and use it in GitHub Desktop.
Examining Tweets with Drill

Tweets

SELECT 
CAST(`t`.`dir0` AS VARCHAR(255)) AS `topic`,
CAST(`t`.`dir1` AS INTEGER) AS `year`,
CAST(`t`.`dir2` AS INTEGER) AS `month`,
CAST(`t`.`dir3` AS INTEGER) AS `day`,
CAST(`t`.`dir4` AS INTEGER) AS `hour`,
CAST(`t`.`id` AS BIGINT) AS `id`,
CAST(`t`.`user`['id'] AS BIGINT) AS `user_id`,
CAST(`t`.`text` AS VARCHAR(140)) AS `tweet`,
`to_timestamp`(`t`.`created_at`, 'EEE MMM dd HH:mm:ss Z YYYY') AS `created_at`,
CAST(`t`.`favorited` AS BOOLEAN) AS `favorited`,
CAST(`t`.`favorite_count` AS INTEGER) AS `favorite_count`,
CAST(`t`.`retweeted` AS BOOLEAN) AS `retweeted`,
CAST(`t`.`retweet_count` AS INTEGER) AS `retweet_count`,
CAST(`t`.`possibly_sensitive` AS BOOLEAN) AS `possibly_sensetive`,
CAST(`t`.`place`['name'] AS VARCHAR(140)) AS `place_name`,
CAST(`t`.`place`['country'] AS VARCHAR(140)) AS `place_country`,
CAST(`t`.`source` AS VARCHAR(140)) AS `source`
FROM `maprfs`.`cmatta`.`tweets/data` AS `t`;

Users

create or replace view mfs.views.twitter_users as
select CAST(t.`user`.`id` as BIGINT) as `user_id`,
CAST(t.`user`.`name` as VARCHAR(128)) as `name`,
CAST(t.`user`.`screen_name` as VARCHAR(128)) as `screen_name`,
CAST(t.`user`.`location` as VARCHAR(128)) as `location`,
CAST(t.`user`.`lang` as VARCHAR(128)) as `language`
from mfs.`/user/cmatta/tweets` t;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment