Skip to content

Instantly share code, notes, and snippets.

rockset> SELECT
text
FROM
twitter_collection:entities.hashtags AS hashtags
WHERE
text IS NOT NULL
LIMIT 5;
+----------------+
| text |
|----------------|
{
"hashtags": [
{ "text": "AmazonMusic",
"indices": [ 15, 27 ]
},
{ "text": "ジョニ・ミッチェル",
"indices": [ 33, 43 ]
},
{ "text": "Blue",
"indices": [ 46, 51 ]
rockset> DESCRIBE twitter_collection;
+-----------------------------------------------+---------------+---------+-----------+
| field | occurrences | total | type |
|-----------------------------------------------+---------------+---------+-----------|
| ['id'] | 4181419 | 4181419 | string |
| ['_event_time'] | 4181419 | 4181419 | timestamp |
| ['coordinates'] | 4178582 | 4181419 | null_type |
| ['coordinates'] | 2837 | 4181419 | object |
| ['coordinates', 'type'] | 2837 | 2837 | string |
| ['coordinates', 'coordinates'] | 2837 | 2837 | array |
-- unnest tweets with stock ticker symbols from the past 1 day
WITH stock_tweets AS
(SELECT t.user.name, t.text, upper(sym.text) AS ticker
FROM "twitter-firehose" AS t, unnest(t.entities.symbols) AS sym
WHERE t.entities.symbols[1] is not null
AND t._event_time > current_timestamp() - INTERVAL 1 day),
-- aggregate stock ticker symbol tweet occurrences
top_stock_tweets AS
(SELECT ticker, count(*) AS tweet_count
SELECT t.user.screen_name,
t.user.followers_count AS fc
FROM "twitter-kinesis-demo" t
WHERE 'music' IN
(SELECT hashtags.text
FROM unnest(t.entities.hashtags) hashtags)
GROUP BY (t.user.screen_name,
t.user.followers_count)
ORDER BY t.user.followers_count DESC
LIMIT 5;
SELECT count(*)
FROM "twitter-kinesis-demo"
WHERE cast(timestamp_ms AS INT) > unix_millis(current_timestamp() - seconds(2));
WITH lastest_hashtags AS
(SELECT lower(ht.text) AS hashtag
FROM "twitter-kinesis-demo" t,
unnest(t.extended_tweet.entities.hashtags) ht
WHERE CAST(t.timestamp_ms AS INT) > UNIX_MILLIS(current_timestamp() - hours(1)))
SELECT count(hashtag) AS tweet_count,
hashtag
FROM latest_hashtags
GROUP BY hashtag
ORDER BY tweet_count DESC
SELECT t.timestamp_ms,
t.created_at AS created_at,
t.text AS text,
t.user.screen_name AS screen_name
FROM "twitter-kinesis-demo" t
WHERE CAST(timestamp_ms AS INT) > UNIX_MILLIS(current_timestamp() - minutes(1))
ORDER BY timestamp_ms DESC
LIMIT 100;
from rockset import Client, Q, F
rs=Client(api_key=...)
aws_integration=rs.Integration.retrieve(...)
sources=[
rs.Source.kinesis(
stream_name="twitter-stream",
integration=aws_integration)]
twitter_kinesis_demo=rs.Collection.create("twitter-kinesis-demo", sources=sources)