public
Created

Perform secondary index (2i) searches on Twitter hashtags stored in Riak and count all hashtags in the matching tweets.

  • Download Gist
2i-more.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
#!/usr/local/bin/python
 
import riak
 
Riak = riak.RiakClient(pb_port=10017, protocol='pbc')
TweetsBucket = Riak.bucket('tweets')
 
results = TweetsBucket.get_index("hashtags_bin", "android",
"androie", return_terms=False)
 
other_tags = {}
 
# With return_terms=False, each result will be simply a Riak
# key. Retrieve each object, iterate over and tally the associated
# index values
for riak_key in results.results:
riak_object = TweetsBucket.get(riak_key)
for index in riak_object.indexes:
if index[0] == 'hashtags_bin':
hashtag = index[1]
if hashtag in other_tags:
other_tags[hashtag] += 1
else:
other_tags[hashtag] = 1
 
all_tags = other_tags.keys()
all_tags.sort()
for tag in all_tags:
print '{0: >5}'.format(str(other_tags[tag])) + " " + tag

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.