Skip to content

Instantly share code, notes, and snippets.

@mrflip
Created Jun 23, 2009
Embed
What would you like to do?
module FreqUser
class Mapper < Wukong::Streamer::StructStreamer
#
# extract just the word
#
def process thing, &block
next unless thing.is_a? TweetToken
yield [thing.user_id, thing.word]
end
end
class Reducer < Wukong::Streamer::CountLines
end
# Execute the script
Wukong::Script.new(
Mapper,
Reducer,
:partition_fields => 2,
:sort_fields => 2
).run
end
module FreqWholeCorpus
class Mapper < Wukong::Streamer::StructStreamer
#
# extract just the word
#
def process thing, &block
next unless thing.is_a? TweetToken
yield thing.word
end
end
# Execute the script
Wukong::Script.new(
Mapper,
Wukong::Streamer::CountKeys
).run
end
module WordCount
class Mapper < Wukong::Streamer::StructStreamer
#
# Extract all the semantic items (smilies, hashtags, etc)
# and all the remaining words from each tweet
#
def process thing, &block
next unless thing.is_a? Tweet
thing.tokenize(true).each do |token|
yield token
end
end
end
# Execute the script
Wukong::Script.new(
Mapper,
nil # WordFreq::Reducer,
:reduce_tasks => 0
).run
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment