Skip to content

Instantly share code, notes, and snippets.

@kypkyp
Created January 23, 2023 01:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kypkyp/eb952228dd5aa4c9a5586f28ebec1aca to your computer and use it in GitHub Desktop.
Save kypkyp/eb952228dd5aa4c9a5586f28ebec1aca to your computer and use it in GitHub Desktop.
タグ分析 for content-based recommendation
namespace :interaction_task do
desc 'kari bunseki'
task analyze: :environment do
# aws dynamodb query --table-name nuita-prd-interaction --key-condition-expression 'user_id = :user_id' --expression-attribute-value '{":user_id": {"N": "24"}}' > raw.json
# cat raw.json | jq '.Items[] | {event_type: .event_type.S, item_id: .item_id.N}' | jq -s '.' > output.json
# ただの配列だと認識されないから結局手でいじった
json = File.read('./output.json')
items = JSON.parse(json).fetch('items')
formatted = items.map do |item|
nweet = Nweet.find_by(id: item['item_id'].to_i)
tags = nweet&.links&.first&.tags&.pluck(:name)
author = nweet&.links&.first&.author
next unless tags&.any?
{
event_type: item['event_type'],
tags: tags,
author: author
}
end
formatted.compact!
File.open('analyzed.json', 'a') do |f|
JSON.dump(formatted, f)
end
end
desc 'karibunseki2'
task analyze2: :environment do
json = File.read('./output.json')
items = JSON.parse(json).fetch('items')
tag_frequency = {}
items.each do |item|
nweet = Nweet.find_by(id: item['item_id'].to_i)
next unless nweet
tags = nweet&.links&.first&.tags&.where(well_defined: true)&.pluck(:name) || []
tags.each do |t|
tag_frequency[t] = (tag_frequency[t] || 0) + 1
end
author = nweet&.links&.first&.author
unless author&.empty?
tag_frequency["a:#{author}"] = (tag_frequency["a:#{author}"] || 0) + 1
end
end
tag_frequency = tag_frequency.sort_by { |_, v| v }.reverse.to_h
obj = tag_frequency.map do |name, count|
tag = Tag.where(name: name)
all_count = LinkTag.where(tag: tag).count
{
name: name,
count: count,
all_count: all_count,
pct: count / all_count.to_f
}
end
File.open('frequency.json', 'w') do |f|
JSON.dump(obj, f)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment