Last active
April 1, 2020 01:55
-
-
Save lnanase/17d8afa1eb203c4a72afb13220d78202 to your computer and use it in GitHub Desktop.
4/1ネタなのでクソコードです
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# imastodonにあかり大好きbot的なことをする | |
# | |
# usage: push_imastodon_misaki.rb | |
require 'rubygems' | |
require 'bundler' | |
Bundler.require(:default) | |
Dotenv.load | |
# CONST | |
UA = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36' #Chrome | |
# LTLからタイムラインを検索する | |
def find_latest_timeline(agent) | |
max_id = -1 | |
contents = [] | |
tmp = [] | |
# デフォルトは過去1時間分を収集 | |
minutes15 = Time.now - 60 * 20 | |
hour1 = minutes15 - 60 * 40 | |
# LTLをsearch | |
get_uri = ENV['MASTODON_HOST'] + 'api/v1/timelines/public' | |
get_params = {'local': true, 'limit': 40} | |
# MAX40件しかget出来ないのでループ | |
until max_id.nil? do | |
res = agent.get(get_uri, get_params) | |
JSON.parse(res.body).each {|status| | |
created_at = Time.parse(status['created_at']) | |
created_at.localtime("+09:00") | |
p created_at | |
#p status | |
# botアカウントは除外 | |
if status['account']['bot'] == false | |
content = (status['spoiler_text'].empty? ? '' : status['spoiler_text'] << ' ') << status['content'] | |
# リンク削除 | |
content = content.gsub(/<a href="https:\/\/imastodon\.net\/tags\/(.*)" (.*)>#(.*)<\/a>/) { $3 } # ハッシュタグは残す | |
content = content.gsub(/<a href=".*".*>(.*)<\/a>/, "") | |
# htmlタグ除去 | |
content = Nokogiri::HTML(content).xpath("//text()").to_s | |
content = content.gsub(/(&|<|>|"|©)/, "") | |
# 全角英数、半角カナを変換 | |
content = NKF.nkf('-m0XZ1 -W -w', content) | |
tmp << content | |
end | |
# 条件に最後のIDを追加 | |
max_id = status['id'] | |
get_params['max_id'] = max_id | |
if contents.empty? && minutes15 > created_at | |
# 20分以内の言葉は重み付け5倍 | |
contents << tmp << tmp << tmp << tmp << tmp | |
tmp = [] | |
elsif hour1 > created_at | |
contents.push tmp | |
max_id = nil | |
break | |
end | |
} | |
end | |
contents.join(" ") | |
end | |
# 文字列をmecabを使用して形態素解析 | |
def parse_in_mecab(text) | |
# 集計ルール | |
target = ['名詞'] | |
exclude = ['非自立', 'ナイ形容詞語幹', '数'] | |
exclude_phrase = ['/', '.', ',', ':', '-', '_', '#', '?', '!', 'http', 'https', '://', '(', ')', '₍', '₎', '()','[', ']','ー','一', '~', 'いい' ,'ない'] | |
result = [] | |
nm = Natto::MeCab.new | |
nm.enum_parse(text).each {|n| | |
if target.any? {|t| n.feature.start_with?(t) } | |
# 除外する条件 | |
next if exclude.any? {|e| n.feature.include?(e) } | |
next if exclude_phrase.any? {|e| n.surface == e } | |
result << n.surface | |
end | |
} | |
result | |
end | |
# imastodon | |
agent = Mechanize.new | |
agent.user_agent = UA | |
agent.request_headers = {'accept-language' => 'ja, ja-JP', 'accept-encoding' => 'utf-8'} | |
agent.request_headers = {'Authorization' => 'Bearer ' + ENV['TB_RANKING_BEARER']} | |
str = find_latest_timeline(agent) | |
result = parse_in_mecab(str) | |
word = result.sample | |
p word | |
# toot | |
res = agent.post(ENV['MASTODON_HOST'] + 'api/v1/statuses', { | |
'status': "なんとぉー!#{word} 美咲#{word}大好きっ♪", | |
'visibility': 'public' | |
}) | |
p res.code |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment