Skip to content

Instantly share code, notes, and snippets.

@okbm
Created June 10, 2018 07:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save okbm/aa17e961b9985fff5fccded636bc15df to your computer and use it in GitHub Desktop.
Save okbm/aa17e961b9985fff5fccded636bc15df to your computer and use it in GitHub Desktop.
import urllib.parse
import urllib.request
import json
import pdb # pdb.set_trace()
# pip install Janome
# from janome.tokenfilter import POSKeepFilter
# from janome.tokenizer import Tokenizer
# from janome.analyzer import Analyzer
# pip install mecab-python3
import MeCab
mecab = MeCab.Tagger ('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')
base_url = 'http://b.hatena.ne.jp/entry/jsonlite/?url=https%3A%2F%2F'
article_url = "anond.hatelabo.jp/20180604174309"
def request_hatena():
res = urllib.request.urlopen(base_url + article_url)
body = res.read().decode('utf-8')
print(body)
def debug_print():
f = open('a.json','r')
json_data = json.load(f)
for row in json_data['bookmarks']:
print(row['comment'])
f.close()
def analyzer_json():
f = open('a.json','r')
json_data = json.load(f)
manga = {}
for row in json_data['bookmarks']:
node = mecab.parseToNode(row['comment'])
while node:
word = node.surface
pos = node.feature.split(",")[1]
if word is None or pos != '固有名詞':
node = node.next
continue
# print('{0} , {1}'.format(word, pos))
try:
manga[word] += 1
except KeyError as e:
manga[word] = 1
node = node.next
for k, v in sorted(manga.items(), key=lambda x:x[1], reverse=True):
print(k, v)
f.close()
analyzer_json()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment