-
-
Save miracle777/28c622ed470487fc2cbbe6d34507f75a to your computer and use it in GitHub Desktop.
RT100.ipynb の公開用
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "RT100.ipynb の公開用", | |
"provenance": [], | |
"authorship_tag": "ABX9TyP7+kb3rCiDPGLmduLgBLyj", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/miracle777/28c622ed470487fc2cbbe6d34507f75a/rt100-ipynb.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab_type": "code", | |
"outputId": "01545109-a5b0-4142-bb12-5af100dbcfab", | |
"id": "Ao-mt7hm3zOv", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
} | |
}, | |
"source": [ | |
"pip install requests requests_oauthlib\n" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (2.21.0)\n", | |
"Requirement already satisfied: requests_oauthlib in /usr/local/lib/python3.6/dist-packages (1.3.0)\n", | |
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests) (2.8)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests) (2019.11.28)\n", | |
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests) (1.24.3)\n", | |
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests) (3.0.4)\n", | |
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests_oauthlib) (3.1.0)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WPrhysUfV-DW", | |
"colab_type": "code", | |
"outputId": "f72e1396-b1c8-4864-ec66-6542f74dfe3f", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 102 | |
} | |
}, | |
"source": [ | |
"pip install pandas\n" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (0.25.3)\n", | |
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.17.5)\n", | |
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2018.9)\n", | |
"Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.6.1)\n", | |
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas) (1.12.0)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NzQown4YmG6J", | |
"colab_type": "code", | |
"outputId": "0c841dc7-219b-4c7a-9833-009afc583b01", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 306 | |
} | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"from requests_oauthlib import OAuth1Session\n", | |
"import json\n", | |
"import csv\n", | |
"\n", | |
"CK = \"**********************\" #Consumer key\n", | |
"CS = \"**********************\" #Consumer secret\n", | |
"AT = \"**********************\" #Access token\n", | |
"ATS = \"**********************\" #Access token secret\n", | |
"\n", | |
"max_id = -1\n", | |
"url = 'https://api.twitter.com/1.1/search/tweets.json'\n", | |
"\n", | |
"\n", | |
"'''\n", | |
"CSVファイル準備\n", | |
"'''\n", | |
"output_csv_file_name = 'retweet_info.csv' #取得した値を格納ファイル名\n", | |
"\n", | |
"with open(output_csv_file_name,'a') as f:\n", | |
" writer = csv.writer(f, lineterminator='\\n')\n", | |
" header = ['ユーザー名','ユーザー表示名','ユーザーID','リツイートID','時間', 'ツイート']\n", | |
" writer.writerow(header)\n", | |
"'''\n", | |
"keyword = '' #検索キーワード設定\n", | |
"'''\n", | |
"moto_tweet = ' ' #検索ツイート\n", | |
"moto_tweet_screen_name = '@user' #元ユーザー表示名\n", | |
"\n", | |
"\n", | |
"keyword = moto_tweet +' filter:retweets '+ moto_tweet_screen_name #リツイート検索限定\n", | |
"\n", | |
"count = 100\n", | |
"params = {'q' : keyword, 'count' : count, 'max_id' : max_id}\n", | |
"\n", | |
"url = \"https://api.twitter.com/1.1/search/tweets.json\"\n", | |
"twitter = OAuth1Session(CK, CS, AT, ATS)\n", | |
"req = twitter.get(url, params = params)\n", | |
"\n", | |
"from time import sleep\n", | |
" \n", | |
"columns = ['ユーザー名','ユーザー表示名','ユーザーID','リツイートID','時間', 'ツイート']\n", | |
"df = pd.DataFrame(columns=columns)\n", | |
"\n", | |
"i = 0 #JSON位置 \n", | |
"while(True):\n", | |
" if max_id != -1:\n", | |
" params['max_id'] = max_id - 1 \n", | |
" req = twitter.get(url, params = params)\n", | |
" \n", | |
" if req.status_code == 200:\n", | |
" search_timeline = json.loads(req.text)\n", | |
" i += 1 \n", | |
" \n", | |
" if search_timeline['statuses'] == []:\n", | |
" break\n", | |
" \n", | |
" for tweet in search_timeline['statuses']:\n", | |
" user_name = tweet['user']['name']\n", | |
" user_screen_name = tweet['user']['screen_name']\n", | |
" user_id = tweet['user']['id']\n", | |
" retweet_id = tweet['id']\n", | |
" created_at = tweet['created_at']\n", | |
" text = tweet['text']\n", | |
" \n", | |
" append_list = [user_name,user_screen_name, user_id, retweet_id, created_at, text]\n", | |
" \n", | |
" df_next = pd.DataFrame(\n", | |
" [append_list],\n", | |
" columns=columns \n", | |
" )\n", | |
"\n", | |
" df = df.append(df_next)\n", | |
" max_id = search_timeline['statuses'][-1]['id']\n", | |
"\n", | |
" with open(output_csv_file_name, 'a') as f:\n", | |
" writer = csv.writer(f, lineterminator='\\n')\n", | |
" writer.writerow(append_list)\n", | |
"\n", | |
" else: \n", | |
" print('15分待ちます')\n", | |
" sleep(15*60)\n", | |
"\n", | |
"'''\n", | |
"データフレームの確認用\n", | |
"'''\n", | |
"print('dataframeの行数・列数の確認==>\\n', df.shape)\n", | |
"print('indexの確認==>\\n', df.index)\n", | |
"print('columnの確認==>\\n', df.columns)\n", | |
"print('dataframeの各列のデータ型を確認==>\\n', df.dtypes) \n", | |
"\n", | |
"df.head()\n" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"dataframeの行数・列数の確認==>\n", | |
" (203, 6)\n", | |
"indexの確認==>\n", | |
" Int64Index([0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" ...\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", | |
" dtype='int64', length=203)\n", | |
"columnの確認==>\n", | |
" Index(['ユーザー名', 'ユーザー表示名', 'ユーザーID', 'リツイートID', '時間', 'ツイート'], dtype='object')\n", | |
"dataframeの各列のデータ型を確認==>\n", | |
" ユーザー名 object\n", | |
"ユーザー表示名 object\n", | |
"ユーザーID object\n", | |
"リツイートID object\n", | |
"時間 object\n", | |
"ツイート object\n", | |
"dtype: object\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "RZ630az_m1AC", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"データ分析で頻出のPandas\n", | |
"https://qiita.com/ysdyt/items/9ccca82fc5b504e7913a\n", | |
"\n", | |
"ソース元\n", | |
"https://datumstudio.jp/blog/twitterapi%E3%82%92%E7%94%A8%E3%81%84%E3%81%9F%E3%82%AF%E3%83%AD%E3%83%BC%E3%83%A9%E3%83%BC%E4%BD%9C%E6%88%90\n", | |
"\n", | |
"【Pandas】データフレームから特定の行・列を取得する方法を総まとめ!\n", | |
"https://yolo.love/pandas/loc-iloc-at-iat/\n", | |
"\n", | |
"キーワード項目\n", | |
"https://qiita.com/mida12251141/items/97bce06e4167db8ed3d5\n", | |
"\n", | |
"IDのチェック\n", | |
"https://idtwi.com/\n" | |
] | |
} | |
] | |
} |
今回のプログラミングにあたり、私は、下記の二つのソースコードの作者に、心から感謝しています。
ありがとうございます。
https://datumstudio.jp/blog/twitterapi%E3%82%92%E7%94%A8%E3%81%84%E3%81%9F%E3%82%AF%E3%83%AD%E3%83%BC%E3%83%A9%E3%83%BC%E4%BD%9C%E6%88%90
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
公式リツイートを100件以上、収集する事ができます。
また、データフレームと、CSVの両方に出力しています。
エクセルやデータベースに、CSVファイルをインポートして、フィルタをかける事で、精度をあげる事ができます。