Skip to content

Instantly share code, notes, and snippets.

@miracle777
Created January 20, 2020 02:54
Show Gist options
  • Save miracle777/28c622ed470487fc2cbbe6d34507f75a to your computer and use it in GitHub Desktop.
Save miracle777/28c622ed470487fc2cbbe6d34507f75a to your computer and use it in GitHub Desktop.
RT100.ipynb の公開用
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "RT100.ipynb の公開用",
"provenance": [],
"authorship_tag": "ABX9TyP7+kb3rCiDPGLmduLgBLyj",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/miracle777/28c622ed470487fc2cbbe6d34507f75a/rt100-ipynb.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"outputId": "01545109-a5b0-4142-bb12-5af100dbcfab",
"id": "Ao-mt7hm3zOv",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
}
},
"source": [
"pip install requests requests_oauthlib\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (2.21.0)\n",
"Requirement already satisfied: requests_oauthlib in /usr/local/lib/python3.6/dist-packages (1.3.0)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests) (2.8)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests) (2019.11.28)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests) (1.24.3)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests) (3.0.4)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests_oauthlib) (3.1.0)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WPrhysUfV-DW",
"colab_type": "code",
"outputId": "f72e1396-b1c8-4864-ec66-6542f74dfe3f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
}
},
"source": [
"pip install pandas\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (0.25.3)\n",
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.17.5)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2018.9)\n",
"Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.6.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas) (1.12.0)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "NzQown4YmG6J",
"colab_type": "code",
"outputId": "0c841dc7-219b-4c7a-9833-009afc583b01",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 306
}
},
"source": [
"import pandas as pd\n",
"from requests_oauthlib import OAuth1Session\n",
"import json\n",
"import csv\n",
"\n",
"CK = \"**********************\" #Consumer key\n",
"CS = \"**********************\" #Consumer secret\n",
"AT = \"**********************\" #Access token\n",
"ATS = \"**********************\" #Access token secret\n",
"\n",
"max_id = -1\n",
"url = 'https://api.twitter.com/1.1/search/tweets.json'\n",
"\n",
"\n",
"'''\n",
"CSVファイル準備\n",
"'''\n",
"output_csv_file_name = 'retweet_info.csv' #取得した値を格納ファイル名\n",
"\n",
"with open(output_csv_file_name,'a') as f:\n",
" writer = csv.writer(f, lineterminator='\\n')\n",
" header = ['ユーザー名','ユーザー表示名','ユーザーID','リツイートID','時間', 'ツイート']\n",
" writer.writerow(header)\n",
"'''\n",
"keyword = '' #検索キーワード設定\n",
"'''\n",
"moto_tweet = ' ' #検索ツイート\n",
"moto_tweet_screen_name = '@user' #元ユーザー表示名\n",
"\n",
"\n",
"keyword = moto_tweet +' filter:retweets '+ moto_tweet_screen_name #リツイート検索限定\n",
"\n",
"count = 100\n",
"params = {'q' : keyword, 'count' : count, 'max_id' : max_id}\n",
"\n",
"url = \"https://api.twitter.com/1.1/search/tweets.json\"\n",
"twitter = OAuth1Session(CK, CS, AT, ATS)\n",
"req = twitter.get(url, params = params)\n",
"\n",
"from time import sleep\n",
" \n",
"columns = ['ユーザー名','ユーザー表示名','ユーザーID','リツイートID','時間', 'ツイート']\n",
"df = pd.DataFrame(columns=columns)\n",
"\n",
"i = 0 #JSON位置 \n",
"while(True):\n",
" if max_id != -1:\n",
" params['max_id'] = max_id - 1 \n",
" req = twitter.get(url, params = params)\n",
" \n",
" if req.status_code == 200:\n",
" search_timeline = json.loads(req.text)\n",
" i += 1 \n",
" \n",
" if search_timeline['statuses'] == []:\n",
" break\n",
" \n",
" for tweet in search_timeline['statuses']:\n",
" user_name = tweet['user']['name']\n",
" user_screen_name = tweet['user']['screen_name']\n",
" user_id = tweet['user']['id']\n",
" retweet_id = tweet['id']\n",
" created_at = tweet['created_at']\n",
" text = tweet['text']\n",
" \n",
" append_list = [user_name,user_screen_name, user_id, retweet_id, created_at, text]\n",
" \n",
" df_next = pd.DataFrame(\n",
" [append_list],\n",
" columns=columns \n",
" )\n",
"\n",
" df = df.append(df_next)\n",
" max_id = search_timeline['statuses'][-1]['id']\n",
"\n",
" with open(output_csv_file_name, 'a') as f:\n",
" writer = csv.writer(f, lineterminator='\\n')\n",
" writer.writerow(append_list)\n",
"\n",
" else: \n",
" print('15分待ちます')\n",
" sleep(15*60)\n",
"\n",
"'''\n",
"データフレームの確認用\n",
"'''\n",
"print('dataframeの行数・列数の確認==>\\n', df.shape)\n",
"print('indexの確認==>\\n', df.index)\n",
"print('columnの確認==>\\n', df.columns)\n",
"print('dataframeの各列のデータ型を確認==>\\n', df.dtypes) \n",
"\n",
"df.head()\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"dataframeの行数・列数の確認==>\n",
" (203, 6)\n",
"indexの確認==>\n",
" Int64Index([0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" ...\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
" dtype='int64', length=203)\n",
"columnの確認==>\n",
" Index(['ユーザー名', 'ユーザー表示名', 'ユーザーID', 'リツイートID', '時間', 'ツイート'], dtype='object')\n",
"dataframeの各列のデータ型を確認==>\n",
" ユーザー名 object\n",
"ユーザー表示名 object\n",
"ユーザーID object\n",
"リツイートID object\n",
"時間 object\n",
"ツイート object\n",
"dtype: object\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "RZ630az_m1AC",
"colab_type": "text"
},
"source": [
"データ分析で頻出のPandas\n",
"https://qiita.com/ysdyt/items/9ccca82fc5b504e7913a\n",
"\n",
"ソース元\n",
"https://datumstudio.jp/blog/twitterapi%E3%82%92%E7%94%A8%E3%81%84%E3%81%9F%E3%82%AF%E3%83%AD%E3%83%BC%E3%83%A9%E3%83%BC%E4%BD%9C%E6%88%90\n",
"\n",
"【Pandas】データフレームから特定の行・列を取得する方法を総まとめ!\n",
"https://yolo.love/pandas/loc-iloc-at-iat/\n",
"\n",
"キーワード項目\n",
"https://qiita.com/mida12251141/items/97bce06e4167db8ed3d5\n",
"\n",
"IDのチェック\n",
"https://idtwi.com/\n"
]
}
]
}
@miracle777
Copy link
Author

公式リツイートを100件以上、収集する事ができます。
また、データフレームと、CSVの両方に出力しています。
エクセルやデータベースに、CSVファイルをインポートして、フィルタをかける事で、精度をあげる事ができます。

@miracle777
Copy link
Author

今回のプログラミングにあたり、私は、下記の二つのソースコードの作者に、心から感謝しています。
ありがとうございます。
https://datumstudio.jp/blog/twitterapi%E3%82%92%E7%94%A8%E3%81%84%E3%81%9F%E3%82%AF%E3%83%AD%E3%83%BC%E3%83%A9%E3%83%BC%E4%BD%9C%E6%88%90

https://www.topgate.co.jp/rookie-python-twitter-api

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment