Skip to content

Instantly share code, notes, and snippets.

Last active July 20, 2021 13:18
Show Gist options
  • Save koonagi/12c5e2c6901cf6b1d704e4478f1ca196 to your computer and use it in GitHub Desktop.
Save koonagi/12c5e2c6901cf6b1d704e4478f1ca196 to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
pd.set_option('display.max_rows', None)
import json
def main():
# アクセス情報
business_account_id = '<user_business_account_id>'
token = '<your_token>'
fields = 'id,media_type,media_url,permalink,like_count,comments_count,caption,timestamp'
search_type = "top_media" #検索タイプ recent_media or top_media
# 検索キーワード指定
query = "<keyword>"
print('ハッシュタグ:' + query)
# ハッシュIDの取得
hash_id = hashtag_id(business_account_id,query,token)
# ハッシュタグ 検索結果取得
result = hashtag_info(hash_id,search_type,business_account_id,query,token,fields)
# データの結合、重複排除
df_concat = None
df_concat = pd.DataFrame(result[0])
if len(result) != 1:
for i,g in enumerate(result):
df_concat = pd.concat([pd.DataFrame(result[i]), df_concat], sort=True)
df_concat_sort = df_concat.sort_values('timestamp').drop_duplicates('id').reset_index(drop='true')
# 結果出力
# ハッシュIDの取得
def hashtag_id(business_account_id,query,token):
id_search_url = "{business_account_id}&q={query}&access_token={token}".format(business_account_id=business_account_id,query=query,token=token)
response = requests.get(id_search_url)
return response.json()['data'][0]['id']
# ハッシュタグ情報の取得
def hashtag_info(hash_id,search_type,business_account_id,query,token,fields):
all_response = []
count = 0
count_limit = 3
request_url = "{hash_id}/{search_type}?user_id={business_account_id}&q={query}&access_token={token}&fields={fields}".format(hash_id=hash_id,search_type=search_type,business_account_id=business_account_id,query=query,token=token,fields=fields)
response = requests.get(request_url)
result = response.json()
# 25件以上データがある場合は取得
if 'next' in result['paging'].keys():
next_url = result['paging']['next']
while next_url is not None:
request_url = next_url
response = requests.get(request_url)
result = response.json()
if 'next' in result['paging'].keys() and count < count_limit :
next_url = result['paging']['next']
count = count + 1
next_url = None
return all_response
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment