Skip to content

Instantly share code, notes, and snippets.

@south1907
Last active May 12, 2020 17:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save south1907/d9358a91cbd1c3aab33655f13d35ca25 to your computer and use it in GitHub Desktop.
Save south1907/d9358a91cbd1c3aab33655f13d35ca25 to your computer and use it in GitHub Desktop.
Get comment facebook plugin of one film in Phimmoi
import requests
import json
import time
start_time = time.time()
headers = {
'cookie': 'fr=0shZ5eUbOjaYtgLs0..BeuWpJ...1.0.BeuWpJ.'
}
limit = 20
def get_id_film(url_film):
params = {'href': url_film}
plugin_comment_root = 'https://www.facebook.com/plugins/feedback.php'
# request với param href = url_fillm
r = requests.get(plugin_comment_root, params=params)
resp = r.text
# tìm index targetFBID
start_index = resp.find('"targetFBID":"') + 14
# tìm dấu nháy tiếp theo
end_index = resp.find('"', start_index)
# crop :)
id_film_fb = resp[start_index:end_index]
return id_film_fb
def get_page(film_id, after_cursor = ''):
data = {
'after_cursor': after_cursor,
'limit': limit,
'__a': '1'
}
url = 'https://www.facebook.com/plugins/comments/async/'+ film_id +'/pager/reverse_time/'
r = requests.post(url, headers=headers, data=data)
response = r.text[9:]
res_obj = json.loads(response)
list_comment = res_obj['payload']['idMap']
r = []
film_name = ''
for key in list_comment:
user_id = ''
user_name = ''
user_uri = ''
item = list_comment[key]
# 3 loại
if item['type'] == 'user':
user_id = item['id']
user_name = item['name']
user_uri = item['uri']
if item['type'] == 'ogobject':
if film_name == '':
film_name = item['name']
film_uri = item['uri']
if item['type'] == 'comment':
comment_user = item['authorID']
comment_content = item['body']['text']
comment_timestamp = item['timestamp']
temp = {
'comment_user': comment_user,
'comment_content': comment_content,
'comment_timestamp': comment_timestamp,
'film_id': film_id
}
r.append(temp)
return {
'data': r,
'next': res_obj['payload']['afterCursor'],
'film_name': film_name
}
def get_all_of_film(url_film):
results = []
after_cursor = ''
film_id = get_id_film(url_film)
while 1:
print('after_cursor: ' + after_cursor)
res = get_page(film_id, after_cursor)
# nếu không có comment nào nữa thì thoát
if len(res['data']) == 0:
break
# nếu không thì + vào results và request next page dựa vào after_cursor
results += res['data']
after_cursor = res['next']
return {
'data': results,
'film_id': film_id
}
url_film = 'http://www.phimmoi.net/phim/spongebob-bot-bien-dao-tau-9881/'
all_data = get_all_of_film(url_film)
with open(all_data['film_id'] + '.json', 'w') as outfile:
json.dump(all_data['data'], outfile, indent=4, ensure_ascii=False)
end_time = time.time()
total_time = end_time - start_time
# print(total_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment