Created
July 27, 2018 15:34
-
-
Save nagayev/8bc82086d6b5f4d844ce953aa5716721 to your computer and use it in GitHub Desktop.
Save messages from VK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:utf-8 | |
from __future__ import print_function | |
from sys import version_info as py_ver | |
import vk_api | |
import time | |
import re | |
py2=py_ver<(3,) | |
if(py2): | |
from io import open | |
login = 'YOR_LOGIN' | |
password = 'YOUR_PASSWORD' | |
session = vk_api.VkApi(login=login,password=password,api_version='5.13') | |
try: | |
session.auth() | |
vkapi = session.get_api() | |
except: | |
print('Incorrect login or password') | |
SELF_ID = vkapi.wall.get(user_id=0)['items'][0]['owner_id'] #your_id | |
SLEEP_TIME = 0.3 | |
friends = vkapi.friends.get()['items'][6:8] # получение всего списка друзей для текущего пользователя | |
def get_dialogs(user_id): | |
dialogs = vkapi.messages.getDialogs(user_id=user_id) | |
return dialogs | |
def get_history(friends, sleep_time=0.3): | |
all_history = [] | |
i = 0 | |
frlen=len(friends) | |
for friend in friends: | |
friend_dialog = get_dialogs(friend) | |
time.sleep(sleep_time) | |
dialog_len = friend_dialog['count'] | |
friend_history = [] | |
if dialog_len > 200: | |
resid = dialog_len | |
offset = 0 | |
while resid > 0: | |
friend_history += vkapi.messages.getHistory( | |
user_id=friend, | |
count=200, | |
offset=offset)['items'] | |
time.sleep(sleep_time) | |
resid -= 200 | |
offset += 200 | |
if resid > 0: | |
print('--processing', friend, ':', resid, | |
'of', dialog_len, 'messages left') | |
all_history += friend_history | |
i +=1 | |
print('processed', i, 'friends of', frlen) | |
return all_history | |
def get_messages_for_user(data, user_id): | |
if data==[]: | |
return "Нет сообщений" | |
self_messages = [] | |
for dialog in data: | |
if dialog['from_id'] != user_id and dialog['user_id'] !=user_id: | |
m_text = re.sub("\\n", " ", dialog['body']) | |
self_messages.append(m_text) | |
print('Extracted', len(self_messages), 'messages in total') | |
return self_messages | |
def save_to_file(data, file_name='output.txt'): | |
f = open(file_name, 'w', encoding='utf-8') | |
f.write(data) | |
f.close() | |
if __name__ == '__main__': | |
all_history = get_history(friends, SLEEP_TIME) | |
save_to_file(all_history, 'raw1.txt') | |
self_messages = get_messages_for_user(all_history, SELF_ID) | |
save_to_file(self_messages, 'sm_corpus.txt') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment