Skip to content

Instantly share code, notes, and snippets.

@nagayev
Created July 27, 2018 15:34
Show Gist options
  • Save nagayev/8bc82086d6b5f4d844ce953aa5716721 to your computer and use it in GitHub Desktop.
Save nagayev/8bc82086d6b5f4d844ce953aa5716721 to your computer and use it in GitHub Desktop.
Save messages from VK
#coding:utf-8
from __future__ import print_function
from sys import version_info as py_ver
import vk_api
import time
import re
py2=py_ver<(3,)
if(py2):
from io import open
login = 'YOR_LOGIN'
password = 'YOUR_PASSWORD'
session = vk_api.VkApi(login=login,password=password,api_version='5.13')
try:
session.auth()
vkapi = session.get_api()
except:
print('Incorrect login or password')
SELF_ID = vkapi.wall.get(user_id=0)['items'][0]['owner_id'] #your_id
SLEEP_TIME = 0.3
friends = vkapi.friends.get()['items'][6:8] # получение всего списка друзей для текущего пользователя
def get_dialogs(user_id):
dialogs = vkapi.messages.getDialogs(user_id=user_id)
return dialogs
def get_history(friends, sleep_time=0.3):
all_history = []
i = 0
frlen=len(friends)
for friend in friends:
friend_dialog = get_dialogs(friend)
time.sleep(sleep_time)
dialog_len = friend_dialog['count']
friend_history = []
if dialog_len > 200:
resid = dialog_len
offset = 0
while resid > 0:
friend_history += vkapi.messages.getHistory(
user_id=friend,
count=200,
offset=offset)['items']
time.sleep(sleep_time)
resid -= 200
offset += 200
if resid > 0:
print('--processing', friend, ':', resid,
'of', dialog_len, 'messages left')
all_history += friend_history
i +=1
print('processed', i, 'friends of', frlen)
return all_history
def get_messages_for_user(data, user_id):
if data==[]:
return "Нет сообщений"
self_messages = []
for dialog in data:
if dialog['from_id'] != user_id and dialog['user_id'] !=user_id:
m_text = re.sub("\\n", " ", dialog['body'])
self_messages.append(m_text)
print('Extracted', len(self_messages), 'messages in total')
return self_messages
def save_to_file(data, file_name='output.txt'):
f = open(file_name, 'w', encoding='utf-8')
f.write(data)
f.close()
if __name__ == '__main__':
all_history = get_history(friends, SLEEP_TIME)
save_to_file(all_history, 'raw1.txt')
self_messages = get_messages_for_user(all_history, SELF_ID)
save_to_file(self_messages, 'sm_corpus.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment