Skip to content

Instantly share code, notes, and snippets.

@theasder
Created February 25, 2017 19:28
Show Gist options
  • Save theasder/f3a5d3a632b7c5288b67abee323ab1bf to your computer and use it in GitHub Desktop.
Save theasder/f3a5d3a632b7c5288b67abee323ab1bf to your computer and use it in GitHub Desktop.
import time
import datetime
import re
import string
import sqlite3
from numpy import array, median
import vk
session = vk.Session()
api = vk.API(session)
class Post:
def __repr__(self):
return str((self.score, self.filtered_text, self.link, self.date))
def query(self):
return self.filtered_text, self.link, self.score, self.date
def word_freq(self):
text = " ".join(self.text.split("<br>"))
text = re.sub(r"\s+", r' ', text)
text = re.sub(r"[«»\"]+", r'', text)
text = re.sub(re.compile('#[\wа-яА-ЯёЁ]+(\@\w+)?', re.MULTILINE), '', text)
self.filtered_text = text
def __init__(self, owner_id, id, post = None, members = -1):
self.owner_id = owner_id
self.id = id
self.likes = post['likes']['count']
self.date = post['date']
self.text = post['text']
self.reposts = post['reposts']['count']
self.comments = post['comments']['count']
self.link = "https://vk.com/wall" + str(self.owner_id) + "_" + str(self.id)
self.virality = self.likes + 5 * self.reposts
self.score = 0
self.word_freq()
class Page:
def __repr__(self):
s = ""
for post in self.posts:
s += str(post) + "\n"
return s
def get_posts(self, since_time, until_time):
times = int(self.number_of_posts / 100) + 1
posts = []
right_time = 0
flag = False
for offset in range(0, times * 100, 100):
response = api.wall.get(owner_id=self.id, count=1000, offset=offset)[1:]
for post in response:
if post['date'] <= until_time and post['date'] >= since_time:
record = Post(self.id, post['id'], post, self.members)
posts.append(record)
if post['date'] < since_time:
flag = True
if flag:
break
return posts
def set_score(self):
virality = array([post.virality for post in self.posts])
med = median(virality)
for post in self.posts:
post.score = post.virality / med
def __init__(self, id): # since_time = 0, until_time = 9223372036854775807):
if type(id) is str:
id = re.sub(r'(https://|vk.com)', r'', id)
response = api.groups.getById(group_ids=id)
self.id = - response[0]['gid']
else:
self.id = id
response = api.groups.getMembers(group_id=-self.id, count=1)
self.members = response['count']
response = api.wall.get(owner_id=self.id, count=1)
self.number_of_posts = response[0]
get_cur_time = lambda: int(round(time.time()))
cur_time = get_cur_time()
delta = datetime.timedelta(days = 2 * 366)
since_time = cur_time - delta.total_seconds()
until_time = cur_time
self.posts = self.get_posts(since_time, until_time)
self.set_score()
page = Page('iamdev')
queries = [post.query() for post in page.posts]
conn = sqlite3.connect('posts/my_posts.db')
c = conn.cursor()
# Create table
c.execute('''CREATE TABLE posts(
id INTEGER PRIMARY KEY AUTOINCREMENT,
text VARCHAR(500),
url VARCHAR(100),
virality FLOAT,
time BIGINT);''')
# # remove duplicates
# c.execute("""
# DELETE FROM posts
# WHERE ID NOT IN (SELECT MAX(ID)
# FROM posts
# GROUP BY url
# HAVING MAX(ID) IS NOT NULL)
# """)
c.executemany('INSERT INTO posts (text, url, virality, time) VALUES (?,?,?,?)', queries)
# Save (commit) the changes
# conn.commit()
for row in c.execute('SELECT url FROM posts ORDER BY virality DESC LIMIT 20'):
print(row[0])
# We can also close the connection if we are done with it.
# Just be sure any changes have been committed or they will be lost.
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment