Created
June 16, 2012 17:33
-
-
Save agudulin/2942039 to your computer and use it in GitHub Desktop.
Python: Looking for the most beautiful girl in vk.com/girlsview
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import vkontakte | |
import json | |
import sqlite3 | |
import time | |
from pprint import pprint | |
def fill_db(db_name): | |
conn = sqlite3.connect(db_name) | |
c = conn.cursor() | |
try: | |
c.execute("""CREATE TABLE girls ( | |
id integer primary key autoincrement, | |
name text, | |
city text, | |
age int, | |
link text, | |
likes int, | |
date int, | |
reply_count int, | |
reposts_count int, | |
parse_date int, | |
rating float | |
) | |
""") | |
c.execute("""CREATE TABLE photos ( | |
id integer primary key autoincrement, | |
id_girl int, | |
url text, | |
likes int) | |
""") | |
except sqlite3.OperationalError: | |
pass | |
vk = vkontakte.API('my_app_id', 'my_app_secret') | |
offset = 0 | |
girls_count = 0 | |
date_now = int(time.time()) | |
while True: | |
girls = vk.wall.get(owner_id='-37862023', offset=offset, count=100) | |
offset += 100 | |
for _post in girls: | |
if not isinstance(_post, dict): continue | |
girls_count += 1 | |
likes = _post['likes']['count'] | |
date = int(_post['date']) | |
reply_count = int(_post['reply_count']) | |
reposts_count = int(_post['reposts']['count']) | |
post_text = _post['text'] | |
# _s = raw_input(post_text) | |
name, city, age, link = 'None', 'None', 0, 'None' | |
_name, _br = post_text.find('Name:'), post_text.find('<br>') | |
if _name != -1 and _br != -1: | |
name = post_text[_name+6:_br] | |
post_text = post_text[_br+4:] | |
_city, _br = post_text.find('City:'), post_text.find('<br>') | |
if _city != -1 and _br != -1: | |
city = post_text[_city+6:_br] | |
post_text = post_text[_br+4:] | |
_age, _br = post_text.find('Age:'), post_text.find('<br>') | |
if _age != -1 and _br != -1: | |
try: | |
age = int(post_text[_age+5:_br]) | |
except Exception: | |
age = 0 | |
post_text = post_text[_br+4:] | |
_link = post_text.find('Link:') | |
if _link != -1: | |
link = post_text[_link+6:] | |
rating = float(likes + reply_count + reposts_count)*10000 / (date_now - date) | |
c.execute("""INSERT INTO girls | |
(name, city, age, link, likes, date, reply_count, reposts_count, parse_date, rating) | |
VALUES ('%s', '%s', '%d', '%s', '%d', '%d', '%d', '%d', '%d', '%.2f') | |
""" % (name, city, age, link, likes, date, reply_count, reposts_count, date_now, rating)) | |
# print ">> added name, city, age, link" | |
girls_id = c.lastrowid | |
try: | |
for _photo in _post['attachments']: | |
photo = 'None' | |
try: | |
photo = _photo['photo']['src_big'] | |
photo = _photo['photo']['src_xbig'] | |
photo = _photo['photo']['src_xxbig'] | |
except KeyError as exception: | |
# print "KeyError: there is no %s type" % (exception) | |
pass | |
c.execute("""INSERT INTO photos (id_girl, url, likes) | |
VALUES (%d, '%s', %d) | |
""" % (girls_id, photo, likes)) | |
# print ">> added photos" | |
except KeyError as e: | |
# print "KeyError: there is no %s for this post" % (e) | |
pass | |
conn.commit() | |
if offset > girls[0]: break | |
time.sleep(0.7) | |
c.close() | |
def find_most_beautiful_girl(db_name): | |
conn = sqlite3.connect(db_name) | |
c = conn.cursor() | |
c.execute("""SELECT id, rating, name, age, link FROM girls WHERE rating > 40.0 ORDER BY rating""") | |
girls = c.fetchall() | |
for girl in girls: | |
print "\n--- Rating: %s ---" % (girl[1]) | |
print "Name: %s\nAge: %s\nLink: %s\nPhotos:" % (girl[2], girl[3], girl[4]) | |
c.execute("""SELECT url FROM photos WHERE id_girl=%d""" % (girl[0])) | |
for photo in c.fetchall(): | |
print "\t> %s" % (photo[0]) | |
c.close() | |
if __name__ == '__main__': | |
fill_db('girls.db') | |
find_most_beautiful_girl('girls.db') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment