Created
January 18, 2018 11:47
-
-
Save jh0l/332636f8fd07893de8a89b798de38c61 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
scrape vine yearly playlist timeline | |
""" | |
import re | |
class UserClass: | |
"""user dictionary""" | |
def __init__(self, lines): | |
self.dic = {0: 0} | |
self.arr = [] | |
self.posts = [] | |
self.populate_user_dict(lines) | |
self.populate_user_arr() | |
print("timeline success") | |
def populate_user_dict(self, lines): | |
"""populate user dictionary""" | |
post = [] | |
for line in lines: | |
if "ember-view grid post" in line: | |
if post != []: | |
self.assign_post_to_user(post) | |
self.posts += [post] | |
post = [] | |
post += [line] | |
def assign_post_to_user(self, post): | |
"""assign a post to a user""" | |
for i, line in enumerate(post): | |
if "class=\"username\"" in line: | |
username = re.search('href=".+"', post[i + 2]).group(0) | |
username = username.replace("href=", "") | |
if self.dic.get(username, 0) == 0: | |
self.dic[username] = {"posts": []} | |
self.dic[username]["posts"] += [post] | |
def populate_user_arr(self): | |
"""populate user array""" | |
if len(self.dic) < 2: | |
return print("users undefined") | |
for key in self.dic: | |
data = self.dic[key] | |
print(data) | |
self.arr += [{"user": key, "posts": data}] | |
def sort_user_arr(self): | |
"""sort user arr; default by len of posts""" | |
[print(x["posts"], end="\n\n\n") for x in self.arr] | |
def main(): | |
"""make use of UserClass""" | |
viners = "" | |
with open('vineYear4Timeline.html', 'r') as file: | |
viners = UserClass(file.readlines()) | |
viners.sort_user_arr() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment