Skip to content

Instantly share code, notes, and snippets.

@jh0l
Created January 18, 2018 11:47
Show Gist options
  • Save jh0l/332636f8fd07893de8a89b798de38c61 to your computer and use it in GitHub Desktop.
Save jh0l/332636f8fd07893de8a89b798de38c61 to your computer and use it in GitHub Desktop.
"""
scrape vine yearly playlist timeline
"""
import re
class UserClass:
"""user dictionary"""
def __init__(self, lines):
self.dic = {0: 0}
self.arr = []
self.posts = []
self.populate_user_dict(lines)
self.populate_user_arr()
print("timeline success")
def populate_user_dict(self, lines):
"""populate user dictionary"""
post = []
for line in lines:
if "ember-view grid post" in line:
if post != []:
self.assign_post_to_user(post)
self.posts += [post]
post = []
post += [line]
def assign_post_to_user(self, post):
"""assign a post to a user"""
for i, line in enumerate(post):
if "class=\"username\"" in line:
username = re.search('href=".+"', post[i + 2]).group(0)
username = username.replace("href=", "")
if self.dic.get(username, 0) == 0:
self.dic[username] = {"posts": []}
self.dic[username]["posts"] += [post]
def populate_user_arr(self):
"""populate user array"""
if len(self.dic) < 2:
return print("users undefined")
for key in self.dic:
data = self.dic[key]
print(data)
self.arr += [{"user": key, "posts": data}]
def sort_user_arr(self):
"""sort user arr; default by len of posts"""
[print(x["posts"], end="\n\n\n") for x in self.arr]
def main():
"""make use of UserClass"""
viners = ""
with open('vineYear4Timeline.html', 'r') as file:
viners = UserClass(file.readlines())
viners.sort_user_arr()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment