Skip to content

Instantly share code, notes, and snippets.

@pcchou
Last active May 22, 2021 07:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pcchou/992c424d695894b4fc44d3c4b5995171 to your computer and use it in GitHub Desktop.
Save pcchou/992c424d695894b4fc44d3c4b5995171 to your computer and use it in GitHub Desktop.
Facebook post parsing bs4 class (2017)
from bs4 import BeautifulSoup as Soup
from requests import get as GET, post as POST
parse_cookie = lambda cookies: dict(map(lambda x: x.split('='), map(lambda x: x.replace(' ', ''), cookies.split(';'))))
COOKIE_STRING = ""
REQARGS = {'cookies': parse_cookie(COOKIE_STRING), 'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}}
class Post:
def react(self, name):
picker = Soup(GET('https://mbasic.facebook.com/reactions/picker/?ft_id={}'.format(self.sid), **REQARGS).text, 'html5lib')
try:
action_url = picker.find(text=name).findParents('td')[1].find('a').attrs['href']
except AttributeError:
return False
else:
s = GET('https://mbasic.facebook.com' + action_url, **REQARGS, allow_redirects=False)
return 302 == s.status_code
def comment(self, text="", sticker=None):
form = self.s.strong.findParent(id="m_story_permalink_view").find('form')
action_url = form.attrs['action']
data = {'fb_dtsg': form.find(attrs={'name': "fb_dtsg"}).attrs['value']}
if sticker:
data['sticker_id'] = sticker
if text:
data['comment_text'] = text
POST('https://mbasic.facebook.com' + action_url, **REQARGS, data=data)
def __init__(self, sid, uid):
self.sid = sid
self.uid = uid
s = self.s = Soup(GET('https://mbasic.facebook.com/story.php?story_fbid={}&id={}'.format(sid, uid), **REQARGS).text, 'html5lib')
try:
self.contents = '\n'.join(map(str, list(s.strong.findParent(id="m_story_permalink_view").find('p').strings)))
except:
self.contents = ""
try:
self.contents += '\n'.join(map(str, list(s.strong.findParent(id="m_story_permalink_view").find('div', attrs={'style': 'font-size:24px;font-weight:300;line-height:1.2;display:inline'}).strings)))
except:
pass
try:
h = ''.join(map(str, s.strong.findParent('h3').strings))
except AttributeError:
self.type = 'P'
else:
if "分享了" in h and "貼文" in h:
self.type = 'S'
elif "分享了" in h and ("相片" in h or "影片" in h):
self.type = 'SP'
else:
self.type = 'M'
q = Post(sid="3931951836874187", uid="134395566629852")
q.contents
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment