Skip to content

Instantly share code, notes, and snippets.

@chriskyfung
Last active July 25, 2021 21:13
Show Gist options
  • Save chriskyfung/e4c863699223206c23a05eebb89faa13 to your computer and use it in GitHub Desktop.
Save chriskyfung/e4c863699223206c23a05eebb89faa13 to your computer and use it in GitHub Desktop.
Get Posts from Facebook Pages and Convert them to WordPress XML Import File using Python with Facebook Scraper
# source file: https://gist.github.com/chriskyfung/e4c863699223206c23a05eebb89faa13
# last update at 2021-07-17T19:36:05+00:00
from datetime import datetime
from facebook_scraper import get_posts
import urllib
# Parameters
FBPAGE_ID = 'medium' # e.g. The name of target facebook page
NUMOFPAGES = 3 # How many posts to scrape
EXPORTFILE = '{}-fb-page-export.xml'.format(FBPAGE_ID) # Output file name, e.g. FB_PAGE_NAME-fb-page-export.xml
TITLELEN = 16 # The limit of the length of post title
CATEGORY = 'Imported from Fb Page' # Which category that you want the post belongs to when import it to your WordPress
CATEGORY_SLUG = urllib.parse.quote_plus(CATEGORY)
COOKIES = '.cookies.json' # The file path of your cookies
print(f'Start scraping https://www.facebook.com/{FBPAGE_ID}\n')
fb_posts = get_posts(FBPAGE_ID, pages=NUMOFPAGES, timeout=30, cookies=COOKIES)
print('\nFinish Facebook scrapping\n')
print(f'\nStart convert and output to {EXPORTFILE}\n')
# Write the File Header
x = open(EXPORTFILE, 'w')
x.write('<?xml version="1.0" encoding="UTF-8" ?>')
x.write(
'<rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/1.2/">'
)
x.write('<channel><wp:wxr_version>1.2</wp:wxr_version>')
# Process Web Scraping
for idx, post in enumerate(fb_posts):
# Print essential values to screen
print('#{}'.format(idx))
if len(post['text']) > TITLELEN:
print('title: {}...'.format(post['text'][:TITLELEN]))
else:
print('title: {}'.format(post['text']))
print('fb_url: {}'.format(post['post_url']))
if post['image']:
print('image: {}'.format(post['image']))
if post['images']:
print('images: {}'.format(post['images']))
print(post['text'])
print('wp:post_name: {}'.format(post['post_id']))
print('wp:post_date_gmt: {}'.format(post['time']))
print('wp:status: draft')
# Start writing a post to file
img_tags = ''
x.write('<!-- ITEM #{} -->'.format(idx))
x.write('<item>')
# Copy the first N letters as the WP post title
if len(post['text']) > TITLELEN:
x.write('<title>{}...</title>'.format(post['text'][:TITLELEN]))
else:
x.write('<title>{}</title>'.format(post['text']))
# Encode the post content and add img tags before the text content if images are present
if post['images']:
for img in post['images']:
img_tags += '<p><img src="{}"></p>'.format(img)
x.write(
'<content:encoded><![CDATA[{}<p>{}</p>]]></content:encoded>'
.format(img_tags, post['text']))
else:
x.write(
'<content:encoded><![CDATA[<p>{}</p>]]></content:encoded>'
.format(post['text']))
# Copy the FB post ID as the WP post name
x.write('<wp:post_name>{}</wp:post_name>'.format(post['post_id']))
# Copy the FB post date as the WP post date. Use the scrape time as the post datetime if failed to get the time attribute
post_date = post['time'] if post['time'] else datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
x.write('<wp:post_date_gmt><![CDATA[{}]]></wp:post_date_gmt>'.format(
post_date))
# Set the post as a draft
x.write('<wp:status><![CDATA[draft]]></wp:status>')
# Set allow comments
x.write('<wp:comment_status><![CDATA[open]]></wp:comment_status>')
# Clarify its post type
x.write('<wp:post_type><![CDATA[post]]></wp:post_type>')
# Assign the category in WordPress
x.write('<category domain="category" nicename="{}"><![CDATA[{}]]></category>'.format(CATEGORY_SLUG, CATEGORY))
# Add FB post URL as an XML comment
x.write('<!-- orig_fb_post_url="{}" -->'.format(post['post_url']))
x.write('</item>')
# End a post item
# Write File Footer
x.write('</channel>')
x.write('</rss>')
x.close()
print('\nComplete!')
<?xml version="1.0" encoding="UTF-8" ?><rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/1.2/"><channel><wp:wxr_version>1.2</wp:wxr_version><!-- ITEM #0 --><item><title>The key to succe...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*7ZWFAD12i7xHcbE1FGySoA.jpeg"></p><p>The key to success is found in being persistently generous, Mike Thompson writes.
FORGE.MEDIUM.COM
How to Stand Out Without Showing Off</p>]]></content:encoded><wp:post_name>4474666145878509</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 22:51:29]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474666145878509" --></item><!-- ITEM #1 --><item><title>"The consistency...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*JqCG2d_E6UxOyJOy"></p><p>"The consistency with which I have been racially profiled while wearing sweatpants is so predictable, that I sometimes even bet money with my white friends that I‘ll get stopped. Guess what? I’ve won every single bet." —Rebecca Stevens A.
MEDIUM.COM
I Envy My White Friends Who Wear Sweatpants To Travel</p>]]></content:encoded><wp:post_name>4474558489222608</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 21:51:30]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474558489222608" --></item><!-- ITEM #2 --><item><title>Naomi Osaka, Ste...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*690_67v6Gf0eeetZ"></p><p>Naomi Osaka, Steph Curry, and Roger Federer have set a precedent for professional athletes to speak up about the need to take a break as part of their healing, placing their health above the push to perform.
Physician Dr. Amitha Kalaichandran on how healing requires rest:
ELEMENTAL.MEDIUM.COM
When Recovery Requires Rest</p>]]></content:encoded><wp:post_name>4474441485900975</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 20:51:31]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474441485900975" --></item><!-- ITEM #3 --><item><title>"The victor will...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*aVnABr6yNfJZDFknJNLsoQ.png"></p><p>"The victor will write the history of January 6—and likely lead us after Biden. And the loser’s story will never be told again." —Julio Vincent Gambuto
JULIOVINCENT.MEDIUM.COM
The Great Story War Is Raging</p>]]></content:encoded><wp:post_name>4474310852580705</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 19:51:32]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474310852580705" --></item><!-- ITEM #4 --><item><title>➡️ Fast is bette...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*Xiwy-2eHyIMj83rZ"></p><p>➡️ Fast is better than good
➡️ Unlearn what you know about technical debt
➡️ There aren’t stupid questions
SVPINO.COM
Lessons learned from the smartest Software Engineer I’ve met</p>]]></content:encoded><wp:post_name>4474179275927196</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 18:51:33]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474179275927196" --></item><!-- ITEM #5 --><item><title>Writer Susan Orl...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*lqj5dHfaThR33v_5OVH1Iw@2x.jpeg"></p><p>Writer Susan Orlean has realized that her dog is now older than she is—in dog years.
"Soon she won’t just be a little older than me; she will be much older than me, moving past our mutual middle age and into something more fragile. I miss her already."
SUSANORLEAN.MEDIUM.COM
Lessons from an Old Dog About Creaky Bones and Graying Hair</p>]]></content:encoded><wp:post_name>4474039199274537</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 17:51:34]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474039199274537" --></item><!-- ITEM #6 --><item><title>Yoga asks us to ...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*_mueb5znFVvESZ1q"></p><p>Yoga asks us to stay still, which can mimic a defensive state—explaining why survivors find yoga to be triggering.
But strength training and/or a trauma-sensitive yoga practice *can* likely help those living with trauma, Laura Khoudari writes:
ELEMENTAL.MEDIUM.COM
Why Yoga Can Be Triggering for Trauma Survivors</p>]]></content:encoded><wp:post_name>4473881829290274</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 16:51:35]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473881829290274" --></item><!-- ITEM #7 --><item><title>Having a time cr...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*8Jl_l9AXtnaOfWl4Lep7SA.jpeg"></p><p>Having a time crunch can actually make your writing better, Sadie Hoagland writes.
MEDIUM.COM
On Writing Without Solitude</p>]]></content:encoded><wp:post_name>4473731309305326</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 14:51:36]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473731309305326" --></item><!-- ITEM #8 --><item><title>"Identify one sk...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*z0aDFyiQ-QppR_TMdt3IQQ.jpeg"></p><p>"Identify one skill you’d like to improve, carve out a few hours a day, and do it until you’re no longer bad at it." —Mike Thompson
MEDIUM.COM
If You Want to Stand Out, Put Your Head Down</p>]]></content:encoded><wp:post_name>4473569789321478</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 13:51:36]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473569789321478" --></item><!-- ITEM #9 --><item><title>"The advent of g...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*v624MIXwjDL_0Yq_"></p><p>"The advent of global positioning systems (GPS) means we’re getting worse at learning routes. It’s been suggested that using navigation systems when driving creates ‘inattention blindness,' a failure to 'see' elements in the environment.’" —Steph 丨凌姿
STEPHSTEPHWONG.MEDIUM.COM
Have we lost our way? Navigation and wayfinding in the 21st century</p>]]></content:encoded><wp:post_name>4473422929336164</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 12:51:37]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473422929336164" --></item></channel></rss>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment