Skip to content

Instantly share code, notes, and snippets.

@kamawanu
Last active February 6, 2016 03:54
Show Gist options
  • Save kamawanu/637384 to your computer and use it in GitHub Desktop.
Save kamawanu/637384 to your computer and use it in GitHub Desktop.
clean blogger(blogspot) posts.
#!/usr/bin/env python
import sys, logging
sys.path.insert(0,"./gdata-2.0.11.final.zip/src")
from gdata import service
import gdata
import atom
import simplejson
accountinfo = simplejson.loads( open("auth.json").read() )
blogger_service = service.GDataService(*accountinfo)
blogger_service.source = 'exampleCo-exampleApp-1.0'
blogger_service.service = 'blogger'
blogger_service.account_type = 'GOOGLE'
blogger_service.server = 'www.blogger.com'
blogger_service.ProgrammaticLogin()
def get_blogs(blogger_service):
query = service.Query()
query.feed = '/feeds/default/blogs'
feed = blogger_service.Get(query.ToUri())
blogs = []
print feed.title.text
for entry in feed.entry:
blog_id = entry.GetSelfLink().href.split("/")[-1]
# print "\t" + entry.title.text + " " + blog_id
blogs.append( ( blog_id, entry.title.text ) )
return blogs
###PrintUserBlogTitles(blogger_service)
blogs = get_blogs(blogger_service)
def get_posts(blogger_service, blog_id):
feed = blogger_service.GetFeed('/feeds/' + blog_id + '/posts/default')
ret = []
# print feed.title.text
for entry in feed.entry:
### logging.warn( (entry.__dict__, dir(entry), ) )
#### break
ret.append( ( entry.title.text, entry.content.text, entry.GetEditLink(), entry ) )
return ret
def UpdatePostTitle(blogger_service, entry_to_update, new_title='The REAL answer'):
entry_to_update.title = atom.Title('xhtml', new_title)
return blogger_service.Put(entry_to_update, entry_to_update.GetEditLink().href)
for blog_id,title in blogs:
posts = get_posts(blogger_service,blog_id)
### logging.warn( posts[0] )
# PrintAllPosts(blogger_service,blog_id)
for art in posts:
ot = unicode(art[1],"utf-8")
import re
nt = ot
nt = re.sub( r'</?(span|div)[^>]*>', "", nt )
nt = re.sub( r'</blockquote>((<br />)?)<blockquote>', r'<br/>', nt )
## nt = re.sub( r'\n{2,}',r"\n", nt )
nt = re.sub( r'(<br />){3,}',r"<br /><br />", nt )
nt = re.sub( r'\s+<br />',r"<br />", nt )
nt = re.sub( r'<br />\s+<br />',r"<br /><br />", nt )
if nt != ot:
### logging.warn( (ot,nt) )
ob = art[-1]
logging.warn( str(ob.title)+":updated..")
ob.content = atom.Content(content_type='html', text=nt)
blogger_service.Put(ob, ob.GetEditLink().href)
### logging.warn( art[0] )
### break
wget https://github.com/kamawanu/gdata-python-client/archive/v2.zip
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment