joeydi/batch.py

## batch.py
import os

import lxml.html
import urllib
import BeautifulSoup

from pprint import pprint
from wordpress_xmlrpc import Client, WordPressPage
from wordpress_xmlrpc.methods.posts import GetPosts, NewPost
from wordpress_xmlrpc.methods.users import GetUserInfo


def get_doc_title(url):
    try:
        doc = urllib.urlopen(url)
        soup = BeautifulSoup.BeautifulSoup(doc)
        return soup.findAll('h1')[1].string.strip()
    except Exception:
        return url.replace('http://fscb.com', '')


def get_doc_content(url):
    try:
        doc = urllib.urlopen(url)
        soup = BeautifulSoup.BeautifulSoup(doc)
        return str(soup.findAll(id='right-content')[0])
    except Exception:
        return ''


def create_page(client, title, content):
    page = WordPressPage()
    page.title = title
    page.content = content
    page.post_status = 'publish'
    return client.call(NewPost(page))


def process_file(in_file, wp):
    in_file = open(in_file,'r')

    for line in in_file.readlines():
        page = {}
        try:
            page['title'] = get_doc_title(line)
            page['content'] = get_doc_content(line)
            create_page(wp, page['title'], page['content'])
            # pprint(page)
            print 'Success'
        except:
            print 'Error on line: %s' % line

    in_file.close()

    return None


if __name__ == "__main__":
    wp = Client('http://example.com/xmlrpc.php', 'username', 'password')
    in_file = 'urls.txt'
    process_file(in_file, wp)
	import os

	import lxml.html
	import urllib
	import BeautifulSoup

	from pprint import pprint
	from wordpress_xmlrpc import Client, WordPressPage
	from wordpress_xmlrpc.methods.posts import GetPosts, NewPost
	from wordpress_xmlrpc.methods.users import GetUserInfo


	def get_doc_title(url):
	try:
	doc = urllib.urlopen(url)
	soup = BeautifulSoup.BeautifulSoup(doc)
	return soup.findAll('h1')[1].string.strip()
	except Exception:
	return url.replace('http://fscb.com', '')


	def get_doc_content(url):
	try:
	doc = urllib.urlopen(url)
	soup = BeautifulSoup.BeautifulSoup(doc)
	return str(soup.findAll(id='right-content')[0])
	except Exception:
	return ''


	def create_page(client, title, content):
	page = WordPressPage()
	page.title = title
	page.content = content
	page.post_status = 'publish'
	return client.call(NewPost(page))


	def process_file(in_file, wp):
	in_file = open(in_file,'r')

	for line in in_file.readlines():
	page = {}
	try:
	page['title'] = get_doc_title(line)
	page['content'] = get_doc_content(line)
	create_page(wp, page['title'], page['content'])
	# pprint(page)
	print 'Success'
	except:
	print 'Error on line: %s' % line

	in_file.close()

	return None


	if __name__ == "__main__":
	wp = Client('http://example.com/xmlrpc.php', 'username', 'password')
	in_file = 'urls.txt'
	process_file(in_file, wp)