Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Django management command to import flat Drupal page sets into a wagtail tree
from datetime import datetime
import json
import requests
import sys
from bs4 import BeautifulSoup
from import BaseCommand
from django.contrib.auth.models import User
from people.models import Profile
from cms.models import StandardPage, StandardIndexPage
Import content from Drupal JSON URL.
Be sure to tweak import URL first.
Assumes that we'll be deriving a hierarchy of pages from existing
Drupal URLs in the feed, e.g. Titles such as:
<a href="/academics/community-arts/visiting-artists">Visiting Artists</a>
<a href="/academics/graduate/social-practice/chair">Meet the Chair</a>
<a href="/academics/graduate/social-practice/apply">Apply</a>
<a href="/academics/graduate/social-practice/curriculum">Curriculum</a>
<a href="/academics/illustration/chair">Meet the Chair</a>
<a href="/academics/interaction-design/internships/tips">Tips for Success</a>
<a href="/academics/interaction-design/internships">Internships</a>
<a href="/academics/interior-design/internships/guidelines">Guidelines</a>
<a href="/academics/graphic-design/internships/guidelines">Guidelines</a>
<a href="/academics/industrial-design/internships/guidelines">Guidelines</a>
Should end up generating a hierarchy like this, where the branches will be
StandardIndexPage and the leaves StandardPage.
Community Arts
Social Practice
Meet the Chair
If satisfied with import, use wagtail's management command to move the tree to a permanent location:
./ move_pages 78 11 (move_pages from_id to_id)
where from and to are the parent IDs.
json_url = ''
import_slug = 'imported-content'
class Command(BaseCommand):
help = "Import content from Drupal JSON URL."
def handle(self, *args, **options):
print("JSON URL is '{u}'".format(u=json_url))
print("New content will be imported under Wagtail parent page with slug '{p}'\n".format(p=import_slug))
answer = input("Is this OK? (y/n) ")
if not answer == "y":
# Start each run by deleting old imported content.
delpages = StandardIndexPage.objects.get(slug=import_slug).get_children()
# Pull JSON from Drupal
response = requests.get(json_url)
json_data = json.loads(response.text)
for node in json_data:
# Title comes through as HTML link - extract pieces with BeautifulSoup and
# split URL path into components which will dictate wagtail hierarchy
dtitle = BeautifulSoup(node['Title'], "html5lib")
# dtitle is e.g. <a href="/academics/graphic-design/internships/guidelines">Guidelines</a>
title = dtitle.find('a').contents[0] # Title is "Guidelines"
urlpath = dtitle.find('a')['href'][1:] # urlpath is academics/graphic-design/internships/guidelines
parts = urlpath.split("/") # List of segments
last = parts[-1] # "guidelines" - This will become final leaf node
body = node['Body']
# Compose python datetime objects from string-formatted dates
post_datetime = datetime.strptime(node['Post date'], '%A, %B %d, %Y - %I:%M%p')
revised_datetime = datetime.strptime(node['Updated date'], '%A, %B %d, %Y - %I:%M%p')
username = node['Name']
# Get or create corresponding Django user
user, created = User.objects.get_or_create(username=username)
if created:
Profile.objects.create(user=user) # Also create a linked Profile
# Make sure all components in the path hierarchy exist under the parent we're working on,
# creating if not. Each time through the loop we move the parent down a level
# (if it's not already the last).
parent = StandardIndexPage.objects.get(slug=import_slug)
print("urlpath is ", urlpath)
print("parent is ", parent)
print("parts is ", parts)
for slug in parts:
print("path part is ", slug)
# If this is the last part in the set, create a final page under this parent.
print("last is ", last, " slug is ", slug)
if slug == last:
print("Last element, creating final page")
page = StandardPage()
page.title = title
page.slug = slug
page.body = body
page.owner = user
page.post_datetime = post_datetime
page.revised_datetime = revised_datetime
page.show_in_menus = True
# Otherwise create a parent hierarchy if needed
print("Getting or creating parent", slug)
children = parent.get_children()
qs = children.filter(slug=slug)
if qs.count() > 0:
print("Page with this slug already exists within parent")
parent = qs[0] # Make that page the new parent
print("New parent is ", parent)
print("Page does not already exist within parent, creating")
print("Parent is ", parent, ", slug is ", slug)
page = StandardIndexPage()
page.title = slug.title().replace("-", " ") # Convert slug into usable page title
page.slug = slug
page.show_in_menus = True
# Increment down the hierarchy
parent = page
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment