Created
July 8, 2011 13:51
-
-
Save thomasfl/1071868 to your computer and use it in GitHub Desktop.
Migrering av summerschool.uio.no
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'vortex_migration/static' | |
# To run, first download the content of the site to local disk with wget: | |
# | |
# $ wget --mirror –p --html-extension –-convert-links --force-directories -e robots=off -P . http://www.summerschool.uio.no/ | |
# the run this script. | |
# | |
# Custom code for migrating the www.summerschool.uio.no site vortex: | |
class SummerSchoolMigration < StaticSiteMigration | |
# Migrate all files with .html extension as articles i vortex | |
def is_article?(filename) | |
return filename[/\.html$/] | |
end | |
def extract_title | |
title_element = @doc.css("#mainContent>h3").first | |
title = "" | |
if(title_element)then | |
title = title_element.text | |
title_element.remove # Remove title from body | |
else | |
title = @doc.css("#header>h2").text | |
title = title.gsub(/.* :: /,'') | |
end | |
return title | |
end | |
def extract_introduction | |
first_paragraph = @doc.css("#mainContent>p").first | |
intro = "" | |
if(first_paragraph)then | |
if(first_paragraph.children and first_paragraph.children.first.name = "strong")then | |
first_paragraph = first_paragraph.children.first | |
end | |
intro = first_paragraph.inner_html | |
first_paragraph.remove | |
end | |
return intro | |
end | |
def extract_body | |
return @doc.css("#mainContent").first.inner_html | |
end | |
end | |
migration = SummerSchoolMigration.new('../site/www.summerschool.uio.no/', 'https://www-dav.vortex-demo.uio.no/konv/iss/') | |
migration.start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment