Created
November 25, 2020 01:09
-
-
Save ehibun/71eb16019729ba29ac3fc50b84783846 to your computer and use it in GitHub Desktop.
Wordpress to Statamic v3 importer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Console\Commands; | |
use League\HTMLToMarkdown\HtmlConverter; | |
use Illuminate\Console\Command; | |
use Illuminate\Support\Facades\Http; | |
use Illuminate\Support\Facades\Cache; | |
use Illuminate\Support\Carbon; | |
use Symfony\Component\HttpFoundation\File\UploadedFile; | |
use Statamic\Facades\Site; | |
use Statamic\Facades\Asset; | |
use Statamic\Facades\AssetContainer; | |
use Statamic\Facades\Entry; | |
use Statamic\Facades\Term; | |
use Statamic\Facades\Taxonomy; | |
use Statamic\Facades\User; | |
class MigrateWordPress extends Command | |
{ | |
/** | |
* The name and signature of the console command. | |
* | |
* @var string | |
*/ | |
protected $signature = 'migrate:wordpress'; | |
/** | |
* The console command description. | |
* | |
* @var string | |
*/ | |
protected $description = 'Scrapes content from a WordPress API and imports it into Statamic'; | |
public $categories = []; | |
public $tags = []; | |
public $users = []; | |
public $media = []; | |
public $genres = []; | |
public $studios = []; | |
/** | |
* Create a new command instance. | |
* | |
* @return void | |
*/ | |
public function __construct() | |
{ | |
parent::__construct(); | |
} | |
/** | |
* Execute the console command. | |
* | |
* @return mixed | |
*/ | |
public function handle() | |
{ | |
$this->getCategoriesFromWordPress(); | |
$this->getTagsFromWordPress(); | |
$this->getUsersFromWordPress(); | |
$this->getMediaFromWordPress(); | |
$this->getPostsFromWordPress(); | |
$this->info("OK"); | |
} | |
private function getPostsFromWordPress($page = 1) | |
{ | |
$site = Site::default(); | |
$totalPages = Cache::get("wp-import-posts-pages", 1); | |
if ($totalPages && $page > $totalPages) { | |
return; | |
} | |
$response = Cache::remember( | |
"wp-import-posts-$page", | |
7200, | |
function () use ($page) { | |
$this->info("Downloading posts (page $page)"); | |
$res = Http::withHeaders([ | |
'Accept-Encoding' => 'utf-8', | |
'Content-Type' => 'application/json', | |
])->get( | |
'https://example.org/wp-json/wp/v2/posts?per_page=100&page=' . | |
$page, | |
); | |
Cache::set( | |
'wp-import-posts-pages', | |
$res->header('X-WP-TotalPages'), | |
7200, | |
); | |
return $res->json(); | |
}, | |
); | |
$this->info( | |
"== Importing posts (page " . $page . " of " . $totalPages . ")", | |
); | |
foreach ($response as $post) { | |
foreach ($post['categories'] as $categoryId) { | |
$collection = $this->_getCollectionFromCategory( | |
$this->categories[$categoryId], | |
); | |
$blueprint = $this->_getBlueprintFromCategory( | |
$this->categories[$categoryId], | |
); | |
} | |
if (!$collection) { | |
$this->error('Collection not found'); | |
dd($post); | |
} | |
if (Entry::findBySlug(str_slug($post['slug'], '-'), $collection)) { | |
continue; | |
} | |
$tags = []; | |
foreach ($post['tags'] as $tagId) { | |
try { | |
$tags[] = $this->tags[$tagId]; | |
} catch (\ErrorException $e) { | |
$this->warn("Couldn't import tag $tagId: $e"); | |
} | |
} | |
$genres = []; | |
if (isset($post['acf']['genre'])) { | |
foreach (explode(',', $post['acf']['genre']) as $genre) { | |
try { | |
$genre = trim($genre); | |
if (!$genre) { | |
continue; | |
} | |
$genres[] = str_slug($genre, '-'); | |
if (Term::findBySlug(str_slug($genre, '-'), 'genres')) { | |
continue; | |
} | |
$this->line("[DEBUG] saving new genre: $genre"); | |
Term::make() | |
->taxonomy('genres') | |
->slug(str_slug($genre, '-')) | |
->data(['title' => $genre]) | |
->save(); | |
} catch (\Exception $e) { | |
dd($genre, str_slug($genre, '-'), $e); | |
} | |
} | |
} | |
$studios = []; | |
if (isset($post['acf']['studio'])) { | |
foreach (explode(',', $post['acf']['studio']) as $studio) { | |
$studio = trim($studio); | |
if (!$studio) { | |
continue; | |
} | |
$studios[] = str_slug($studio, '-'); | |
if (Term::findBySlug(str_slug($studio, '-'), 'studios')) { | |
continue; | |
} | |
$this->line("[DEBUG] saving new studio: $studio"); | |
Term::make() | |
->taxonomy('studios') | |
->slug(str_slug($studio, '-')) | |
->data(['title' => $studio]) | |
->save(); | |
} | |
} | |
$entry = Entry::make() | |
->collection($collection) | |
->slug(str_slug($post['slug'], '-')) | |
->published($post['status'] == 'publish' ? 'true' : 'false') | |
->locale($site->handle()) | |
->date(Carbon::parse($post['date_gmt'], 'UTC')) | |
->blueprint($blueprint); | |
$data = [ | |
'title' => html_entity_decode($post['title']['rendered']), | |
'excerpt' => $this->_htmlToMarkdown( | |
$post['excerpt']['rendered'], | |
), | |
'body_text' => [ | |
[ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'markdown_html', | |
'content' => $this->_htmlToMarkdown( | |
$post['content']['rendered'], | |
), | |
], | |
], | |
], | |
], | |
'featured_image' => [ | |
$this->media[$post['featured_media']] ?? null, | |
], | |
'authors' => [$this->users[$post['author']] ?? null], // @TODO account for multiple authors | |
'tags' => $tags, | |
'updated_at' => Carbon::parse( | |
$post['modified_gmt'], | |
'UTC', | |
)->format('U'), | |
]; | |
switch ($collection) { | |
case 'news': | |
case 'features': | |
case 'articles': | |
$entry->data($data); | |
break; | |
case 'reviews': | |
$entry->data( | |
array_merge($data, [ | |
'publisher' => | |
$post['acf']['anime_publisher'] ?? | |
($post['acf']['game_publisher'] ?? null), | |
'developer' => $post['acf']['developer'] ?? null, | |
'book_authors' => | |
$post['acf']['author(s):_'] ?? null, | |
'genres' => $genres ?? null, // @TODO: Convert to taxonomy | |
'studios' => $studios ?? null, // @TODO: Convert to taxonomy | |
'type' => $post['acf']['type'] ?? null, | |
'original_vintage' => $post['acf']['year'] ?? null, | |
'format' => $post['acf']['format:_'] ?? null, | |
'platform' => $post['acf']['platform'] ?? null, | |
'languages' => isset( | |
$post['acf']['language_options:'], | |
) | |
? $this->_parseLanguageString( | |
$post['acf']['language_options:'], | |
) | |
: null, | |
'age_rating' => | |
$post['acf']['bbfc_rating:_'] ?? null, | |
'material_length' => | |
$post['acf']['material_length:_'] ?? null, | |
'running_time' => | |
$post['acf']['running_time_(minutes)'] ?? null, | |
'score' => $post['acf']['score:_'] ?? null, | |
]), | |
); | |
break; | |
default: | |
die( | |
'Collection not found for ' . | |
html_entity_decode($post['title']['rendered']) | |
); | |
break; | |
} | |
$entry->save(); | |
$this->line( | |
"Imported " . html_entity_decode($post['title']['rendered']), | |
); | |
} | |
$this->getPostsFromWordPress(++$page); | |
} | |
private function getCategoriesFromWordPress($page = 1) | |
{ | |
$taxonomy = Taxonomy::find('categories'); | |
$totalPages = Cache::get("wp-import-categories-pages", 1); | |
if ($totalPages && $page > $totalPages) { | |
return; | |
} | |
$response = Cache::remember( | |
"wp-import-categories-$page", | |
7200, | |
function () use ($page, $totalPages) { | |
$this->info("Trying to get categories page $page"); | |
$res = Http::withHeaders([ | |
'Accept-Encoding' => 'utf-8', | |
'Content-Type' => 'application/json', | |
])->get( | |
'https://example.org/wp-json/wp/v2/categories?per_page=100&page=' . | |
$page, | |
); | |
$totalPages = $res->header('X-WP-TotalPages'); | |
Cache::set( | |
'wp-import-categories-pages', | |
$res->header('X-WP-TotalPages'), | |
7200, | |
); | |
return $res->json(); | |
}, | |
); | |
$this->info( | |
"== Importing categories (page " . | |
$page . | |
" of " . | |
$totalPages . | |
")", | |
); | |
foreach ($response as $category) { | |
$this->categories[$category['id']] = str_slug( | |
$category['slug'], | |
'-', | |
); | |
if ( | |
Term::findBySlug(str_slug($category['slug'], '-'), 'categories') | |
) { | |
continue; | |
} | |
if ($this->_getSkippedCategory(str_slug($category['slug'], '-'))) { | |
// This will be a collection - skipping import. | |
continue; | |
} | |
Term::make() | |
->taxonomy($taxonomy) | |
->slug(str_slug($category['slug'], '-')) | |
->data(['title' => $category['name']]) | |
->save(); | |
$this->line("Imported " . $category['name']); | |
} | |
$this->getCategoriesFromWordPress(++$page); | |
} | |
private function getTagsFromWordPress($page = 1) | |
{ | |
$taxonomy = Taxonomy::find('tags'); | |
$totalPages = Cache::get("wp-import-tags-pages", 1); | |
if ($totalPages && $page > $totalPages) { | |
return; | |
} | |
$response = Cache::remember( | |
"wp-import-tags-$page", | |
7200, | |
function () use ($page, $totalPages) { | |
$this->info("Downloading tags (page $page)"); | |
$res = Http::withHeaders([ | |
'Accept-Encoding' => 'utf-8', | |
'Content-Type' => 'application/json', | |
])->get( | |
'https://example.org/wp-json/wp/v2/tags?per_page=100&page=' . | |
$page, | |
); | |
$totalPages = $res->header('X-WP-TotalPages'); | |
Cache::set( | |
'wp-import-tags-pages', | |
$res->header('X-WP-TotalPages'), | |
7200, | |
); | |
return $res->json(); | |
}, | |
); | |
$this->info("== Importing tags (page $page of $totalPages)"); | |
foreach ($response as $tag) { | |
$this->tags[$tag['id']] = str_slug($tag['slug'], '-'); | |
try { | |
if ( | |
!trim($tag['slug']) || | |
Term::findBySlug(str_slug($tag['slug'], '-'), 'tags') | |
) { | |
continue; | |
} | |
Term::make() | |
->taxonomy($taxonomy) | |
->slug(str_slug($tag['slug'], '-')) | |
->data(['title' => $tag['name']]) | |
->save(); | |
} catch (\Exception $exception) { | |
dd($tag, str_slug($tag['slug'], '-'), $exception); | |
} | |
$this->line("Imported " . $tag['name']); | |
} | |
$this->getTagsFromWordPress(++$page); | |
} | |
private function getUsersFromWordPress($page = 1) | |
{ | |
$response = Http::withBasicAuth( | |
'Blank', | |
'Password', | |
)->get( | |
'https://example.org/wp-json/wp/v2/users?per_page=100&context=edit&page=' . | |
$page, | |
); | |
$totalPages = $response->header('X-WP-TotalPages'); | |
if ($page > $totalPages) { | |
return; | |
} | |
$this->info("== Importing users (page $page of $totalPages)"); | |
foreach ($response->json() as $user) { | |
if ($statamicUser = User::findByEmail($user['email'])) { | |
$this->users[$user['id']] = $statamicUser->id(); | |
continue; | |
} | |
$statamicUser = User::make() | |
->email($user['email']) | |
->data([ | |
'name' => $user['name'], | |
'first_name' => $user['first_name'], | |
'last_name' => $user['last_name'], | |
'description' => $user['description'], | |
]) | |
->save(); | |
$this->users[$user['id']] = $statamicUser->id(); | |
$this->line("Imported " . $user['name']); | |
} | |
$this->getCategoriesFromWordPress(++$page); | |
} | |
private function getMediaFromWordPress($page = 1) | |
{ | |
$totalPages = Cache::get("wp-import-media-pages", 1); | |
if ($page > $totalPages) { | |
return; | |
} | |
$response = Cache::remember( | |
"wp-import-media-$page", | |
7200, | |
function () use ($page, $totalPages) { | |
$this->info("Downloading media (page $page)"); | |
$res = Http::withHeaders([ | |
'Accept-Encoding' => 'utf-8', | |
'Content-Type' => 'application/json', | |
])->get( | |
'https://example.org/wp-json/wp/v2/media?per_page=100&page=' . | |
$page, | |
); | |
$totalPages = $res->header('X-WP-TotalPages'); | |
Cache::set( | |
'wp-import-media-pages', | |
$res->header('X-WP-TotalPages'), | |
7200, | |
); | |
return $res->json(); | |
}, | |
); | |
$this->info("== Importing media (page $page of $totalPages)"); | |
foreach ($response as $media) { | |
if ($media['mime_type'] == 'video/videopress') { | |
preg_match_all( | |
'/(\/.*\/)(.*)/m', | |
parse_url( | |
stripslashes( | |
urldecode( | |
$media['media_details']['original']['url'], | |
), | |
), | |
PHP_URL_PATH, | |
), | |
$folder, | |
PREG_SET_ORDER, | |
0, | |
); | |
$file = $folder[0][2]; | |
$path = ltrim($folder[0][1] . '/' . $file, '/'); | |
} else { | |
preg_match_all( | |
'/(.*)\/(.*)/m', | |
parse_url( | |
stripslashes(urldecode($media['source_url'])), | |
PHP_URL_PATH, | |
), | |
$path, | |
PREG_SET_ORDER, | |
0, | |
); | |
$folder = str_replace('app/uploads/', '', $path[0][1]); | |
$file = str_replace('app/uploads/', '', $path[0][2]); | |
$path = ltrim($folder . '/' . $file, '/'); | |
} | |
$this->media[$media['id']] = $path; | |
$container = AssetContainer::findByHandle('assets'); | |
if (Asset::find('assets::' . $path)) { | |
continue; | |
} | |
try { | |
$urlToDownload = | |
$media['media_details']['original']['url'] ?? | |
$media['source_url']; | |
$downloadedFile = Http::get( | |
stripslashes(urldecode($urlToDownload)), | |
)->getBody(); | |
file_put_contents( | |
sys_get_temp_dir() . '/' . $file, | |
$downloadedFile, | |
); | |
} catch (\Exception $e) { | |
continue; | |
} | |
$image = new UploadedFile(sys_get_temp_dir() . '/' . $file, $file); | |
$container->makeAsset($path)->upload($image); | |
$this->line("Imported " . $file); | |
} | |
$this->getMediaFromWordPress(++$page); | |
} | |
private function _htmlToMarkdown($htmlString) | |
{ | |
$converter = new HtmlConverter(); | |
$converter->getConfig()->setOption('strip_tags', true); | |
$converter->getConfig()->setOption('hard_break', true); | |
$converter->getConfig()->setOption('header_style', 'atx'); | |
$markdown = $converter->convert($htmlString); | |
return $markdown; | |
} | |
private function _getSkippedCategory($slug) | |
{ | |
if (in_array($slug, ['reviews', 'news', 'features', 'uncategorized'])) { | |
return true; | |
} | |
return false; | |
} | |
private function _getCollectionFromCategory($slug) | |
{ | |
if ( | |
in_array($slug, [ | |
'reviews', | |
'anime-review', | |
'film-review', | |
'manga-review', | |
'book-review', | |
'game-review', | |
]) | |
) { | |
return 'reviews'; | |
} | |
if ($slug == 'news') { | |
return 'news'; | |
} | |
if ( | |
in_array($slug, ['features', 'opinions', 'roundups', 'interviews']) | |
) { | |
return 'features'; | |
} | |
if ( | |
in_array($slug, [ | |
'aukn-announcements', | |
'podcasts', | |
'giveaways', | |
'miscellaneous', | |
]) | |
) { | |
return 'articles'; | |
} | |
return null; | |
} | |
private function _getBlueprintFromCategory($slug) | |
{ | |
if ($slug == 'anime-review') { | |
return 'review_anime'; | |
} | |
if ($slug == 'film-review') { | |
return 'review_film'; | |
} | |
if ($slug == 'game-review') { | |
return 'review_video_game'; | |
} | |
if (in_array($slug, ['manga-review', 'book-review'])) { | |
return 'review_book'; | |
} | |
if ($slug == 'news') { | |
return 'news'; | |
} | |
if ( | |
in_array($slug, ['features', 'opinions', 'roundups', 'interviews']) | |
) { | |
return 'features'; | |
} | |
if ( | |
in_array($slug, [ | |
'aukn-announcements', | |
'podcasts', | |
'giveaways', | |
'miscellaneous', | |
]) | |
) { | |
return 'articles'; | |
} | |
return null; | |
} | |
private function _parseLanguageString($language) | |
{ | |
switch ($language) { | |
case 'English dub audio only': | |
return ['english_audio']; | |
break; | |
case 'Japanese audio with English subtitles': | |
return ['japanese_audio', 'english_subtitles']; | |
break; | |
case 'Japanese audio with English subtitles and English dub audio': | |
return ['english_audio', 'japanese_audio', 'english_subtitles']; | |
break; | |
case 'Japanese audio (no subtitles)': | |
return ['japanese_audio']; | |
break; | |
case 'Another language audio with English subtitles': | |
return ['foreign_audio', 'english_subtitles']; | |
break; | |
case 'Japanese audio with Subtitles for the English dub (dubtitles) and English dub audio': | |
return ['english_audio', 'japanese_audio', 'english_dubtitles']; | |
break; | |
case 'Japanese audio with English/Dutch/French/German subtitles and English/French/German dub audio': | |
return [ | |
'english_audio', | |
'japanese_audio', | |
'foreign_audio', | |
'english_subtitles', | |
'foreign_subtitles', | |
]; | |
break; | |
default: | |
return []; | |
break; | |
} | |
return []; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment