Skip to content

Instantly share code, notes, and snippets.

@ehibun
Created November 25, 2020 01:09
Show Gist options
  • Save ehibun/71eb16019729ba29ac3fc50b84783846 to your computer and use it in GitHub Desktop.
Save ehibun/71eb16019729ba29ac3fc50b84783846 to your computer and use it in GitHub Desktop.
Wordpress to Statamic v3 importer
<?php
namespace App\Console\Commands;
use League\HTMLToMarkdown\HtmlConverter;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Carbon;
use Symfony\Component\HttpFoundation\File\UploadedFile;
use Statamic\Facades\Site;
use Statamic\Facades\Asset;
use Statamic\Facades\AssetContainer;
use Statamic\Facades\Entry;
use Statamic\Facades\Term;
use Statamic\Facades\Taxonomy;
use Statamic\Facades\User;
class MigrateWordPress extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'migrate:wordpress';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Scrapes content from a WordPress API and imports it into Statamic';
public $categories = [];
public $tags = [];
public $users = [];
public $media = [];
public $genres = [];
public $studios = [];
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$this->getCategoriesFromWordPress();
$this->getTagsFromWordPress();
$this->getUsersFromWordPress();
$this->getMediaFromWordPress();
$this->getPostsFromWordPress();
$this->info("OK");
}
private function getPostsFromWordPress($page = 1)
{
$site = Site::default();
$totalPages = Cache::get("wp-import-posts-pages", 1);
if ($totalPages && $page > $totalPages) {
return;
}
$response = Cache::remember(
"wp-import-posts-$page",
7200,
function () use ($page) {
$this->info("Downloading posts (page $page)");
$res = Http::withHeaders([
'Accept-Encoding' => 'utf-8',
'Content-Type' => 'application/json',
])->get(
'https://example.org/wp-json/wp/v2/posts?per_page=100&page=' .
$page,
);
Cache::set(
'wp-import-posts-pages',
$res->header('X-WP-TotalPages'),
7200,
);
return $res->json();
},
);
$this->info(
"== Importing posts (page " . $page . " of " . $totalPages . ")",
);
foreach ($response as $post) {
foreach ($post['categories'] as $categoryId) {
$collection = $this->_getCollectionFromCategory(
$this->categories[$categoryId],
);
$blueprint = $this->_getBlueprintFromCategory(
$this->categories[$categoryId],
);
}
if (!$collection) {
$this->error('Collection not found');
dd($post);
}
if (Entry::findBySlug(str_slug($post['slug'], '-'), $collection)) {
continue;
}
$tags = [];
foreach ($post['tags'] as $tagId) {
try {
$tags[] = $this->tags[$tagId];
} catch (\ErrorException $e) {
$this->warn("Couldn't import tag $tagId: $e");
}
}
$genres = [];
if (isset($post['acf']['genre'])) {
foreach (explode(',', $post['acf']['genre']) as $genre) {
try {
$genre = trim($genre);
if (!$genre) {
continue;
}
$genres[] = str_slug($genre, '-');
if (Term::findBySlug(str_slug($genre, '-'), 'genres')) {
continue;
}
$this->line("[DEBUG] saving new genre: $genre");
Term::make()
->taxonomy('genres')
->slug(str_slug($genre, '-'))
->data(['title' => $genre])
->save();
} catch (\Exception $e) {
dd($genre, str_slug($genre, '-'), $e);
}
}
}
$studios = [];
if (isset($post['acf']['studio'])) {
foreach (explode(',', $post['acf']['studio']) as $studio) {
$studio = trim($studio);
if (!$studio) {
continue;
}
$studios[] = str_slug($studio, '-');
if (Term::findBySlug(str_slug($studio, '-'), 'studios')) {
continue;
}
$this->line("[DEBUG] saving new studio: $studio");
Term::make()
->taxonomy('studios')
->slug(str_slug($studio, '-'))
->data(['title' => $studio])
->save();
}
}
$entry = Entry::make()
->collection($collection)
->slug(str_slug($post['slug'], '-'))
->published($post['status'] == 'publish' ? 'true' : 'false')
->locale($site->handle())
->date(Carbon::parse($post['date_gmt'], 'UTC'))
->blueprint($blueprint);
$data = [
'title' => html_entity_decode($post['title']['rendered']),
'excerpt' => $this->_htmlToMarkdown(
$post['excerpt']['rendered'],
),
'body_text' => [
[
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'markdown_html',
'content' => $this->_htmlToMarkdown(
$post['content']['rendered'],
),
],
],
],
],
'featured_image' => [
$this->media[$post['featured_media']] ?? null,
],
'authors' => [$this->users[$post['author']] ?? null], // @TODO account for multiple authors
'tags' => $tags,
'updated_at' => Carbon::parse(
$post['modified_gmt'],
'UTC',
)->format('U'),
];
switch ($collection) {
case 'news':
case 'features':
case 'articles':
$entry->data($data);
break;
case 'reviews':
$entry->data(
array_merge($data, [
'publisher' =>
$post['acf']['anime_publisher'] ??
($post['acf']['game_publisher'] ?? null),
'developer' => $post['acf']['developer'] ?? null,
'book_authors' =>
$post['acf']['author(s):_'] ?? null,
'genres' => $genres ?? null, // @TODO: Convert to taxonomy
'studios' => $studios ?? null, // @TODO: Convert to taxonomy
'type' => $post['acf']['type'] ?? null,
'original_vintage' => $post['acf']['year'] ?? null,
'format' => $post['acf']['format:_'] ?? null,
'platform' => $post['acf']['platform'] ?? null,
'languages' => isset(
$post['acf']['language_options:'],
)
? $this->_parseLanguageString(
$post['acf']['language_options:'],
)
: null,
'age_rating' =>
$post['acf']['bbfc_rating:_'] ?? null,
'material_length' =>
$post['acf']['material_length:_'] ?? null,
'running_time' =>
$post['acf']['running_time_(minutes)'] ?? null,
'score' => $post['acf']['score:_'] ?? null,
]),
);
break;
default:
die(
'Collection not found for ' .
html_entity_decode($post['title']['rendered'])
);
break;
}
$entry->save();
$this->line(
"Imported " . html_entity_decode($post['title']['rendered']),
);
}
$this->getPostsFromWordPress(++$page);
}
private function getCategoriesFromWordPress($page = 1)
{
$taxonomy = Taxonomy::find('categories');
$totalPages = Cache::get("wp-import-categories-pages", 1);
if ($totalPages && $page > $totalPages) {
return;
}
$response = Cache::remember(
"wp-import-categories-$page",
7200,
function () use ($page, $totalPages) {
$this->info("Trying to get categories page $page");
$res = Http::withHeaders([
'Accept-Encoding' => 'utf-8',
'Content-Type' => 'application/json',
])->get(
'https://example.org/wp-json/wp/v2/categories?per_page=100&page=' .
$page,
);
$totalPages = $res->header('X-WP-TotalPages');
Cache::set(
'wp-import-categories-pages',
$res->header('X-WP-TotalPages'),
7200,
);
return $res->json();
},
);
$this->info(
"== Importing categories (page " .
$page .
" of " .
$totalPages .
")",
);
foreach ($response as $category) {
$this->categories[$category['id']] = str_slug(
$category['slug'],
'-',
);
if (
Term::findBySlug(str_slug($category['slug'], '-'), 'categories')
) {
continue;
}
if ($this->_getSkippedCategory(str_slug($category['slug'], '-'))) {
// This will be a collection - skipping import.
continue;
}
Term::make()
->taxonomy($taxonomy)
->slug(str_slug($category['slug'], '-'))
->data(['title' => $category['name']])
->save();
$this->line("Imported " . $category['name']);
}
$this->getCategoriesFromWordPress(++$page);
}
private function getTagsFromWordPress($page = 1)
{
$taxonomy = Taxonomy::find('tags');
$totalPages = Cache::get("wp-import-tags-pages", 1);
if ($totalPages && $page > $totalPages) {
return;
}
$response = Cache::remember(
"wp-import-tags-$page",
7200,
function () use ($page, $totalPages) {
$this->info("Downloading tags (page $page)");
$res = Http::withHeaders([
'Accept-Encoding' => 'utf-8',
'Content-Type' => 'application/json',
])->get(
'https://example.org/wp-json/wp/v2/tags?per_page=100&page=' .
$page,
);
$totalPages = $res->header('X-WP-TotalPages');
Cache::set(
'wp-import-tags-pages',
$res->header('X-WP-TotalPages'),
7200,
);
return $res->json();
},
);
$this->info("== Importing tags (page $page of $totalPages)");
foreach ($response as $tag) {
$this->tags[$tag['id']] = str_slug($tag['slug'], '-');
try {
if (
!trim($tag['slug']) ||
Term::findBySlug(str_slug($tag['slug'], '-'), 'tags')
) {
continue;
}
Term::make()
->taxonomy($taxonomy)
->slug(str_slug($tag['slug'], '-'))
->data(['title' => $tag['name']])
->save();
} catch (\Exception $exception) {
dd($tag, str_slug($tag['slug'], '-'), $exception);
}
$this->line("Imported " . $tag['name']);
}
$this->getTagsFromWordPress(++$page);
}
private function getUsersFromWordPress($page = 1)
{
$response = Http::withBasicAuth(
'Blank',
'Password',
)->get(
'https://example.org/wp-json/wp/v2/users?per_page=100&context=edit&page=' .
$page,
);
$totalPages = $response->header('X-WP-TotalPages');
if ($page > $totalPages) {
return;
}
$this->info("== Importing users (page $page of $totalPages)");
foreach ($response->json() as $user) {
if ($statamicUser = User::findByEmail($user['email'])) {
$this->users[$user['id']] = $statamicUser->id();
continue;
}
$statamicUser = User::make()
->email($user['email'])
->data([
'name' => $user['name'],
'first_name' => $user['first_name'],
'last_name' => $user['last_name'],
'description' => $user['description'],
])
->save();
$this->users[$user['id']] = $statamicUser->id();
$this->line("Imported " . $user['name']);
}
$this->getCategoriesFromWordPress(++$page);
}
private function getMediaFromWordPress($page = 1)
{
$totalPages = Cache::get("wp-import-media-pages", 1);
if ($page > $totalPages) {
return;
}
$response = Cache::remember(
"wp-import-media-$page",
7200,
function () use ($page, $totalPages) {
$this->info("Downloading media (page $page)");
$res = Http::withHeaders([
'Accept-Encoding' => 'utf-8',
'Content-Type' => 'application/json',
])->get(
'https://example.org/wp-json/wp/v2/media?per_page=100&page=' .
$page,
);
$totalPages = $res->header('X-WP-TotalPages');
Cache::set(
'wp-import-media-pages',
$res->header('X-WP-TotalPages'),
7200,
);
return $res->json();
},
);
$this->info("== Importing media (page $page of $totalPages)");
foreach ($response as $media) {
if ($media['mime_type'] == 'video/videopress') {
preg_match_all(
'/(\/.*\/)(.*)/m',
parse_url(
stripslashes(
urldecode(
$media['media_details']['original']['url'],
),
),
PHP_URL_PATH,
),
$folder,
PREG_SET_ORDER,
0,
);
$file = $folder[0][2];
$path = ltrim($folder[0][1] . '/' . $file, '/');
} else {
preg_match_all(
'/(.*)\/(.*)/m',
parse_url(
stripslashes(urldecode($media['source_url'])),
PHP_URL_PATH,
),
$path,
PREG_SET_ORDER,
0,
);
$folder = str_replace('app/uploads/', '', $path[0][1]);
$file = str_replace('app/uploads/', '', $path[0][2]);
$path = ltrim($folder . '/' . $file, '/');
}
$this->media[$media['id']] = $path;
$container = AssetContainer::findByHandle('assets');
if (Asset::find('assets::' . $path)) {
continue;
}
try {
$urlToDownload =
$media['media_details']['original']['url'] ??
$media['source_url'];
$downloadedFile = Http::get(
stripslashes(urldecode($urlToDownload)),
)->getBody();
file_put_contents(
sys_get_temp_dir() . '/' . $file,
$downloadedFile,
);
} catch (\Exception $e) {
continue;
}
$image = new UploadedFile(sys_get_temp_dir() . '/' . $file, $file);
$container->makeAsset($path)->upload($image);
$this->line("Imported " . $file);
}
$this->getMediaFromWordPress(++$page);
}
private function _htmlToMarkdown($htmlString)
{
$converter = new HtmlConverter();
$converter->getConfig()->setOption('strip_tags', true);
$converter->getConfig()->setOption('hard_break', true);
$converter->getConfig()->setOption('header_style', 'atx');
$markdown = $converter->convert($htmlString);
return $markdown;
}
private function _getSkippedCategory($slug)
{
if (in_array($slug, ['reviews', 'news', 'features', 'uncategorized'])) {
return true;
}
return false;
}
private function _getCollectionFromCategory($slug)
{
if (
in_array($slug, [
'reviews',
'anime-review',
'film-review',
'manga-review',
'book-review',
'game-review',
])
) {
return 'reviews';
}
if ($slug == 'news') {
return 'news';
}
if (
in_array($slug, ['features', 'opinions', 'roundups', 'interviews'])
) {
return 'features';
}
if (
in_array($slug, [
'aukn-announcements',
'podcasts',
'giveaways',
'miscellaneous',
])
) {
return 'articles';
}
return null;
}
private function _getBlueprintFromCategory($slug)
{
if ($slug == 'anime-review') {
return 'review_anime';
}
if ($slug == 'film-review') {
return 'review_film';
}
if ($slug == 'game-review') {
return 'review_video_game';
}
if (in_array($slug, ['manga-review', 'book-review'])) {
return 'review_book';
}
if ($slug == 'news') {
return 'news';
}
if (
in_array($slug, ['features', 'opinions', 'roundups', 'interviews'])
) {
return 'features';
}
if (
in_array($slug, [
'aukn-announcements',
'podcasts',
'giveaways',
'miscellaneous',
])
) {
return 'articles';
}
return null;
}
private function _parseLanguageString($language)
{
switch ($language) {
case 'English dub audio only':
return ['english_audio'];
break;
case 'Japanese audio with English subtitles':
return ['japanese_audio', 'english_subtitles'];
break;
case 'Japanese audio with English subtitles and English dub audio':
return ['english_audio', 'japanese_audio', 'english_subtitles'];
break;
case 'Japanese audio (no subtitles)':
return ['japanese_audio'];
break;
case 'Another language audio with English subtitles':
return ['foreign_audio', 'english_subtitles'];
break;
case 'Japanese audio with Subtitles for the English dub (dubtitles) and English dub audio':
return ['english_audio', 'japanese_audio', 'english_dubtitles'];
break;
case 'Japanese audio with English/Dutch/French/German subtitles and English/French/German dub audio':
return [
'english_audio',
'japanese_audio',
'foreign_audio',
'english_subtitles',
'foreign_subtitles',
];
break;
default:
return [];
break;
}
return [];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment