Last active
November 14, 2023 08:51
-
-
Save uzielweb/f347cb50ee0430fcf7a6b7a3c3dbb6bb to your computer and use it in GitHub Desktop.
Import Articles Cli for Joomla 4, importing articles from form2content and Joomla Articles
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
Instructions for use: | |
- To skip the first 100 articles, use the argument "--skip 100". | |
- To manually define the limit of records to be imported, use the argument "-Limit" followed by the desired number, for example, "-Limit 50". | |
- To define the start date of import, use the argument "--start_date" followed by the date in "Yyyy-MM-DD" format, for example, "--start_date 2023-01-01". | |
- To define the end of importation, use the argument "--end_date" followed by the date in "Yyyy-MM-DD" format, for example, "--end_date 2023-12-31". | |
- To define the category of origin, use the argument "--source_category" followed by the desired ID, for example, "-source_category 10". | |
- To define the destination category, use the argument "--destination_category" followed by the desired category ID, for example, "--destination_category 5". | |
- To select Joomla Standard Articles from the Content table in the old database, use the argument "-standard_articles True". | |
Example to import Joomla's standard articles: | |
PHP ImportArticles.php --standard_Articles True --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 | |
- To import articles from F2C (Form 2 Content), simply perform the script without the argument "-standard_articles": | |
Example to import F2C articles: | |
PHP ImportArticles.php --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 | |
- To avoid cleaning HTML in imported articles, use the argument "--no_clean_html". | |
Example to import articles without cleaning HTML: | |
PHP ImportArticles.php --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 --no_clean_html | |
- The log file will be created in Administrator/Logs/Import_log.txt as defined in the $logfile variable and Joomla configuration.php. | |
*/ | |
define('_JEXEC', 1); | |
use Joomla\CMS\Factory; | |
use Joomla\Console\Application; | |
use Joomla\CMS\HTML\HTMLHelper; | |
use Joomla\CMS\Language\Text; | |
use Joomla\CMS\Date\Date; | |
define('JPATH_BASE', dirname(dirname(__file__))); | |
define('DS', DIRECTORY_SEPARATOR); | |
require_once JPATH_BASE . DS . 'includes' . DS . 'defines.php'; | |
require_once JPATH_BASE . DS . 'includes' . DS . 'framework.php'; | |
define('SOURCE_CATEGORY', 90); | |
define('TARGET_CATEGORY', 90); | |
define('START_DATE', '0000-01-01'); | |
define('END_DATE', '3500-12-31'); | |
define('DEFAULT_LIMIT', 100); | |
// Set the user's time zone | |
date_default_timezone_set('America/Sao_Paulo'); | |
class ImportArticles extends JApplicationCli | |
{ | |
public function slugify($text) | |
{ | |
// replace non letter or digits by - | |
$text = preg_replace('~[^\\pL\d]+~u', '-', $text); | |
// trim | |
$text = trim($text, '-'); | |
// transliterate | |
$text = iconv('utf-8', 'us-ascii//TRANSLIT', $text); | |
// lowercase | |
$text = strtolower($text); | |
// remove unwanted characters | |
$text = preg_replace('~[^-\w]+~', '', $text); | |
if (empty($text)) { | |
return 'n-a'; | |
} | |
// Check if slug already exists | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$baseSlug = $text; | |
$counter = 1; | |
while (true) { | |
$slugToCheck = $counter > 1 ? $baseSlug . '-' . $counter : $baseSlug; | |
$query = $db->getQuery(true) | |
->select($db->quoteName('alias')) | |
->from($db->quoteName('#__content')) | |
->where($db->quoteName('alias') . ' = ' . $db->quote($slugToCheck)); | |
$db->setQuery($query); | |
$existingSlug = $db->loadResult(); | |
if (!$existingSlug) { | |
return $slugToCheck; | |
} | |
$counter++; | |
} | |
} | |
private function getOldDatabase() | |
{ | |
$option = array( | |
'driver' => 'mysql', | |
'host' => 'yourhost.something', | |
'user' => 'olddatabaseuser', | |
'password' => 'olddatabasepassword', | |
'database' => 'joomla_olddatabase', | |
'prefix' => 'oldprefix_', | |
); | |
return JDatabaseDriver::getInstance($option); | |
} | |
private function checkOldIds($skipItems = 0, $limit = DEFAULT_LIMIT, $startDate = START_DATE, $endDate = END_DATE, $sourceCategory = SOURCE_CATEGORY, $destinationCategory = TARGET_CATEGORY, $standardArticles = false, $noCleanHtml = false) | |
{ | |
$olddatabase = $this->getOldDatabase(); | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
// Query for the rest of the content (excluding introtext and fulltext) | |
if ($standardArticles) { | |
// If the Argument --standard_srticles is defined as True | |
// query to select articles from the Content table in the old database | |
$queryfromOld = $olddatabase->getQuery(true) | |
->select('*') | |
->from('#__content') | |
->where('catid = ' . $olddatabase->quote($sourceCategory)) | |
->where('created >= ' . $olddatabase->quote($startDate)) | |
->where('created < ' . $olddatabase->quote($endDate)) | |
->order('id ASC'); | |
if ($skipItems > 0) { | |
// Add an OFFSET clause to skip the first X articles | |
$queryfromOld->setLimit($limit, $skipItems); | |
} else { | |
// if there are no items to skip, use the normal limit | |
$queryfromOld->setLimit($limit); | |
} | |
} else { | |
$queryfromOld = $olddatabase->getQuery(true) | |
->select('*') | |
->from('#__f2c_form') | |
->where('projectid = 5') | |
->where('state = 1') | |
->where('catid = ' . $olddatabase->quote($sourceCategory)) | |
->where('created >= ' . $olddatabase->quote($startDate)) | |
->where('created < ' . $olddatabase->quote($endDate)) | |
->order('id ASC'); | |
if ($skipItems > 0) { | |
// Add an OFFSET clause to skip the first X articles | |
$queryfromOld->setLimit($limit, $skipItems); | |
} else { | |
// if there are no items to skip, use the normal limit | |
$queryfromOld->setLimit($limit); | |
} | |
} | |
$results = $olddatabase->setQuery($queryfromOld)->loadObjectList(); | |
echo "Total articles to be imported: " . count($results) . "\n"; | |
$this->logAction("Total articles to be imported: " . count($results) . "\n"); | |
$contador = $skipItems; // articles counter | |
// Loop through the old articles | |
foreach ($results as $oldarticle) { | |
if ($standardArticles) { | |
$introText = $oldarticle->introtext; | |
$fullText = $oldarticle->fulltext; | |
} else { | |
$queryIntroText = $olddatabase->getQuery(true) | |
->select('content') | |
->from('#__f2c_fieldcontent') | |
->where('formid = ' . $olddatabase->quote($oldarticle->id)) | |
->where('fieldid = 58'); | |
$introText = $olddatabase->setQuery($queryIntroText)->loadResult(); | |
$queryFullText = $olddatabase->getQuery(true) | |
->select('content') | |
->from('#__f2c_fieldcontent') | |
->where('formid = ' . $olddatabase->quote($oldarticle->id)) | |
->where('fieldid = 59'); | |
$fullText = $olddatabase->setQuery($queryFullText)->loadResult(); | |
} | |
$article = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__content')) | |
->where($db->quoteName('id') . ' = ' . $db->quote($oldarticle->reference_id)); | |
$db->setQuery($article); | |
$article = $db->loadObject(); | |
if (!$article) { | |
$contador++; | |
$oldDefaultArticleDataQuery = $olddatabase->getQuery(true) | |
->select('*') | |
->from('#__content') | |
->where('id = ' . $olddatabase->quote($oldarticle->reference_id)); | |
$oldDefaultArticleData = $olddatabase->setQuery($oldDefaultArticleDataQuery)->loadObject(); | |
$article = new stdClass(); | |
// $article->id = $oldarticle->id; | |
$article->id = $oldarticle->reference_id; | |
// get article->asset_id | |
$queryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $oldarticle->id)); | |
$db->setQuery($queryAsset); | |
$asset = $db->loadObject(); | |
$article->asset_id = $asset->id; | |
$article->title = $oldarticle->title; | |
$article->alias = $this->slugify($oldarticle->title); | |
$article->introtext = $noCleanHtml ? $introText : $this->cleanTheHtml($introText); | |
$article->fulltext = $noCleanHtml ? $fullText : $this->cleanTheHtml($fullText); | |
$article->state = 1; | |
$article->catid = $destinationCategory; | |
$article->created = $oldarticle->created; | |
$article->created_by = $oldarticle->created_by; | |
$article->created_by_alias = $oldarticle->created_by_alias; | |
$article->modified = $oldarticle->modified; | |
$article->modified_by = $oldarticle->modified_by; | |
$article->checked_out = ''; | |
$article->checked_out_time = ''; | |
$article->publish_up = $oldarticle->publish_up; | |
$article->publish_down = ''; | |
$article->images = '{"image_intro":"","float_intro":"","image_intro_alt":"","image_intro_caption":"","image_fulltext":"","float_fulltext":"","image_fulltext_alt":"","image_fulltext_caption":""}'; | |
$article->urls = '{"urla":"","urlatext":"","targeta":"","urlb":"","urlbtext":"","targetb":"","urlc":"","urlctext":"","targetc":""}'; | |
$article->attribs = '{"article_layout":"","show_title":"","link_titles":"","show_tags":"","show_intro":"","info_block_position":"","info_block_show_title":"","show_category":"","link_category":"","show_parent_category":"","link_parent_category":"","show_author":"","link_author":"","show_create_date":"","show_modify_date":"","show_publish_date":"","show_item_navigation":"","show_hits":"","show_noauth":"","urls_position":"","alternative_readmore":"","article_page_title":"","show_publishing_options":"","show_article_options":"","show_urls_images_backend":"","show_urls_images_frontend":""}'; | |
$article->version = 1; | |
$article->ordering = $oldDefaultArticleData->ordering ? $oldDefaultArticleData->ordering : ''; | |
$article->metakey = $oldDefaultArticleData->metakey ? $oldDefaultArticleData->metakey : ''; | |
$article->metadesc = $oldDefaultArticleData->metadesc ? $oldDefaultArticleData->metadesc : ''; | |
$article->access = 1; | |
$article->hits = $oldDefaultArticleData->hits ? $oldDefaultArticleData->hits : 0; | |
$article->metadata = '{"robots":"","author":"","rights":""}'; | |
$article->featured = 0; | |
$article->language = '*'; | |
$article->xreference = ''; | |
$article->note = ''; | |
$db->insertObject('#__content', $article, 'id'); | |
// plural | |
if ($contador > 1) { | |
echo $contador . " completed tasks :::>> "; | |
} else { | |
echo $contador . " completed tasks :::>> "; | |
} | |
echo "Article created: " . $article->id . ": " . $article->title . " - Created at: " . $article->created . " :::>> "; | |
if ($contador > 1) { | |
$this->logAction($contador . " completed tasks :::>> "); | |
} else { | |
$this->logAction($contador . " completed task :::>> "); | |
} | |
$this->logAction("Article created: " . $article->id . ": " . $article->title . " - Created at: " . $article->created . " :::>> "); | |
$this->adicionarReferenciasAsset($article); | |
$this->adicionarReferenciasWorkflow($article); | |
} else { | |
// pula artigo | |
echo "The article already exists: " . $article->id . ": " . $article->title . " :::>> "; | |
$this->logAction("The article already exists: " . $article->id . ": " . $article->title . " :::>> "); | |
$this->adicionarReferenciasAsset($article); | |
$this->adicionarReferenciasWorkflow($article); | |
} | |
} | |
} | |
private function adicionarReferenciasAsset($article) | |
{ | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$queryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $article->id)); | |
$db->setQuery($queryAsset); | |
$asset = $db->loadObject(); | |
if (!$asset) { | |
$asset = new stdClass(); | |
// calculate the asset parent_id based on the category asset id | |
$queryCategoryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.category.' . $article->catid)); | |
$db->setQuery($queryCategoryAsset); | |
$categoryAsset = $db->loadObject(); | |
$asset->parent_id = $categoryAsset->id; | |
// calculate the asset lft, rgt , lft is max rgt + 1, rgt is lft + 1 | |
$queryMaxRgt = $db->getQuery(true) | |
->select('MAX(rgt)') | |
->from($db->quoteName('#__assets')); | |
$db->setQuery($queryMaxRgt); | |
$maxRgt = $db->loadResult(); | |
$asset->lft = $maxRgt + 1; | |
$asset->rgt = $maxRgt + 2; | |
// asset level is the same as the category level | |
$asset->level = $categoryAsset->level; | |
$asset->name = 'com_content.article.' . $article->id; | |
$asset->title = $article->title; | |
$asset->rules = '{}'; | |
$db->insertObject('#__assets', $asset, 'id'); | |
echo "Asset created: " . $asset->id . ": " . $asset->name . " :::>> "; | |
$this->logAction("Asset created: " . $asset->id . ": " . $asset->name . " :::>> "); | |
$this->updateAssetId($article->id); | |
} else { | |
echo "The asset already exists: " . $asset->id . ": " . $asset->name . " :::>> "; | |
$this->logAction("The asset already exists: " . $asset->id . ": " . $asset->name . " :::>> "); | |
} | |
} | |
private function adicionarReferenciasWorkflow($article) | |
{ | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$queryWorkflow = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__workflow_associations')) | |
->where($db->quoteName('item_id') . ' = ' . $db->quote($article->id)); | |
$db->setQuery($queryWorkflow); | |
$workflow = $db->loadObject(); | |
if (!$workflow) { | |
$workflow = new stdClass(); | |
$workflow->item_id = $article->id; | |
$workflow->stage_id = 1; | |
$workflow->extension = "com_content.article"; | |
$db->insertObject('#__workflow_associations', $workflow, 'id'); | |
echo "The workflow was created: " . $workflow->item_id . ": " . $workflow->extension . "\n"; | |
$this->logAction("The workflow was created: " . $workflow->item_id . ": " . $workflow->extension . "\n"); | |
} else { | |
echo "The workflow already exists: " . $workflow->item_id . ": " . $workflow->extension . "\n"; | |
$this->logAction("The workflow already exists: " . $workflow->item_id . ": " . $workflow->extension . "\n"); | |
} | |
} | |
// update asset_id in content table based on assets table and article id | |
private function updateAssetId($articleId) | |
{ | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$queryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $articleId)); | |
$db->setQuery($queryAsset); | |
$asset = $db->loadObject(); | |
if ($asset) { | |
$queryContent = $db->getQuery(true) | |
->update($db->quoteName('#__content')) | |
->set($db->quoteName('asset_id') . ' = ' . $db->quote($asset->id)) | |
->where($db->quoteName('id') . ' = ' . $db->quote($articleId)); | |
$db->setQuery($queryContent); | |
$db->execute(); | |
echo "Asset updated: " . $asset->id . ": " . $asset->name . " :::>> "; | |
$this->logAction("Asset updated: " . $asset->id . ": " . $asset->name . " :::>> "); | |
} else { | |
echo "Asset not found: " . $articleId . "\n"; | |
$this->logAction("Asset not found: " . $articleId . "\n"); | |
} | |
} | |
// Log actions | |
private function logAction($message) { | |
$logFile = Factory::getConfig()->get('log_path') . '/import_log.txt'; | |
$logEntry = date('Y-m-d H:i:s') . ' - ' . $message . PHP_EOL; | |
file_put_contents($logFile, $logEntry, FILE_APPEND); | |
} | |
// Clean HTML | |
private function cleanTheHtml($string, $limpaTudo = false) | |
{ | |
$string = preg_replace('/(<[^>]+) class=".*?"/i', '$1', $string); | |
$string = preg_replace('/(class|style)[^>]*/', '', $string); | |
// replace <p > to <p> and other tags | |
$string = preg_replace('/<p\s+>/', '<p>', $string); | |
// replace • to - | |
$string = preg_replace('/•/', '-', $string); | |
return $limpaTudo ? strip_tags($string) : strip_tags($string, '<iframe><img><p><span><a><b><div><strong><em><br><br/><br />'); | |
} | |
private function clearLog() | |
{ | |
$logFile = Factory::getConfig()->get('log_path') . '/import_log.txt'; | |
if (file_exists($logFile)) { | |
file_put_contents($logFile, ''); // Clear the log file | |
echo "Log file cleared.\n"; | |
} else { | |
echo "Log file does not exist.\n"; | |
} | |
} | |
// Execute the checkOldIds function | |
public function doExecute() | |
{ | |
$clearScreen = $this->input->get('clear_screen', false); | |
if ($clearScreen) { | |
$this->clearTerminalScreen(); | |
} | |
$clearLog = $this->input->get('clear_log', false); | |
if ($clearLog) { | |
$this->clearLog(); | |
} | |
$skipItems = $this->input->get('skip', 0); | |
$limit = $this->input->get('limit', DEFAULT_LIMIT); | |
$startDate = $this->input->get('start_date', START_DATE); | |
$endDate = $this->input->get('end_date', END_DATE); | |
$sourceCategory = $this->input->get('source_category', SOURCE_CATEGORY); | |
$destinationCategory = $this->input->get('destination_category', TARGET_CATEGORY); | |
$standardArticles = $this->input->get('standard_articles', false); | |
$noCleanHtml = $this->input->get('no_clean_html', false); | |
$this->checkOldIds($skipItems, $limit, $startDate, $endDate, $sourceCategory, $destinationCategory, $standardArticles, $noCleanHtml); | |
} | |
// Implement the getName method | |
public function getName() | |
{ | |
return 'ImportArticles'; | |
} | |
} | |
JApplicationCli::getInstance('ImportArticles')->execute(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment