Skip to content

Instantly share code, notes, and snippets.

@uzielweb
Last active November 14, 2023 08:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save uzielweb/f347cb50ee0430fcf7a6b7a3c3dbb6bb to your computer and use it in GitHub Desktop.
Save uzielweb/f347cb50ee0430fcf7a6b7a3c3dbb6bb to your computer and use it in GitHub Desktop.
Import Articles Cli for Joomla 4, importing articles from form2content and Joomla Articles
<?php
/*
Instructions for use:
- To skip the first 100 articles, use the argument "--skip 100".
- To manually define the limit of records to be imported, use the argument "-Limit" followed by the desired number, for example, "-Limit 50".
- To define the start date of import, use the argument "--start_date" followed by the date in "Yyyy-MM-DD" format, for example, "--start_date 2023-01-01".
- To define the end of importation, use the argument "--end_date" followed by the date in "Yyyy-MM-DD" format, for example, "--end_date 2023-12-31".
- To define the category of origin, use the argument "--source_category" followed by the desired ID, for example, "-source_category 10".
- To define the destination category, use the argument "--destination_category" followed by the desired category ID, for example, "--destination_category 5".
- To select Joomla Standard Articles from the Content table in the old database, use the argument "-standard_articles True".
Example to import Joomla's standard articles:
PHP ImportArticles.php --standard_Articles True --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5
- To import articles from F2C (Form 2 Content), simply perform the script without the argument "-standard_articles":
Example to import F2C articles:
PHP ImportArticles.php --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5
- To avoid cleaning HTML in imported articles, use the argument "--no_clean_html".
Example to import articles without cleaning HTML:
PHP ImportArticles.php --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 --no_clean_html
- The log file will be created in Administrator/Logs/Import_log.txt as defined in the $logfile variable and Joomla configuration.php.
*/
define('_JEXEC', 1);
use Joomla\CMS\Factory;
use Joomla\Console\Application;
use Joomla\CMS\HTML\HTMLHelper;
use Joomla\CMS\Language\Text;
use Joomla\CMS\Date\Date;
define('JPATH_BASE', dirname(dirname(__file__)));
define('DS', DIRECTORY_SEPARATOR);
require_once JPATH_BASE . DS . 'includes' . DS . 'defines.php';
require_once JPATH_BASE . DS . 'includes' . DS . 'framework.php';
define('SOURCE_CATEGORY', 90);
define('TARGET_CATEGORY', 90);
define('START_DATE', '0000-01-01');
define('END_DATE', '3500-12-31');
define('DEFAULT_LIMIT', 100);
// Set the user's time zone
date_default_timezone_set('America/Sao_Paulo');
class ImportArticles extends JApplicationCli
{
public function slugify($text)
{
// replace non letter or digits by -
$text = preg_replace('~[^\\pL\d]+~u', '-', $text);
// trim
$text = trim($text, '-');
// transliterate
$text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
// lowercase
$text = strtolower($text);
// remove unwanted characters
$text = preg_replace('~[^-\w]+~', '', $text);
if (empty($text)) {
return 'n-a';
}
// Check if slug already exists
$db = Factory::getContainer()->get('DatabaseDriver');
$baseSlug = $text;
$counter = 1;
while (true) {
$slugToCheck = $counter > 1 ? $baseSlug . '-' . $counter : $baseSlug;
$query = $db->getQuery(true)
->select($db->quoteName('alias'))
->from($db->quoteName('#__content'))
->where($db->quoteName('alias') . ' = ' . $db->quote($slugToCheck));
$db->setQuery($query);
$existingSlug = $db->loadResult();
if (!$existingSlug) {
return $slugToCheck;
}
$counter++;
}
}
private function getOldDatabase()
{
$option = array(
'driver' => 'mysql',
'host' => 'yourhost.something',
'user' => 'olddatabaseuser',
'password' => 'olddatabasepassword',
'database' => 'joomla_olddatabase',
'prefix' => 'oldprefix_',
);
return JDatabaseDriver::getInstance($option);
}
private function checkOldIds($skipItems = 0, $limit = DEFAULT_LIMIT, $startDate = START_DATE, $endDate = END_DATE, $sourceCategory = SOURCE_CATEGORY, $destinationCategory = TARGET_CATEGORY, $standardArticles = false, $noCleanHtml = false)
{
$olddatabase = $this->getOldDatabase();
$db = Factory::getContainer()->get('DatabaseDriver');
// Query for the rest of the content (excluding introtext and fulltext)
if ($standardArticles) {
// If the Argument --standard_srticles is defined as True
// query to select articles from the Content table in the old database
$queryfromOld = $olddatabase->getQuery(true)
->select('*')
->from('#__content')
->where('catid = ' . $olddatabase->quote($sourceCategory))
->where('created >= ' . $olddatabase->quote($startDate))
->where('created < ' . $olddatabase->quote($endDate))
->order('id ASC');
if ($skipItems > 0) {
// Add an OFFSET clause to skip the first X articles
$queryfromOld->setLimit($limit, $skipItems);
} else {
// if there are no items to skip, use the normal limit
$queryfromOld->setLimit($limit);
}
} else {
$queryfromOld = $olddatabase->getQuery(true)
->select('*')
->from('#__f2c_form')
->where('projectid = 5')
->where('state = 1')
->where('catid = ' . $olddatabase->quote($sourceCategory))
->where('created >= ' . $olddatabase->quote($startDate))
->where('created < ' . $olddatabase->quote($endDate))
->order('id ASC');
if ($skipItems > 0) {
// Add an OFFSET clause to skip the first X articles
$queryfromOld->setLimit($limit, $skipItems);
} else {
// if there are no items to skip, use the normal limit
$queryfromOld->setLimit($limit);
}
}
$results = $olddatabase->setQuery($queryfromOld)->loadObjectList();
echo "Total articles to be imported: " . count($results) . "\n";
$this->logAction("Total articles to be imported: " . count($results) . "\n");
$contador = $skipItems; // articles counter
// Loop through the old articles
foreach ($results as $oldarticle) {
if ($standardArticles) {
$introText = $oldarticle->introtext;
$fullText = $oldarticle->fulltext;
} else {
$queryIntroText = $olddatabase->getQuery(true)
->select('content')
->from('#__f2c_fieldcontent')
->where('formid = ' . $olddatabase->quote($oldarticle->id))
->where('fieldid = 58');
$introText = $olddatabase->setQuery($queryIntroText)->loadResult();
$queryFullText = $olddatabase->getQuery(true)
->select('content')
->from('#__f2c_fieldcontent')
->where('formid = ' . $olddatabase->quote($oldarticle->id))
->where('fieldid = 59');
$fullText = $olddatabase->setQuery($queryFullText)->loadResult();
}
$article = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__content'))
->where($db->quoteName('id') . ' = ' . $db->quote($oldarticle->reference_id));
$db->setQuery($article);
$article = $db->loadObject();
if (!$article) {
$contador++;
$oldDefaultArticleDataQuery = $olddatabase->getQuery(true)
->select('*')
->from('#__content')
->where('id = ' . $olddatabase->quote($oldarticle->reference_id));
$oldDefaultArticleData = $olddatabase->setQuery($oldDefaultArticleDataQuery)->loadObject();
$article = new stdClass();
// $article->id = $oldarticle->id;
$article->id = $oldarticle->reference_id;
// get article->asset_id
$queryAsset = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__assets'))
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $oldarticle->id));
$db->setQuery($queryAsset);
$asset = $db->loadObject();
$article->asset_id = $asset->id;
$article->title = $oldarticle->title;
$article->alias = $this->slugify($oldarticle->title);
$article->introtext = $noCleanHtml ? $introText : $this->cleanTheHtml($introText);
$article->fulltext = $noCleanHtml ? $fullText : $this->cleanTheHtml($fullText);
$article->state = 1;
$article->catid = $destinationCategory;
$article->created = $oldarticle->created;
$article->created_by = $oldarticle->created_by;
$article->created_by_alias = $oldarticle->created_by_alias;
$article->modified = $oldarticle->modified;
$article->modified_by = $oldarticle->modified_by;
$article->checked_out = '';
$article->checked_out_time = '';
$article->publish_up = $oldarticle->publish_up;
$article->publish_down = '';
$article->images = '{"image_intro":"","float_intro":"","image_intro_alt":"","image_intro_caption":"","image_fulltext":"","float_fulltext":"","image_fulltext_alt":"","image_fulltext_caption":""}';
$article->urls = '{"urla":"","urlatext":"","targeta":"","urlb":"","urlbtext":"","targetb":"","urlc":"","urlctext":"","targetc":""}';
$article->attribs = '{"article_layout":"","show_title":"","link_titles":"","show_tags":"","show_intro":"","info_block_position":"","info_block_show_title":"","show_category":"","link_category":"","show_parent_category":"","link_parent_category":"","show_author":"","link_author":"","show_create_date":"","show_modify_date":"","show_publish_date":"","show_item_navigation":"","show_hits":"","show_noauth":"","urls_position":"","alternative_readmore":"","article_page_title":"","show_publishing_options":"","show_article_options":"","show_urls_images_backend":"","show_urls_images_frontend":""}';
$article->version = 1;
$article->ordering = $oldDefaultArticleData->ordering ? $oldDefaultArticleData->ordering : '';
$article->metakey = $oldDefaultArticleData->metakey ? $oldDefaultArticleData->metakey : '';
$article->metadesc = $oldDefaultArticleData->metadesc ? $oldDefaultArticleData->metadesc : '';
$article->access = 1;
$article->hits = $oldDefaultArticleData->hits ? $oldDefaultArticleData->hits : 0;
$article->metadata = '{"robots":"","author":"","rights":""}';
$article->featured = 0;
$article->language = '*';
$article->xreference = '';
$article->note = '';
$db->insertObject('#__content', $article, 'id');
// plural
if ($contador > 1) {
echo $contador . " completed tasks :::>> ";
} else {
echo $contador . " completed tasks :::>> ";
}
echo "Article created: " . $article->id . ": " . $article->title . " - Created at: " . $article->created . " :::>> ";
if ($contador > 1) {
$this->logAction($contador . " completed tasks :::>> ");
} else {
$this->logAction($contador . " completed task :::>> ");
}
$this->logAction("Article created: " . $article->id . ": " . $article->title . " - Created at: " . $article->created . " :::>> ");
$this->adicionarReferenciasAsset($article);
$this->adicionarReferenciasWorkflow($article);
} else {
// pula artigo
echo "The article already exists: " . $article->id . ": " . $article->title . " :::>> ";
$this->logAction("The article already exists: " . $article->id . ": " . $article->title . " :::>> ");
$this->adicionarReferenciasAsset($article);
$this->adicionarReferenciasWorkflow($article);
}
}
}
private function adicionarReferenciasAsset($article)
{
$db = Factory::getContainer()->get('DatabaseDriver');
$queryAsset = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__assets'))
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $article->id));
$db->setQuery($queryAsset);
$asset = $db->loadObject();
if (!$asset) {
$asset = new stdClass();
// calculate the asset parent_id based on the category asset id
$queryCategoryAsset = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__assets'))
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.category.' . $article->catid));
$db->setQuery($queryCategoryAsset);
$categoryAsset = $db->loadObject();
$asset->parent_id = $categoryAsset->id;
// calculate the asset lft, rgt , lft is max rgt + 1, rgt is lft + 1
$queryMaxRgt = $db->getQuery(true)
->select('MAX(rgt)')
->from($db->quoteName('#__assets'));
$db->setQuery($queryMaxRgt);
$maxRgt = $db->loadResult();
$asset->lft = $maxRgt + 1;
$asset->rgt = $maxRgt + 2;
// asset level is the same as the category level
$asset->level = $categoryAsset->level;
$asset->name = 'com_content.article.' . $article->id;
$asset->title = $article->title;
$asset->rules = '{}';
$db->insertObject('#__assets', $asset, 'id');
echo "Asset created: " . $asset->id . ": " . $asset->name . " :::>> ";
$this->logAction("Asset created: " . $asset->id . ": " . $asset->name . " :::>> ");
$this->updateAssetId($article->id);
} else {
echo "The asset already exists: " . $asset->id . ": " . $asset->name . " :::>> ";
$this->logAction("The asset already exists: " . $asset->id . ": " . $asset->name . " :::>> ");
}
}
private function adicionarReferenciasWorkflow($article)
{
$db = Factory::getContainer()->get('DatabaseDriver');
$queryWorkflow = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__workflow_associations'))
->where($db->quoteName('item_id') . ' = ' . $db->quote($article->id));
$db->setQuery($queryWorkflow);
$workflow = $db->loadObject();
if (!$workflow) {
$workflow = new stdClass();
$workflow->item_id = $article->id;
$workflow->stage_id = 1;
$workflow->extension = "com_content.article";
$db->insertObject('#__workflow_associations', $workflow, 'id');
echo "The workflow was created: " . $workflow->item_id . ": " . $workflow->extension . "\n";
$this->logAction("The workflow was created: " . $workflow->item_id . ": " . $workflow->extension . "\n");
} else {
echo "The workflow already exists: " . $workflow->item_id . ": " . $workflow->extension . "\n";
$this->logAction("The workflow already exists: " . $workflow->item_id . ": " . $workflow->extension . "\n");
}
}
// update asset_id in content table based on assets table and article id
private function updateAssetId($articleId)
{
$db = Factory::getContainer()->get('DatabaseDriver');
$queryAsset = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__assets'))
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $articleId));
$db->setQuery($queryAsset);
$asset = $db->loadObject();
if ($asset) {
$queryContent = $db->getQuery(true)
->update($db->quoteName('#__content'))
->set($db->quoteName('asset_id') . ' = ' . $db->quote($asset->id))
->where($db->quoteName('id') . ' = ' . $db->quote($articleId));
$db->setQuery($queryContent);
$db->execute();
echo "Asset updated: " . $asset->id . ": " . $asset->name . " :::>> ";
$this->logAction("Asset updated: " . $asset->id . ": " . $asset->name . " :::>> ");
} else {
echo "Asset not found: " . $articleId . "\n";
$this->logAction("Asset not found: " . $articleId . "\n");
}
}
// Log actions
private function logAction($message) {
$logFile = Factory::getConfig()->get('log_path') . '/import_log.txt';
$logEntry = date('Y-m-d H:i:s') . ' - ' . $message . PHP_EOL;
file_put_contents($logFile, $logEntry, FILE_APPEND);
}
// Clean HTML
private function cleanTheHtml($string, $limpaTudo = false)
{
$string = preg_replace('/(<[^>]+) class=".*?"/i', '$1', $string);
$string = preg_replace('/(class|style)[^>]*/', '', $string);
// replace <p > to <p> and other tags
$string = preg_replace('/<p\s+>/', '<p>', $string);
// replace • to -
$string = preg_replace('/•/', '-', $string);
return $limpaTudo ? strip_tags($string) : strip_tags($string, '<iframe><img><p><span><a><b><div><strong><em><br><br/><br />');
}
private function clearLog()
{
$logFile = Factory::getConfig()->get('log_path') . '/import_log.txt';
if (file_exists($logFile)) {
file_put_contents($logFile, ''); // Clear the log file
echo "Log file cleared.\n";
} else {
echo "Log file does not exist.\n";
}
}
// Execute the checkOldIds function
public function doExecute()
{
$clearScreen = $this->input->get('clear_screen', false);
if ($clearScreen) {
$this->clearTerminalScreen();
}
$clearLog = $this->input->get('clear_log', false);
if ($clearLog) {
$this->clearLog();
}
$skipItems = $this->input->get('skip', 0);
$limit = $this->input->get('limit', DEFAULT_LIMIT);
$startDate = $this->input->get('start_date', START_DATE);
$endDate = $this->input->get('end_date', END_DATE);
$sourceCategory = $this->input->get('source_category', SOURCE_CATEGORY);
$destinationCategory = $this->input->get('destination_category', TARGET_CATEGORY);
$standardArticles = $this->input->get('standard_articles', false);
$noCleanHtml = $this->input->get('no_clean_html', false);
$this->checkOldIds($skipItems, $limit, $startDate, $endDate, $sourceCategory, $destinationCategory, $standardArticles, $noCleanHtml);
}
// Implement the getName method
public function getName()
{
return 'ImportArticles';
}
}
JApplicationCli::getInstance('ImportArticles')->execute();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment