Skip to content

Instantly share code, notes, and snippets.

@peta
Created April 27, 2012 16:23
Show Gist options
  • Save peta/2510556 to your computer and use it in GitHub Desktop.
Save peta/2510556 to your computer and use it in GitHub Desktop.
D7: Quick'n'dirty bulk import for article nodes

During several relaunches I had to migrate thousands of content items (mostly news articles) from Typo3 (WTF?!??) sites to the new (Drupal-based) ones. Because Drupal's Bulk Import API or the Feeds module seemed way to overdose, I decided to accomplish that using only MySQL's toolbelt: Stored Procedures.

Usage

CALL do_batch_import(0, 1000);

After running Pathauto's batch updater, the field new_url in table ats_content_transition.tmp_id_news had been updated with the according url alias. (see Drupal table url_alias). Finally the old realurl aliases were put into a low-level Redis-based URL redirection layer (cool URIs don't change and so) and I was done.

DELIMITER $$
CREATE DEFINER=`root`@`localhost` PROCEDURE `do_batch_import`(IN from_id INT, IN to_id INT)
BEGIN
-- temp data variables
DECLARE news_id, time_edited, time_created INT;
DECLARE title, body TEXT;
-- loop variables
DECLARE done BOOLEAN;
DECLARE n_imports INT;
DECLARE pending_imports CURSOR FOR
-- select only news articles that haven't bee imported yet
SELECT news.uid, news.tstamp, news.crdate, news.title, news.bodytext
FROM `ats_alt_id`.`news_view` news
LEFT JOIN `ats_content_transition`.`tmp_id_news` news_imported
ON news.uid=news_imported.tx_news_id
WHERE news_imported.tx_news_id IS NULL
AND ((news.uid >= from_id) AND (news.uid <= to_id));
DECLARE CONTINUE HANDLER FOR NOT FOUND
SET done = TRUE;
OPEN pending_imports;
SELECT FOUND_ROWS() INTO n_imports;
SET done = (n_imports = 0);
WHILE NOT done DO
-- try to fetch row
FETCH pending_imports
INTO news_id, time_edited, time_created, title, body;
IF NOT done THEN
-- could fetch another row; import it
CALL `ats_content_transition`.`import_news_article`(
news_id, time_edited, time_created, title, body);
END IF;
END WHILE;
CLOSE pending_imports;
-- report numer of imported articles
SELECT CONCAT('Imported ', n_imports, ' news articles');
END
$$
DELIMITER $$
CREATE DEFINER=`root`@`localhost` PROCEDURE `import_news_article`(
IN tx_news_id INT,
IN time_edited INT,
IN time_created INT,
IN title TEXT,
IN body TEXT
)
BEGIN
DECLARE new_nid INT(11);
DECLARE new_vid INT(11) DEFAULT 0;
-- insert row into `node` and get created `nid`
INSERT INTO `ats_thema_id`.`node` VALUES (
NULL, -- nid
new_vid, -- vid
'article', -- type
'de', -- language
title, -- title
1, -- uid
1, -- status
time_created, -- created
time_edited, -- changed
0, -- comment
0, -- promote
0, -- sticky
0, -- tnid
0 -- translate
);
SELECT LAST_INSERT_ID() INTO new_nid;
-- insert row into `node_revision` and get created `vid`
INSERT INTO `ats_thema_id`.`node_revision` VALUES (
new_nid, -- nid
NULL, -- vid
1, -- uid
title, -- title
'', -- log
time_edited, -- timestamp
1, -- status
0, -- comment
0, -- promote
0 -- sticky
);
SELECT LAST_INSERT_ID() INTO new_vid;
-- update `vid` column of previously created row in `node`
UPDATE `ats_thema_id`.`node`
SET vid=new_vid
WHERE nid=new_nid;
-- insert body field
INSERT INTO `ats_thema_id`.`field_data_body` VALUES (
'node', -- entity_type
'article', -- bundle
0, -- deleted
new_nid, -- entity_id
new_vid, -- revision_id
'und', -- language
0, -- delta
body, -- body_value
'', -- body_summary
'filtered_html' -- body_format
);
INSERT INTO `ats_thema_id`.`field_revision_body` VALUES (
'node', -- entity_type
'article', -- bundle
0, -- deleted
new_nid, -- entity_id
new_vid, -- revision_id
'und', -- language
0, -- delta
body, -- body_value
'', -- body_summary
'filtered_html' -- body_format
);
-- insert row into `tmp_id_news`
INSERT INTO `ats_content_transition`.`tmp_id_news` VALUES (
new_nid, -- nid
new_vid, -- vid
NULL, -- new_url
tx_news_id -- tx_news_id
);
-- find urlconf aliases and store them (9 slocs in SQL -- 70 in PHP)
SET @news_id_serialized = CONCAT('s:', LENGTH(tx_news_id), ':"', tx_news_id, '";');
SET @substr = CONCAT('s:7:"tt_news";', @news_id_serialized);
INSERT INTO `ats_content_transition`.`realurl_aliases` (
SELECT
NULL, -- pk+ai
new_nid, -- nid
spurl, -- url
tx_news_id -- tx_news_id
FROM
ats_alt_id.tx_realurl_urldecodecache
WHERE
INSTR(CONVERT(content USING utf8), @substr)
);
END
$$
DELIMITER $$
CREATE VIEW `ats_alt_id`.`news_view` AS
SELECT
`tt_news`.`uid` AS `uid`,
`tt_news`.`pid` AS `pid`,
`tt_news`.`tstamp` AS `tstamp`,
`tt_news`.`crdate` AS `crdate`,
`tt_news`.`datetime` AS `datetime`,
`tt_news`.`title` AS `title`,
`tt_news`.`bodytext` AS `bodytext`
FROM `tt_news`
WHERE (`tt_news`.`deleted` = 0) AND (`tt_news`.`hidden` = 0);
$$
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment