mbourgon/dynamic_csv_etl_basic

## dynamic_csv_etl_basic
USE tempdb
/*
2015/04/22 - mdb - 2.00 - pull raw CSV, shred via function, pivot into a virtual table, then insert into
							actual table based on existing fields.  Practically: forwards/backwards compatability.
							Add new fields to your target table, and as they show up in the new file they'll be added.
							Practically, they'll have to rename the file when they do this, so that we know the
							names of the new fields. But that's just a parameter change in the job.
2015/05/05 - mdb - 2.1 - very basic implementation, complete code. Just fill out the parameters!
Downside: SLOW?!  A 5mb file, which has 106k rows, takes 7 seconds.

PREREQs: Jeff Moden's 8k CSV parser, or something like it.
*/

--BULK INSERT requires dynamic SQL
DECLARE @filename VARCHAR(500), @sql NVARCHAR(4000), @base_filename VARCHAR(500), @servername VARCHAR(255)
DECLARE @Columns VARCHAR (MAX), @Columns_Short VARCHAR (MAX), @Columns_Insert VARCHAR(MAX)
DECLARE @target_table sysname
DECLARE @ErrorMessage NVARCHAR(4000);
DECLARE @ErrorSeverity INT;
DECLARE @ErrorState INT;

SELECT @filename = 'c:\temp\blah.csv'
SELECT @base_filename = RIGHT(@filename, CHARINDEX('\',REVERSE(@filename))-1)
SELECT @target_table = 'blahtemp'
--the code has to be run in database with the table, since I use nondynamic sql for the columns

-------------------------------
--Creating tables for process--
-------------------------------
--two temp tables, since we need an ID column
if object_id('tempdb..#csv_shred_stage_headers_insert') is not null
    DROP TABLE #csv_shred_stage_headers_insert

CREATE TABLE #csv_shred_stage_headers_insert
(
[resultant] [varchar] (4000)
)

--two temp tables, since we need an ID column
if object_id('tempdb..#csv_shred_insert') is not null
    DROP TABLE #csv_shred_insert

CREATE TABLE #csv_shred_insert
(
[resultant] [varchar] (4000)
)

if object_id('tempdb..#csv_shred_stage') is not null
    DROP TABLE #csv_shred_stage

CREATE TABLE #csv_shred_stage
(
ID INT IDENTITY,
[resultant] [varchar] (4000)
)

--Column to handle the staged keypairs
if object_id('tempdb..#csv_shred_split') is not null
    DROP TABLE #csv_shred_split
CREATE TABLE #csv_shred_split
(
[id] [int] NOT NULL,
[ItemNumber] [int] NULL,
[Item] [varchar] (500)
)
CREATE UNIQUE CLUSTERED INDEX ucidx__csv_shred_split ON #csv_shred_split ([id], [ItemNumber])

--------------------------
--Import JUST the Header--
--------------------------
PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' BULK INSERT of header begins for ' + @filename
SET @sql =
'BULK INSERT #csv_shred_stage_headers_insert
	FROM  ''' + @filename + '''
	WITH
		(
			LASTROW = 1,
			FIELDTERMINATOR = ''\0''  --the \0 is "null terminator"; needed to make sure it doesnt try and parse
		)'
BEGIN TRY
	EXEC sp_executesql @sql
END TRY
BEGIN CATCH
	PRINT 'File either locked, Does Not Exist, or format has changed; see error message for more details'

	SELECT  @ErrorMessage = ERROR_MESSAGE(),
			@ErrorSeverity = ERROR_SEVERITY(),
			@ErrorState = ERROR_STATE();

	IF @@TRANCOUNT > 0
		ROLLBACK TRANSACTION;
	PRINT 'Error Severity: ' + CONVERT(VARCHAR(30), @ErrorSeverity)
	PRINT 'Error State: ' + CONVERT(VARCHAR(30), @ErrorState)
	PRINT 'Error Severity: ' + @ErrorMessage
	--commented this out so we can run it without files.
	--RAISERROR  (@ErrorMessage, -- Message text.
	--					@ErrorSeverity, -- Severity.
	--					@ErrorState -- State.
	--					);
END CATCH


---------------
--Import Data--
---------------
PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' BULK INSERT of data begins for ' + @filename
--inserts into view in order to add the ID column so the PIVOT works. insertview is just a select *, minus the ID

SET @sql =
'BULK INSERT #csv_shred_insert
	FROM  ''' + @filename + '''
	WITH
		(
			FIRSTROW = 2,
			FIELDTERMINATOR = ''\0''  --the \0 is "null terminator"; needed to make sure it doesnt try and parse
		)'
BEGIN TRY
	EXEC sp_executesql @sql
END TRY
BEGIN CATCH
	PRINT 'File either locked, Does Not Exist, or format has changed; see error message for more details'

	SELECT  @ErrorMessage = ERROR_MESSAGE(),
			@ErrorSeverity = ERROR_SEVERITY(),
			@ErrorState = ERROR_STATE();

	IF @@TRANCOUNT > 0
		ROLLBACK TRANSACTION;
	PRINT 'Error Severity: ' + CONVERT(VARCHAR(30), @ErrorSeverity)
	PRINT 'Error State: ' + CONVERT(VARCHAR(30), @ErrorState)
	PRINT 'Error Severity: ' + @ErrorMessage
	--commented this out so we can run it without files.
	--RAISERROR  (@ErrorMessage, -- Message text.
	--					@ErrorSeverity, -- Severity.
	--					@ErrorState -- State.
	--					);
END CATCH

PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' BULK INSERT ends for ' + @filename

--Copy the data from the first temp table to the second. We could also do this if we had permanent table + view
INSERT INTO #csv_shred_stage (resultant)
	SELECT resultant FROM #csv_shred_insert

--===== Split the CSV column for the whole table using CROSS APPLY
INSERT INTO #csv_shred_split
		(id, ItemNumber, Item)
	SELECT stage.id, split.ItemNumber, split.item
	FROM #csv_shred_stage stage
	CROSS APPLY dbo.DelimitedSplit8K(resultant,',') split

--this is needed because blank values ('') don't convert to null; they come back with error converting varchar to decimal
UPDATE #csv_shred_split SET item = NULL WHERE item = ''

----------------------
--Building the PIVOT--
----------------------

 --(I think we) have to do this part here, and not earlier - we need to match up all the tables
 -- (though I wonder if we could fake the "split" and just self-join)

SELECT @Columns=COALESCE(@Columns + ',','') + QUOTENAME(ItemNumber) + ' as ' + QUOTENAME(item)
		, @Columns_Short = COALESCE(@Columns_Short + ',','') + QUOTENAME(ItemNumber)
		, @Columns_Insert = COALESCE(@Columns_Insert + ',','') + QUOTENAME(item)
FROM
(
		SELECT DISTINCT split.ItemNumber, headers.item
		From #csv_shred_split split
		INNER JOIN
		(
			SELECT split.ItemNumber, split.item
			FROM #csv_shred_stage_headers_insert headers
			CROSS APPLY dbo.DelimitedSplit8K(resultant,',') split
		)
		headers
		ON headers.ItemNumber = split.ItemNumber
		INNER JOIN INFORMATION_SCHEMA.COLUMNS
		ON TABLE_NAME = @target_table
		AND columns.COLUMN_NAME = headers.item
) AS B
ORDER BY B.ItemNumber

--We need the CTE so that we can calculate the import_datetime more easily.
-- (import_datetime not included in this stripped down version)
SET @SQL='
;with CTE_Import as
(
SELECT ' + @Columns
+ '  FROM
(
SELECT id, ItemNumber, item FROM
#csv_shred_split ) AS source
PIVOT
(max(item) FOR source.ItemNumber IN (' + @Columns_Short + ')
)AS pvt
)
insert into ' + @target_table + ' (' + @Columns_Insert + ')
select ' + @Columns_Insert + '
from CTE_Import'

PRINT @sql

BEGIN
	EXEC (@sql)
END

PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' insert into _main ends for ' + @filename
	USE tempdb
	/*
	2015/04/22 - mdb - 2.00 - pull raw CSV, shred via function, pivot into a virtual table, then insert into
	actual table based on existing fields. Practically: forwards/backwards compatability.
	Add new fields to your target table, and as they show up in the new file they'll be added.
	Practically, they'll have to rename the file when they do this, so that we know the
	names of the new fields. But that's just a parameter change in the job.
	2015/05/05 - mdb - 2.1 - very basic implementation, complete code. Just fill out the parameters!
	Downside: SLOW?! A 5mb file, which has 106k rows, takes 7 seconds.

	PREREQs: Jeff Moden's 8k CSV parser, or something like it.
	*/

	--BULK INSERT requires dynamic SQL
	DECLARE @filename VARCHAR(500), @sql NVARCHAR(4000), @base_filename VARCHAR(500), @servername VARCHAR(255)
	DECLARE @Columns VARCHAR (MAX), @Columns_Short VARCHAR (MAX), @Columns_Insert VARCHAR(MAX)
	DECLARE @target_table sysname
	DECLARE @ErrorMessage NVARCHAR(4000);
	DECLARE @ErrorSeverity INT;
	DECLARE @ErrorState INT;

	SELECT @filename = 'c:\temp\blah.csv'
	SELECT @base_filename = RIGHT(@filename, CHARINDEX('\',REVERSE(@filename))-1)
	SELECT @target_table = 'blahtemp'
	--the code has to be run in database with the table, since I use nondynamic sql for the columns

	-------------------------------
	--Creating tables for process--
	-------------------------------
	--two temp tables, since we need an ID column
	if object_id('tempdb..#csv_shred_stage_headers_insert') is not null
	DROP TABLE #csv_shred_stage_headers_insert

	CREATE TABLE #csv_shred_stage_headers_insert
	(
	[resultant] [varchar] (4000)
	)

	--two temp tables, since we need an ID column
	if object_id('tempdb..#csv_shred_insert') is not null
	DROP TABLE #csv_shred_insert

	CREATE TABLE #csv_shred_insert
	(
	[resultant] [varchar] (4000)
	)

	if object_id('tempdb..#csv_shred_stage') is not null
	DROP TABLE #csv_shred_stage

	CREATE TABLE #csv_shred_stage
	(
	ID INT IDENTITY,
	[resultant] [varchar] (4000)
	)

	--Column to handle the staged keypairs
	if object_id('tempdb..#csv_shred_split') is not null
	DROP TABLE #csv_shred_split
	CREATE TABLE #csv_shred_split
	(
	[id] [int] NOT NULL,
	[ItemNumber] [int] NULL,
	[Item] [varchar] (500)
	)
	CREATE UNIQUE CLUSTERED INDEX ucidx__csv_shred_split ON #csv_shred_split ([id], [ItemNumber])

	--------------------------
	--Import JUST the Header--
	--------------------------
	PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' BULK INSERT of header begins for ' + @filename
	SET @sql =
	'BULK INSERT #csv_shred_stage_headers_insert
	FROM ''' + @filename + '''
	WITH
	(
	LASTROW = 1,
	FIELDTERMINATOR = ''\0'' --the \0 is "null terminator"; needed to make sure it doesnt try and parse
	)'
	BEGIN TRY
	EXEC sp_executesql @sql
	END TRY
	BEGIN CATCH
	PRINT 'File either locked, Does Not Exist, or format has changed; see error message for more details'

	SELECT @ErrorMessage = ERROR_MESSAGE(),
	@ErrorSeverity = ERROR_SEVERITY(),
	@ErrorState = ERROR_STATE();

	IF @@TRANCOUNT > 0
	ROLLBACK TRANSACTION;
	PRINT 'Error Severity: ' + CONVERT(VARCHAR(30), @ErrorSeverity)
	PRINT 'Error State: ' + CONVERT(VARCHAR(30), @ErrorState)
	PRINT 'Error Severity: ' + @ErrorMessage
	--commented this out so we can run it without files.
	--RAISERROR (@ErrorMessage, -- Message text.
	-- @ErrorSeverity, -- Severity.
	-- @ErrorState -- State.
	-- );
	END CATCH


	---------------
	--Import Data--
	---------------
	PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' BULK INSERT of data begins for ' + @filename
	--inserts into view in order to add the ID column so the PIVOT works. insertview is just a select *, minus the ID

	SET @sql =
	'BULK INSERT #csv_shred_insert
	FROM ''' + @filename + '''
	WITH
	(
	FIRSTROW = 2,
	FIELDTERMINATOR = ''\0'' --the \0 is "null terminator"; needed to make sure it doesnt try and parse
	)'
	BEGIN TRY
	EXEC sp_executesql @sql
	END TRY
	BEGIN CATCH
	PRINT 'File either locked, Does Not Exist, or format has changed; see error message for more details'

	SELECT @ErrorMessage = ERROR_MESSAGE(),
	@ErrorSeverity = ERROR_SEVERITY(),
	@ErrorState = ERROR_STATE();

	IF @@TRANCOUNT > 0
	ROLLBACK TRANSACTION;
	PRINT 'Error Severity: ' + CONVERT(VARCHAR(30), @ErrorSeverity)
	PRINT 'Error State: ' + CONVERT(VARCHAR(30), @ErrorState)
	PRINT 'Error Severity: ' + @ErrorMessage
	--commented this out so we can run it without files.
	--RAISERROR (@ErrorMessage, -- Message text.
	-- @ErrorSeverity, -- Severity.
	-- @ErrorState -- State.
	-- );
	END CATCH

	PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' BULK INSERT ends for ' + @filename

	--Copy the data from the first temp table to the second. We could also do this if we had permanent table + view
	INSERT INTO #csv_shred_stage (resultant)
	SELECT resultant FROM #csv_shred_insert

	--===== Split the CSV column for the whole table using CROSS APPLY
	INSERT INTO #csv_shred_split
	(id, ItemNumber, Item)
	SELECT stage.id, split.ItemNumber, split.item
	FROM #csv_shred_stage stage
	CROSS APPLY dbo.DelimitedSplit8K(resultant,',') split

	--this is needed because blank values ('') don't convert to null; they come back with error converting varchar to decimal
	UPDATE #csv_shred_split SET item = NULL WHERE item = ''

	----------------------
	--Building the PIVOT--
	----------------------

	--(I think we) have to do this part here, and not earlier - we need to match up all the tables
	-- (though I wonder if we could fake the "split" and just self-join)

	SELECT @Columns=COALESCE(@Columns + ',','') + QUOTENAME(ItemNumber) + ' as ' + QUOTENAME(item)
	, @Columns_Short = COALESCE(@Columns_Short + ',','') + QUOTENAME(ItemNumber)
	, @Columns_Insert = COALESCE(@Columns_Insert + ',','') + QUOTENAME(item)
	FROM
	(
	SELECT DISTINCT split.ItemNumber, headers.item
	From #csv_shred_split split
	INNER JOIN
	(
	SELECT split.ItemNumber, split.item
	FROM #csv_shred_stage_headers_insert headers
	CROSS APPLY dbo.DelimitedSplit8K(resultant,',') split
	)
	headers
	ON headers.ItemNumber = split.ItemNumber
	INNER JOIN INFORMATION_SCHEMA.COLUMNS
	ON TABLE_NAME = @target_table
	AND columns.COLUMN_NAME = headers.item
	) AS B
	ORDER BY B.ItemNumber

	--We need the CTE so that we can calculate the import_datetime more easily.
	-- (import_datetime not included in this stripped down version)
	SET @SQL='
	;with CTE_Import as
	(
	SELECT ' + @Columns
	+ ' FROM
	(
	SELECT id, ItemNumber, item FROM
	#csv_shred_split ) AS source
	PIVOT
	(max(item) FOR source.ItemNumber IN (' + @Columns_Short + ')
	)AS pvt
	)
	insert into ' + @target_table + ' (' + @Columns_Insert + ')
	select ' + @Columns_Insert + '
	from CTE_Import'

	PRINT @sql

	BEGIN
	EXEC (@sql)
	END

	PRINT CONVERT(VARCHAR(30),GETDATE(),120) + ' insert into _main ends for ' + @filename