Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
parseDate_Islands_iTVF.sql #blog
-- http://spaghettidba.com/2012/03/23/sql-server-and-custom-date-formats/
-- =============================================
-- Author: Gianluca Sartori - @spaghettidba
-- Create date: 2011-10-14
-- Description: Parses a date from its string
-- representation, using the supplied
-- format string.
-- =============================================
CREATE FUNCTION [dbo].[parseDate](@date AS varchar(50), @format_string varchar(50))
RETURNS TABLE
AS
RETURN (
WITH
-- ================================================
-- Build 4 rows
-- ================================================
fourRows (N) AS (
SELECT 1
UNION ALL
SELECT 2
UNION ALL
SELECT 3
UNION ALL
SELECT 4
),
-- ================================================
-- build a small 64 rows tally table
-- ================================================
cteTally (N) AS (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM fourRows AS A
CROSS JOIN fourRows AS B
CROSS JOIN fourRows AS C
),
allowedTokens (code, tokenGroup) AS (
SELECT code COLLATE Latin1_General_CS_AS, value
FROM (
SELECT 'YYYY', 'YEAR'
UNION ALL SELECT 'YY', 'YEAR'
UNION ALL SELECT 'Y', 'YEAR'
UNION ALL SELECT 'MMMM', 'MONTH'
UNION ALL SELECT 'MM', 'MONTH'
UNION ALL SELECT 'M', 'MONTH'
UNION ALL SELECT 'DD', 'DAY'
UNION ALL SELECT 'D', 'DAY'
UNION ALL SELECT 'HH', 'HOUR'
UNION ALL SELECT 'H', 'HOUR'
UNION ALL SELECT 'hh', 'HOUR'
UNION ALL SELECT 'h', 'HOUR'
UNION ALL SELECT 'mm', 'MINUTE'
UNION ALL SELECT 'm', 'MINUTE'
UNION ALL SELECT 'ss', 'SECOND'
UNION ALL SELECT 's', 'SECOND'
UNION ALL SELECT 'fff', 'MILLISECOND'
UNION ALL SELECT 'f', 'MILLISECOND'
UNION ALL SELECT 'tt', 'AMPM'
UNION ALL SELECT 't', 'AMPM'
) AS susbst (code, value)
),
allowedDelimiters (delimiter) AS (
SELECT ' '
UNION ALL SELECT '-'
UNION ALL SELECT '/'
UNION ALL SELECT '.'
UNION ALL SELECT ':'
UNION ALL SELECT ','
),
-- ================================================
-- make the parameter case-sensitive
-- ================================================
formatStringCS (format_string) AS (
SELECT @format_string COLLATE Latin1_General_CS_AS
),
inputStringCS (input_string) AS (
SELECT @date COLLATE Latin1_General_CS_AS
),
-- ================================================
-- join the format string to the tally table
-- and add some rankings to identify the "islands"
-- ================================================
tokenizedFormatString AS (
SELECT *,
subGroupId =
DENSE_RANK() OVER(
ORDER BY C, _subGroupId
),
delimitedGroupId =
DENSE_RANK() OVER(
ORDER BY isDelimiter, _delimitedGroupId
)
FROM (
SELECT N,
SUBSTRING(format_string, N, 1) AS C,
_subGroupId =
N - ROW_NUMBER() OVER (
PARTITION BY SUBSTRING(format_string, N, 1)
ORDER BY N
),
_delimitedGroupId =
ROW_NUMBER() OVER (
ORDER BY N
) -
ROW_NUMBER() OVER (
PARTITION BY CASE WHEN AD.delimiter IS NULL THEN 0 ELSE 1 END
ORDER BY N
),
isDelimiter = CASE WHEN AD.delimiter IS NULL THEN 0 ELSE 1 END
FROM cteTally AS TALLY
CROSS JOIN formatStringCS AS FMT
LEFT JOIN allowedDelimiters AS AD
ON delimiter = SUBSTRING(format_string, TALLY.N, 1)
WHERE N <= LEN(@format_string)
) AS fs
),
-- ================================================
-- join the input string to the tally table
-- and add some rankings to identify the "islands"
-- ================================================
tokenizedInputString AS (
SELECT *,
delimitedGroupId =
DENSE_RANK() OVER(
ORDER BY G, _delimitedGroupId
)
FROM (
SELECT N, C, G,
_delimitedGroupId =
N - ROW_NUMBER() OVER (
PARTITION BY G
ORDER BY N
)
FROM (
SELECT N,
SUBSTRING(input_string, N, 1) AS C,
CASE WHEN AD.delimiter IS NULL THEN 0 ELSE 1 END AS G
FROM cteTally AS TALLY
CROSS JOIN inputStringCS AS INSTR
LEFT JOIN allowedDelimiters AS AD
ON delimiter = SUBSTRING(input_string, TALLY.N, 1)
WHERE N <= LEN(input_string)
) AS in_str
) AS in_str
),
-- ================================================
-- Aggregate groups on format string
-- ================================================
groupedFormatString AS (
SELECT *,
delimitedGroupStart =
MIN(subGroupStart) OVER(
PARTITION BY delimitedGroupId
),
subGroupCount =
COUNT(*) OVER(
PARTITION BY delimitedGroupId
)
FROM (
SELECT delimitedGroupId,
subGroupId,
subGroupLen = COUNT(*),
subGroupStart = MIN(N),
C = MIN(C)
FROM tokenizedFormatString
GROUP BY delimitedGroupId, subGroupId
) AS src
),
-- ================================================
-- Aggregate groups on input string
-- ================================================
groupedInputString AS (
SELECT delimitedGroupId,
groupLen = COUNT(*),
delimitedGroupStart = MIN(N),
G = MIN(G)
FROM tokenizedInputString
GROUP BY delimitedGroupId
),
-- ================================================
-- Repeat ranking for format string
-- ================================================
rankedFormatString AS (
SELECT *,
delimitedGroupRank =
DENSE_RANK() OVER (
ORDER BY delimitedGroupStart
) ,
subGroupRank =
DENSE_RANK() OVER (
PARTITION BY delimitedGroupId
ORDER BY subGroupStart
)
FROM groupedFormatString AS GFS
LEFT JOIN allowedTokens AS AT
ON REPLICATE(GFS.C,GFS.subGroupLen) = AT.code
),
-- ================================================
-- Repeat ranking for input string
-- ================================================
rankedInputString AS (
SELECT *,
delimitedGroupRank =
DENSE_RANK() OVER (
ORDER BY delimitedGroupStart
)
FROM groupedInputString AS GFS
),
-- ================================================
-- Discover values in the input string
-- ================================================
valuedInputString AS (
SELECT RFS.tokenGroup, dateValues.value
FROM rankedInputString AS RIS
INNER JOIN rankedFormatString AS RFS
ON RIS.delimitedGroupRank = RFS.delimitedGroupRank
CROSS APPLY (
SELECT
CASE subGroupCount
WHEN 1 THEN
SUBSTRING(@date, RIS.delimitedGroupStart, RIS.groupLen)
ELSE
SUBSTRING(@date,
RIS.delimitedGroupStart +
RFS.subGroupStart -
RFS.delimitedGroupStart,
RFS.subGroupLen)
END
) AS dateValues (value)
WHERE tokenGroup IS NOT NULL
),
-- ================================================
-- Verify tokens
-- This is just to ensure that only one value per group
-- is specified in the format string, disallowing ambiguity
-- ================================================
verifiedTokens AS (
SELECT tokenGroup, value,
patternVerify = CONVERT(datetime,
CASE COUNT(*) OVER (PARTITION BY tokenGroup)
WHEN 1 THEN '2000-01-01 00:00:00.000' -- valid format
ELSE '2000-5 :00.000' -- random invalid format
END,
121)
FROM valuedInputString AS VIS
),
-- ================================================
-- Collapse all tokens in a single line
-- ================================================
pivotedTokens AS (
SELECT year_value = LEFT('2000',4 - LEN(ISNULL([YEAR],''))) + ISNULL([YEAR],''),
month_value = CASE
WHEN ISNUMERIC([MONTH]) = 1
THEN RIGHT('00' + ISNULL([MONTH],''),2)
ELSE [MONTH]
END,
day_value = RIGHT('00' + ISNULL([DAY],''),2),
hour_value = RIGHT('00' + ISNULL(
CASE
WHEN CAST([HOUR] AS int) < 12 AND LEFT([AMPM],1) = 'P'
THEN CAST(CAST([HOUR] AS int) + 12 AS varchar(2))
ELSE [HOUR]
END
,''),2),
minute_value = RIGHT('00' + ISNULL([MINUTE],''),2),
second_value = RIGHT('00' + ISNULL([SECOND],''),2),
millisecond_value = RIGHT('000' + ISNULL([MILLISECOND],''),3)
FROM (
SELECT [YEAR], [MONTH], [DAY],
[HOUR], [MINUTE], [SECOND],
[MILLISECOND], [AMPM]
FROM verifiedTokens AS VT
PIVOT (
MIN(value)
FOR tokenGroup IN (
[YEAR], [MONTH], [DAY],
[HOUR], [MINUTE], [SECOND],
[MILLISECOND], [AMPM]
)
) AS p
) AS src
)
-- ================================================
-- Build the output date
-- ================================================
SELECT parsedDate =
CASE
WHEN ISNUMERIC(month_value) = 1 THEN
CONVERT(datetime,
year_value + '-' + month_value + '-' + day_value + ' ' +
hour_value + ':' + minute_value + ':' + second_value + '.' + millisecond_value,
121)
ELSE
CONVERT(datetime,
month_value + ' ' + day_value + ' ' + year_value + ' ' +
hour_value + ':' + minute_value + ':' + second_value + ':' + millisecond_value,
109)
END
FROM pivotedTokens
)
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.