Skip to content

Instantly share code, notes, and snippets.

@NickCraver
Last active April 20, 2017 18:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NickCraver/7e645feb8afb173c3e99cea9d0028ddc to your computer and use it in GitHub Desktop.
Save NickCraver/7e645feb8afb173c3e99cea9d0028ddc to your computer and use it in GitHub Desktop.
Setup scripts for TrafficLogs.sql - table partitioning and archive movement fun.
-- For production
--CREATE DATABASE [TrafficLogs] CONTAINMENT = NONE ON
--PRIMARY ( NAME = N'TrafficLogs', FILENAME = N'E:\Data\TrafficLogs.mdf' , SIZE = 102400000KB , FILEGROWTH = 5120000KB),
--FILEGROUP [TrafficLogs_D] ( NAME = N'TrafficLogs_D', FILENAME = N'D:\Data\TrafficLogs_D.ndf' , SIZE = 102400000KB, FILEGROWTH = 51200000KB),
--FILEGROUP [TrafficLogs_E] ( NAME = N'TrafficLogs_E', FILENAME = N'E:\Data\TrafficLogs_E.ndf' , SIZE = 1024000000KB, FILEGROWTH = 51200000KB)
--LOG ON ( NAME = N'TrafficLogs_log', FILENAME = N'D:\Data\TrafficLogs.ldf' , SIZE = 51200000KB , FILEGROWTH = 10240000KB)
--GO
-- For local development
Declare @dataPath nvarchar(500) = Cast(ServerProperty('instancedefaultdatapath') as nvarchar(500)),
@logPath nvarchar(500) = Cast(ServerProperty('instancedefaultlogpath') as nvarchar(500)),
@sql nvarchar(4000) = '
CREATE DATABASE [TrafficLogs] CONTAINMENT = NONE ON
PRIMARY ( NAME = N''TrafficLogs'', FILENAME = N''<DataPath>TrafficLogs.mdf'' , SIZE = 1024000KB , FILEGROWTH = 512000KB),
FILEGROUP [TrafficLogs_D] ( NAME = N''TrafficLogs_D'', FILENAME = N''<DataPath>TrafficLogs_D.ndf'' , SIZE = 512000KB, FILEGROWTH = 512000KB),
FILEGROUP [TrafficLogs_E] ( NAME = N''TrafficLogs_E'', FILENAME = N''<DataPath>TrafficLogs_E.ndf'' , SIZE = 512000KB, FILEGROWTH = 512000KB)
LOG ON ( NAME = N''TrafficLogs_log'', FILENAME = N''<LogPath>TrafficLogs.ldf'' , SIZE = 512000KB , FILEGROWTH = 512000KB)';
Set @sql = Replace(Replace(@sql, '<DataPath>', @dataPath), '<LogPath>', @logPath);
Exec (@sql);
GO
ALTER DATABASE [TrafficLogs] SET RECOVERY SIMPLE WITH NO_WAIT;
ALTER DATABASE [TrafficLogs] SET COMPATIBILITY_LEVEL = 130;
ALTER DATABASE [TrafficLogs] SET ANSI_NULL_DEFAULT OFF;
ALTER DATABASE [TrafficLogs] SET ANSI_NULLS OFF;
ALTER DATABASE [TrafficLogs] SET ANSI_PADDING OFF;
ALTER DATABASE [TrafficLogs] SET ANSI_WARNINGS OFF;
ALTER DATABASE [TrafficLogs] SET ARITHABORT OFF;
ALTER DATABASE [TrafficLogs] SET AUTO_CLOSE OFF;
ALTER DATABASE [TrafficLogs] SET AUTO_SHRINK OFF;
ALTER DATABASE [TrafficLogs] SET AUTO_CREATE_STATISTICS ON(INCREMENTAL = OFF);
ALTER DATABASE [TrafficLogs] SET AUTO_UPDATE_STATISTICS ON;
ALTER DATABASE [TrafficLogs] SET CURSOR_CLOSE_ON_COMMIT OFF;
ALTER DATABASE [TrafficLogs] SET CURSOR_DEFAULT GLOBAL;
ALTER DATABASE [TrafficLogs] SET CONCAT_NULL_YIELDS_NULL OFF;
ALTER DATABASE [TrafficLogs] SET NUMERIC_ROUNDABORT OFF;
ALTER DATABASE [TrafficLogs] SET QUOTED_IDENTIFIER OFF;
ALTER DATABASE [TrafficLogs] SET RECURSIVE_TRIGGERS OFF;
ALTER DATABASE [TrafficLogs] SET DISABLE_BROKER;
ALTER DATABASE [TrafficLogs] SET AUTO_UPDATE_STATISTICS_ASYNC OFF;
ALTER DATABASE [TrafficLogs] SET DATE_CORRELATION_OPTIMIZATION OFF;
ALTER DATABASE [TrafficLogs] SET PARAMETERIZATION SIMPLE;
ALTER DATABASE [TrafficLogs] SET READ_COMMITTED_SNAPSHOT OFF;
ALTER DATABASE [TrafficLogs] SET READ_WRITE;
ALTER DATABASE [TrafficLogs] SET RECOVERY FULL;
ALTER DATABASE [TrafficLogs] SET MULTI_USER;
ALTER DATABASE [TrafficLogs] SET PAGE_VERIFY CHECKSUM;
ALTER DATABASE [TrafficLogs] SET TARGET_RECOVERY_TIME = 0 SECONDS;
ALTER DATABASE [TrafficLogs] SET DELAYED_DURABILITY = DISABLED;
GO
USE [TrafficLogs]
GO
ALTER DATABASE SCOPED CONFIGURATION SET MAXDOP = 0;
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET MAXDOP = PRIMARY;
ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET LEGACY_CARDINALITY_ESTIMATION = PRIMARY;
ALTER DATABASE SCOPED CONFIGURATION SET PARAMETER_SNIFFING = ON;
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET PARAMETER_SNIFFING = PRIMARY;
ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET QUERY_OPTIMIZER_HOTFIXES = PRIMARY;
GO
IF NOT EXISTS (SELECT name FROM sys.filegroups WHERE is_default=1 AND name = N'PRIMARY') ALTER DATABASE [TrafficLogs] MODIFY FILEGROUP [PRIMARY] DEFAULT
GO
Use [TrafficLogs];
DECLARE @minDate datetime = '2010-12-01',
@maxDate datetime = '2026-01-01',
@fgRecent sysname = 'TrafficLogs_D',
@fgArchive sysname = 'TrafficLogs_E',
@recentCutoff datetime = DateAdd(Month, -4, GETUTCDATE()),
@sql nvarchar(max) = '';
/* Drops:
Drop Partition Scheme PS_HAProxyLogs;
Drop Partition Function PF_HAProxyLogs;
*/
With Months As
(Select DateAdd(Month, 1, @minDate) MonthStart
Union All
Select DateAdd(Month, 1, MonthStart)
From Months
Where MonthStart < @maxDate
)
Select *
Into #MonthStarts
From Months
Option (MaxRecursion 1000);
If Not Exists (Select 1 From sys.partition_functions Where name = N'PF_HAProxyLogs')
Begin
Set @sql = @sql +
'Create Partition Function PF_HAProxyLogs (datetime) As Range Right For Values (' + char(10) +
Reverse(Stuff(Reverse(Stuff((
Select char(10) + '''' + CONVERT(varchar, MonthStart, 120) + ''','
From #MonthStarts
For Xml Path(''), Type).value('.', 'nvarchar(MAX)'), 1, 1,'')
), 1, 1, '')) + ');' + char(10);
End;
If Not Exists (Select 1 From sys.partition_schemes Where name = N'PS_HAProxyLogs')
Begin
Set @sql = @sql +
'Create Partition Scheme PS_HAProxyLogs As Partition PF_HAProxyLogs To (' + char(10) + '/* Ancient */ ' + @fgArchive + ',' + char(10) +
Reverse(Stuff(Reverse(Stuff((
Select char(10) + '/* ' + CONVERT(varchar, MonthStart, 120) + '*/ ' + (Case When MonthStart > @recentCutoff Then @fgRecent Else @fgArchive End) + ','
From #MonthStarts
For Xml Path(''), Type).value('.', 'nvarchar(MAX)'), 1, 1,'')
), 1, 1, '')) + ');' + char(10);
End;
-- Everything on the archive drive should be ColumnStore_Archive
Declare @archiveCount int = (Select Count(*) From #MonthStarts Where MonthStart <= @recentCutoff) + 1;
Set @sql = @sql + char(10) + 'Alter Table HAProxyLogs Rebuild Partition = All With (Data_Compression = ColumnStore_Archive On Partitions (1 To ' + Cast(@archiveCount as varchar(10)) + '))';
Drop Table #MonthStarts;
If (Len(@sql) > 0)
Begin
Execute sp_executesql @sql;
End
Create Table HAProxyLogs (
[CreationDate] datetime Not Null,
[Host] varchar(50) Not Null,
[Server] varchar(20) Not Null,
[ResponseCode] smallint Not Null,
[Method] varchar(4) Not Null,
[Https] bit Not Null Default 0,
[Uri] varchar(150) Not Null,
[Query] varchar(500) Null,
[RouteName] varchar(64) Null,
[IsPageView] [bit] Not Null,
[ClientIp] varchar(45) Not Null,
[ForwardFor] varchar(45) Null,
[Country] varchar(2) Null,
[HttpVersion] varchar(10) Null,
[Referer] varchar(100) Null,
[RefererHost] varchar(50) Null,
[UserAgent] varchar(128) Null,
[AcceptEncoding] varchar(30) Null,
[ContentEncoding] varchar(10) Null,
[FrontEnd] varchar(30) Null,
[BackEnd] varchar(30) Null,
[Tq] int Null,
[Tw] int Null,
[Tc] int Null,
[Tr] int Null,
[Tt] int Null,
[Bytes] int Null,
[TermState] char(4) Null,
[ActConn] int Null,
[FeConn] int Null,
[BeConn] int Null,
[SrvConn] int Null,
[Retries] int Null,
[SrvQueue] int Null,
[BackEndQueue] int Null,
[AccountId] int Null,
[SqlCount] smallint Null,
[SqlDurationMs] smallint Null,
[RedisCount] smallint Null,
[RedisDurationMs] smallint Null,
[HttpCount] smallint Null,
[HttpDurationMs] smallint Null,
[TagEngineCount] smallint Null,
[TagEngineDurationMs] smallint Null,
[ElasticCount] smallint Null,
[ElasticDurationMs] smallint Null,
[AspNetDurationMs] smallint Null,
[ApplicationId] int Null,
[RequestGuid] uniqueidentifier Null,
[ProvidenceCookie] uniqueidentifier Null,
[IsCrawler] bit Not Null,
[AcceptLanguage] varchar(5) Null,
[ClientIpRaw] varchar(45) Null,
[Colo] varchar(3) Null,
[Flags] int Not Null Default 0,
[ItemId] int Null
) On PS_HAProxyLogs(CreationDate)
GO
Create Clustered Columnstore Index CCI_HAProxyLogs On HAProxyLogs On PS_HAProxyLogs(CreationDate);
GO
Create Or Alter View HAProxyToday As
Select *
From HAProxyLogs
Where CreationDate > GETUTCDATE() - 1;
GO
Create Or Alter View HAProxyYesterday As
Select *
From HAProxyLogs
Where CreationDate Between GETUTCDATE() - 2 And GETUTCDATE() - 1;
GO
Create Or Alter View HAProxyLast2Days As
Select *
From HAProxyLogs
Where CreationDate > GETUTCDATE() - 2;
GO
Create Or Alter Procedure sp_ArchiveHAProxyLogs
@month datetime
As
/*
Example Usage:
Exec sp_ArchiveHAProxyLogs @month = '2011-05-01';
If you have no idea how table partitiong works in SQL Server, start here: https://www.brentozar.com/archive/2013/01/sql-server-table-partitioning-tutorial-videos-and-scripts/
- It's an excellent video tutorial with examples and terminology explanations by Kendra Little
- Trust me, you'll save a lot of time learning this way, it's less than 45 minutes overall and walks through most of these steps end-to-end
Here's how this thing works:
- There's a single table for HAProxyLogs
- Data is partitioned by month
- Data is stored in a clustered columnstore
- Data on the recent drive (D) is columnstore (because I/O is not the bottleneck)
- Data on the archive drive (E) is columnstore_archive (because I/O is the bottleneck, CPU is comparatively available)
- Overall there are 2 filegroups:
- TrafficLogs_D: High speed PCIe NVMe SSD storage, holds as much recent data as we have space for
- TrafficLogs_E: Slower spinny storage (but far more space), holds all data older than recent months on the SSDs
When TrafficLogs_D runs low on space, we invoke this procedure on the oldest month on the recent drive, in order
to move it to the achive drive. Here's how that process works:
0. It's a big TRY/CATCH so any errors aborts the rest of the run.
1. Sanity check we haven't moved this month already - and abort if we have.
2. Create 2 move tables:
- HAProxyLogsMover: For moving data from the D drive to the E drive
- HAProxyLogsArchiveSwap: For temporarily holding data on the E drive during some metadata operations
- Each table has CreationDate constraints that match the partitions they'll be swapped into later, needed for SWITCH integrity
3. Swap the data from @month partition (oldest on the recent/D drive) into HAProxyLogsMover (SWITCH)
- @month partition (still on D) is now empty, HAProxyLogsMover has that month's rows
- (OFFLINE) @month data is now offline for queries
4. Swap the data from @prevMonth partition (newest on the archive/E drive) into HAProxyLogsArchiveSwap (SWITCH)
- @prevMonth partition (on E) is now empty, HAProxyLogsArchiveSwap has that month's rows
- (OFFLINE) @prevMonth data is now offline for queries
5. MERGE the empty @month partition (on D, columnstore) into the also empty @prevMonth partition (on E, columnstore_arhive)
6. Prep the PS_HAProxyLogs (partition scheme) to use TrafficLogs_E when step 7 occurs (NEXT USED)
7. SPLIT the 2-month long @prevMonth partition into @month and @prevMonth (both on E, both columnstore_archive now)
- Both partitions are still empty. Steps 5-7 are metadata-only operations.
8. Swap the data held in HAProxyLogsArchiveSwap for the MERGE/SPLIT back into the @prevMonth partition (SWITCH)
- (ONLINE) @prevMonth data is now online for queries
9. Create a CCI (replacing the existing one) on HAProxyLogsMover, this does the following:
- Moves the data to the E drive (to prep for the swap, via "On TrafficLogs_E")
- Compresses the data during the move (via "Data_Compression = Columnstore_Archive")
10. Swap the data in HAProxyLogsMover (on E, with columnstore_archive compression) into the @month parition (on E, clustered_columnstore)
- @month partition now has all of that month's data
- HAProxyLogsMover is now empty
11. Drop our temp tables (HAProxyLogsMover and HAProxyLogsArchiveSwap) if they are empty
- If they're not empty, we leave them alone...we don't want any data loss here
- They should be good to debug and fix any swap issues by virtue of existing
*/
Set NoCount On;
Declare @prevMonth datetime = DateAdd(Month, -1, @month);
Declare @nextMonth datetime = DateAdd(Month, 1, @month);
Begin Try
-- Step 1: Sanity check that we're not re-moving an old table...don't do that.
Declare @CurrentFG sysname = (Select ds.name
From sys.data_spaces ds
Join sys.destination_data_spaces dds
On ds.data_space_id = dds.data_space_id
Where dds.destination_id = $PARTITION.PF_HAProxyLogs(@month));
If (@CurrentFG = 'TrafficLogs_E')
Begin
Declare @error nvarchar(400) = 'The ' + Convert(varchar(10), @month, 120) + ' month is already on the archive partition (TrafficLogs_E), aborting.';
Throw 501337, @error, 1;
Return;
End
-- Step 2: Table creation
-- Simply so we don't declare this huge thing twice.
Declare @tableTemplate nvarchar(4000) = '
Create Table {Name} (
[CreationDate] datetime Not Null,
[Host] varchar(50) Not Null,
[Server] varchar(20) Not Null,
[ResponseCode] smallint Not Null,
[Method] varchar(4) Not Null,
[Https] bit Not Null Default 0,
[Uri] varchar(150) Not Null,
[Query] varchar(500) Null,
[RouteName] varchar(64) Null,
[IsPageView] [bit] Not Null,
[ClientIp] varchar(45) Not Null,
[ForwardFor] varchar(45) Null,
[Country] varchar(2) Null,
[HttpVersion] varchar(10) Null,
[Referer] varchar(100) Null,
[RefererHost] varchar(50) Null,
[UserAgent] varchar(128) Null,
[AcceptEncoding] varchar(30) Null,
[ContentEncoding] varchar(10) Null,
[FrontEnd] varchar(30) Null,
[BackEnd] varchar(30) Null,
[Tq] int Null,
[Tw] int Null,
[Tc] int Null,
[Tr] int Null,
[Tt] int Null,
[Bytes] int Null,
[TermState] char(4) Null,
[ActConn] int Null,
[FeConn] int Null,
[BeConn] int Null,
[SrvConn] int Null,
[Retries] int Null,
[SrvQueue] int Null,
[BackEndQueue] int Null,
[AccountId] int Null,
[SqlCount] smallint Null,
[SqlDurationMs] smallint Null,
[RedisCount] smallint Null,
[RedisDurationMs] smallint Null,
[HttpCount] smallint Null,
[HttpDurationMs] smallint Null,
[TagEngineCount] smallint Null,
[TagEngineDurationMs] smallint Null,
[ElasticCount] smallint Null,
[ElasticDurationMs] smallint Null,
[AspNetDurationMs] smallint Null,
[ApplicationId] int Null,
[RequestGuid] uniqueidentifier Null,
[ProvidenceCookie] uniqueidentifier Null,
[IsCrawler] bit Not Null,
[AcceptLanguage] varchar(5) Null,
[ClientIpRaw] varchar(45) Null,
[Colo] varchar(3) Null,
[Flags] int Not Null Default 0,
[ItemId] int Null,
Constraint CK_{Name}_Low Check (CreationDate >= ''{LowerDate}''),
Constraint CK_{Name}_High Check (CreationDate < ''{UpperDate}'')
) On {Filegorup};
Create Clustered Columnstore Index CCI_{Name} On {Name} With (Data_Compression = {Compression}) On {Filegorup};'
-- TrafficLogs_D Drive mover table
-- Constraints exist for metadata swap
Declare @table nvarchar(4000) = @tableTemplate;
Set @table = Replace(@table, '{Name}', 'HAProxyLogsMover');
Set @table = Replace(@table, '{Filegorup}', 'TrafficLogs_D');
Set @table = Replace(@table, '{LowerDate}', Convert(varchar(20), @month, 120));
Set @table = Replace(@table, '{UpperDate}', Convert(varchar(20), @nextMonth, 120));
Set @table = Replace(@table, '{Compression}', 'ColumnStore');
Exec sp_executesql @table;
-- TrafficLogs_E Drive swap table
-- Constraints exist for metadata swap
-- This exists to temporarily hold the data on the newest month on the archive drive during the SPLIT operation
Set @table = @tableTemplate;
Set @table = Replace(@table, '{Name}', 'HAProxyLogsArchiveSwap');
Set @table = Replace(@table, '{Filegorup}', 'TrafficLogs_E');
Set @table = Replace(@table, '{LowerDate}', Convert(varchar(20), @prevMonth, 120));
Set @table = Replace(@table, '{UpperDate}', Convert(varchar(20), @month, 120));
Set @table = Replace(@table, '{Compression}', 'ColumnStore_Archive');
Exec sp_executesql @table;
-- Step 3: Swap data into move table (metadata operation)
-- This exchanges data in the partition in HAProxyLogs into our CCI table with the same schema/constraints
Alter Table HAProxyLogs
Switch Partition $PARTITION.PF_HAProxyLogs(@month)
To dbo.HAProxyLogsMover;
-- Step 4: Swap old data into the archive move table (metadata operation) so we can split the partition which will have 0 rows after this swap
-- This exchanges data in the partition in HAProxyLogs into our CCI table with the same schema/constraints
Alter Table HAProxyLogs
Switch Partition $PARTITION.PF_HAProxyLogs(@prevMonth)
To dbo.HAProxyLogsArchiveSwap;
-- Step 5: Move the now-empty partition to the archive file group
-- This effectively eliminates the @month partition, by combining it with the month that came before it (already on the archive drive)
Alter Partition Function PF_HAProxyLogs() Merge Range (@month);
-- Step 6: Setup next function to land in the archive
-- This tells the next command which filegroup to use
Alter Partition Scheme PS_HAProxyLogs Next Used TrafficLogs_E;
-- Step 7: Re-create this month in the archive, by splitting the currently empty partition (due to the archive swap above)
-- This changes the empty @prevMonth partition (which was temporarily 2-months big with 0 rows, because the data was swapped out above)
-- into 2 (both columnstore_archive compression). Both are empty and ready for data to be swapped back in now.
Alter Partition Function PF_HAProxyLogs() Split Range (@month);
-- Step 8: Swap the archive month back into the main table
-- This data never moved - it was just a metadata swap above - we're putting it back in the partition that was just split back into 2 months
-- We did all this because you can't split a partition with data in it with a clustered columnstore index present
-- ...nor would you want to, due to massive logging churn. This is nearly instant, win-win.
Alter Table HAProxyLogsArchiveSwap
Switch Partition $PARTITION.PF_HAProxyLogs(@prevMonth)
To HAProxyLogs Partition $PARTITION.PF_HAProxyLogs(@prevMonth);
-- Step 9: Create the CCI in prep for the swap, this also **moves it to the archive filegroup/drive**, while compressing it - all at once
-- Since we merged and split the range above, the split was from a columnstore_archive partition, meaning that the monthly partition went like this:
-- TrafficLogs_D: ColumnStore
-- (Non-existant - merged into previous month)
-- TrafficLogs_E: ColumnStore_Archive (from previous month SPLIT)
Create Clustered Columnstore Index CCI_HAProxyLogsMover On HAProxyLogsMover
With (Drop_Existing = On, Data_Compression = Columnstore_Archive)
On TrafficLogs_E;
-- Step 10: Swap data back into the main table
Alter Table HAProxyLogsMover
Switch Partition $PARTITION.PF_HAProxyLogs(@month)
To HAProxyLogs Partition $PARTITION.PF_HAProxyLogs(@month);
-- Step 11: Drop our temp tables, if they're empty
If (Select Count(*) From HAProxyLogsMover) = 0
Begin
Drop Table HAProxyLogsMover;
End
If (Select Count(*) From HAProxyLogsArchiveSwap) = 0
Begin
Drop Table HAProxyLogsArchiveSwap;
End
End Try
Begin Catch
-- Mainly for SSMS behavior while working
Select Error_Number() ErrorNumber,
Error_Severity() ErrorSeverity,
Error_State() ErrorState,
Error_Procedure() ErrorProcedure,
Error_Line() ErrorLine,
Error_Message() ErrorMessage;
Throw;
End Catch
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment