Skip to content

Instantly share code, notes, and snippets.

@martin0258
Last active December 16, 2015 09:08
Show Gist options
  • Save martin0258/5410428 to your computer and use it in GitHub Desktop.
Save martin0258/5410428 to your computer and use it in GitHub Desktop.
Generate T-SQL for applying windowing transformation to time-series data.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2013 martinku <martinku@ss-martin-ku>
#
# Distributed under terms of the MIT license.
"""
Purpose:
Apply windowing transformation to time-series data.
That is, transform time-series data to regression cases used for MS neural network.
Input:
1. A time series DB table consists of two columns (time and value).
For examplae: DB table [ETC].[dbo].[F_SHB_Weekly_Passing]
| WeekNum | Passings |
| 1 | 12334 |
| 2 | 12335 |
2. two parameters
- n : # of input units
- m : # of output units
Output:
T-SQL that generates the following table.
| CurrentWeekNum | I1 | I2 | ... | In | O1 | O2 | ... | Om |
| n | 21 | 22 | ... | 67 | 34 | 43 | ... | 65 |
| n+1 | 22 | 23 | ... | 68 | 35 | 44 | ... | 66 |
"""
import argparse
from string import Template
# settings (adjusted by script users)
table_name = "[ETC].[dbo].[F_SHB_Weekly_Passing]"
time_column_name = "WeekNum"
value_column_name = "Passings"
max_time_column = 105
# input parameters (passed in via cmd arguments)
parser = argparse.ArgumentParser()
parser.add_argument("num_input", help="the number of input", type=int)
parser.add_argument("num_output", help="the number of output", type=int)
args = parser.parse_args()
num_input = args.num_input
num_output = args.num_output
output_file = "windowing_transformation.sql"
# generating sql
final_sql = ""
window_length = num_input + num_output
num_case = max_time_column - window_length + 1
for j in range(1,num_case+1):
# build sql for each case
pivot_column = []
template = Template("SELECT ${currentTime} AS Current${time_column}")
sql = template.substitute(
currentTime=str(num_input+j-1),
time_column=time_column_name
)
for i in range(1,num_input+1):
pivot_column.append("[%d]" % int(i+j-1))
template = Template("\n,[${In}] AS I${i}")
sql += template.substitute(
In=str(i+j-1),
i=str(i)
)
for i in range(1,num_output+1):
pivot_column.append("[%d]" % int(num_input+i+j-1))
template = Template("\n,[${On}] AS O${i}")
sql += template.substitute(
On=str(num_input+i+j-1),
i=str(i)
)
template = Template("""
FROM ${table}
PIVOT (
SUM(${value_column})
FOR ${time_column} IN (${pivot_column})
) AS t
""")
sql += template.substitute(
table=table_name,
value_column=value_column_name,
time_column=time_column_name,
pivot_column=",".join(pivot_column)
)
final_sql += sql
final_sql += "\nUNION\n" if j<num_case else ""
final_sql += "\nORDER BY Current%s" % time_column_name
print final_sql
output = open(output_file,'w')
output.write(final_sql)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment