Skip to content

Instantly share code, notes, and snippets.

@hfleitas
Last active February 15, 2024 01:30
Show Gist options
  • Save hfleitas/0916b2412a4f1a538df1735e51b7d6ca to your computer and use it in GitHub Desktop.
Save hfleitas/0916b2412a4f1a538df1735e51b7d6ca to your computer and use it in GitHub Desktop.
M06-Demo5-ML.kql
// #connect cluster('igniteadxsource.eastus2').database('Occupancy')
#connect cluster('adxpm10774.eastus').database('IoTAnalytics')
print("😍 ML 🤖")
// Python UnSupervised Learning
//Create a cusotm UDF to run K-Means clustering using Python plugin
.create-or-alter function with (folder = "Python") kmeans_sf_OccupDetc(tbl:(*),k:int,features:dynamic,cluster_col:string) {
let kwargs = pack('k', k, 'features', features, 'cluster_col', cluster_col);
let code =
'\n'
'from sklearn.cluster import KMeans\n'
'\n'
'k = kargs["k"]\n'
'features = kargs["features"]\n'
'cluster_col = kargs["cluster_col"]\n'
'\n'
'km = KMeans(n_clusters=k)\n'
'df1 = df[features]\n'
'km.fit(df1)\n'
'result = df\n'
'result[cluster_col] = km.labels_\n';
tbl
| evaluate python(typeof(*), code, kwargs)
}
// Invoke the custom UDF for KMeans clusters
Thermostats
| where EnqueuedTimeUTC > ago(7d)
| extend cluster_id=double(null)
| project EnqueuedTimeUTC, DeviceId, Temp, Humidity, cluster_id
| invoke kmeans_sf_OccupDetc(3, pack_array("Temp", "Humidity"), "cluster_id")
| sample 10
// Python Supervised Model
// Custom UDF to score based on pre-trained model
.create-or-alter function with (folder = "Python", skipvalidation = "true") classify_sf_OccupDetc(samples:(*), models_tbl:(name:string,timestamp:datetime,model:string), model_name:string, features_cols:dynamic, pred_col:string) {
let model_str = toscalar(ML_Models | where name == model_name | top 1 by timestamp desc | project model);
let kwargs = pack('smodel', model_str, 'features_cols', features_cols, 'pred_col', pred_col);
let code =
'\n'
'import pickle\n'
'import binascii\n'
'\n'
'smodel = kargs["smodel"]\n'
'features_cols = kargs["features_cols"]\n'
'pred_col = kargs["pred_col"]\n'
'bmodel = binascii.unhexlify(smodel)\n'
'clf1 = pickle.loads(bmodel)\n'
'df1 = df[features_cols]\n'
'predictions = clf1.predict(df1)\n'
'\n'
'result = df\n'
'result[pred_col] = pd.DataFrame(predictions, columns=[pred_col])'
'\n'
;
samples | evaluate python(typeof(*), code, kwargs)
}
.append ML_Models <| datatable (name: string, timestamp: datetime, model: string) [
"Occupancy",datetime(2019-11-05T15:28:53.010134Z),"800363736b6c6561726e2e6c696e6561725f6d6f64656c2e6c6f6769737469630a4c6f67697374696352656772657373696f6e0a7100298171017d710228580700000070656e616c7479710358020000006c32710458040000006475616c7105895803000000746f6c7106473f1a36e2eb1c432d5801000000437107473ff0000000000000580d0000006669745f696e746572636570747108885811000000696e746572636570745f7363616c696e6771094b01580c000000636c6173735f776569676874710a4e580c00000072616e646f6d5f7374617465710b4e5806000000736f6c766572710c58090000006c69626c696e656172710d58080000006d61785f69746572710e4b64580b0000006d756c74695f636c617373710f58040000007761726e71105807000000766572626f736571114b00580a0000007761726d5f737461727471128958060000006e5f6a6f627371134e5808000000636c61737365735f7114636e756d70792e636f72652e6d756c746961727261790a5f7265636f6e7374727563740a7115636e756d70790a6e6461727261790a71164b00857117430162711887711952711a284b014b0285711b636e756d70790a64747970650a711c58020000006231711d4b004b0187711e52711f284b0358010000007c71204e4e4e4affffffff4affffffff4b007471216289430200017122747123625805000000636f65665f7124681568164b008571256818877126527127284b014b014b05867128681c5802000000663871294b004b0187712a52712b284b0358010000003c712c4e4e4e4affffffff4affffffff4b0074712d6289432883ebdfd50687e0bf2cdaca74fa93a63fd3abc0080e6e943f650656defdad713f18f6a86bd73202bf712e74712f62580a000000696e746572636570745f7130681568164b008571316818877132527133284b014b01857134682b89430808459f57711290bf71357471366258070000006e5f697465725f7137681568164b00857138681887713952713a284b014b0185713b681c58020000006934713c4b004b0187713d52713e284b03682c4e4e4e4affffffff4affffffff4b0074713f628943040c00000071407471416258100000005f736b6c6561726e5f76657273696f6e71425806000000302e32302e33714375622e"
]
ML_Models
| take 10
//Based on the Temp and Humidity - Is the room occupied?
Thermostats
| where EnqueuedTimeUTC > ago(15m)
| extend pred_Occupancy=bool(0)
| extend CO2=0, HumidityRatio=0
| invoke classify_sf_OccupDetc(ML_Models, 'Occupancy', pack_array('Temp', 'Humidity', 'BatteryLevel', 'CO2', 'HumidityRatio'), 'pred_Occupancy')
// Built-in forecasting & anomaly detection
//Lets forcast out 12 hours
let start = now()-1d;
let end = now();
Thermostats
| where EnqueuedTimeUTC between (start .. end)
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6'
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from start to end+12h step 1m
| extend NoGapsTemp=series_fill_linear(AvgTemp)
| project EnqueuedTimeUTC, NoGapsTemp
| extend forecast = series_decompose_forecast(NoGapsTemp, 720)
| render timechart with(title='Forecasting the next 15min by Time Series Decmposition')
//Are there any anomalies for this device?
let start = now()-24h;
let end = now();
Thermostats
| where EnqueuedTimeUTC between (start .. end)
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6'
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from start to end step 1m
| extend NoGapsTemp=series_fill_linear(AvgTemp)
| project EnqueuedTimeUTC, NoGapsTemp
| extend anomalies = series_decompose_anomalies(NoGapsTemp,1)
| render anomalychart with(anomalycolumns=anomalies)
//Lets make it less sensative
Thermostats
| where EnqueuedTimeUTC > ago(3d)
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6'
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from ago(3d) to now() step 1m
| extend NoGapsTemp=series_fill_linear(AvgTemp)
| project EnqueuedTimeUTC, NoGapsTemp
| extend anomalies = series_decompose_anomalies(NoGapsTemp,1.2)
| render anomalychart with(anomalycolumns=anomalies)
// split stats into panels
let start = now()-3d;
let end = now();
Thermostats
| where EnqueuedTimeUTC between (start .. end)
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6'
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from ago(3d) to now() step 1m
| extend NoGapsTemp=series_fill_linear(AvgTemp)
| project EnqueuedTimeUTC, NoGapsTemp
| extend (anomaly, deviation, seasonal) = series_decompose_anomalies(NoGapsTemp,1.2)
| render timechart with(ysplit=panels)
//What the anomalies I should focus on across all devices?
let start = now()-3d;
let end = now();
Thermostats
| where EnqueuedTimeUTC between (start .. end)
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from start to end step 1m by DeviceId
| extend NoGapsTemp=series_fill_linear(AvgTemp)
| project EnqueuedTimeUTC, DeviceId, NoGapsTemp
| extend anomalies = series_decompose_anomalies(NoGapsTemp, 1.5)
| mv-expand EnqueuedTimeUTC, anomalies, NoGapsTemp
| where anomalies == 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment