Created
April 12, 2024 01:51
-
-
Save bjulius/5b517db84f161626059c06eb1e3ee1a1 to your computer and use it in GitHub Desktop.
M and R Code to Accompany Brian Julius Video on Real-Time Natural Language Data Cleaning in Power Query
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The code below goes into a query named RegexFromAPI. This returns the Regex from GPT4 based on the user described cleaning task. | |
// #GiveCredit4Code, thanks to Oscar Martinez Valero for profviding the structure of how to call | |
// GTP4 from within Power Query and parse the results that come back | |
// Source article: https://www.bibb.pro/post/ai-driven-data-analysis-chatgpt-power-query-integration-in-bi | |
let | |
AnalyzeDataWithChatGPT = (RegexDescription as text) as text => | |
let | |
apikey = Text.FromBinary(File.Contents("C:\temp\PQ_API_KEY.txt")), | |
GPTPrompt = Json.FromValue( | |
[ | |
model = "gpt-4", | |
messages = { | |
[ | |
role = "system", | |
content | |
= "You are an analytics expert. Provide the requested regex with no introductory or explanatory text - just the regex code itself" | |
], | |
[role = "system", content = RegexDescription] | |
} | |
], | |
TextEncoding.Utf8 | |
), | |
APICallResult = Json.Document( | |
Web.Contents( | |
"https://api.openai.com/v1/chat/completions", | |
[ | |
Headers = [#"Content-Type" = "application/json", #"Authorization" = "Bearer " & apikey], | |
Content = GPTPrompt | |
] | |
) | |
), | |
ResponseContent = APICallResult[choices]{0}[message][content] | |
in | |
ResponseContent | |
in | |
AnalyzeDataWithChatGPT | |
// Enter the code below into the Data query in Power Query. | |
// This will invoke the R script that applies the Regex above to your data to be cleaned | |
let | |
Source = DataRaw, | |
RegexRequest = "Provide Regex to " & CleaningRequest, | |
GPTRegexCode = Table.AddColumn(Source, "Regex", each RegexFromAPI(RegexRequest)), | |
RunRScript = R.Execute( | |
"library(stringr)#(lf)df <- dataset#(lf)target <- max(df$Regex)#(lf)df$Cleaned <- sapply(str_extract_all(df$String, target ), function(x) paste(x, collapse = "", ""))#(lf)df", | |
[dataset = GPTRegexCode] | |
), | |
Result = RunRScript{[Name = "df"]}[Value] | |
in | |
Result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment