Skip to content

Instantly share code, notes, and snippets.

@bjulius
Created April 12, 2024 01:51
Show Gist options
  • Save bjulius/5b517db84f161626059c06eb1e3ee1a1 to your computer and use it in GitHub Desktop.
Save bjulius/5b517db84f161626059c06eb1e3ee1a1 to your computer and use it in GitHub Desktop.
M and R Code to Accompany Brian Julius Video on Real-Time Natural Language Data Cleaning in Power Query
// The code below goes into a query named RegexFromAPI. This returns the Regex from GPT4 based on the user described cleaning task.
// #GiveCredit4Code, thanks to Oscar Martinez Valero for profviding the structure of how to call
// GTP4 from within Power Query and parse the results that come back
// Source article: https://www.bibb.pro/post/ai-driven-data-analysis-chatgpt-power-query-integration-in-bi
let
AnalyzeDataWithChatGPT = (RegexDescription as text) as text =>
let
apikey = Text.FromBinary(File.Contents("C:\temp\PQ_API_KEY.txt")),
GPTPrompt = Json.FromValue(
[
model = "gpt-4",
messages = {
[
role = "system",
content
= "You are an analytics expert. Provide the requested regex with no introductory or explanatory text - just the regex code itself"
],
[role = "system", content = RegexDescription]
}
],
TextEncoding.Utf8
),
APICallResult = Json.Document(
Web.Contents(
"https://api.openai.com/v1/chat/completions",
[
Headers = [#"Content-Type" = "application/json", #"Authorization" = "Bearer " & apikey],
Content = GPTPrompt
]
)
),
ResponseContent = APICallResult[choices]{0}[message][content]
in
ResponseContent
in
AnalyzeDataWithChatGPT
// Enter the code below into the Data query in Power Query.
// This will invoke the R script that applies the Regex above to your data to be cleaned
let
Source = DataRaw,
RegexRequest = "Provide Regex to " & CleaningRequest,
GPTRegexCode = Table.AddColumn(Source, "Regex", each RegexFromAPI(RegexRequest)),
RunRScript = R.Execute(
"library(stringr)#(lf)df <- dataset#(lf)target <- max(df$Regex)#(lf)df$Cleaned <- sapply(str_extract_all(df$String, target ), function(x) paste(x, collapse = "", ""))#(lf)df",
[dataset = GPTRegexCode]
),
Result = RunRScript{[Name = "df"]}[Value]
in
Result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment