Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
section Section1;
Detect1000Languages = (input as list) as list =>
let
// TODO: support nulls, truncate text to avoid service limits
text = List.Buffer(input),
data = Table.FromColumns({text}, type table [text=text]),
indexed = Table.AddIndexColumn(data, "id", 1),
textId = Table.TransformColumnTypes(indexed, {{"id", type text}}),
body = [documents=Table.ReorderColumns(textId, {"id", "text"})],
response = Web.Contents(
"https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/languages", [
Headers=[
#"Content-Type"="application/json",
#"Ocp-Apim-Subscription-Key"="************************"
],
Content=Json.FromValue(body)
]),
asTable = Table.FromRecords(Json.Document(response)[documents]),
indexed2 = Table.RenameColumns(textId, {"id", "id2"}),
joined = Table.Join(indexed2, "id2", asTable, "id", JoinKind.LeftOuter),
transformed = Table.TransformColumns(joined, {{"detectedLanguages", (t) => if t is list and List.Count(t) > 0 then t{0} else null}})
in
transformed[detectedLanguages];
shared Text.DetectLanguage =
let
recordType = type [name=text, iso6391Name=text, score=number]
in
Function.ScalarVector(
type function (text as text) as nullable recordType,
(input) => List.Combine(
List.Transform(
List.Split(input[text], 1000),
Detect1000Languages)));
shared Test = Table.ExpandRecordColumn(
Table.AddColumn(
Table.FromColumns({{"guten tag", "Hello", "multumesc", "asefjailsfasdfa"}}, {"words"}),
"Language",
each Text.DetectLanguage([words])),
"Language",
{"name", "iso6391Name", "score"});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment