Skip to content

Instantly share code, notes, and snippets.

@joelonsql
Created May 22, 2021 15:08
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joelonsql/1b7afbf822c3e1c7cae43fec9d6c6fce to your computer and use it in GitHub Desktop.
Save joelonsql/1b7afbf822c3e1c7cae43fec9d6c6fce to your computer and use it in GitHub Desktop.
PostgreSQL guess_language(text) one-liner SQL-function
CREATE OR REPLACE FUNCTION guess_language(string text)
RETURNS text
LANGUAGE sql
AS
$$
SELECT cfgname::text FROM pg_ts_config WHERE cfgname <> 'simple' ORDER BY length(to_tsvector(cfgname::regconfig,string)) LIMIT 1
$$;
--
-- Demo testing some random sentences from Wikipedia:
--
SELECT guess_language('حسينية الحاج داوود العاشور هي حسينية تم تأسيسها عام');
guess_language
----------------
arabic
(1 row)
SELECT guess_language('Esbjerg Sharks FC er en dansk floorballklub fra Esbjerg stiftet i 1995.');
guess_language
----------------
danish
(1 row)
SELECT guess_language('De wetenschappelijke naam van deze soort is voor het eerst geldig gepubliceerd in 2001 door Nickle.');
guess_language
----------------
dutch
(1 row)
SELECT guess_language('It allows DOS programs to eliminate the 640 KB conventional memory limit by addressing up to 64 MB of extended memory on Intel 80386 and above machines.');
guess_language
----------------
english
(1 row)
SELECT guess_language('Hän saavutti Tšekkoslovakian maajoukkueessa MM-pronssia Tukholman kisoissa 1970.');
guess_language
----------------
finnish
(1 row)
SELECT guess_language('Vous pouvez améliorer la vérifiabilité en associant ces informations à des références à l''aide d''appels de notes.');
guess_language
----------------
french
(1 row)
SELECT guess_language('Benjamin Baltes (* 30. März 1984 in Saarbrücken) ist ein deutscher Fußballspieler.');
guess_language
----------------
german
(1 row)
SELECT guess_language('Flaminio Ponzio (Viggiù, 5 aprile 1560 – Roma, 6 aprile 1613) è stato un architetto italiano che operò principalmente sotto il pontefice Paolo V.');
guess_language
----------------
italian
(1 row)
SELECT guess_language('गाइने कीरा नेपालमा पाइने एक प्रकारको किरा हो । यो किरा पानी माथी नाच्ने भएकाले यसलाई पानी चेपुवा किरा पनि भनिन्छ');
guess_language
----------------
nepali
(1 row)
SELECT guess_language('Karpelaksfisker er en gruppe strålefinnede fisker. Noe over 200 arter finnes i Afrika, resten finnes i Amerika.');
guess_language
----------------
norwegian
(1 row)
SELECT guess_language('África do Sul participou dos Jogos Paralímpicos de Verão de 2008, que foram realizados na cidade de Pequim, na China, entre os dias 6 e 17 de setembro de 2008.');
guess_language
----------------
portuguese
(1 row)
SELECT guess_language('Витя Малеев в школе и дома»&#160;— повесть Николая Носова на школьную тему, написанная в 1951 году');
guess_language
----------------
russian
(1 row)
SELECT guess_language('Dicoelotrachelus cubensis es una especie de insecto coleóptero de la familia Chrysomelidae.');
guess_language
----------------
spanish
(1 row)
SELECT guess_language('Käringsjön är en sjö i Vansbro kommun i Dalarna och ingår i Dalälvens huvudavrinningsområde.');
guess_language
----------------
swedish
(1 row)
--
-- This method works OK for some languages, but fails for others, especially for short sentences.
-- Should not be used in production.
--
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment