Created
May 22, 2021 15:08
-
-
Save joelonsql/1b7afbf822c3e1c7cae43fec9d6c6fce to your computer and use it in GitHub Desktop.
PostgreSQL guess_language(text) one-liner SQL-function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION guess_language(string text) | |
RETURNS text | |
LANGUAGE sql | |
AS | |
$$ | |
SELECT cfgname::text FROM pg_ts_config WHERE cfgname <> 'simple' ORDER BY length(to_tsvector(cfgname::regconfig,string)) LIMIT 1 | |
$$; | |
-- | |
-- Demo testing some random sentences from Wikipedia: | |
-- | |
SELECT guess_language('حسينية الحاج داوود العاشور هي حسينية تم تأسيسها عام'); | |
guess_language | |
---------------- | |
arabic | |
(1 row) | |
SELECT guess_language('Esbjerg Sharks FC er en dansk floorballklub fra Esbjerg stiftet i 1995.'); | |
guess_language | |
---------------- | |
danish | |
(1 row) | |
SELECT guess_language('De wetenschappelijke naam van deze soort is voor het eerst geldig gepubliceerd in 2001 door Nickle.'); | |
guess_language | |
---------------- | |
dutch | |
(1 row) | |
SELECT guess_language('It allows DOS programs to eliminate the 640 KB conventional memory limit by addressing up to 64 MB of extended memory on Intel 80386 and above machines.'); | |
guess_language | |
---------------- | |
english | |
(1 row) | |
SELECT guess_language('Hän saavutti Tšekkoslovakian maajoukkueessa MM-pronssia Tukholman kisoissa 1970.'); | |
guess_language | |
---------------- | |
finnish | |
(1 row) | |
SELECT guess_language('Vous pouvez améliorer la vérifiabilité en associant ces informations à des références à l''aide d''appels de notes.'); | |
guess_language | |
---------------- | |
french | |
(1 row) | |
SELECT guess_language('Benjamin Baltes (* 30. März 1984 in Saarbrücken) ist ein deutscher Fußballspieler.'); | |
guess_language | |
---------------- | |
german | |
(1 row) | |
SELECT guess_language('Flaminio Ponzio (Viggiù, 5 aprile 1560 – Roma, 6 aprile 1613) è stato un architetto italiano che operò principalmente sotto il pontefice Paolo V.'); | |
guess_language | |
---------------- | |
italian | |
(1 row) | |
SELECT guess_language('गाइने कीरा नेपालमा पाइने एक प्रकारको किरा हो । यो किरा पानी माथी नाच्ने भएकाले यसलाई पानी चेपुवा किरा पनि भनिन्छ'); | |
guess_language | |
---------------- | |
nepali | |
(1 row) | |
SELECT guess_language('Karpelaksfisker er en gruppe strålefinnede fisker. Noe over 200 arter finnes i Afrika, resten finnes i Amerika.'); | |
guess_language | |
---------------- | |
norwegian | |
(1 row) | |
SELECT guess_language('África do Sul participou dos Jogos Paralímpicos de Verão de 2008, que foram realizados na cidade de Pequim, na China, entre os dias 6 e 17 de setembro de 2008.'); | |
guess_language | |
---------------- | |
portuguese | |
(1 row) | |
SELECT guess_language('Витя Малеев в школе и дома» — повесть Николая Носова на школьную тему, написанная в 1951 году'); | |
guess_language | |
---------------- | |
russian | |
(1 row) | |
SELECT guess_language('Dicoelotrachelus cubensis es una especie de insecto coleóptero de la familia Chrysomelidae.'); | |
guess_language | |
---------------- | |
spanish | |
(1 row) | |
SELECT guess_language('Käringsjön är en sjö i Vansbro kommun i Dalarna och ingår i Dalälvens huvudavrinningsområde.'); | |
guess_language | |
---------------- | |
swedish | |
(1 row) | |
-- | |
-- This method works OK for some languages, but fails for others, especially for short sentences. | |
-- Should not be used in production. | |
-- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment