Skip to content

Instantly share code, notes, and snippets.

@dankleiman
Created December 27, 2018 20:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dankleiman/2e00b967697e22836a76ad0e75023a9e to your computer and use it in GitHub Desktop.
Save dankleiman/2e00b967697e22836a76ad0e75023a9e to your computer and use it in GitHub Desktop.
-- First set up the relevant DB/tables
CREATE DATABASE words;
CREATE TABLE word_list (word text);
CREATE TABLE words (word text, len int, letters text);
-- Create a seed script for `word_list` and run the sql file into your db
-- cat /usr/share/dict/words | awk '{ print "INSERT INTO word_list VALUES('\''" $1 "'\'');" }' > seed.sql
-- psql words -f seed.sql
-- Now we need to build the `words` table from pg functions off the `word_list` table
-- First, let's do length
select word, length(word) from word_list limit 10;
word | length
----------+--------
A | 1
a | 1
aa | 2
aal | 3
aalii | 5
aam | 3
Aani | 4
aardvark | 8
aardwolf | 8
Aaron | 5
(10 rows)
-- Now, sorting
-- if we can break each word down into an array, sort the array, and recombine the letters into a word,
-- we now have an ordered `letters` column for each work
-- step 1: break the words apart into letter arrays
select word, string_to_array(word, NULL) from word_list limit 10;
word | string_to_array
----------+-------------------
A | {A}
a | {a}
aa | {a,a}
aal | {a,a,l}
aalii | {a,a,l,i,i}
aam | {a,a,m}
Aani | {A,a,n,i}
aardvark | {a,a,r,d,v,a,r,k}
aardwolf | {a,a,r,d,w,o,l,f}
Aaron | {A,a,r,o,n}
(10 rows)
-- step 2: unnest the arrays so we can order the letters
SELECT
word,
lower(l.*) as letter
FROM
word_list, unnest(string_to_array(word, NULL)) l
WHERE word like 'a%'
ORDER BY word, letter
word | letter
-------------------------+--------
a | a
aa | a
aa | a
aal | a
aal | a
aal | l
aalii | a
aalii | a
aalii | i
aalii | i
aalii | l
aam | a
aam | a
aam | m
-- step 3: using the ordered letters as a subquery, re-aggregate them into an array
SELECT
word,
array_agg(letter)
FROM (
SELECT
word,
lower(l.*) as letter
FROM
word_list, unnest(string_to_array(word, NULL)) l
WHERE word like 'quack%'
ORDER BY word, letter
) w
GROUP BY word
word | array_agg
--------------+---------------------------
quack | {a,c,k,q,u}
quackery | {a,c,e,k,q,r,u,y}
quackhood | {a,c,d,h,k,o,o,q,u}
quackish | {a,c,h,i,k,q,s,u}
quackishly | {a,c,h,i,k,l,q,s,u,y}
quackishness | {a,c,e,h,i,k,n,q,s,s,s,u}
quackism | {a,c,i,k,m,q,s,u}
quackle | {a,c,e,k,l,q,u}
quacksalver | {a,a,c,e,k,l,q,r,s,u,v}
quackster | {a,c,e,k,q,r,s,t,u}
quacky | {a,c,k,q,u,y}
(11 rows)
-- step 4: glue the words back together and `array_to_string`
SELECT
word,
array_to_string(array_agg(letter), '') as letters
FROM (
SELECT
word,
lower(l.*) as letter
FROM
word_list, unnest(string_to_array(word, NULL)) l
WHERE word like 'quack%'
ORDER BY word, letter
) w
GROUP BY word
word | letters
--------------+--------------
quack | ackqu
quackery | acekqruy
quackhood | acdhkooqu
quackish | achikqsu
quackishly | achiklqsuy
quackishness | acehiknqsssu
quackism | acikmqsu
quackle | aceklqu
quacksalver | aaceklqrsuv
quackster | acekqrstu
quacky | ackquy
(11 rows)
-- Now that we know we can generate each column in `words` from the `word_list` table, let's put them all back together
INSERT INTO words (
SELECT
word,
length(word) as len,
array_to_string(array_agg(letter), '') as letters
FROM (
SELECT
word,
lower(l.*) as letter
FROM
word_list, unnest(string_to_array(word, NULL)) l
ORDER BY word, letter
) w
GROUP BY word
);
words=# select * from words where word like 'quack%';
word | len | letters
--------------+-----+--------------
quack | 5 | ackqu
quackery | 8 | acekqruy
quackhood | 9 | acdhkooqu
quackish | 8 | achikqsu
quackishly | 10 | achiklqsuy
quackishness | 12 | acehiknqsssu
quackism | 8 | acikmqsu
quackle | 7 | aceklqu
quacksalver | 11 | aaceklqrsuv
quackster | 9 | acekqrstu
quacky | 6 | ackquy
(11 rows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment