Skip to content

Instantly share code, notes, and snippets.

@millerjs
Last active August 23, 2021 19:57
Show Gist options
  • Save millerjs/1434a329c84f6d034f83d189b84d5576 to your computer and use it in GitHub Desktop.
Save millerjs/1434a329c84f6d034f83d189b84d5576 to your computer and use it in GitHub Desktop.
Burning Glass Queries

SUPPORT-2027-burning-glass-queries

final index attempt

CREATE INDEX CONCURRENTLY index_burning_glass_jobs_full_search
ON burning_glass_jobs
USING GIN ((
    setweight(
      to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')
     ), 'A')
     || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, '')
   )
));

explain SELECT COUNT(*) FROM "burning_glass_jobs" INNER JOIN (SELECT "burning_glass_jobs"."id" AS pg_search_id, (ts_rank((setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A') || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, ''))), (to_tsquery('simple', ''' ' || 'business' || ' ''' || ':*')), 0)) AS rank FROM "burning_glass_jobs" WHERE (((setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A') || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, ''))) @@ (to_tsquery('simple', ''' ' || 'business' || ' ''' || ':*'))))) AS pg_search_9049415c020507460935a8 ON "burning_glass_jobs"."id" = pg_search_9049415c020507460935a8.pg_search_id WHERE "burning_glass_jobs"."internship" = 'false';

--   Finalize Aggregate  (cost=198013.33..198013.34 rows=1 width=8)
--   ->  Gather  (cost=198012.91..198013.32 rows=4 width=8)
--         Workers Planned: 4
--         ->  Partial Aggregate  (cost=197012.91..197012.92 rows=1 width=8)                                                            ->  Nested Loop  (cost=1062.33..196932.84 rows=32027 width=0)
--                     ->  Parallel Bitmap Heap Scan on burning_glass_jobs burning_glass_jobs_1  (cost=1061.89..142994.58 rows=32196 width=8)                                                                                                                              Recheck Cond: ((setweight(to_tsvector('simple'::regconfig, COALESCE((title)::text, ''::text)), 'A'::"char") || to_tsvector('simple'::regconfig, COALESCE((description)::text, ''::text))) @@ '''business'':*'::tsquery)
--                           ->  Bitmap Index Scan on index_burning_glass_jobs_full_search  (cost=0.00..1029.70 rows=128786 width=0)                                                                                                                                             Index Cond: ((setweight(to_tsvector('simple'::regconfig, COALESCE((title)::text, ''::text)), 'A'::"char") || to_tsvector('simple'::regconfig, COALESCE((description)::text, ''::text))) @@ '''business'':*'::tsquery)
--                     ->  Index Scan using burning_glass_jobs_pkey on burning_glass_jobs  (cost=0.43..1.68 rows=1 width=8)
--                           Index Cond: (id = burning_glass_jobs_1.id)
--                           Filter: (NOT internship)
--(12 rows)

It’s doing an index scan, but it still timed out in staging.

triage

curl ‘https://studentplaybook.staging.benchprep.com/api/v2/user_manager/jobs/search?search_text=business&city=&state=&post_date=&include_all_locations=false&internship=false&per_page=10&country=USA&auth_token=X

ssh deploy@10.176.230.78
SELECT COUNT(*) FROM "burning_glass_jobs" INNER JOIN (
  SELECT "burning_glass_jobs"."id" AS pg_search_id,
  (
  ts_rank((setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A')
  || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, ''))), (to_tsquery('simple', ''' ' || 'information' || ' ''' || ':*') && to_tsquery('simple', ''' ' || 'technology' || ' ''' || ':*')), 0))


AS rank FROM "burning_glass_jobs" WHERE (((setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A') || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, ''))) @@ (to_tsquery('simple', ''' ' || 'information' || ' ''' || ':*') && to_tsquery('simple', ''' ' || 'technology' || ' ''' || ':*'))))) AS pg_search_9049415c020507460935a8 ON "burning_glass_jobs"."id" = pg_search_9049415c020507460935a8.pg_search_id WHERE "burning_glass_jobs"."internship" = 'false'

  SELECT "burning_glass_jobs".*
  FROM "burning_glass_jobs"
  WHERE (((to_tsvector('english', coalesce("burning_glass_jobs"."description"::text, ''))) @@ (to_tsquery('english', ''' ' || 'business' || ' ''' || ':*'))))
  ORDER BY "burning_glass_jobs"."id" ASC;
select (setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A') || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, '')))
from burning_glass_jobs limit 2;

CREATE INDEX burning_glass_jobs_description_search
ON burning_glass_jobs
USING GIN (to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, '')));

CREATE INDEX burning_glass_jobs_title_search
ON burning_glass_jobs
USING GIN (to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A');
  • no blocks

wmx_rails_api_staging=# SELECT COUNT(*) FROM “burning_glass_jobs”; count ───────── 6273040 (1 row)

Time: 993.082 ms

SELECT COUNT(*) FROM "burning_glass_jobs" INNER JOIN (
  SELECT "burning_glass_jobs"."id" AS pg_search_id,
  (
  ts_rank((setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A')
  || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, ''))), (to_tsquery('simple', ''' ' || 'information' || ' ''' || ':*') && to_tsquery('simple', ''' ' || 'technology' || ' ''' || ':*')), 0))


AS rank FROM "burning_glass_jobs" WHERE (((setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A') || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, ''))) @@ (to_tsquery('simple', ''' ' || 'information' || ' ''' || ':*') && to_tsquery('simple', ''' ' || 'technology' || ' ''' || ':*'))))) AS pg_search_9049415c020507460935a8 ON "burning_glass_jobs"."id" = pg_search_9049415c020507460935a8.pg_search_id WHERE "burning_glass_jobs"."internship" = 'false'

  SELECT "burning_glass_jobs".*
  FROM "burning_glass_jobs"
  WHERE (((to_tsvector('english', coalesce("burning_glass_jobs"."description"::text, ''))) @@ (to_tsquery('english', ''' ' || 'business' || ' ''' || ':*'))))
  ORDER BY "burning_glass_jobs"."id" ASC;
select (setweight(to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A') || to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, '')))
from burning_glass_jobs limit 2;

CREATE INDEX burning_glass_jobs_description_search
ON burning_glass_jobs
USING GIN (to_tsvector('simple', coalesce("burning_glass_jobs"."description"::text, '')));

CREATE INDEX burning_glass_jobs_title_search
ON burning_glass_jobs
USING GIN (to_tsvector('simple', coalesce("burning_glass_jobs"."title"::text, '')), 'A');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment