Skip to content

Instantly share code, notes, and snippets.

@jehoshua02
Created September 25, 2011 02:09
Show Gist options
  • Save jehoshua02/1240120 to your computer and use it in GitHub Desktop.
Save jehoshua02/1240120 to your computer and use it in GitHub Desktop.
Building the mba Search Query
/* Requirements:
I'm writing an SQL query for a search feature on a website in a PHP/MySQL application.
I have customer and ad tables. Customer may have many ads.
The user enters a keyword and sees the results in a web page. Here's the rules for the search results:
each item in the search result contains customer info and one ad for that customer
the customer, or the customer's ad must match the keyword
a customer may only appear once in the search results
search results ordered by relevance (how many times the keyword is found in customer and ad fields)
if the customer has more than one ad with equal relevance, then the ad with the greatest start date is chosen
The approach here is to break the problem down into smaller problems,
solve the smaller problems, then work the solutions together to build
the solution for the original problem.
*/
USE mba;
SET @keyword = LOWER('or');
SET @like = CONCAT('%', @keyword, '%');
-- Q1: select active ads --
SELECT ad.ad_id, ad.paid, ad.start_date, ad.end_date
FROM ad
WHERE ad.paid = 1
AND ad.start_date <= NOW()
AND ad.end_date >= NOW();
-- Q2: join customer and ad, selecting rows that match keyword in some way --
SELECT customer.customer_id, ad.ad_id
FROM customer
JOIN state ON (customer.state_id = state.state_id)
JOIN country ON (state.country_id = country.country_id)
JOIN ad ON (customer.customer_id = ad.customer_id)
WHERE (ad.description LIKE @like
OR ad.text LIKE @like
OR customer.title LIKE @like
OR customer.company LIKE @like
OR customer.city LIKE @like
OR state.name LIKE @like
OR customer.zip LIKE @like
OR country.name LIKE @like
OR customer.description LIKE @like
OR CONCAT(customer.first_name, " ", customer.last_name) LIKE @like
);
-- Q3: Q1 + Q2 --
SELECT customer.customer_id, ad.ad_id
FROM customer
JOIN state ON (customer.state_id = state.state_id)
JOIN country ON (state.country_id = country.country_id)
JOIN ad ON (customer.customer_id = ad.customer_id)
WHERE (
ad.description LIKE @like
OR ad.text LIKE @like
OR customer.title LIKE @like
OR customer.company LIKE @like
OR customer.city LIKE @like
OR state.name LIKE @like
OR customer.zip LIKE @like
OR country.name LIKE @like
OR customer.description LIKE @like
OR CONCAT(customer.first_name, " ", customer.last_name) LIKE @like
)
AND ad.ad_id IN (
SELECT ad.ad_id
FROM ad
WHERE ad.paid = 1
AND ad.start_date <= NOW()
AND ad.end_date >= NOW()
);
-- Q4: count customers --
SELECT COUNT(customer.customer_id) AS customers
FROM customer;
-- Q5: count customers with ads --
SELECT COUNT(DISTINCT ad.customer_id) AS customers_with_ads
FROM ad;
-- Q6: show start dates for all ads --
SELECT ad.customer_id, ad.ad_id, ad.start_date
FROM ad
ORDER BY ad.customer_id, ad.start_date DESC;
-- Q7: select max ad start date for each customer --
SELECT ad.customer_id, MAX(ad.start_date) AS start_date
FROM ad
GROUP BY ad.customer_id
ORDER BY ad.customer_id, ad.start_date DESC;
-- Q8: get latest ad for each customer --
SELECT ad.customer_id, ad.ad_id, ad.start_date
FROM ad
JOIN (
SELECT ad.customer_id, MAX(ad.start_date) AS start_date
FROM ad
GROUP BY ad.customer_id
) AS max USING (customer_id, start_date);
-- Q9: calculate relevance for active active ads --
SELECT
ad.ad_id,
customer.customer_id,
(
((LENGTH(ad.description) - LENGTH(REPLACE(LOWER(ad.description), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(ad.text) - LENGTH(REPLACE(LOWER(ad.text), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.title) - LENGTH(REPLACE(LOWER(customer.title), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.company) - LENGTH(REPLACE(LOWER(customer.company), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.city) - LENGTH(REPLACE(LOWER(customer.city), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(state.name) - LENGTH(REPLACE(LOWER(state.name), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.zip) - LENGTH(REPLACE(LOWER(customer.zip), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(country.name) - LENGTH(REPLACE(LOWER(country.name), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.description) - LENGTH(REPLACE(LOWER(customer.description), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.first_name) - LENGTH(REPLACE(LOWER(customer.first_name), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.last_name) - LENGTH(REPLACE(LOWER(customer.last_name), @keyword, ''))) / LENGTH(@keyword))
) AS relevance,
ad.start_date
FROM customer
JOIN state USING (state_id)
JOIN country USING (country_id)
JOIN ad USING (customer_id)
WHERE ad.ad_id IN (
SELECT ad.ad_id
FROM ad
WHERE ad.paid = 1
AND ad.start_date <= NOW()
AND ad.end_date >= NOW()
)
ORDER BY customer.customer_id, relevance DESC, ad.start_date DESC;
-- Q10: get the most recent of the most relevant ads for each customer --
SELECT
customer.customer_id,
customer.first_name,
customer.middle_initial,
customer.last_name,
customer.title,
customer.email,
customer.mobile_phone,
customer.office_phone,
customer.tf_phone,
customer.fax,
customer.personal_url,
customer.company,
customer.description,
customer.website,
customer.street,
customer.city,
state.name AS state,
customer.zip,
country.name AS country,
ad.ad_id,
ad.ad_type_id,
ad.url,
ad.text,
ad.description AS ad_description,
ad.file_url
FROM customer
JOIN state USING (state_id)
JOIN country USING (country_id)
JOIN ad USING (customer_id)
JOIN (
SELECT ad.customer_id, MAX(r.relevance) AS relevance
FROM ad
JOIN (
SELECT
ad.ad_id,
(
((LENGTH(ad.description) - LENGTH(REPLACE(LOWER(ad.description), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(ad.text) - LENGTH(REPLACE(LOWER(ad.text), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.title) - LENGTH(REPLACE(LOWER(customer.title), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.company) - LENGTH(REPLACE(LOWER(customer.company), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.city) - LENGTH(REPLACE(LOWER(customer.city), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(state.name) - LENGTH(REPLACE(LOWER(state.name), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.zip) - LENGTH(REPLACE(LOWER(customer.zip), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(country.name) - LENGTH(REPLACE(LOWER(country.name), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.description) - LENGTH(REPLACE(LOWER(customer.description), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.first_name) - LENGTH(REPLACE(LOWER(customer.first_name), @keyword, ''))) / LENGTH(@keyword))
+ ((LENGTH(customer.last_name) - LENGTH(REPLACE(LOWER(customer.last_name), @keyword, ''))) / LENGTH(@keyword))
) AS relevance
FROM customer
JOIN state USING (state_id)
JOIN country USING (country_id)
JOIN ad USING (customer_id)
WHERE ad.ad_id IN (
SELECT ad.ad_id
FROM ad
WHERE ad.paid = 1
AND ad.start_date <= NOW()
AND ad.end_date >= NOW()
)
) r USING (ad_id)
GROUP BY ad.customer_id
) AS relevance USING (customer_id)
JOIN (
SELECT ad.customer_id, MAX(ad.start_date) AS start_date
FROM ad
GROUP BY ad.customer_id
) AS latest USING (customer_id, start_date)
ORDER BY relevance DESC, ad.start_date;
@jehoshua02
Copy link
Author

Okay, so I think I have it with Q10. It looks crazy, but it does it correctly.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment