Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
SQLアンチパターン:14章アンビギュアスグループ(曖昧なグループ)

14章 アンビギュアスグループ(曖昧なグループ)

準備

drop table Bugs;

CREATE TABLE `Bugs` (
      `bug_id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
      `reported_by` int,
      `date_reported` date,
      PRIMARY KEY (`bug_id`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8
;

insert into Bugs values (1234,1,'2009-12-19'), (2248,2,'2010-06-01'), (3456,2,'2010-02-16'), (4077,3,'2010-02-10'), (5150,5,'2010-02-16'), (5678,6,'2010-01-01'), (8063,4,'2009-11-09');

drop table Accounts;

create table Accounts (
    account_id int,
    account_name varchar(100),
    PRIMARY KEY (`account_id`)
);

insert into Accounts values (1 ,'Fran'), (2 ,'Ollie'), (3 ,'Kukla'), (4 ,'hironomiu'), (5 ,'hoge'), (6 ,'fuga');

drop table BugsProducts;

create table `BugsProducts`(
    `bug_id`bigint(20) unsigned NOT NULL,
    product_id int,
    product_name varchar(100)
);

insert into BugsProducts values
(1234,1,'Open RoundFile'), (2248,1,'Open RoundFile'), (3456,2,'Visual TurboBuilder'), (4077,2,'Visual TurboBuilder'), (5150,2,'Visual TurboBuilder'), (5678,3,'Reconsider'), (8063,3,'Reconsider');
  • データの確認
select * from Bugs JOIN BugsProducts USING(bug_id);

14.1 目的:グループ内で最大値を持つ行を取得する

  • 14.2.1も含む
SELECT product_id, MAX(date_reported) AS latest FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_id;
SELECT product_id, MAX(date_reported) AS latest,bug_id FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_id;
  • product_nameで同じことを試す
SELECT product_name, MAX(date_reported) AS latest FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_name;
SELECT product_name, MAX(date_reported) AS latest ,bug_id FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_name;

14.2.2 SQLがクエリの意図を汲んでくれるとは限らない

SELECT product_id, MAX(date_reported) AS latest, MIN(date_reported) AS earliest, bug_id FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_id;
SELECT product_name, MAX(date_reported) AS latest, MIN(date_reported) AS earliest, bug_id FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_name;

14.3 アンチパターンの見つけ方

MySQLの以下の設定

set sql_mode = 'ONLY_FULL_GROUP_BY';

14.4 アンチパターンを用いても良い場合

SELECT b.reported_by, a.account_name FROM Bugs b JOIN Accounts a ON (b.reported_by = a.account_id) GROUP BY b.reported_by;

SELECT b.reported_by, a.account_name FROM Bugs b JOIN Accounts a ON (b.reported_by = a.account_id) GROUP BY b.reported_by,a.account_name;
SELECT b.reported_by, a.account_name FROM Bugs b JOIN Accounts a ON (b.reported_by = a.account_id) ;

14.5.1 関数従属性のある列のみにクエリを実行する

SELECT product_id, MAX(date_reported) AS latest FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_id;

14.5.2 相関サブクエリを使用する

SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id) WHERE NOT EXISTS
  (SELECT * FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id) WHERE bp1.product_id = bp2.product_id AND b1.date_reported < b2.date_reported);

insert into Bugs values (8888,2,'2010-02-17');
insert into BugsProducts values (8888,2,'Visual TurboBuilder');

14.5.3 導出テーブルを使用する

SELECT m.product_id, m.latest, b1.bug_id FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id)
  JOIN (SELECT bp2.product_id, MAX(b2.date_reported) AS latest FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id) GROUP BY bp2.product_id) m
  ON (bp1.product_id = m.product_id AND b1.date_reported = m.latest);

SELECT m.product_id, m.latest, MAX(b1.bug_id) AS latest_bug_id FROM Bugs b1 JOIN
  (SELECT product_id, MAX(date_reported) AS latest FROM Bugs b2 JOIN BugsProducts USING (bug_id) GROUP BY product_id) m
  ON (b1.date_reported = m.latest) GROUP BY m.product_id, m.latest;

-- 余談 相関サブクエリ

SELECT bp1.product_id, b1.date_reported, b1.bug_id FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id)
where b1.date_reported = (SELECT MAX(b2.date_reported) AS latest FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id) where bp1.product_id = bp2.product_id GROUP BY bp2.product_id)

14.5.4 JOINを使用する

SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id
FROM Bugs b1 JOIN BugsProducts bp1 ON (b1.bug_id = bp1.bug_id)
LEFT OUTER JOIN (Bugs AS b2 JOIN BugsProducts AS bp2 ON (b2.bug_id = bp2.bug_id))
  ON (bp1.product_id = bp2.product_id AND (b1.date_reported < b2.date_reported
            OR b1.date_reported = b2.date_reported AND b1.bug_id < b2.bug_id))
WHERE b2.bug_id IS NULL;

SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id
FROM Bugs b1 JOIN BugsProducts bp1 USING(bug_id)
LEFT OUTER JOIN (Bugs AS b2 JOIN BugsProducts AS bp2 USING(bug_id))
  ON (bp1.product_id = bp2.product_id AND (b1.date_reported < b2.date_reported
            OR b1.date_reported = b2.date_reported AND b1.bug_id < b2.bug_id))
    WHERE b2.bug_id IS NULL;

SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id,b2.bug_id
FROM Bugs b1 JOIN BugsProducts bp1 USING(bug_id)
LEFT OUTER JOIN (Bugs AS b2 JOIN BugsProducts AS bp2 USING(bug_id))
  ON (bp1.product_id = bp2.product_id AND (b1.date_reported < b2.date_reported
            OR b1.date_reported = b2.date_reported AND b1.bug_id < b2.bug_id));

14.5.5 他の列に対しても集約関数を使用する

SELECT product_id, MAX(date_reported) AS latest, MAX(bug_id) AS latest_bug_id FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_id;

14.5.6 グループごとにすべての値を連結する

SELECT product_id, MAX(date_reported) AS latest, GROUP_CONCAT(bug_id) AS bug_id_list FROM Bugs JOIN BugsProducts USING (bug_id) GROUP BY product_id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.