-- 如果用 IN 谓词 SELECT course_name, CASEWHEN course_id IN (SELECT course_id FROM OpenCourses WHEREmonth=200706) THEN'⚪' ELSE'×'ENDAS "6月", CASEWHEN course_id IN (SELECT course_id FROM OpenCourses WHEREmonth=200707) THEN'⚪' ELSE'×'ENDAS "7月", CASEWHEN course_id IN (SELECT course_id FROM OpenCourses WHEREmonth=200708) THEN'⚪' ELSE'×'ENDAS "8月" FROM CourseMaster;
-- 使用EXISTS谓词 SELECT CM.course_name, CASEWHENEXISTS (SELECT course_id FROM OpenCourses OC WHERE OC.month=200706 AND OC.course_id=CM.course_id) THEN'O' ELSE'X'ENDAS'6月', CASEWHENEXISTS (SELECT course_id FROM OpenCourses OC WHERE OC.month=200707 AND OC.course_id=CM.course_id) THEN'O' ELSE'X'ENDAS'7月', CASEWHENEXISTS (SELECT course_id FROM OpenCourses OC WHERE OC.month=200708 AND OC.course_id=CM.course_id) THEN'O' ELSE'X'ENDAS'8月' FROM CourseMaster CM;
-- Greatests +------+---+---+---+ | key0 | x | y | z | +------+---+---+---+ | A |1|2|3| | B |5|5|2| | C |4|7|1| | D |3|3|8| +------+---+---+---+ 4rowsinset (0.00 sec)
-- 找出多列数据中的最大值 /* 找出x列和y列中的最大值 */ SELECT key0, CASEWHEN x < y THEN y ELSE x ENDAS greatest FROM Greatests;
-- MySQL自带函数 GREATEST 直接可实现上述过程 SELECT key0, GREATEST(x,y) AS greatest FROM Greatests;
+------+----------+ | key0 | greatest | +------+----------+ | A |2| | B |5| | C |7| | D |3| +------+----------+ 4rowsinset (0.00 sec)
SELECT sex AS'性别', SUM(population) AS'全国', SUM(CASEWHEN pref_name ='德岛'THEN population ELSE0END) AS "德岛", SUM(CASEWHEN pref_name ='香川'THEN population ELSE0END) AS "香川", SUM(CASEWHEN pref_name ='爱媛'THEN population ELSE0END) AS "爱媛", SUM(CASEWHEN pref_name ='高知'THEN population ELSE0END) AS "高知", SUM(CASEWHEN pref_name IN ('德岛', '香川', '爱媛', '高知') THEN population ELSE0END) AS zaijie FROM PopTbl2 GROUPBY sex;
-- 注意语句的顺序 SELECT CASEWHEN (A+B<=C OR A+C<=B OR B+C<=A) THEN'Not A Triangle' WHEN (A=B AND B=C) THEN'Equilateral' WHEN (A=B OR A=C OR B=C) THEN'Isosceles' ELSE'Scalene'ENDAS triangle FROM TRIANGLES;
SET@r1=0, @r2=0, @r3=0, @r4=0; SELECTMIN(Doctor), MIN(Professor), MIN(Singer), MIN(Actor) FROM ( SELECTCASE Occupation WHEN "Doctor" THEN (@r1:=@r1+1) WHEN "Professor" THEN (@r2:=@r2+1) WHEN "Singer" THEN (@r3:=@r3+1) WHEN "Actor" THEN (@r4:=@r4+1) ENDAS rownumber, CASEWHEN Occupation="Doctor" THEN Name ENDAS Doctor, CASEWHEN Occupation="Professor" THEN Name ENDAS Professor, CASEWHEN Occupation="Singer" THEN Name ENDAS Singer, CASEWHEN Occupation="Actor" THEN Name ENDAS Actor FROM OCCUPATIONS ORDERBY Name ) AS TEMP GROUPBY rownumber; -- min()/max() will return a name for specific index and specific occupation. If there is a name, it will return it, if not, return NULL.
排序
窗口函数:也称OLAP函数、分析函数。
RANK()
DENSE_RANK()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
SELECT name , price , RANK() OVER(ORDERBY price DESC) AS rank_1 , DENSE_RANK() OVER(ORDERBY price DESC) AS rank_2 FROM Products;
-- a = 2, b = 4, c = NULL a < b AND b > c -- true AND unknown = unknown
a > b OR b < c -- false OR unknown = unknown
a < b OR b < c -- true OR unknown = true
NOT (b <> c) -- NOT(unknown) = unknown
排中律(Law of Excluded Middle): 命题“把命题和它的否命题通过OR连接而成的命题全都是真命题”在二值逻辑中被称为排中律。
在SQL中,排中律不成立
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
-- 查询年龄是25岁或不是25岁的学生 SELECT* FROM Students WHERE age =25 OR age <>25; -- 查询结果不包含年龄为空的行
SELECT* FROM Students WHERE age =NULL OR age <>NULL; -- 如此才能详尽 -- 或 SELECT* FROM Students WHERE age =25 OR age <>25 OR age ISNULL;
CASE 表达式与NULL
CASE表达式与NULL:
1 2 3 4 5 6 7 8 9 10 11 12
CASE col_1 WHEN1THEN'O' WHENNULLTHEN'X' END -- 这个CASE表达式一定不会返回X -- 因为第二个WHEN子句表示的是col_1=NULL,这个式子的真值永远是unknown -- CASE表达式只认可真值为true的条件 -- 正确的写法是 CASE WHEN col_1 =1THEN'O' WHEN col_2 ISNULLTHEN'X' END
-- 5. 用<>等价改写NOT 和 = SELECT* FROM class_a WHERE (age <>22) AND (age <>23) AND (age <>NULL); -- Empty set (0.00 sec) -- 对NULL使用<>后,结果为unknown -- unknown AND unknown = unknown
应该使用EXISTS谓词:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
SELECT* FROM class_a a WHERENOTEXISTS ( SELECT* FROM class_b b WHERE a.age=b.age AND b.city='东京' ); -- +--------+------+--------+ -- | name | age | city | -- +--------+------+--------+ -- | 拉里 | 19 | 埼玉 | -- | 伯杰 | 21 | 千叶 | -- +--------+------+--------+ -- 2 rows in set (0.00 sec)
-- 求众数 -- 众数:反映群体趋势 SELECT income , COUNT(*) FROM Graduates GROUPBY income HAVINGCOUNT(*) >=ALL( SELECTCOUNT(*) FROM Graduates GROUPBY income ); -- ALL用于NULL或空集时会出现问题,可用极值函数来代替 SELECT income , COUNT(*) AS cnt FROM Graduates GROUPBY income HAVINGCOUNT(*) >= ( SELECTMAX(cnt) FROM ( SELECTCOUNT(*) AS cnt FROM Graduates GROUPBY income ) tmp );
-- 购物篮分析 -- 找出shopitems表中同时包含items表中物品的商铺 -- 带余除法(division with a remainder) SELECT SI.shop FROM ShopItems SI, Items I WHERE SI.item = I.item GROUPBY SI.shop HAVINGCOUNT(SI.item) = ( SELECTCOUNT(item) FROM Items );
-- 找出ShopItems表中包含Items表中所有物品的商铺(物品完全一致) -- 精确关系除法(exact relational division) SELECT SI.shop FROM ShopItems SI LEFTOUTERJOIN Items I ON SI.item = I.item GROUPBY SI.shop HAVINGCOUNT(SI.item) = (SELECTCOUNT(item) FROM Items) ANDCOUNT(I.item) = (SELECTCOUNT(item) FROM Items);
-- 水平展开2:使用标量子查询 SELECT c0.name , ( SELECT'O' FROM Courses c1 WHERE course ='SQL入门' AND c1.name = c0.name ) AS `SQL入门` , ( SELECT'O' FROM Courses c2 WHERE course ='UNIX基础' AND c2.name = c0.name ) AS `UNIX基础` , ( SELECT'O' FROM Courses c3 WHERE course ='Java中级' AND c3.name = c0.name ) AS `Java中级` FROM ( SELECTDISTINCT name FROM Courses ) c0;
-- 列转行:使用UNION ALL SELECT employee , child_1 AS child FROM Personnel UNIONALL SELECT employee , child_2 AS child FROM Personnel UNIONALL SELECT employee , child_3 AS child FROM Personnel; -- UNION ALL不会排除掉重复的行,可能会出现child列为NULL的情况
-- MySQL全外连接 SELECT a.id AS id , a.name , b.name FROM Class_a a LEFTOUTERJOIN Class_b b ON a.id = b.id
UNION
SELECT b.id AS id , a.name , b.name FROM Class_a a RIGHTOUTERJOIN Class_b b ON a.id = b.id;
内连接相当于求集合的交集(intersect)/积
全外连接相当于求集合的并集(union)/和
差集
1 2 3 4 5 6 7 8 9 10 11 12 13 14
-- 求差集:A-B SELECT a.id AS id , a.name AS a_name FROM Class_a a LEFTOUTERJOIN Class_b b ON a.id = b.id WHERE b.name ISNULL; -- B-A SELECT b.id AS id , B.name AS b_name FROM Class_b b LEFTOUTERJOIN Class_a a ON b.id = a.id WHERE a.name ISNULL;
异或集
(A UNION B) EXCEPT (A INTERSECT B)
(A EXCEPT B) UNION (B EXCEPT A)
全外连接
1 2 3 4 5 6 7
-- 使用全外连接求异或集 SELECTCOALESCE(a.id, b.id) AS id , COALESCE(a.name, b.name) AS name FROM Class_a a FULLOUTERJOIN Class_b b ON a.id = b.id WHERE a.name ISNULLOR b.name ISNULL;
关联子查询
对同一行数据进行列间比较,只需在WHERE子句里写上比较条件
对不同行数据进行列间比较,需要使用关联子查询或窗口函数
比较过去
1 2 3 4 5 6 7 8 9 10 11 12 13 14
-- 求与上一年营业额一样的年份: -- (1)使用关联子查询 SELECTyear , sale FROM Sales s1 WHERE sale = (SELECT sale FROM Sales s2 WHERE s2.year = s1.year -1) ORDERBYyear; -- (2)使用自连接 SELECT s1.year , s1.sale FROM Sales s1, Sales s2 WHERE s2.sale = s1.sale AND s2.year = s1.year -1 ORDERBY s1.year;
-- 比较营业额相较于上一年的变化 -- (1)使用关联子查询 SELECT s1.year , s1.sale , CASEWHEN sale = (SELECT sale FROM Sales s2 WHERE s2.year = s1.year -1) THEN'→'-- 持平 WHEN sale > (SELECT sale FROM Sales s2 WHERE s2.year = s1.year -1) THEN'↑'-- 增长 WHEN sale < (SELECT sale FROM Sales s2 WHERE s2.year = s1.year -1) THEN'↓'-- 减少 ELSE'——'ENDAS var FROM Sales s1 ORDERBYyear; -- (2)使用自连接 SELECT s1.year , s1.sale , CASEWHEN s1.sale = s2.sale THEN'→' WHEN s1.sale > s2.sale THEN'↑' WHEN s1.sale < s2.sale THEN'↓' ELSE'——'ENDAS var FROM Sales s1, Sales s2 WHERE s2.year = s1.year -1 ORDERBYyear; -- 注意:使用自连接,最早的年份不会出现在结果里 -- 以上写法,year需连续才可比较
-- 若时间轴有间断 -- 与过去最邻近的年份进行比较 -- (1)使用关联子查询 SELECTyear , sale FROM Sales s1 WHERE sale = ( SELECT sale FROM Sales s2 WHERE s2.year = ( SELECTMAX(year) -- 过去的年份中,年份最新的那个 FROM Sales s3 WHERE s1.year > s3.year -- 与s1.year相比是过去的年份 ) ) ORDERBYyear; -- (2)使用自连接:可减少一层子查询的嵌套 SELECT s1.year , s1.sale FROM Sales s1, Sales s2 WHERE s1.sale = s2.sale AND s2.year = (SELECTMAX(year) FROM Sales s3 WHERE s1.year > s3.year) ORDERBYyear; -- 自连接的结果里不包括最早的年份
1 2 3 4 5 6 7 8 9 10
-- 求每一年与过去最邻近的年份之间的营业额之差 -- 使用自外连接,结果里包含最早的年份 SELECT s2.year AS pre_year , s1.year AS now_year , s2.sale AS pre_sale , s1.sale AS now_sale , s1.sale - s2.sale AS diff_sale FROM Sales s1 LEFTOUTERJOIN Sales s2 ON s2.year = (SELECTMAX(year) FROM Sales s3 WHERE s1.year > s3.year) ORDERBY now_year;
累计值
1 2 3 4 5 6 7 8 9 10 11 12
-- 求截止到某个日期的营业额累计值 -- (1)使用窗口函数 SELECTdate , amt , SUM(amt) OVER(ORDERBYdate) AS total_amt FROM Accounts; -- (2)使用冯·诺伊曼型递归集合(标准SQL-92下) SELECTdate , a1.amt , (SELECTSUM(amt) FROM Accounts a2 WHERE a1.date >= a2.date) AS total_amt FROM Accounts a1 ORDERBYdate;
-- 求前3行(包括本行)的累计值 SELECTdate , amt , SUM(amt) OVER(ORDERBYdateROWS2 PRECEDING) AS total_amt FROM Accounts ; -- 不满3行的时间区间也输出 SELECTdate , a1.amt , ( SELECTSUM(amt) FROM Accounts a2 WHERE a1.date >= a2.date AND ( SELECTCOUNT(*) FROM Accounts a3 WHERE a3.date BETWEEN a2.date AND a1.date ) <=3 ) AS mvg_sum FROM Accounts a1 ORDERBYdate; -- 不满3行的区间按无效处理 SELECTdate , a1.amt , ( SELECTSUM(amt) FROM Accounts a2 WHERE a1.date >= a2.date AND ( SELECTCOUNT(*) FROM Accounts a3 WHERE a3.date BETWEEN a2.date AND a1.date ) <=3 HAVINGCOUNT(*) =3-- 不满3行数据的不显示 ) AS mvg_sum FROM Accounts a1 ORDERBYdate;
-- 求重叠的住宿期间 SELECT reserver , start_date , end_date FROM tbl t1 WHEREEXISTS ( SELECT* FROM tbl t2 WHERE t1.reserver <> t2.reserver -- 与其他客人比较 AND ( t1.start_date BETWEEN t2.start_date AND t2.end_date -- 自己的入住日期在他人的住宿期间内 OR t1.end_date BETWEEN t2.start_date AND t2.end_date -- 自己的离店日期在他人的住宿期间内 ) ); -- 如果有个人的住宿期间完全包含他人的住宿期间,则上述结果并不会输出这样的区间 -- 如果要把“完全包含他人的住宿期间的情况”也输出 SELECT reserver , start_date , end_date FROM tbl t1 WHEREEXISTS ( SELECT* FROM tbl t2 WHERE t1.reserver <> t2.reserver AND ( ( t1.start_date BETWEEN t2.start_date AND t2.end_date OR t1.end_date BETWEEN t2.start_date AND t2.end_date ) OR ( t2.start_date BETWEEN t1.start_date AND t1.end_date AND-- 注意 t1.end_date BETWEEN t1.start_date AND t1.end_date ) ) );
集合运算
直接使用UNION或INTERSECT,结果里不会出现重复的行;若要在结果里留下重复的行,可以加上ALL,即UNION ALL