Answers:
一种选择是使用排名变量,例如:
SELECT first_name,
age,
gender,
@curRank := @curRank + 1 AS rank
FROM person p, (SELECT @curRank := 0) r
ORDER BY age;
该(SELECT @curRank := 0)
部分允许变量初始化,而无需单独的SET
命令。
测试用例:
CREATE TABLE person (id int, first_name varchar(20), age int, gender char(1));
INSERT INTO person VALUES (1, 'Bob', 25, 'M');
INSERT INTO person VALUES (2, 'Jane', 20, 'F');
INSERT INTO person VALUES (3, 'Jack', 30, 'M');
INSERT INTO person VALUES (4, 'Bill', 32, 'M');
INSERT INTO person VALUES (5, 'Nick', 22, 'M');
INSERT INTO person VALUES (6, 'Kathy', 18, 'F');
INSERT INTO person VALUES (7, 'Steve', 36, 'M');
INSERT INTO person VALUES (8, 'Anne', 25, 'F');
结果:
+------------+------+--------+------+
| first_name | age | gender | rank |
+------------+------+--------+------+
| Kathy | 18 | F | 1 |
| Jane | 20 | F | 2 |
| Nick | 22 | M | 3 |
| Bob | 25 | M | 4 |
| Anne | 25 | F | 5 |
| Jack | 30 | M | 6 |
| Bill | 32 | M | 7 |
| Steve | 36 | M | 8 |
+------------+------+--------+------+
8 rows in set (0.02 sec)
partition by gender
分析功能的一部分(该数字“ 按性别”而不是总体结果的“等级” )
这是一个通用解决方案,可将密集的分区分配给行。它使用用户变量:
CREATE TABLE person (
id INT NOT NULL PRIMARY KEY,
firstname VARCHAR(10),
gender VARCHAR(1),
age INT
);
INSERT INTO person (id, firstname, gender, age) VALUES
(1, 'Adams', 'M', 33),
(2, 'Matt', 'M', 31),
(3, 'Grace', 'F', 25),
(4, 'Harry', 'M', 20),
(5, 'Scott', 'M', 30),
(6, 'Sarah', 'F', 30),
(7, 'Tony', 'M', 30),
(8, 'Lucy', 'F', 27),
(9, 'Zoe', 'F', 30),
(10, 'Megan', 'F', 26),
(11, 'Emily', 'F', 20),
(12, 'Peter', 'M', 20),
(13, 'John', 'M', 21),
(14, 'Kate', 'F', 35),
(15, 'James', 'M', 32),
(16, 'Cole', 'M', 25),
(17, 'Dennis', 'M', 27),
(18, 'Smith', 'M', 35),
(19, 'Zack', 'M', 35),
(20, 'Jill', 'F', 25);
SELECT person.*, @rank := CASE
WHEN @partval = gender AND @rankval = age THEN @rank
WHEN @partval = gender AND (@rankval := age) IS NOT NULL THEN @rank + 1
WHEN (@partval := gender) IS NOT NULL AND (@rankval := age) IS NOT NULL THEN 1
END AS rnk
FROM person, (SELECT @rank := NULL, @partval := NULL, @rankval := NULL) AS x
ORDER BY gender, age;
请注意,变量分配位于CASE
表达式内。这(理论上)考虑了评估问题的顺序。的IS NOT NULL
被添加到处理数据类型转换和短路的问题。
PS:通过删除所有检查领带的条件,可以轻松地将其转换为分区上的行号。
| id | firstname | gender | age | rank |
|----|-----------|--------|-----|------|
| 11 | Emily | F | 20 | 1 |
| 20 | Jill | F | 25 | 2 |
| 3 | Grace | F | 25 | 2 |
| 10 | Megan | F | 26 | 3 |
| 8 | Lucy | F | 27 | 4 |
| 6 | Sarah | F | 30 | 5 |
| 9 | Zoe | F | 30 | 5 |
| 14 | Kate | F | 35 | 6 |
| 4 | Harry | M | 20 | 1 |
| 12 | Peter | M | 20 | 1 |
| 13 | John | M | 21 | 2 |
| 16 | Cole | M | 25 | 3 |
| 17 | Dennis | M | 27 | 4 |
| 7 | Tony | M | 30 | 5 |
| 5 | Scott | M | 30 | 5 |
| 2 | Matt | M | 31 | 6 |
| 15 | James | M | 32 | 7 |
| 1 | Adams | M | 33 | 8 |
| 18 | Smith | M | 35 | 9 |
| 19 | Zack | M | 35 | 9 |
ELSE @rank_count := @rank_count + 1
ORDER BY gender, age DESC
?
虽然最受好评的答案排名很高,但它不会分区,您也可以进行自我连接以将整个事物分区:
SELECT a.first_name,
a.age,
a.gender,
count(b.age)+1 as rank
FROM person a left join person b on a.age>b.age and a.gender=b.gender
group by a.first_name,
a.age,
a.gender
用例
CREATE TABLE person (id int, first_name varchar(20), age int, gender char(1));
INSERT INTO person VALUES (1, 'Bob', 25, 'M');
INSERT INTO person VALUES (2, 'Jane', 20, 'F');
INSERT INTO person VALUES (3, 'Jack', 30, 'M');
INSERT INTO person VALUES (4, 'Bill', 32, 'M');
INSERT INTO person VALUES (5, 'Nick', 22, 'M');
INSERT INTO person VALUES (6, 'Kathy', 18, 'F');
INSERT INTO person VALUES (7, 'Steve', 36, 'M');
INSERT INTO person VALUES (8, 'Anne', 25, 'F');
答:
Bill 32 M 4
Bob 25 M 2
Jack 30 M 3
Nick 22 M 1
Steve 36 M 5
Anne 25 F 3
Jane 20 F 2
Kathy 18 F 1
丹尼尔(Daniel)版本的调整,以计算百分位和排名。同样,具有相同标记的两个人将获得相同的排名。
set @totalStudents = 0;
select count(*) into @totalStudents from marksheets;
SELECT id, score, @curRank := IF(@prevVal=score, @curRank, @studentNumber) AS rank,
@percentile := IF(@prevVal=score, @percentile, (@totalStudents - @studentNumber + 1)/(@totalStudents)*100),
@studentNumber := @studentNumber + 1 as studentNumber,
@prevVal:=score
FROM marksheets, (
SELECT @curRank :=0, @prevVal:=null, @studentNumber:=1, @percentile:=100
) r
ORDER BY score DESC
查询样本数据的结果-
+----+-------+------+---------------+---------------+-----------------+
| id | score | rank | percentile | studentNumber | @prevVal:=score |
+----+-------+------+---------------+---------------+-----------------+
| 10 | 98 | 1 | 100.000000000 | 2 | 98 |
| 5 | 95 | 2 | 90.000000000 | 3 | 95 |
| 6 | 91 | 3 | 80.000000000 | 4 | 91 |
| 2 | 91 | 3 | 80.000000000 | 5 | 91 |
| 8 | 90 | 5 | 60.000000000 | 6 | 90 |
| 1 | 90 | 5 | 60.000000000 | 7 | 90 |
| 9 | 84 | 7 | 40.000000000 | 8 | 84 |
| 3 | 83 | 8 | 30.000000000 | 9 | 83 |
| 4 | 72 | 9 | 20.000000000 | 10 | 72 |
| 7 | 60 | 10 | 10.000000000 | 11 | 60 |
+----+-------+------+---------------+---------------+-----------------+
丹尼尔和萨尔曼的答案相结合。但是,随着存在并列关系的继续序列,等级将不会给出。而是将排名跳到下一个。因此最大值始终达到行数。
SELECT first_name,
age,
gender,
IF(age=@_last_age,@curRank:=@curRank,@curRank:=@_sequence) AS rank,
@_sequence:=@_sequence+1,@_last_age:=age
FROM person p, (SELECT @curRank := 1, @_sequence:=1, @_last_age:=0) r
ORDER BY age;
架构和测试用例:
CREATE TABLE person (id int, first_name varchar(20), age int, gender char(1));
INSERT INTO person VALUES (1, 'Bob', 25, 'M');
INSERT INTO person VALUES (2, 'Jane', 20, 'F');
INSERT INTO person VALUES (3, 'Jack', 30, 'M');
INSERT INTO person VALUES (4, 'Bill', 32, 'M');
INSERT INTO person VALUES (5, 'Nick', 22, 'M');
INSERT INTO person VALUES (6, 'Kathy', 18, 'F');
INSERT INTO person VALUES (7, 'Steve', 36, 'M');
INSERT INTO person VALUES (8, 'Anne', 25, 'F');
INSERT INTO person VALUES (9, 'Kamal', 25, 'M');
INSERT INTO person VALUES (10, 'Saman', 32, 'M');
输出:
+------------+------+--------+------+--------------------------+-----------------+
| first_name | age | gender | rank | @_sequence:=@_sequence+1 | @_last_age:=age |
+------------+------+--------+------+--------------------------+-----------------+
| Kathy | 18 | F | 1 | 2 | 18 |
| Jane | 20 | F | 2 | 3 | 20 |
| Nick | 22 | M | 3 | 4 | 22 |
| Kamal | 25 | M | 4 | 5 | 25 |
| Anne | 25 | F | 4 | 6 | 25 |
| Bob | 25 | M | 4 | 7 | 25 |
| Jack | 30 | M | 7 | 8 | 30 |
| Bill | 32 | M | 8 | 9 | 32 |
| Saman | 32 | M | 8 | 10 | 32 |
| Steve | 36 | M | 10 | 11 | 36 |
+------------+------+--------+------+--------------------------+-----------------+
从MySQL 8开始,您最终还可以在MySQL中使用窗口函数:https : //dev.mysql.com/doc/refman/8.0/en/window-functions.html
您的查询可以完全相同的方式编写:
SELECT RANK() OVER (PARTITION BY Gender ORDER BY Age) AS `Partition by Gender`,
FirstName,
Age,
Gender
FROM Person
@Sam,您的观点在概念上很出色,但是我认为您误解了MySQL文档在引用页面上所说的内容-或我误解了:-)-我只是想添加此内容,以便于某些人对@感到不适丹尼尔(Daniel)的回答是,他们会更加放心,或者至少会更加深入。
您会看到"@curRank := @curRank + 1 AS rank"
里面SELECT
不是“一个声明”,它是声明的一个“原子”部分,因此应该是安全的。
您参考的文档继续显示了示例,其中在语句的2个(原子)部分中使用了相同的用户定义变量,例如, "SELECT @curRank, @curRank := @curRank + 1 AS rank"
。
有人可能会争辩说@curRank
在@Daniel的答案中使用了两次:(1)"@curRank := @curRank + 1 AS rank"
和(2)the,"(SELECT @curRank := 0) r"
但是由于第二种用法是FROM
子句的我敢肯定,一定会首先对其进行评估;本质上使其成为第二条和前一条声明。
实际上,在您引用的同一MySQL文档页面上,您将在注释中看到相同的解决方案-可能是@Daniel的来源;是的,我知道这是评论,但它是官方文档页面上的评论,确实具有一定的分量。
确定给定值等级的最直接的方法是对在此值之前的值进行计数。假设我们具有以下值:
10 20 30 30 30 40
30
值均视为第三40
值都被认为是第六(排名)或第四(密集排名)现在回到原来的问题。这是一些示例数据,按照OP中的描述进行了排序(期望的等级添加在右侧):
+------+-----------+------+--------+ +------+------------+
| id | firstname | age | gender | | rank | dense_rank |
+------+-----------+------+--------+ +------+------------+
| 11 | Emily | 20 | F | | 1 | 1 |
| 3 | Grace | 25 | F | | 2 | 2 |
| 20 | Jill | 25 | F | | 2 | 2 |
| 10 | Megan | 26 | F | | 4 | 3 |
| 8 | Lucy | 27 | F | | 5 | 4 |
| 6 | Sarah | 30 | F | | 6 | 5 |
| 9 | Zoe | 30 | F | | 6 | 5 |
| 14 | Kate | 35 | F | | 8 | 6 |
| 4 | Harry | 20 | M | | 1 | 1 |
| 12 | Peter | 20 | M | | 1 | 1 |
| 13 | John | 21 | M | | 3 | 2 |
| 16 | Cole | 25 | M | | 4 | 3 |
| 17 | Dennis | 27 | M | | 5 | 4 |
| 5 | Scott | 30 | M | | 6 | 5 |
| 7 | Tony | 30 | M | | 6 | 5 |
| 2 | Matt | 31 | M | | 8 | 6 |
| 15 | James | 32 | M | | 9 | 7 |
| 1 | Adams | 33 | M | | 10 | 8 |
| 18 | Smith | 35 | M | | 11 | 9 |
| 19 | Zack | 35 | M | | 11 | 9 |
+------+-----------+------+--------+ +------+------------+
要计算RANK() OVER (PARTITION BY Gender ORDER BY Age)
的莎拉,您可以使用此查询:
SELECT COUNT(id) + 1 AS rank, COUNT(DISTINCT age) + 1 AS dense_rank
FROM testdata
WHERE gender = (SELECT gender FROM testdata WHERE id = 6)
AND age < (SELECT age FROM testdata WHERE id = 6)
+------+------------+
| rank | dense_rank |
+------+------------+
| 6 | 5 |
+------+------------+
要计算RANK() OVER (PARTITION BY Gender ORDER BY Age)
的所有行,你可以使用此查询:
SELECT testdata.id, COUNT(lesser.id) + 1 AS rank, COUNT(DISTINCT lesser.age) + 1 AS dense_rank
FROM testdata
LEFT JOIN testdata AS lesser ON lesser.age < testdata.age AND lesser.gender = testdata.gender
GROUP BY testdata.id
结果如下(连接值添加在右侧):
+------+------+------------+ +-----------+-----+--------+
| id | rank | dense_rank | | firstname | age | gender |
+------+------+------------+ +-----------+-----+--------+
| 11 | 1 | 1 | | Emily | 20 | F |
| 3 | 2 | 2 | | Grace | 25 | F |
| 20 | 2 | 2 | | Jill | 25 | F |
| 10 | 4 | 3 | | Megan | 26 | F |
| 8 | 5 | 4 | | Lucy | 27 | F |
| 6 | 6 | 5 | | Sarah | 30 | F |
| 9 | 6 | 5 | | Zoe | 30 | F |
| 14 | 8 | 6 | | Kate | 35 | F |
| 4 | 1 | 1 | | Harry | 20 | M |
| 12 | 1 | 1 | | Peter | 20 | M |
| 13 | 3 | 2 | | John | 21 | M |
| 16 | 4 | 3 | | Cole | 25 | M |
| 17 | 5 | 4 | | Dennis | 27 | M |
| 5 | 6 | 5 | | Scott | 30 | M |
| 7 | 6 | 5 | | Tony | 30 | M |
| 2 | 8 | 6 | | Matt | 31 | M |
| 15 | 9 | 7 | | James | 32 | M |
| 1 | 10 | 8 | | Adams | 33 | M |
| 18 | 11 | 9 | | Smith | 35 | M |
| 19 | 11 | 9 | | Zack | 35 | M |
+------+------+------------+ +-----------+-----+--------+
如果您只想对一个人进行排名,则可以执行以下操作:
SELECT COUNT(Age) + 1
FROM PERSON
WHERE(Age < age_to_rank)
该排名与oracle RANK函数相对应(如果您有相同年龄的人,他们将获得相同的排名,此后的排名将是不连续的)。
这比在子查询中使用上述解决方案之一并从中选择一个人的排名要快一点。
这可以用于对所有人进行排名,但比上述解决方案要慢。
SELECT
Age AS age_var,
(
SELECT COUNT(Age) + 1
FROM Person
WHERE (Age < age_var)
) AS rank
FROM Person
Person
表的增长。这是为O(n ^ 2) VS O(n)的慢。
为了避免“ 但是 ”,在Erandac在丹尼尔和萨尔曼的答案组合回答,可以使用下面的“分区的解决方法”之一
SELECT customerID, myDate
-- partition ranking works only with CTE / from MySQL 8.0 on
, RANK() OVER (PARTITION BY customerID ORDER BY dateFrom) AS rank,
-- Erandac's method in combination of Daniel's and Salman's
-- count all items in sequence, maximum reaches row count.
, IF(customerID=@_lastRank, @_curRank:=@_curRank, @_curRank:=@_sequence+1) AS sequenceRank
, @_sequence:=@_sequence+1 as sequenceOverAll
-- Dense partition ranking, works also with MySQL 5.7
-- remember to set offset values in from clause
, IF(customerID=@_lastRank, @_nxtRank:=@_nxtRank, @_nxtRank:=@_nxtRank+1 ) AS partitionRank
, IF(customerID=@_lastRank, @_overPart:=@_overPart+1, @_overPart:=1 ) AS partitionSequence
, @_lastRank:=customerID
FROM myCustomers,
(SELECT @_curRank:=0, @_sequence:=0, @_lastRank:=0, @_nxtRank:=0, @_overPart:=0 ) r
ORDER BY customerID, myDate
此代码段中第3个变体中的分区排名将返回连续的排名编号。这将导致类似于rank() over partition by
结果的数据结构。例如,请参见下文。特别是,对于每个新的partitionRank,partitionSequence将始终使用1开始,使用此方法:
customerID myDate sequenceRank (Erandac)
| sequenceOverAll
| | partitionRank
| | | partitionSequence
| | | | lastRank
... lines ommitted for clarity
40 09.11.2016 11:19 1 44 1 44 40
40 09.12.2016 12:08 1 45 1 45 40
40 09.12.2016 12:08 1 46 1 46 40
40 09.12.2016 12:11 1 47 1 47 40
40 09.12.2016 12:12 1 48 1 48 40
40 13.10.2017 16:31 1 49 1 49 40
40 15.10.2017 11:00 1 50 1 50 40
76 01.07.2015 00:24 51 51 2 1 76
77 04.08.2014 13:35 52 52 3 1 77
79 15.04.2015 20:25 53 53 4 1 79
79 24.04.2018 11:44 53 54 4 2 79
79 08.10.2018 17:37 53 55 4 3 79
117 09.07.2014 18:21 56 56 5 1 117
119 26.06.2014 13:55 57 57 6 1 119
119 02.03.2015 10:23 57 58 6 2 119
119 12.10.2015 10:16 57 59 6 3 119
119 08.04.2016 09:32 57 60 6 4 119
119 05.10.2016 12:41 57 61 6 5 119
119 05.10.2016 12:42 57 62 6 6 119
...
select id,first_name,gender,age,
rank() over(partition by gender order by age) rank_g
from person
CREATE TABLE person (id int, first_name varchar(20), age int, gender char(1));
INSERT INTO person VALUES (1, 'Bob', 25, 'M');
INSERT INTO person VALUES (2, 'Jane', 20, 'F');
INSERT INTO person VALUES (3, 'Jack', 30, 'M');
INSERT INTO person VALUES (4, 'Bill', 32, 'M');
INSERT INTO person VALUES (5, 'Nick', 22, 'M');
INSERT INTO person VALUES (6, 'Kathy', 18, 'F');
INSERT INTO person VALUES (7, 'Steve', 36, 'M');
INSERT INTO person VALUES (8, 'Anne', 25, 'F');
INSERT INTO person VALUES (9,'AKSH',32,'M');