MySQL-数据修复-删除重复线上考试数据-delete or update-以两个字段分组-保留小id行的数据-having等说明-按字段group b
-- 同时有两个字段的判重 exam_id,login_id
select * from exams_member m
inner JOIN (SELECT exam_id,login_id FROM exams_member GROUP BY exam_id,login_id HAVING COUNT(1)>1) t
on t. exam_id = m.exam_id and t.login_id = m.login_id
where m.id not in (SELECT MIN(id) AS id FROM exams_member GROUP BY exam_id,login_id HAVING COUNT(1)>1)
-- and m.exam_id = 3738 and m.login_id = 'hbwss';
DELETE from exams_member where id in (SELECT id from (select id from exams_member m
inner JOIN (SELECT exam_id,login_id FROM exams_member GROUP BY exam_id,login_id HAVING COUNT(1)>1) t
on t. exam_id = m.exam_id and t.login_id = m.login_id
where m.id not in (SELECT MIN(id) AS id FROM exams_member GROUP BY exam_id,login_id HAVING COUNT(1)>1)
-- and m.exam_id = 3738 and m.login_id = 'hbwss'
) a);
select * from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
-- and m.work_id = 3738 and m.user_id = 'hbwss';
select count(1) from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
-- and m.work_id = 3738 and m.user_id = 'hbwss';
-- 执行update 包了2层
update exam_marking set is_delete = 1 where id in (select id from (select m.id from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
-- and m.work_id = 3738 and m.user_id = 'hbwss'
) as a);
*********************如果要带上条件的话,FROM exam_marking GROUP BY work_id,user_id 中间是不是也可以加上条件*******************************
以上带and m.work_id = 3738 and m.user_id = 'hbwss'就是处理指定考试、人的数据。不带条件就是处理全表数据。文本还有按指定容器处理数据的。
-- 136条
SELECT exam_id,login_id,count(1) as c from exams_member GROUP BY exam_id,login_id HAVING c > 1 ;
-- 14360条
SELECT work_id,user_id,count(1) FROM exam_marking where is_delete = 0 GROUP BY work_id,user_id HAVING COUNT(1) > 1;
-- 手动处理
SELECT * from exam_marking WHERE work_id = 3738 and user_id = 'hbwss';
SELECT * from exams_member WHERE exam_id = 3738 and login_id = 'hbwss';
-- 删除大的id
DELETE from exams_member where id = 289199;
update exam_marking set is_delete = 1 where id = 475506
-- 验证
-- 136条
SELECT exam_id,login_id,count(1) as c from exams_member GROUP BY exam_id,login_id HAVING c > 1 ;
-- 14360条 ( create_time > 1646049600) 2022-02-28 20:00:00
SELECT work_id,user_id,count(1) FROM exam_marking where is_delete = 0 and create_time > UNIX_TIMESTAMP('2022-02-28 20:00:00') GROUP BY work_id,user_id HAVING COUNT(1)>1 ; 也有=1 来判断一下有没有创建过考试
SELECT work_id,user_id,count(1) FROM exam_marking where is_delete = 0 and enterprise_id = 'jiangsudipukejigufen' GROUP BY work_id,user_id HAVING COUNT(1)>1 ;
-- 培训的考试 修复完并发问题后
SELECT work_id,user_id,count(1),FROM_UNIXTIME(w.create_time) create_time FROM exam_marking m left join exams_work w on w.id = m.work_id where m.is_delete = 0 and m.create_time > UNIX_TIMESTAMP('2022-02-28 20:00:00') and w.exam_source = 1 GROUP BY m.work_id,m.user_id HAVING COUNT(1)>1 order by create_time desc ;
为空
-- 培训的考试 所有的数据
mysql> SELECT m.id, work_id,user_id,count(1),FROM_UNIXTIME(w.create_time) create_time FROM exam_marking m left join exams_work w on w.id = m.work_id where m.is_delete = 0 and w.exam_source = 1 GROUP BY m.work_id,m.user_id HAVING COUNT(1)>1 order by create_time desc ;
-- 还有11条22月2月17号数据
+--------+---------+-----------------+----------+---------------------+
| id | work_id | user_id | count(1) | create_time |
+--------+---------+-----------------+----------+---------------------+
| 472924 | 3690 | lecturer02 | 2 | 2022-02-17 20:39:21 |
| 472927 | 3690 | lecturer_two | 2 | 2022-02-17 20:39:21 |
| 472923 | 3690 | administrator02 | 2 | 2022-02-17 20:39:21 |
| 472926 | 3690 | lecturer_one | 2 | 2022-02-17 20:39:21 |
| 472925 | 3690 | lecturer1 | 2 | 2022-02-17 20:39:21 |
| 472762 | 3687 | admin_51 | 2 | 2022-02-17 19:35:11 |
| 472752 | 3686 | administrator02 | 2 | 2022-02-17 19:18:05 |
| 472755 | 3686 | lecturer_one | 2 | 2022-02-17 19:18:05 |
| 472754 | 3686 | lecturer1 | 2 | 2022-02-17 19:18:05 |
| 472753 | 3686 | lecturer02 | 2 | 2022-02-17 19:18:05 |
| 472756 | 3686 | lecturer_two | 2 | 2022-02-17 19:18:05 |
+--------+---------+-----------------+----------+---------------------+
11 rows in set (0.03 sec)
修复
update exam_marking set is_delete = 1 where id in (select id from (select m.id from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
and m.work_id in (3690,3687,3686)
) as a);
Query OK, 11 rows affected (4.45 sec)
Rows matched: 11 Changed: 11 Warnings: 0
另外发现的数据都是来自普通的
SELECT work_id,user_id,count(1),FROM_UNIXTIME(max(w.create_time)) maxCreate_time,FROM_UNIXTIME(min(w.create_time)) minCreate_time FROM exam_marking m left join exams_work w on w.id = m.work_id where m.is_delete = 0 and w.exam_source = 0 GROUP BY m.work_id,m.user_id HAVING COUNT(1)>1 order by maxCreate_time desc limit 10;
-- 2021-09-30 之前的数据重复
SELECT work_id,user_id,count(1),FROM_UNIXTIME(create_time) FROM exam_marking where is_delete = 0 and create_time < UNIX_TIMESTAMP('2021-09-30 00:00:00') GROUP BY work_id,user_id HAVING COUNT(1)>1 ;
14226 rows
SELECT work_id,user_id,FROM_UNIXTIME(max(create_time)) maxCreate_time,FROM_UNIXTIME(min(create_time)) FROM exam_marking where is_delete = 0 GROUP BY work_id,user_id HAVING COUNT(1)>1 ;
// 最早最晚的时间
SELECT FROM_UNIXTIME(max(create_time)) maxCreate_time,FROM_UNIXTIME(min(create_time)) FROM exam_marking where id in (SELECT id FROM exam_marking where is_delete = 0 GROUP BY work_id,user_id HAVING COUNT(1)>1);
+---------------------+---------------------------------+
| maxCreate_time | FROM_UNIXTIME(min(create_time)) |
+---------------------+---------------------------------+
| 2022-04-21 09:12:02 | 2019-09-26 17:32:03 |
+---------------------+---------------------------------+
我没有改过普通的考试的加人,我只搞过培训和课程的
select id, enterprise_id,work_id,user_id,FROM_UNIXTIME(create_time) from exam_marking WHERE FROM_UNIXTIME(create_time) = '2022-04-21 09:12:02';
+--------+---------------------+---------+-------------------+----------------------------+
| id | enterprise_id | work_id | user_id | FROM_UNIXTIME(create_time) |
+--------+---------------------+---------+-------------------+----------------------------+
| 490278 | qiyeyunkaoshixitong | 3974 | u1000002142634390 | 2022-04-21 09:12:02 |
| 490279 | qiyeyunkaoshixitong | 3974 | u6000000503085869 | 2022-04-21 09:12:02 |
| 490280 | qiyeyunkaoshixitong | 3974 | u1000002142634390 | 2022-04-21 09:12:02 |
| 490281 | qiyeyunkaoshixitong | 3974 | u6000000503085869 | 2022-04-21 09:12:02 |
+--------+---------------------+---------+-------------------+----------------------------+
4 rows in set (0.36 sec)
--给志强和建国的start-
select count(1) from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0;
-- 185928 条重复数据
-- 逻辑删除
update exam_marking set is_delete = 1 where id in (select id from (select m.id from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
) as a);
--给志强和建国的end-
-- 可以按指定的考试id来处理 (实测处理marking表就行)
select id from exams_work where enterprise_id ="jiangsudipukejigufen" and is_delete = 0\g;
+------+
| id |
+------+
| 2925 |
| 3646 |
| 3653 |
| 3654 |
| 3664 |
| 3738 |
| 3749 |
-- 由于需要同时按 work_id,user_id ,来限制范围,这里有个关键的 JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t。不能分别where in work_id and where in user_id,这会串到其他数据
-- 以下处理容器维度的数据
select DISTINCT(count(m.id)) from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
and m.work_id in (select id from exams_work where enterprise_id ="jiangsudipukejigufen" and is_delete = 0) ;
update exam_marking set is_delete = 1 where id in (select id from (select m.id from exam_marking m inner JOIN (SELECT work_id,user_id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1) t
on t. work_id = m.work_id and t.user_id = m.user_id
where m.id not in (SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1)
and m.is_delete = 0
and m.work_id in (select id from exams_work where enterprise_id ="jiangsudipukejigufen" and is_delete = 0 )
) as a);
线上这个容器只有这一条考试 ,日志从 2022-02-24 16:27:14,788 [pool-4-thread-8] DEBUG c.s.e.m.e.E.batchInsert [BaseJdbcLogger.java : 143] 开始看
synchronized (lockGenExamMemberAndMarking) { 如果事务未提交的事务,这里可能也不行 。从日志上,一个子线程,一个主线程,如果没有事务,这里也会按主线程执行
还需要特别注意 SELECT MIN(id) AS id FROM exam_marking GROUP BY work_id,user_id HAVING COUNT(1)>1 拿到的是不是最小id,另外参考 《SQL-按字段group by分组 并 返回最新的结果》
结论似乎就是min max 是取group by 的最小最大值的,而如果在 group by 后加ORDER BY,这也仅仅是最group by后的结果进行排序,拿到的不是最小最大值。
如果需要对group by 前的结果进行过滤,就在group by 前 加where,
如果需要对group by 后的结果再做筛选,就加having。
正文到此结束