MySQL-数据修复-人员数据重复- 基础表数据重复,被后续业务表关联使用
最终提交执行
1、备份safety51_inp库中的inp_course_task_member,inp_exam_member,inp_contr_member 3个表
2、Jenkins重新构建inp相关方(customized-lets分支)
3、修正数据SQL(务必依次按顺序执行完下面5条SQL)
inp_course_task_member表:
-- 添加同一课程任务课程同时勾选了两个重复的人,需要删除大id
DELETE from inp_course_task_member WHERE id in (select id from (select mdelete.id from inp_course_task_member mdelete inner join
(SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,
m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number) b
on b.task_id = mdelete.task_id and b.course_id = mdelete.course_id and mdelete.member_id = maxUserId) c ) ;
-- 添加课程任务时选人选到大id,需要把课程id修改成小id的
UPDATE inp_course_task_member m1 inner join
(select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.member_id = m2.maxId set m1.member_id = m2.minId ;
inp_exam_member表:
-- 添加考试任务时同一考试同时勾选了两个重复的人,需要删除大id
DELETE from inp_exam_member WHERE id in (select id from (select mdelte.id from inp_exam_member mdelte inner join
(SELECT a.exam_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.exam_id,m2.or_id,m2.phone,
m2.id_card_number from inp_exam_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.exam_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.exam_id,a.or_id,a.phone,a.id_card_number)
b on b.exam_id = mdelte.exam_id and mdelte.member_id = maxUserId) c ) ;
-- 添加考试任务时选人选到大id,需要把考试id修改成小id的
UPDATE inp_exam_member m1 inner join
(select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.member_id = m2.maxId set m1.member_id = m2.minId ;
inp_contr_member表:
-- 删除重复人员数据,保留小id
DELETE from inp_contr_member where id in (SELECT id from (select id from inp_contr_member m
inner JOIN (SELECT or_id,phone,id_card_number FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) t
on t. or_id = m.or_id and t.phone = m.phone and t.id_card_number = m.id_card_number
where m.id not in (SELECT MIN(id) AS id FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)) a);
公有云线上研究过程
-- 所有重复的行
select m1.id,m1.or_id, m1.name,m1.phone,m1.id_card_number from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number order by m1.id;
-- 大小写 136条
也可见 csv
{
"payload": {
"data": [
{
"id": "3750",
"id_card_number": "410923196806206639",
"name": "王振国",
"or_id": "219",
"phone": "13939347383"
},
{
"id": "3751",
"id_card_number": "410923196806206639",
"name": "王振国",
"or_id": "219",
"phone": "13939347383"
},
{
"id": "3755",
"id_card_number": "130425196902105510",
"name": "杨兵为",
"or_id": "219",
"phone": "18631055746"
},
{
"id": "3756",
"id_card_number": "130425196902105510",
"name": "杨兵为",
"or_id": "219",
"phone": "18631055746"
},
{
"id": "3757",
"id_card_number": "130425196712295536",
"name": "郑守强",
"or_id": "219",
"phone": "18639300853"
}
],
"status": false,
"time": 11,
"title": [
{
"fixed": "left",
"key": "id",
"title": "id",
"width": "200"
},
{
"key": "or_id",
"title": "or_id",
"width": "200"
},
{
"key": "name",
"title": "name",
"width": "200"
},
{
"key": "phone",
"title": "phone",
"width": "200"
},
{
"key": "id_card_number",
"title": "id_card_number",
"width": "200"
}
],
"total": 136
},
"code": 1200,
"text": ""
}
总体处理方向:
1条业务数据同时选择重复人员,则删除人员id大的
业务数据如果选择到重复人员大id时,这把业务表的关联id改成小id
企业表
SELECT container_id, NAME FROM inp_contractor GROUP BY container_id, NAME HAVING COUNT( 1 ) > 1
inp_contractor 无重复数据
研究各个表用到到id
SELECT * from inp_attachment WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) order by member_id
inp_attachment 无数据
SELECT * from inp_contr_member_cert WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) order by member_id
inp_contr_member_cert 无数据
SELECT * from inp_user_learning_course WHERE ur_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) order by ur_id
inp_user_learning_course 无数据
SELECT * from inp_course_task_member WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) order by member_id
inp_course_task_member 有数据 大小都有
---> 这就说明1、添加同一课程任务课程同时勾选了两个重复的人,需要删除大id【课程id1822同时有小id4449,大id4450】;2、课程id不同,添加课程任务时选人选到大id,需要把课程id修改成小id的【课程id:1822,1913都使用了大id4450】
情况1:
重复的选人
select m1.* from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)
得到的id不一定是大是小,总之就是这个人。
-- task id 都相同
select m1.* from inp_course_task_member m1 where course_id = 1822 and member_id in (4449,4450) 2
select m1.* from inp_course_task_member m1 where course_id = 1913 and member_id in (4506,4508,4513,4515,4497,4498) 6
select m1.* from inp_course_task_member m1 where course_id = 1917 and member_id in (4513,4515) 2
-- 找到每个课程任务下,重复的人id(大id)
SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number;
查询结果: 见截图1
执行删除:
待删除inp_course_task_member行
【线上执行1】
select mdelete.id,mdelete.task_id,mdelete.course_id,mdelete.member_id from inp_course_task_member mdelete inner join (SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number) b on b.task_id = mdelete.task_id and b.course_id = mdelete.course_id and mdelete.member_id = maxUserId;
查询结果: 见截图2
【线上执行2】
DELETE from inp_course_task_member WHERE id in (select id from (select mdelete.id from inp_course_task_member mdelete inner join (SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number) b on b.task_id = mdelete.task_id and b.course_id = mdelete.course_id and mdelete.member_id = maxUserId) c ) ;
情况2:
查询出所有使用大id的数据
【线上执行3】
SELECT * from inp_course_task_member m1 inner join
(select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.member_id = m2.maxId ;
见截图3、截图4
执行更新
【线上执行4】
UPDATE inp_course_task_member m1 inner join
(select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.member_id = m2.maxId set m1.member_id = m2.minId ;
SELECT * from inp_course_task_user_learning_record WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 【 group by member_id】 order by member_id
inp_course_task_user_learning_record 有数据 用的小id,不做处理
SELECT * from inp_exam_hand_in_paper WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 【 group by member_id】 order by member_id
inp_exam_hand_in_paper 有数据 用的小id ,不做处理
SELECT * from inp_exam_member WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 【 group by member_id】 order by member_id
inp_exam_member 有数据 4466,4467(考试id不同) 4492,4493(考试id不同) 4497,4498(考试id不同) 4503,4504 4506,4508 有2个的,也有小的
进一步验证 和 inp_course_task_member 是不是有关系
SELECT * from inp_course_task_member WHERE member_id
in (select m1.id from inp_contr_member m1 inner join
(select or_id,phone,id_card_number from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number )
and member_id in (4497,4498) group by member_id
order by member_id ;发现有1条,也有2条,所以没有关联
---> 这就说明1、添加考试任务时同一考试同时勾选了两个重复的人,需要删除大id【貌似没有,同一考试,通过sql查一下】;2、添加考试任务时选人选到大id,需要把考试id修改成小id的
情况1:
【线上执行5】
select mdelte.id,mdelte.member_id,mdelte.exam_id from inp_exam_member mdelte inner join (SELECT a.exam_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.exam_id,m2.or_id,m2.phone,m2.id_card_number from inp_exam_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.exam_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.exam_id,a.or_id,a.phone,a.id_card_number) b on b.exam_id = mdelte.exam_id and mdelte.member_id = maxUserId;
查询解决见 截图5
验证部分数据
examId memberId
1041 4515(大) -> 4513
1096 4544(大) -> 4543
1096 4536(大) -> 4535
1096 4567(大) -> 4566
1096 4549(大) -> 4546
1096 4493(大) -> 4492
1096 4530(大) -> 4528
SELECT*FROM inp_exam_member WHERE exam_id=1041 AND member_id IN (4515,4513) group by member_id ;2
SELECT * FROM inp_exam_member WHERE exam_id=1096 AND member_id IN (4543,4544,4535,4536,4566,4567,4546,4549,4492,4493,4528,4530) group by member_id ; 12
执行删除
【线上执行6】
DELETE from inp_exam_member WHERE id in (select id from (select mdelte.id from inp_exam_member mdelte inner join (SELECT a.exam_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.exam_id,m2.or_id,m2.phone,m2.id_card_number from inp_exam_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id
GROUP BY m1.exam_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY a.exam_id,a.or_id,a.phone,a.id_card_number) b on b.exam_id = mdelte.exam_id and mdelte.member_id = maxUserId) c ) ;
情况2:
查询出所有使用大id的数据
【线上执行7】
SELECT * from inp_exam_member m1 inner join
(select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.member_id = m2.maxId ;
结果为空
执行更新(上述结果为空)
【线上执行8,可不执行】
UPDATE inp_exam_member m1 inner join
(select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2
on m1.member_id = m2.maxId set m1.member_id = m2.minId ;
--> 删除inp_contr_member中的大id
可参考《220228-删除重复线上考试数据-delete or update-以两个字段分组-保留小id行的数据-having等说明-待同步到SQL-按字段group by分组》
【线上执行9】
select * from inp_contr_member m
inner JOIN (SELECT or_id,phone,id_card_number FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) t
on t. or_id = m.or_id and t.phone = m.phone and t.id_card_number = m.id_card_number
where m.id not in (SELECT MIN(id) AS id FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)
-- and m.or_id = 18 and m.phone = '13212344321' and m.id_card_number = '341124199909080876';
查询结果大写一共68条 ,见《大id列表.txt》
【线上执行10】
DELETE from inp_contr_member where id in (SELECT id from (select id from inp_contr_member m
inner JOIN (SELECT or_id,phone,id_card_number FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) t
on t. or_id = m.or_id and t.phone = m.phone and t.id_card_number = m.id_card_number
where m.id not in (SELECT MIN(id) AS id FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)
-- and m.or_id = 18 and m.phone = '13212344321' and m.id_card_number = '341124199909080876'
) a);
可以备份 inp_course_task_member,inp_exam_member,inp_contr_member 表
Jenkins重新构建customized-lets分支(controller层加锁,也要注意《Java-synchronized-Lock-锁-加锁-并发插入重复数据-service层加锁要注意》)
sql执行时务必按顺序执行
正文到此结束