原创

MySQL-数据修复-人员数据重复- 基础表数据重复,被后续业务表关联使用

最终提交执行


 1、备份safety51_inp库中的inp_course_task_member,inp_exam_member,inp_contr_member 3个表

 2、Jenkins重新构建inp相关方(customized-lets分支)

 3、修正数据SQL(务必依次按顺序执行完下面5条SQL)

    inp_course_task_member表:
         -- 添加同一课程任务课程同时勾选了两个重复的人,需要删除大id
         DELETE from inp_course_task_member WHERE id in (select id from (select mdelete.id from inp_course_task_member  mdelete inner join 
         (SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,
         m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
         GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
         and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number) b 
         on b.task_id = mdelete.task_id and b.course_id = mdelete.course_id and mdelete.member_id  = maxUserId) c ) ;

         -- 添加课程任务时选人选到大id,需要把课程id修改成小id的
         UPDATE inp_course_task_member m1 inner join 
         (select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 
         on m1.member_id = m2.maxId set m1.member_id = m2.minId ;

    inp_exam_member表:

        -- 添加考试任务时同一考试同时勾选了两个重复的人,需要删除大id
         DELETE from inp_exam_member WHERE id in (select id from (select mdelte.id from inp_exam_member mdelte inner join 
        (SELECT a.exam_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.exam_id,m2.or_id,m2.phone,
        m2.id_card_number from inp_exam_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
        GROUP BY m1.exam_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id 
         and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.exam_id,a.or_id,a.phone,a.id_card_number) 
         b on b.exam_id = mdelte.exam_id  and mdelte.member_id  = maxUserId) c ) ;

        -- 添加考试任务时选人选到大id,需要把考试id修改成小id的
         UPDATE inp_exam_member m1 inner join 
        (select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 
         on m1.member_id = m2.maxId set m1.member_id = m2.minId ;

    inp_contr_member表:

        -- 删除重复人员数据,保留小id
        DELETE from inp_contr_member where id in (SELECT id from (select id from inp_contr_member m 
        inner JOIN (SELECT or_id,phone,id_card_number FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) t 
        on t. or_id = m.or_id and t.phone = m.phone and t.id_card_number = m.id_card_number 
        where m.id not in (SELECT MIN(id) AS id FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)) a);

公有云线上研究过程


-- 所有重复的行
select m1.id,m1.or_id, m1.name,m1.phone,m1.id_card_number from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number order by m1.id;
-- 大小写 136条 
也可见 csv 
{
  "payload": {
    "data": [
      {
        "id": "3750",
        "id_card_number": "410923196806206639",
        "name": "王振国",
        "or_id": "219",
        "phone": "13939347383"
      },
      {
        "id": "3751",
        "id_card_number": "410923196806206639",
        "name": "王振国",
        "or_id": "219",
        "phone": "13939347383"
      },
      {
        "id": "3755",
        "id_card_number": "130425196902105510",
        "name": "杨兵为",
        "or_id": "219",
        "phone": "18631055746"
      },
      {
        "id": "3756",
        "id_card_number": "130425196902105510",
        "name": "杨兵为",
        "or_id": "219",
        "phone": "18631055746"
      },
      {
        "id": "3757",
        "id_card_number": "130425196712295536",
        "name": "郑守强",
        "or_id": "219",
        "phone": "18639300853"
      }
    ],
    "status": false,
    "time": 11,
    "title": [
      {
        "fixed": "left",
        "key": "id",
        "title": "id",
        "width": "200"
      },
      {
        "key": "or_id",
        "title": "or_id",
        "width": "200"
      },
      {
        "key": "name",
        "title": "name",
        "width": "200"
      },
      {
        "key": "phone",
        "title": "phone",
        "width": "200"
      },
      {
        "key": "id_card_number",
        "title": "id_card_number",
        "width": "200"
      }
    ],
    "total": 136
  },
  "code": 1200,
  "text": ""
}

总体处理方向:
1条业务数据同时选择重复人员,则删除人员id大的
业务数据如果选择到重复人员大id时,这把业务表的关联id改成小id

企业表
SELECT container_id, NAME FROM inp_contractor GROUP BY container_id, NAME HAVING COUNT( 1 ) > 1
inp_contractor 无重复数据

研究各个表用到到id
 SELECT * from inp_attachment WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number )  order by member_id 
 inp_attachment 无数据

 SELECT * from inp_contr_member_cert WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number )  order by member_id 
 inp_contr_member_cert 无数据

 SELECT * from inp_user_learning_course WHERE ur_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) order by ur_id 
 inp_user_learning_course 无数据

 SELECT * from inp_course_task_member WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) order by member_id 
 inp_course_task_member  有数据 大小都有 
---> 这就说明1、添加同一课程任务课程同时勾选了两个重复的人,需要删除大id【课程id1822同时有小id4449,大id4450】;2、课程id不同,添加课程任务时选人选到大id,需要把课程id修改成小id的【课程id:1822,1913都使用了大id4450】
     情况1:  
                重复的选人
                select m1.* from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
                 GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)
                得到的id不一定是大是小,总之就是这个人。

                -- task id 都相同 
                select m1.* from inp_course_task_member m1 where course_id = 1822 and member_id in (4449,4450) 2
                select m1.* from inp_course_task_member m1 where course_id = 1913 and member_id in (4506,4508,4513,4515,4497,4498) 6
                select m1.* from inp_course_task_member m1 where course_id = 1917 and member_id in (4513,4515) 2

                -- 找到每个课程任务下,重复的人id(大id)
                SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
                GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
                and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number;
                查询结果: 见截图1

                执行删除: 
                        待删除inp_course_task_member行
                 【线上执行1】    
                 select mdelete.id,mdelete.task_id,mdelete.course_id,mdelete.member_id from inp_course_task_member  mdelete inner join (SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
                    GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
                    and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number) b on b.task_id = mdelete.task_id and b.course_id = mdelete.course_id and mdelete.member_id  = maxUserId;
                    查询结果: 见截图2

                 【线上执行2】
                 DELETE from inp_course_task_member WHERE id in (select id from (select mdelete.id from inp_course_task_member  mdelete inner join (SELECT a.task_id, a.course_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number from inp_course_task_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
                    GROUP BY m1.task_id, m1.course_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
                    and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.task_id, a.course_id,a.or_id,a.phone,a.id_card_number) b on b.task_id = mdelete.task_id and b.course_id = mdelete.course_id and mdelete.member_id  = maxUserId) c ) ;


     情况2:
            查询出所有使用大id的数据
            【线上执行3】
            SELECT * from inp_course_task_member m1 inner join 
            (select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)  m2
            on m1.member_id = m2.maxId ;
            见截图3、截图4

            执行更新
            【线上执行4】 
            UPDATE inp_course_task_member m1 inner join 
            (select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)  m2
            on m1.member_id = m2.maxId set m1.member_id = m2.minId ;


SELECT * from inp_course_task_user_learning_record WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 【 group by member_id】  order by member_id 
inp_course_task_user_learning_record 有数据 用的小id,不做处理

SELECT * from inp_exam_hand_in_paper WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 【 group by member_id】  order by member_id 
inp_exam_hand_in_paper 有数据  用的小id ,不做处理

SELECT * from inp_exam_member WHERE member_id in (select m1.id from inp_contr_member m1 inner join (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 【 group by member_id】 order by member_id 
inp_exam_member 有数据 4466,4467(考试id不同)  4492,4493(考试id不同) 4497,4498(考试id不同) 4503,4504 4506,4508 有2个的,也有小的
进一步验证 和 inp_course_task_member 是不是有关系
SELECT *  from inp_course_task_member WHERE member_id 
 in (select m1.id from inp_contr_member m1 inner join
 (select or_id,phone,id_card_number from inp_contr_member  GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) m2 
 on m1.or_id = m2.or_id and m1.phone = m2.phone and m1.id_card_number = m2.id_card_number ) 
 and member_id in (4497,4498) group by member_id
 order by member_id  ;发现有1条,也有2条,所以没有关联
---> 这就说明1、添加考试任务时同一考试同时勾选了两个重复的人,需要删除大id【貌似没有,同一考试,通过sql查一下】;2、添加考试任务时选人选到大id,需要把考试id修改成小id的

        情况1:
            【线上执行5】
            select mdelte.id,mdelte.member_id,mdelte.exam_id from inp_exam_member mdelte inner join (SELECT a.exam_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.exam_id,m2.or_id,m2.phone,m2.id_card_number from inp_exam_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
                    GROUP BY m1.exam_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
                    and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.exam_id,a.or_id,a.phone,a.id_card_number) b on b.exam_id = mdelte.exam_id  and mdelte.member_id  = maxUserId;
            查询解决见 截图5 

            验证部分数据
            examId  memberId
            1041    4515(大)   -> 4513
            1096    4544(大)   -> 4543
            1096    4536(大)   -> 4535
            1096    4567(大)   -> 4566
            1096    4549(大)   -> 4546
            1096    4493(大)   -> 4492
            1096    4530(大)   -> 4528

            SELECT*FROM inp_exam_member WHERE exam_id=1041 AND member_id IN (4515,4513) group by member_id ;2
            SELECT * FROM inp_exam_member WHERE exam_id=1096 AND member_id IN (4543,4544,4535,4536,4566,4567,4546,4549,4492,4493,4528,4530) group by member_id ; 12

            执行删除
            【线上执行6】
            DELETE from inp_exam_member WHERE id in (select id from (select mdelte.id from inp_exam_member mdelte inner join (SELECT a.exam_id,max(id) maxUserId from inp_contr_member m0 inner JOIN (select m1.exam_id,m2.or_id,m2.phone,m2.id_card_number from inp_exam_member m1 inner join inp_contr_member m2 on m1.member_id = m2.id 
            GROUP BY m1.exam_id,m2.or_id,m2.phone,m2.id_card_number HAVING (count(1) > 1)) a on m0.or_id = a.or_id
            and m0.phone = a.phone and m0.id_card_number = a.id_card_number GROUP BY  a.exam_id,a.or_id,a.phone,a.id_card_number) b on b.exam_id = mdelte.exam_id  and mdelte.member_id  = maxUserId) c ) ;

        情况2:
            查询出所有使用大id的数据
            【线上执行7】
            SELECT * from inp_exam_member m1 inner join 
            (select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)  m2
            on m1.member_id = m2.maxId ;
            结果为空

            执行更新(上述结果为空)
            【线上执行8,可不执行】
            UPDATE inp_exam_member m1 inner join 
            (select min(id) minId,max(id) maxId from inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)  m2
            on m1.member_id = m2.maxId set m1.member_id = m2.minId ;

--> 删除inp_contr_member中的大id
    可参考《220228-删除重复线上考试数据-delete or update-以两个字段分组-保留小id行的数据-having等说明-待同步到SQL-按字段group by分组》

    【线上执行9】
    select * from inp_contr_member m 
    inner JOIN (SELECT or_id,phone,id_card_number FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) t
    on t. or_id = m.or_id and t.phone = m.phone and t.id_card_number = m.id_card_number
    where m.id not in (SELECT MIN(id) AS id FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)
    -- and m.or_id = 18 and m.phone = '13212344321' and m.id_card_number = '341124199909080876';
    查询结果大写一共68条 ,见《大id列表.txt》

    【线上执行10】
    DELETE from inp_contr_member where id in (SELECT id from (select id from inp_contr_member m 
    inner JOIN (SELECT or_id,phone,id_card_number FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1) t
    on t. or_id = m.or_id and t.phone = m.phone and t.id_card_number = m.id_card_number
    where m.id not in (SELECT MIN(id) AS id FROM inp_contr_member GROUP BY or_id,phone,id_card_number HAVING COUNT(1)>1)
    -- and m.or_id = 18 and m.phone = '13212344321' and m.id_card_number = '341124199909080876'
    ) a);


 可以备份 inp_course_task_member,inp_exam_member,inp_contr_member 表

 Jenkins重新构建customized-lets分支(controller层加锁,也要注意《Java-synchronized-Lock-锁-加锁-并发插入重复数据-service层加锁要注意》)

 sql执行时务必按顺序执行
正文到此结束
本文目录