MySQL-group by-记录join两个无关联的业务表数据-分组-聚合-排序-分拆sql-按排序join-分页参数
SELECT
d.*,
ifnull( sum( IF ( ( t.finish_time IS NOT NULL ), 1, 0 ) ), 0 ) AS courseFinishCount,
(
SELECT
ifnull( sum( IF ( ( m.is_pass = 1 ), 1, 0 ) ), 0 )
FROM
exam_marking m
WHERE
enterprise_id = d.container_id
AND user_id = d.user_id
AND (
m.answer_time BETWEEN unix_timestamp(CONCAT("2022-03-31", ' 00:00:00') )
AND unix_timestamp( CONCAT("2022-05-31", ' 23:59:59') )
OR m.supple_time BETWEEN unix_timestamp( CONCAT("2022-03-31", ' 00:00:00') )
AND unix_timestamp( CONCAT("2022-05-31", ' 23:59:59') )
)
) AS passExamCount,
(
SELECT
ifnull( count( 1 ), 0 )
FROM
exam_marking m
WHERE
enterprise_id = d.container_id
AND user_id = d.user_id
AND (
m.answer_time BETWEEN unix_timestamp(CONCAT("2022-03-31", ' 00:00:00') )
AND unix_timestamp( CONCAT("2022-05-31", ' 23:59:59') )
OR m.supple_time BETWEEN unix_timestamp( CONCAT("2022-03-31", ' 00:00:00') )
AND unix_timestamp( CONCAT("2022-05-31", ' 23:59:59') )
)
) AS totalExamCount ,
(
SELECT
ROUND(case ifnull(count( 1 ), 0 )
when 0 then 0
else ifnull( sum( IF ( ( m.is_pass = 1 ), 1, 0 ) ), 0 ) * 100 / ifnull( count( 1 ), 0 ) END,2)
FROM
exam_marking m
WHERE
enterprise_id = d.container_id
AND user_id = d.user_id
AND (
m.answer_time BETWEEN unix_timestamp(CONCAT("2022-03-31", ' 00:00:00') )
AND unix_timestamp( CONCAT("2022-05-31", ' 23:59:59') )
OR m.supple_time BETWEEN unix_timestamp( CONCAT("2022-03-31", ' 00:00:00') )
AND unix_timestamp( CONCAT("2022-05-31", ' 23:59:59') )
)
) AS examPassRate
FROM
(
SELECT
container_id,
user_id,
user_name,
sum( total_actual_learning_time ) AS total_actual_learning_time,
department_name,
department_parent_name,
post_name,
enterprise_name
FROM
edu_user_learning_course_record_every_day
WHERE
container_id = 'BaoXianFuWu'
AND department_parent_id LIKE CONCAT( '%"', 5, '"%' )
AND user_name LIKE CONCAT( '%', '无忧管理员', '%' )
AND user_status = 0
AND learning_day BETWEEN '2022-03-31'
AND '2022-05-31'
<!-- 如果要去掉 Using where; Using temporary; Using filesort,后期可以向着 GROUP BY user_id 去掉的方向优化,但是 finishedCourseNumber 数量乘以2倍了,根本原因应该就是这里没有了GROUP BY,导致数量倍数增加了,examPassRate的影响似乎不受到影响,
所以如果有必要,可以尝试查finishedCourseNumber的改成和examPassRate查询一样 ===> 测试改造完edu_user_learning_course_record_every_day有 Using temporary; Using filesort,如下的sql -->
GROUP BY user_id
) d
LEFT JOIN edu_user_learning_course_record_t t ON d.container_id = t.container_id
AND d.user_id = t.user_id
AND t.finish_time IS NOT NULL
AND t.finish_time BETWEEN '2022-03-31'
AND '2022-05-31'
GROUP BY d.user_id
ORDER BY
examPassRate asc;
-- todo
-- passExamCount ,totalExamCount 后期 可以去掉,加强性能,
-- 还要优化sql性能,目前还存在问题
如果主表edu_user_learning_course_record_every_day 直接和 edu_user_learning_course_record_t、 exam_marking 2个表left join,那edu_user_learning_course_record_t、 exam_marking
2个表直接会有交叉,数据数据会有sum或count重复,最多只能join一个表。方案是,课程表join,考试表子查询
都改成子查询的方式(测试改造完edu_user_learning_course_record_every_day有 Using temporary; Using filesort,如下的sql)
EXPLAIN SELECT
d.*,
sum( tmp_total_actual_learning_time ) AS total_actual_learning_time,
(SELECT
ifnull( sum( IF ( ( finish_time IS NOT NULL ), 1, 0 ) ), 0 )
FROM
edu_user_learning_course_record_t
WHERE
container_id = d.container_id
AND user_id = d.user_id
) AS finishedCourseNumber,
(
SELECT
ROUND(
CASE
ifnull( count( 1 ), 0 )
WHEN 0 THEN
0 ELSE ifnull( sum( IF ( ( m.is_pass = 1 ), 1, 0 ) ), 0 ) * 100 / ifnull( count( 1 ), 0 )
END,
2
)
FROM
exam_marking m
WHERE
enterprise_id = d.container_id
AND user_id = d.user_id
) AS examPassRate
FROM
(
SELECT
id,
container_id,
user_id,
user_name,
total_actual_learning_time as tmp_total_actual_learning_time,
-- sum( total_actual_learning_time ) AS total_actual_learning_time,
department_name,
department_parent_name,
post_name,
enterprise_name,
user_status
FROM
edu_user_learning_course_record_every_day FORCE index (idx_c_u_d_d_u)
WHERE
container_id = "BaoXianFuWu"
AND department_parent_id LIKE CONCAT( '%"', 1, '"%' )
AND user_status = 0
AND learning_day BETWEEN '2022-03-31'
AND '2022-05-31'
-- GROUP BY
-- user_id
) d
-- LEFT JOIN edu_user_learning_course_record_t t ON d.container_id = t.container_id
-- AND d.user_id = t.user_id
GROUP BY
d.user_id
ORDER BY
examPassRate DESC
LIMIT 10
成功版本一:分拆优化:
这个sql耗时,因为 GROUP BY d.user_id 瓶颈在此,所以按排序的字段查询,减少关联,主表是edu_user_learning_course_record_every_day,所以减少影响,
这个优化已经很难,有疑问查询的是阶段内的数据,不能提前处理好数据,所以用户数量增加实时查询就越慢,最好加上时间范围
同时不能使用pagehelper,这个工具count的时候也会查一遍 ,且查询user_id 时 自己带上分页
所以单独提供count的sql
<sql id="Base_Column_List_WithExam">
d.id, d.container_id, d.user_id, d.user_name,d.type, d.learning_requirement, d.learning_day, d.total_actual_learning_time, d.department_id,
d.department_name, d.department_parent_id, d.department_parent_name, d.enterprise_name, d.post_name,
d.user_status, d.creater_id, d.updater_id, d.deleter_id, d.create_time, d.update_time, d.delete_time,
d.is_delete,courseFinishCount,examPassRate
</sql>
<sql id="getUserIdList4UserLearningWhereSql">
d.container_id = #{containerId}
AND d.department_parent_id LIKE CONCAT( '%"', #{departmentParentId}, '"%' )
<if test="showForbiddenUser !=null and showForbiddenUser == 0">
AND d.user_status = 0
</if>
<if test="keyword !=null and keyword != ''">
AND d.user_name LIKE CONCAT( '%', #{keyword}, '%' )
</if>
<if test="startDate != null and startDate != '' and endDate !=null and endDate != ''">
<!-- 入参也是 年月的传入,这里必须格式 -->
AND DATE_FORMAT(d.learning_day, '%Y-%m' ) BETWEEN #{startDate} AND #{endDate}
</if>
</sql>
<sql id="getUserIdList4UserLearningOrderBySql">
<!-- 加上OrderBy有性能问题 -->
ORDER BY
<choose>
<when test="orderType == null or orderSort == null or orderSort == ''">
d.id desc
</when>
<when test="orderType == 1">
finishedCourseNumber ${orderSort}
</when>
<when test="orderType == 2">
<!-- 不要加d. -->
total_actual_learning_time ${orderSort}
</when>
<when test="orderType == 3">
examPassRate ${orderSort}
</when>
<otherwise>
d.id desc
</otherwise>
</choose>
</sql>
提供count的sql:
<select id="getUserIdList4UserLearningListNewTotal" resultType="long">
SELECT count(DISTINCT d.user_id) as total FROM edu_user_learning_course_record_every_day d force index (idx_c_u_d_d_u)
WHERE
<include refid="getUserIdList4UserLearningWhereSql" />
</select>
查userId的sql:(重点:关系到按哪个排序,就连哪个表;共用where 和 order by )
<select id="getUserIdList4UserLearningListNew" resultType="com.safety51.edu.entity.course.EduUserLearningCourseRecordEveryDay">
SELECT
d.user_id ,
<choose>
<when test="orderType != null and orderType == 1">
ifnull( sum( IF ( ( t.finish_time IS NOT NULL ), 1, 0 ) ), 0 ) AS finishedCourseNumber,
</when>
<when test="orderType != null and orderType == 2">
sum( d.total_actual_learning_time ) AS total_actual_learning_time,
</when>
<when test="orderType != null and orderType == 3">
ROUND(
CASE
ifnull( count( 1 ), 0 )
WHEN 0 THEN
0 ELSE ifnull( sum( IF ( ( m.is_pass = 1 ), 1, 0 ) ), 0 ) * 100 / ifnull( count( 1 ), 0 )
END,
2
) examPassRate,
</when>
</choose>
d.id
FROM
edu_user_learning_course_record_every_day d force index(idx_c_u_d_d_u)
<if test="orderType != null and orderType == 1" >
LEFT JOIN edu_user_learning_course_record_t t ON d.container_id = t.container_id
AND d.user_id = t.user_id
<if test="startDate != null and startDate != '' and endDate !=null and endDate != ''">
<!-- 入参也是 年月的传入,这里必须格式 -->
AND DATE_FORMAT( t.finish_time, '%Y-%m-%d' ) BETWEEN #{startDate} and #{endDate}
</if>
</if>
<if test="orderType != null and orderType == 3" >
LEFT JOIN exam_marking m ON m.enterprise_id = d.container_id AND m.user_id = d.user_id
<if test="startDate != null and startDate != '' and endDate !=null and endDate != ''">
AND (
m.answer_time BETWEEN unix_timestamp( CONCAT(#{startDate}, ' 00:00:00') )
AND unix_timestamp(CONCAT(#{endDate}, ' 23:59:59') )
OR m.supple_time BETWEEN unix_timestamp( CONCAT(#{startDate}, ' 00:00:00') )
AND unix_timestamp( CONCAT(#{endDate}, ' 23:59:59') )
)
</if>
</if>
WHERE
<include refid="getUserIdList4UserLearningWhereSql" />
GROUP BY d.user_id
<include refid="getUserIdList4UserLearningOrderBySql"/>
LIMIT ${(pageNum-1)*pageSize}, #{pageSize}
</select>
-- 以上先得到 user_id list ,上下两个sql共用相同的where 条件和ORDER BY ,这个必须的
<select id="getUserLearningListNew1" resultMap="BaseResultMapWithExam">
SELECT
d.id,
d.container_id,
d.user_id,
d.user_name,
sum( d.total_actual_learning_time ) AS total_actual_learning_time,
d.department_name,
d.department_parent_name,
d.post_name,
d.enterprise_name,
d.user_status,
(
SELECT
ifnull( sum( IF ( ( finish_time IS NOT NULL ), 1, 0 ) ), 0 ) from edu_user_learning_course_record_t
where container_id= d.container_id AND user_id = d.user_id
<if test="startDate != null and startDate != '' and endDate !=null and endDate != ''">
<!-- 入参也是 年月的传入,这里必须格式 -->
AND DATE_FORMAT( finish_time, '%Y-%m-%d' ) BETWEEN #{startDate} and #{endDate}
</if>
) AS finishedCourseNumber ,
(
SELECT
ROUND(case ifnull(count( 1 ), 0 )
when 0 then 0
else ifnull( sum( IF ( ( m.is_pass = 1 ), 1, 0 ) ), 0 ) * 100 / ifnull( count( 1 ), 0 ) END,2)
FROM
exam_marking m inner JOIN exams_work w on w.id = m.work_id and w.enterprise_id = m.enterprise_id
WHERE
m.enterprise_id = d.container_id
AND m.user_id = d.user_id
<if test="startDate != null and startDate != '' and endDate !=null and endDate != ''">
AND (
m.answer_time BETWEEN unix_timestamp( CONCAT(#{startDate}, ' 00:00:00') )
AND unix_timestamp(CONCAT(#{endDate}, ' 23:59:59') )
OR m.supple_time BETWEEN unix_timestamp( CONCAT(#{startDate}, ' 00:00:00') )
AND unix_timestamp( CONCAT(#{endDate}, ' 23:59:59') )
)
</if>
and w.exam_source = 0 and w.is_delete = 0
) AS examPassRate
FROM edu_user_learning_course_record_every_day d force index(idx_c_u_d_d_u)
WHERE d.user_id in
<foreach collection="userIdList" item="userId" index="index" open="(" close=")" separator=",">
#{userId}
</foreach>
AND
<include refid="getUserIdList4UserLearningWhereSql" />
GROUP BY d.user_id
<include refid="getUserIdList4UserLearningOrderBySql"/>
</select>
-- 原子查询中的group by主要的作用是找到userIdList,同时计算了total_actual_learning_time(另外:一般来说GROUP BY 比 distinct 去重效率要高)
-- 强制使用索引用法 force index(idx_c_u_d_d_u) ,
-- GROUP BY 在字段 要引用的索引的第一个位才会去掉 Using temporary; Using filesort
-- 比如 idx_c_u_d_d_u 的索引字段顺序是 `user_id`, `container_id`, `user_name`, `learning_day`, `department_parent_id`
代码部分:
long total = eduUserLearningCourseRecordEveryDayMapper.getUserIdList4UserLearningListNewTotal(userLearningDto);
if (total == 0) {
resultVo.setList(Collections.emptyList());
resultVo.setTotal(total);
return resultVo;
}
List<EduUserLearningCourseRecordEveryDay> userIdList = eduUserLearningCourseRecordEveryDayMapper.getUserIdList4UserLearningListNew(userLearningDto);
if (CollectionUtils.isEmpty(userIdList)) {
resultVo.setList(Collections.emptyList());
resultVo.setTotal(total);
return resultVo;
}
userLearningDto.setUserIdList(userIdList.stream().map(EduUserLearningCourseRecordEveryDay::getUserId).distinct().collect(Collectors.toList()));
List<EduUserLearningCourseRecordEveryDayVo> list = eduUserLearningCourseRecordEveryDayMapper.getUserLearningListNew1(userLearningDto);
PageInfo<EduUserLearningCourseRecordEveryDayVo> pageInfo = new PageInfo<>(list);
List<EduUserLearningCourseRecordEveryDayVo> pageInfoList = pageInfo.getList();
resultVo.setList(pageInfo.getList());
resultVo.setTotal(total);
以上成功版本一
成功版本二:一步到位:
-- 一行sql解决,edu_user_learning_course_record_t有一对多的问题,所以类似子查询
-- 还要各个表加上时间,不然只跟人关联了
EXPLAIN SELECT
d.id,
d.container_id,
d.user_id,
d.user_name,
sum( d.total_actual_learning_time ) AS total_actual_learning_time,
d.department_name,
d.department_parent_name,
d.post_name,
d.enterprise_name,
d.user_status,
(
SELECT
ifnull( sum( IF ( ( finish_time IS NOT NULL ), 1, 0 ) ), 0 )
FROM
edu_user_learning_course_record_t
WHERE
container_id = d.container_id
AND user_id = d.user_id
AND finish_time BETWEEN '2022-03-31'
AND '2022-05-31'
) AS finishedCourseNumber,
ROUND(
CASE
ifnull( count( 1 ), 0 )
WHEN 0 THEN
0 ELSE ifnull( sum( IF ( ( m.is_pass = 1 ), 1, 0 ) ), 0 ) * 100 / ifnull( count( 1 ), 0 )
END,
2
) AS examPassRate
FROM
edu_user_learning_course_record_every_day d FORCE INDEX ( idx_c_u_d_d_u )
LEFT JOIN exam_marking m ON m.enterprise_id = d.container_id
AND m.user_id = d.user_id
AND (
m.answer_time BETWEEN unix_timestamp( CONCAT( "2022-03-31", ' 00:00:00' ) )
AND unix_timestamp( CONCAT( "2022-05-31", ' 23:59:59' ) )
OR m.supple_time BETWEEN unix_timestamp( CONCAT( "2022-03-31", ' 00:00:00' ) )
AND unix_timestamp( CONCAT( "2022-05-31", ' 23:59:59' ) )
)
WHERE
d.container_id = "BaoXianFuWu"
AND d.department_parent_id LIKE CONCAT( '%"', 1, '"%' )
AND d.user_status = 0
AND DATE_FORMAT( d.learning_day, '%Y-%m' ) BETWEEN '2022-03'
AND '2022-05'
GROUP BY
d.user_id order by examPassRate;
另外 特别注意 , 不要加d.
<sql id="getUserIdList4UserLearningOrderBySql">
<!-- 加上OrderBy有性能问题 -->
ORDER BY
<choose>
<when test="orderType == null or orderSort == null or orderSort == ''">
d.id desc
</when>
<when test="orderType == 1">
finishedCourseNumber ${orderSort}
</when>
<when test="orderType == 2">
<!-- 不要加d. -->
total_actual_learning_time ${orderSort}
</when>
<when test="orderType == 3">
examPassRate ${orderSort}
</when>
<otherwise>
d.id desc
</otherwise>
</choose>
</sql>
方案一目前实测查询慢,如果是未选时间范围且考试排序会比较慢,其他都很快【已经很难优化,,实测1秒多,可接受】
方案二 目前性能很慢 如果加上order by 还是有 Using temporary; Using filesort,不管是不是有d.id,应该是按d.user_id 递增排序,total_actual_learning_time 数据重复累计