sql语句
做一些基本测试:
球队表
直播表
球员表
赛事的赛况表
聊天室表
// 球队表
CREATE TABLE `live_team`(
`id` tinyint(1) unsigned NOT NULL auto_increment,
`name` VARCHAR(20) NOT NULL DEFAULT '',
`image` VARCHAR(20) NOT NULL DEFAULT '',
`type` tinyint(1) unsigned NOT NULL DEFAULT 0,
`create_time` int(10) unsigned NOT NULL DEFAULT 0,
`update_time` int(10) unsigned NOT NULL DEFAULT 0,
PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;
// 直播表
CREATE TABLE `live_game`(
`id` int(10) unsigned NOT NULL auto_increment,
`a_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
`b_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
`a_score` int(10) unsigned NOT NULL DEFAULT 0,
`b_score` int(10) unsigned NOT NULL DEFAULT 0,
`narrator` VARCHAR(20) NOT NULL DEFAULT '',
`image` VARCHAR(20) NOT NULL DEFAULT '',
`start_time` int(10) unsigned NOT NULL DEFAULT 0,
`status` tinyint(1) unsigned NOT NULL DEFAULT 0,
`create_time` int(10) unsigned NOT NULL DEFAULT 0,
`update_time` int(10) unsigned NOT NULL DEFAULT 0,
PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;
// 球员表
CREATE TABLE `live_player`(
`id` int(10) unsigned NOT NULL auto_increment,
`name` VARCHAR(20) NOT NULL DEFAULT '',
`image` VARCHAR(20) NOT NULL DEFAULT '',
`age` tinyint(1) unsigned NOT NULL DEFAULT 0,
`position` tinyint(1) unsigned NOT NULL DEFAULT 0,
`status` tinyint(1) unsigned NOT NULL DEFAULT 0,
`create_time` int(10) unsigned NOT NULL DEFAULT 0,
`update_time` int(10) unsigned NOT NULL DEFAULT 0,
PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;
//赛事的赛况表
CREATE TABLE `live_outs`(
`id` int(10) unsigned NOT NULL auto_increment,
`game_id` int(10) unsigned NOT NULL DEFAULT 0,
`team_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
`content` VARCHAR(200) NOT NULL DEFAULT '',
`image` VARCHAR(20) NOT NULL DEFAULT '',
`type` tinyint(1) unsigned NOT NULL DEFAULT 0,
`status` tinyint(1) unsigned NOT NULL DEFAULT 0,
`create_time` int(10) unsigned NOT NULL DEFAULT 0,
PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;
//聊天室表
CREATE TABLE `live_chart`(
`id` int(10) unsigned NOT NULL auto_increment,
`game_id` int(10) unsigned NOT NULL DEFAULT 0,
`user_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
`content` VARCHAR(200) NOT NULL DEFAULT '',
`status` tinyint(1) unsigned NOT NULL DEFAULT 0,
`create_time` int(10) unsigned NOT NULL DEFAULT 0,
PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;
1.海风教育
匹配合同编号对应的学生编号和学生姓名
#### 引用自:
#### 匹配合同编号对应的学生编号和学生姓名
#### 日期:2018-12-11
#### 需求方:张明杰
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
tc.contract_id 合同编号,
tc.student_name 学生姓名,
tc.student_no 学生编号
FROM
view_tms_contract tc
WHERE
tc.contract_id in ('X22011810013489','X29011809015318')
2018-12匹配正式课课量
#### 引用自:王涤非
#### 匹配正式课课量
#### 日期:2018-12-12
#### 需求方:黄露
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
select t.teacher_no 编号,(select name from view_user_info where user_id = t.teacher_id) 姓名,
(select case when quarters_type = 1 then '全职授课' when quarters_type = 2 then'全职教研' when quarters_type = 3 then'兼职' when quarters_type = 4 then'实习'
when quarters_type = 5 then'体验课老师' end from view_teacher where teacher_id = t.teacher_id) '岗位属性',
(select group_concat(subject_name) from subject where subject_id in
(select subject_id from teacher_subject_rel where teacher_id =t.teacher_id))'学科名称',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-01-01' and '2017-01-31') '17-01课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-02-01' and '2017-02-28') '17-02课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-03-01' and '2017-03-31') '17-03课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-04-01' and '2017-04-30') '17-04课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-05-01' and '2017-05-31') '17-05课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-06-01' and '2017-06-30') '17-06课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-07-01' and '2017-07-31') '17-07课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-08-01' and '2017-08-31') '17-08课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-09-01' and '2017-09-30') '17-09课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-10-01' and '2017-10-31') '17-10课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-11-01' and '2017-11-30') '17-11课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-12-01' and '2017-12-31') '17-12课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-01-01' and '2018-01-31') '18-01课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-02-01' and '2018-02-28') '18-02课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-03-01' and '2018-03-31') '18-03课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-04-01' and '2018-04-30') '18-04课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-05-01' and '2018-05-31') '18-05课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-06-01' and '2018-06-30') '18-06课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-07-01' and '2018-07-31') '18-07课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-08-01' and '2018-08-31') '18-08课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-09-01' and '2018-09-30') '18-09课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-10-01' and '2018-10-31') '18-10课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-11-01' and '2018-11-30') '18-11课时量'
from view_teacher t where quarters_type <> 5 and suspend = 0
根据表内学生编号,拉取下沟通详情
#### 引用自:王沛弘
#### 根据表内学生编号,拉取下沟通详情
#### 执行时间:0.12s,shape(274,3),新脱敏库hfjy
#### 日期:2018-12-13
#### 需求方:庄辉
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
s.student_no 学员编号,
s.name 学生姓名,
(select name from view_user_info u where u.user_id = cr.communication_person) 沟通人,
cr.role_code 沟通角色,
(case when sale_stage2 = 0 then '新名单' when sale_stage2 = 1 then '跟进中' when sale_stage2 = 2
then '无效名单' when sale_stage2 = 3 then '未接听' when sale_stage2 = 4 then '未试听失单'
when sale_stage2 = 5 then '工作台销售阶段' when sale_stage2 = 6 then '已发起设班单'
when sale_stage2 = 7 then '已驳回或撤回' when sale_stage2 = 8 then '已排待沟通'
when sale_stage2 = 9 then '试听后待反馈' when sale_stage2 = 10 then '反馈后跟进中'
when sale_stage2 = 11 then '试听已跳票或取消' when sale_stage2 = 12 then '试听环节失单'
when sale_stage2 = 13 then '待提交成单' when sale_stage2 = 14 then '成单签订中' when sale_stage2 = 15 then '已成单' end) '销售阶段',
cr.content 沟通内容,
cr.start_time 沟通开始时间,
s.create_time 注册时间,
ad.adid_no 'ADID',
(select value from ddic where type = 'TP056' and code =cr.refuse_reason) 拒绝原因
FROM
view_student s
left join view_communication_record cr on cr.student_intention_id = s.student_intention_id
left join adid ad on ad.adid_id = s.adid_id
left join lesson_plan lp on s.student_id = lp.student_id
WHERE
s.student_no in tc.contract_id in ()
扩展内推渠道
#### 引用自:
#### 扩展内推渠道,提高招聘效率
#### 执行时间:0.12s,shape(274,3),不脱敏
#### 日期:2018-12-19
#### 需求方:符仲豪
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
t.teacher_no 教师编号,
case
t.quarters_type
when 1 then '全职授课'
when 2 then '全职教研'
when 3 then '兼职'
when 4 then '实习'
end 职位,
(select
group_concat(subject_name)
from
subject
where
subject_id in (
select
subject_id
from
teacher_subject_rel
where
teacher_id = t.teacher_id)) 授课科目,
u.phone 手机号码
FROM
view_teacher t
left join view_user_info u on u.user_id = t.teacher_id
WHERE
t.teacher_no in('EN986428','MA895758','CM619710','EN973284','MA872422','MA510182','CH638099','MA832436','EN452180','EN605560','MA650143','EN431375','MA623555','CH672780','MA251350','CH279462','MA416408','MA894492','PH773706','MA106630','CH570976','CH351722','CH590160','CM877339','PH761791','CH965963','EN651501','EN406361','CH730883','MA827543','EN136134','CH271984','EN230456','BI562010','CM351760','MA351150','MA695954','CH209523','MA213790','HI462339','MA116751','MA541162','MA187609','MA156942','MA403235','EN362828','CH379783','MA518124','GE266846','CH506465','MA793934','MA329451','MA834375','PH486008','MA967601','MA169318','EN223844','CH234352','CH232263','CM841499','GE239029','CH148363','CH127614','PH732951','CH335506','CH879687','CM679366','MA992383','MA460990','CH935940','MA488461','HI591424','CH810088','PH584001','PH639570','EN155475','PH311187','PH527149','EN917008','CH579165','MA358390','CH458473','PH756906','PH425722','EN377041','MA483267','EN849121','MA583832','MA275023','PO774285','CH456357','MA783656','MA441030','MA920021','MA275992','CH410863','MA547833','BI135382','MA147963','CH381538','MA149169','MA900789','MA580845','PH889762','MA675440','MA289681','EN580408')
and t.quarters_type <> 5
陈佳的需求
#### 引用自: 赵淼
#### 学员编号 高考年份 最新剩余课时 最近一次消课时间 班主任 班主任所在部门 班主任所在组 6-12月总计消课次数 其中时长1小时的消课次数
#### 执行时间:0.12s,shape(274,3),不脱敏
#### 日期:2018-12-19
#### 需求方:陈佳
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
s.student_no 学员编号,
s.exam_year 高考年份,
tb.rest 最新剩余课时,
max(adjust_start_time) 最后一次上课日期,
csl.by_assistant 班主任,
csl.assistant_dept 班主任部门,
csl.assistant_group 班主任所在组,
sum(case when lp.adjust_start_time >='2018-06-01'and lp.adjust_start_time< curdate() then 1 else 0 end) '6-12月总计消课次数',
sum(case when lp.adjust_start_time >='2018-07-01'and lp.adjust_start_time<'2018-09-01' then 1 else 0 end) '7-8月总计消课次数',
sum(case when lp.adjust_start_time >='2018-06-01'and lp.adjust_start_time<= curdate() and timestampdiff(SECOND,lp.adjust_start_time,lp.adjust_end_time)/3600=1 then 1 else 0 end ) '6-12月1小时的消课次数',
sum(case when lp.adjust_start_time >='2018-07-01'and lp.adjust_start_time<'2018-09-01' and timestampdiff(SECOND,lp.adjust_start_time,lp.adjust_end_time)/3600=1 then 1 else 0 end ) '7-8月1小时的消课次数',
csl.status 学员当时状态
FROM
lesson_plan lp
left join view_student s on lp.student_id = s.student_id
left join
(
select student_no,status,remaining_period,by_assistant,assistant_dept,assistant_group from
bidata.yxy_curriculum_status_log where stat_date =date_sub(curdate(),interval 1 day)
)
csl on csl.student_no = s.student_no
left join
(
select
t.student_intention_id, sum(t.rest) as rest
from
(select
c.student_intention_id, c.contract_id,
c.period + ifnull(c.donate_period,0)-ifnull(sum(l.class_period),0) as rest, sum(l.class_period)
from
view_tms_contract c
left join lesson_plan l on l.contract_id = c.contract_id and l.status<>0 and l.solve_status = 5
where big_type_id = 1 and c.status in (4,5) and c.teacher_level is not null
group by c.contract_id) t
group by t.student_intention_id
) tb on tb.student_intention_id = s.student_intention_id
WHERE
lp.adjust_start_time>='2018-06-01'
and lp.adjust_start_time< curdate()
and lp.status in (3,5)
and lp.solve_status <> 6
and lp.lesson_type = 1
and lp.subject_id<>11
and s.exam_year <= '2022'
and (select name from view_user_info where user_id=lp.teacher_id) not like '%测试%'
and (select name from view_user_info where user_id=lp.teacher_id) not like '%授课支持%'
GROUP BY
s.student_id
拉取班主任黄群的学生明细
#### 引用自:
#### 拉取班主任黄群的学生明细
#### 执行时间:36.787s,shape(274,3),新脱敏库hfjy
#### 日期:2018-12-20
#### 需求方:高敏霞
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
sd.student_no 学生编号,
sd.name 学生姓名,
tb.rest 剩余课时数,
(case sd.curriculum_status
when 0 then '正常学生'
when 1 then '已结课'
when 2 then '停课'
when 3 then '结课预警'
when 4 then '停课预警'
when 5 then '结课预警,停课预警'
end) 停结课情况,
ui.name 班主任姓名
FROM
view_student sd
left join view_user_info ui on ui.user_id = sd.by_assistant
left join
(
select
t.student_intention_id, sum(t.rest) as rest
from
(select
c.student_intention_id, c.contract_id,
c.period + ifnull(c.donate_period,0)-ifnull(sum(l.class_period),0) as rest, sum(l.class_period)
from
view_tms_contract c
left join lesson_plan l on l.contract_id = c.contract_id and l.status<>0 and l.solve_status = 5
where big_type_id = 1 and c.status in (4,5) and c.teacher_level is not null
group by c.contract_id) t
group by t.student_intention_id
) tb on tb.student_intention_id = sd.student_intention_id
WHERE
ui.name = '黄群' and sd.student_no is not null
拉取学生编号 学生姓名
#### 引用自:
#### 拉取拉取学生编号 学生姓名
#### 执行时间:0.416s,shape(274,3),新脱敏库hfjy
#### 日期:2018-12-20
#### 需求方:张明杰
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
select
tc.contract_id 合同编号,
tc.student_name 学生姓名,
tc.student_no 学生编号
from
view_tms_contract tc
where
tc.contract_id in ('X29011812006367','X20011812001838')
教师姓名-编号-学历-院校-专业
#### 引用自:
#### 教师姓名,教师编号,最高学历,毕业院校,专业(教师工作台(新))(不是服务中心!)
#### 执行时间:2.740,新脱敏库hfjy
#### 日期:2018-12-24
#### 需求方:吴建
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
tea_name 教师姓名,
teacher_no 教师编号,
case t.education
when 1 then '大专'
when 2 then '本科'
when 3 then '研究生'
when 4 then '硕士'
when 5 then '博士'
end 最高学历,
graduate_college 毕业院校,
major 专业
FROM
bidata.batch_teacher_info
可授课学科为科学的教师
#### 引用自:
#### 可授课学科为科学的教师
#### 执行时间:3.275,新脱敏库hfjy
#### 日期:2018-12-25
#### 需求方:张瑾
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
u.name 教师姓名,
t.teacher_no 教师编号,
(select value from bidata.gen_dict where type ='teaching_group' and code = tt.teaching_group) 学科组,
(select group_concat(subject_name) from subject where subject_id in (select subject_id from teacher_subject_rel where teacher_id =t.teacher_id) having group_concat(subject_name) like '%科学%') 可教授学科
FROM
view_teacher t
left join view_user_info u on u.user_id = t.teacher_id
left join view_tms_teacher_ext tt on tt.id = t.teacher_id
WHERE
t.quarters_type <> 5
and u.name not like '%测试%' and u.name not like '%授课支持%'
and (select group_concat(subject_name) from subject where subject_id in (select subject_id from teacher_subject_rel where teacher_id =t.teacher_id) having group_concat(subject_name) like '%科学%') is not null
学生id匹配学生姓名和学生编号
#### 引用自:
#### 学生id匹配学生姓名和学生编号
#### 执行时间:0.823,新脱敏库hfjy
#### 日期:2018-12-27
#### 需求方:张瑾
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
s.student_id 学生id,
u.name 学生姓名,
s.student_no 学生编号
FROM
view_student s
left join view_user_info u on u.user_id = s.student_id
WHERE
s.student_id in ('943632','631213')
教师id匹配教师姓名和教师编号
#### 引用自:
#### 教师id匹配教师姓名和教师编号
#### 执行时间:0.823,新脱敏库hfjy
#### 日期:2018-12-27
#### 需求方:张瑾
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:
SELECT
t.teacher_id 教师id,
u.name 教师姓名,
t.teacher_no 教师编号
FROM
view_teacher t
left join view_user_info u on u.user_id = t.teacher_id
WHERE
t.teacher_id in ('80920','122395')
2019-8月zxj排课率
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# # 排课率 = 当月排课人数/上月底在读人数
# ### =============================月维度=============================
# In[2]:
sql1 = '''
SELECT
'201907' as '统计时间',
a.学段,
count(DISTINCT student_no) as '在读学生总数量'
FROM
(
SELECT
stat_date,
student_no,
(case when slm.exam_year >= 2025 and slm.exam_year < 2031 then '小学'
when slm.exam_year >= 2022 and slm.exam_year < 2025 then '初中'
when slm.exam_year >= 2020 and slm.exam_year < 2022 then '高中' else '无' end) '学段',
remaining_period as '剩余总课时',
status
FROM
yxy_curriculum_status_log_monthly slm
WHERE
slm.stat_date = '2019-06-30'
and remaining_period > 0
and slm.exam_year <> 2019
) a
WHERE
a.学段 <> '无'
GROUP BY
a.学段
'''
df1 = pd.read_sql(sql1,conn_dmart)
df1.head()
# In[3]:
sql2 = '''
SELECT
'201907' as '统计时间',
a.学段,
count(a.student_id) as '排课学生总数量'
FROM
(
SELECT
lp.student_id,
DATE(lp.adjust_start_time) as 'stat_date',
(case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '无' end) '学段'
FROM
tms_lesson_plan_history lph
LEFT JOIN
lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
view_student s on s.student_id=lp.student_id
WHERE
year(lp.adjust_start_time) = 2019
and month(lp.adjust_start_time)= 7
and lph.opt_type=1
and lp.lesson_type=1
and s.account_type=1
and s.exam_year <> 2019
GROUP BY
lp.student_id
) a
WHERE
a.学段 <> '无'
GROUP BY
a.学段
'''
df2 = pd.read_sql(sql2,conn_mysql)
df2.head()
# In[4]:
df_sub = pd.merge(df1,df2,on=['统计时间','学段'],how='left')
df_sub.head()
# In[5]:
df_sub['排课率'] = df_sub['排课学生总数量']/df_sub['在读学生总数量']
df_sub['排课率'] = df_sub['排课率'].apply(lambda x: format(x, '.2%'))
df_sub.head()
# ### =============================周维度=============================
# In[6]:
sql3 = '''
SELECT
a.周数,
a.学段,
count(DISTINCT a.student_id) as '排课总人数'
FROM
(
SELECT
lp.student_id,
DATE(lp.adjust_start_time) as 'stat_date',
(case when DAY(lp.adjust_start_time) in (1,2,3,4,5,6,7) then '第一周' when DAY(lp.adjust_start_time) in (8,9,10,11,12,13,14) then '第二周'
when DAY(lp.adjust_start_time) in (15,16,17,18,19,20,21) then '第三周' when DAY(lp.adjust_start_time) in (22,23,24,25,26,27,28) then '第四周'
when DAY(lp.adjust_start_time) in (29,30,31) then '第五周' else '' end) as '周数',
(case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '无' end) '学段'
FROM
tms_lesson_plan_history lph
LEFT JOIN
lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
view_student s on s.student_id=lp.student_id
WHERE
year(lp.adjust_start_time) = 2019
and month(lp.adjust_start_time)= 7
and lph.opt_type=1
and lp.lesson_type=1
and s.account_type=1
and s.exam_year <> 2019
) a
WHERE
a.学段 <> '无'
GROUP BY
a.周数,a.学段
ORDER BY
field(a.周数,'第一周','第二周','第三周','第四周','第五周'),
field(a.学段,'小学','初中','高中')
'''
df3 = pd.read_sql(sql3,conn_mysql)
df3.head()
# In[7]:
df4 = pd.merge(df3,df1,on='学段',how='inner')
df4
# In[8]:
df4.drop('统计时间',axis=1,inplace=True)
df4.head()
# In[9]:
df4['排课率'] = df4['排课总人数']/df4['在读学生总数量']
df4['排课率'] = df4['排课率'].apply(lambda x: format(x, '.2%'))
df4.head()
# ### =================================日维度======================================
# In[10]:
sql5 = '''
SELECT
a.天数,
a.学段,
count(DISTINCT a.student_id) as '排课学生总数量'
FROM
(
SELECT
lp.student_id,
DAY(lp.adjust_start_time) as '天数',
(case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '无' end) '学段'
FROM
tms_lesson_plan_history lph
LEFT JOIN
lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
view_student s on s.student_id=lp.student_id
WHERE
year(lp.adjust_start_time) = 2019
and month(lp.adjust_start_time)= 7
and lph.opt_type=1
and lp.lesson_type=1
and s.account_type=1
and s.exam_year <> 2019
) a
WHERE
a.学段 <> '无'
GROUP BY
a.天数,a.学段
'''
df5 = pd.read_sql(sql5,conn_mysql)
df5.head()
# In[11]:
df5 = pd.merge(df5,df1,on='学段',how='inner')
df5
# In[12]:
df5.drop('统计时间',axis=1,inplace=True)
# In[13]:
df5['排课率'] = df5['排课学生总数量']/df5['在读学生总数量']
df5['排课率'] = df5['排课率'].apply(lambda x: format(x, '.2%'))
df5.head()
# In[14]:
writer=pd.ExcelWriter(r'C:\Users\hp\Desktop\19年7月排课率004.xlsx')
df_sub.to_excel(writer,sheet_name='月维度')
df4.to_excel(writer,sheet_name='周维度')
df5.to_excel(writer,sheet_name='日维度')
writer.save()
# In[ ]:
统计讲义当月使用题目数
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[22]:
sql1 = '''
SELECT
lpqh.paperid,
count(lpqh.quizid) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub ON sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id
LEFT JOIN
dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
lp.status in (3,5)
AND lp.adjust_start_time >= '2019-07-01'
AND lp.adjust_end_time < '2019-08-01'
AND lp.solve_status <> 6
AND lp.lesson_type in (1,2)
GROUP BY
lpqh.paperid
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[35]:
sql3 = '''
SELECT
lpqh.paperid,
lesson_plan_id,
lpqh.quizid
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub ON sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id
LEFT JOIN
dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
lp.status in (3,5)
AND lp.adjust_start_time >= '2019-07-01'
AND lp.adjust_end_time < '2019-08-01'
AND lp.solve_status <> 6
AND lp.lesson_type in (1,2)
AND lpqh.paperid = '167972C540D54D4483A8D0B66CC36E55'
AND lpqh.lessonplanid=14570487
'''
df3 = pd.read_sql(sql3,conn_hive)
df3
# In[7]:
df1.dtypes
# In[16]:
print(df1[df1.paperid == 'BB03041048644CCBA1F3125F28097382'].quiz_count)
# In[19]:
df1[df1.paperid == '167972C540D54D4483A8D0B66CC36E55']
# In[14]:
import datetime
period = ('%d%02d' % (datetime.date.today().year - (datetime.date.today().month==1),datetime.date.today().month - 1 or 12))
period
# In[25]:
sql2 = '''SELECT * FROM dwd_db_hfjydb.lesson_plan_quiz_hwlinfo WHERE paperid = '167972C540D54D4483A8D0B66CC36E55' and lesson_plan_id=14145739 '''
df2 = pd.read_sql(sql2,conn_hive)
df2
课程使用讲义占比
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[2]:
sql1 = '''
SELECT
max(b.subject_name) subject_name,
count(b.lesson_plan_id) count_plan
FROM
(
SELECT
max(a.subject_name) subject_name,
a.lesson_plan_id
FROM
(
SELECT
max(sb.subject_name) subject_name,
lp.lesson_plan_id,
lpqh.paperid,
count(lpq.quiz_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
LEFT JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq ON lpq.lesson_plan_id = lp.lesson_plan_id
LEFT JOIN
dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
LEFT JOIN
dwd_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
WHERE
lp.adjust_start_time >= '2019-05-01'
AND lp.adjust_start_time < '2019-06-01'
AND lp.solve_status <> 6
AND lp.status in (3,5)
AND lp.lesson_type = 1
GROUP BY
lp.lesson_plan_id,lpqh.paperid
HAVING
count(lpq.quiz_id) >= 5
) a
GROUP BY
a.lesson_plan_id
HAVING
count(a.paperid) >= 2
) b
GROUP BY
b.subject_name
'''
df1 = pd.read_sql(sql1,conn_hive)
df1
# In[3]:
sql2 = '''
SELECT
sb.subject_name,
count(lp.lesson_plan_id) count_all
FROM
dwd_db_hfjydb.lesson_plan lp
LEFT JOIN
dwd_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
WHERE
lp.adjust_start_time >= '2019-05-01'
AND lp.adjust_start_time < '2019-06-01'
AND lp.solve_status <> 6
AND lp.status in (3,5)
AND lp.lesson_type in (1,2)
GROUP BY
sb.subject_name
'''
df2 = pd.read_sql(sql2,conn_hive)
df2.head()
# In[4]:
df_sub = pd.merge(df2,df1,how='left',on=['subject_name'])
df_sub
# In[5]:
df_sub['使用率'] = df_sub['count_plan']/df_sub['count_all']
df_sub['使用率'] = df_sub['使用率'].apply(lambda x: format(x, '.2%'))
df_sub.head()
# In[6]:
df_sub
# In[7]:
df_sub.to_excel(r'C:\Users\hp\Desktop\5月正式课讲义使用率.xlsx')
经典题目使用率
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[3]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-01-01'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[4]:
df1.dtypes
# In[5]:
df1.shape
# In[34]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-07-29'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[55]:
aa = df1.loc[0]
aa
# In[56]:
aa['quiz_id']
# In[57]:
aa['quiz_count']
# In[58]:
bb = df1.loc[0]['quiz_id']
bb
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[3]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-01-01'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[4]:
df1.dtypes
# In[5]:
df1.shape
# In[34]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-07-29'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[55]:
aa = df1.loc[0]
aa
# In[56]:
aa['quiz_id']
# In[57]:
aa['quiz_count']
# In[58]:
bb = df1.loc[0]['quiz_id']
bb
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[3]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-01-01'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[4]:
df1.dtypes
# In[5]:
df1.shape
# In[34]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-07-29'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[55]:
aa = df1.loc[0]
aa
# In[56]:
aa['quiz_id']
# In[57]:
aa['quiz_count']
# In[58]:
bb = df1.loc[0]['quiz_id']
bb
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[3]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-01-01'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[4]:
df1.dtypes
# In[5]:
df1.shape
# In[34]:
sql1 = '''
SELECT
lpq.quiz_id,
COUNT(lp.lesson_plan_id) quiz_count
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
lp.adjust_start_time >= '2019-07-29'
and lp.adjust_start_time < '2019-08-01'
and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
lpq.quiz_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[55]:
aa = df1.loc[0]
aa
# In[56]:
aa['quiz_id']
# In[57]:
aa['quiz_count']
# In[58]:
bb = df1.loc[0]['quiz_id']
bb
讲义中的题目在一节课使用时常大于40%
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")
# In[2]:
#### 每节课实际上课时间和使用的各讲义时间
sql1 = '''
SELECT
max(sub.subject_name) subject_name,
lp.lesson_plan_id,
lpqh.paperid,
max(date_diff('second', (cast (substring(real_start_time,1,19) as TIMESTAMP)), (cast (substring(real_end_time,1,19) as TIMESTAMP)))) lesson_time,
sum(quiz.stay_time) paper_time
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
LEFT JOIN
dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
LEFT JOIN
(select ilrqi.lesson_plan_id,ilrqi.quiz_id,ilrqi.stay_time from edw.ls_learning_record_quiz_info ilrqi where ilrqi.real_start_time >= '2019-07-01' and ilrqi.real_start_time < '2019-08-01') quiz ON quiz.lesson_plan_id= lpq.lesson_plan_id and lpq.quiz_id = cast (quiz.quiz_id as int)
LEFT JOIN
dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
lp.adjust_start_time >= '2019-07-01'
AND lp.adjust_start_time < '2019-08-01'
AND lp.solve_status <> 6
AND lp.status in (3,5)
AND lp.lesson_type = 2
GROUP BY
lp.lesson_plan_id,lpqh.paperid
HAVING
paperid is not null
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[3]:
df1.shape
# In[4]:
df1['pro'] = df1['paper_time']/df1['lesson_time']
df1.head()
# In[5]:
df1.dtypes
# In[6]:
df1.shape
# In[7]:
df4 = df1[df1['pro']>=0.4]
df4.head()
# In[8]:
df4.shape
# In[9]:
df4.drop_duplicates(subset='lesson_plan_id',inplace=True)
df4.head()
# In[10]:
df4.shape
# In[11]:
df5 = df4.groupby(['subject_name'])['lesson_plan_id'].count().reset_index()
df5.head()
# In[12]:
df5.rename(columns={'lesson_plan_id':'课程数'},inplace=True)
df5.head()
# In[13]:
#### 成单数量
sql3 = '''
SELECT
sub.subject_name,
sum((case when (select count(contract_id) from view_tms_contract tc
where tc.student_intention_id = s.student_intention_id
and status in (3,4,5,9) and submit_time >lp.adjust_start_time)>0 then 1
else 0 end)) '成单数',
case when opt_user=972038 then 'AI直排'
when opt_user=381322 and lpo.inform_status = 5 then 'app排课'
when opt_user=381322 and (select count(*) from hfjydb.data_teacher_requirements where order_id = lpo.order_id) > 0 then '指定老师排课'
when opt_user is null then '未排课' when opt_user not in (381322, 32547,972038) then '教务手动' else '系统未知' end 排课方式
FROM
lesson_plan_order lpo
LEFT JOIN
lesson_relation lr on lr.order_id = lpo.order_id
LEFT JOIN
lesson_plan lp on lp.lesson_plan_id = lr.plan_id
LEFT JOIN
tms_lesson_plan_history tlph on tlph.lesson_plan_id = lr.plan_id
LEFT JOIN
view_user_info u on u.user_id = lp.teacher_id
INNER JOIN
subject sub on sub.subject_id = lp.subject_id
LEFT JOIN
view_student s on s.student_id = lp.student_id
WHERE
lp.lesson_plan_id in {0}
GROUP BY
sub.subject_name
HAVING
排课方式 <> '指定老师排课'
'''.format(tuple(df4['lesson_plan_id']))
df33 = pd.read_sql(sql3,conn_mysql)
df33.head()
# In[14]:
df33.shape
# In[15]:
df_sub = pd.merge(df5,df33,on='subject_name',how='left')
df_sub
# In[16]:
df_sub['贡献率'] = df_sub['成单数']/df_sub['课程数']
df_sub['贡献率'] = df_sub['贡献率'].apply(lambda x: format(x, '.2%'))
df_sub.head()
# In[17]:
df_sub.drop('排课方式', axis=1,inplace=True)
df_sub
# In[ ]:
df_sub.to_excel(r'C:\Users\hp\Desktop\7月试听课贡献率006.xlsx')
# In[ ]:
#### 8-16
# In[ ]:
#### 成单数
# In[ ]:
sql04 = '''
select
lp.lesson_plan_id,
tc.student_intention_id,
tc.contract_id
from
view_tms_contract tc
LEFT JOIN
view_student s on s.student_intention_id = tc.student_intention_id
LEFT JOIN
lesson_plan lp on lp.student_id= s.student_id
where
lp.lesson_plan_id in {0}
and tc.status in (3,4,5,9)
and tc.submit_time >lp.adjust_start_time
GROUP BY
tc.student_intention_id
'''.format(tuple(df4['lesson_plan_id']))
df04 = pd.read_sql(sql04,conn_mysql)
df04.head()
# In[ ]:
df04.shape
# In[ ]:
df05 = pd.merge(df04,df4,on='lesson_plan_id',how='left')
df05.head()
# In[ ]:
df05.shape
# In[ ]:
df05.to_excel(r'C:\Users\hp\Desktop\df05.xlsx')
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]:
df_sub = pd.merge(df33,df4,on=['lesson_plan_id','subject_name'],how='left')
df_sub
# In[ ]:
df_sub.to_excel(r'C:\Users\hp\Desktop\7月试听课贡献率007.xlsx')
# In[ ]:
df4.to_excel(r'C:\Users\hp\Desktop\df4.xlsx')
# In[ ]:
sql01 = '''
SELECT
s.student_intention_id,
lp.lesson_plan_id
FROM
lesson_plan lp
LEFT JOIN
view_student s on s.student_id = lp.student_id
WHERE
lp.lesson_plan_id in {0}
'''.format(tuple(df4['lesson_plan_id']))
df01 = pd.read_sql(sql01,conn_mysql)
df01.head()
# In[ ]:
sql02 = '''
select
tc.student_intention_id,
tc.contract_id
from
view_tms_contract tc
LEFT JOIN
view_student s on s.student_intention_id = tc.student_intention_id
LEFT JOIN
lesson_plan lp on lp.student_id= s.student_id
where
tc.student_intention_id in {0}
and tc.status in (3,4,5,9)
and tc.submit_time >lp.adjust_start_time
GROUP BY
tc.student_intention_id
'''.format(tuple(df01['student_intention_id']))
df02 = pd.read_sql(sql02,conn_mysql)
df02.head()
# In[ ]:
df02.shape
教研题目使用率
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import pymysql
from pyhive import presto
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
# In[31]:
sql1 = '''
SELECT
a.lesson_plan_id,
max(a.lesson_type) lesson_type,
max(a.adjust_start_time) adjust_start_time,
max(a.subject_name) subject_name,
count(case when (a.stay_time >= 10 AND a.subject_two='小学') then a.quiz_id when (a.stay_time >= 30 AND (a.subject_two='初中' OR a.subject_two='高中')) then a.quiz_id end) quiz_count,
count(case when (a.type in (1,3,4) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type in (1,3,4) AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hf_quiz_count,
count(case when (a.type = 2 AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type = 2 AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) homework_quiz_count,
count(case when (a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hwl_quiz_count,
count(case when (a.type = 6 AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type = 6 AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) zdy_quiz_count,
count(case when (a.hwl_type='严选智能题库' AND a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.hwl_type='严选智能题库' AND a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hwl_zn_count,
count(case when (a.hwl_type='严选讲义' AND a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.hwl_type='严选讲义' AND a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hwl_jy_count,
count(case when (a.homework_type='作业中严选智能题库' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中严选智能题库' AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hw_zn_count,
count(case when (a.homework_type='作业中精品讲义题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中精品讲义题目' AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hw_jy_count,
count(case when (a.homework_type='作业中自定义的题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中自定义的题目' AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hw_zdy_count,
count(case when (a.homework_type='作业中经典的题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中经典的题目' AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hw_jd_count,
count(case when (a.homework_type='作业中的作业题目数' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中的作业题目数' AND (a.subject_two='初中' OR a.subject_two='高中') AND a.stay_time >= 30 ) THEN a.quiz_id end) hw_hw_count
FROM
(
SELECT
lp.lesson_plan_id,
lp.lesson_type,
lp.adjust_start_time,
sub.subject_name,
SUBSTR(sub.subject_name,1,2) subject_two,
lpq.quiz_id,
lpq.type,
(case when jh.id is null and lpq.type in (8,9) then '严选智能题库' when jh.id > 0 then '严选讲义' else '其他' end) hwl_type,
(case when lpq.type = 2 and hwdi.type = 1 AND hwdih.homeworkid IS NULL then '作业中经典的题目' when lpq.type = 2 and hwdi.type = 6 AND hwdih.homeworkid IS NULL then '作业中自定义的题目' when lpq.type = 2 and hwdi.type in (8,9) and hwdih.homeworkid IS NULL then '作业中严选智能题库' when lpq.type = 2 and hwdi.type = 8 and hwdih.homeworkid IS NOT NULL then '作业中精品讲义题目' WHEN lpq.type = 2 AND hwdi.type=2 AND hwdih.homeworkid IS NULL THEN '作业中的作业题目数' else '其他'end ) homework_type,
quiz.stay_time
FROM
dwd_db_hfjydb.lesson_plan lp
INNER JOIN
dwd_db_hfjydb.subject sub on sub.subject_id = lp.subject_id
INNER JOIN
dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
LEFT JOIN
dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh on lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
LEFT JOIN
dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid and jh.is_del = 0
LEFT JOIN
dwd_db_hfjydb.home_work_detail_info hwdi ON hwdi.Id = lpq.quiz_id and lpq.type = 2
LEFT JOIN
dwd_db_hfjydb.home_work_detail_info_hwlinfo hwdih ON hwdih.homeworkid = hwdi.home_work_id and hwdi.quiz_id = hwdih.quizid
LEFT JOIN
(select ilrqi.lesson_plan_id,ilrqi.quiz_id,ilrqi.stay_time from edw.ls_learning_record_quiz_info ilrqi where ilrqi.real_start_time >= '2019-07-01' and ilrqi.real_start_time < '2019-08-01') quiz
ON quiz.lesson_plan_id= lpq.lesson_plan_id and lpq.quiz_id = cast (quiz.quiz_id as int)
WHERE lp.status in (3,5)
AND lp.solve_status <> 6
AND lp.lesson_type in (1,2)
AND lp.adjust_start_time >= '2019-07-01'
AND lp.adjust_start_time < '2019-08-01'
) a
GROUP BY
a.lesson_plan_id
'''
df1 = pd.read_sql(sql1,conn_hive)
df1.head()
# In[32]:
df1.shape
# In[40]:
df1.dtypes
# In[33]:
df1.rename(columns={'lesson_plan_id':'课程id','lesson_type':'课程类型','adjust_start_time':'上课时间','subject_name':'学科','quiz_count':'题目总数','hf_quiz_count':'经典题目总数','homework_quiz_count':'作业题目总数','hwl_quiz_count':'好未来题目总数','zdy_quiz_count':'自定义题目总数','hwl_zn_count':'严选智能题库数','hwl_jy_count':'严选讲义','hw_zn_count':'作业中严选智能题库数','hw_zdy_count':'作业中自定义的题目数','hw_jy_count':'作业中精品讲义题目数','hw_jd_count':'作业中经典的题目','hw_hw_count':'作业中的作业题目数'},inplace=True)
df1.head(10)
# In[34]:
df1[df1['作业题目总数'] == (df1['作业中严选智能题库数']+df1['作业中精品讲义题目数']+df1['作业中自定义的题目数']+df1['作业中经典的题目']+df1['作业中的作业题目数'])].loc[:,['课程id','作业题目总数','作业中严选智能题库数','作业中精品讲义题目数','作业中自定义的题目数','作业中经典的题目','作业中的作业题目数']]
# In[37]:
df1[df1['好未来题目总数'] == (df1['严选智能题库数']+df1['严选讲义'])].loc[:,['课程id','好未来题目总数','严选智能题库数','严选讲义']]
# In[38]:
df1[df1['题目总数'] == (df1['经典题目总数']+df1['作业题目总数']+df1['好未来题目总数']+df1['自定义题目总数'])]
# In[39]:
df1.to_csv(r"C:\Users\hp\Desktop\题目使用率V4.csv",encoding="utf_8_sig")
# In[ ]:
2019-9豌豆思维
学生端思维导图
=IF(H2<5,"0-5分钟",IF(H2<10,"0-10分钟",IF(H2>10,"大于10分钟")))
=IF(S10<0.6,"0 <= 正确率 <= 60%",IF(S10<0.8,"60% <= 正确率 <= 80%",IF(S10<1,"80% <= 正确率 < 100%",IF(S10>=1,"正确率100%"))))
sql
SELECT
*
FROM
(
(
SELECT
a.`完成测评月份` as '完成测评月份',
-- a.`当前阶段` as '当前阶段_降阶',
a.`分数段_降阶`,
count(a.`当前阶段`) as '数量_降阶'
FROM
(
SELECT
dj.当前阶段,
dj.完成测评月份,
case when dj.平均分 < 60 then '0-60' when dj.平均分 > 60 and
dj.平均分 < 80 then '60-80' when dj.平均分 > 80 and dj.平均分 <=100 then '80-100'
end as '分数段_降阶'
FROM
wd_new_djcp_0927 dj
WHERE
dj.升降标识 = '降阶'
) a
WHERE
a.`完成测评月份` in (6,7,8)
AND a.`分数段_降阶` is not null
GROUP BY
a.`完成测评月份`,a.`分数段_降阶`
) x
RIGHT JOIN
(
SELECT
a.`完成测评月份` as '完成测评月份',
-- a.`当前阶段` as '当前阶段_不变',
a.`分数段_不变`,
count(a.`当前阶段`) as '数量_不变'
FROM
(
SELECT
dj.当前阶段,
dj.完成测评月份,
case when dj.平均分 < 60 then '0-60' when dj.平均分 > 60 and
dj.平均分 < 80 then '60-80' when dj.平均分 > 80 and dj.平均分 <=100 then '80-100'
end as '分数段_不变'
FROM
wd_new_djcp_0927 dj
WHERE
dj.升降标识 = '不变'
) a
WHERE
a.`完成测评月份` in (6,7,8)
AND a.`分数段_不变` is not null
GROUP BY
a.`完成测评月份`,a.`分数段_不变`
) y
ON x.完成测评月份 = y.完成测评月份 and x.分数段_降阶 = y.分数段_不变
)
参考文献
苏宝宝-
汪国强
http://subaobao.club:8181/