sql语句

sql语句

做一些基本测试:

球队表

直播表

球员表

赛事的赛况表

聊天室表

// 球队表
CREATE  TABLE `live_team`(
  `id` tinyint(1) unsigned NOT NULL auto_increment,
  `name` VARCHAR(20) NOT NULL DEFAULT '',
  `image` VARCHAR(20) NOT NULL DEFAULT '',
  `type` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `create_time` int(10) unsigned NOT NULL DEFAULT 0,
  `update_time` int(10) unsigned NOT NULL DEFAULT 0,
  PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;

// 直播表
CREATE  TABLE `live_game`(
  `id` int(10) unsigned NOT NULL auto_increment,
  `a_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `b_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `a_score` int(10) unsigned NOT NULL DEFAULT 0,
  `b_score` int(10) unsigned NOT NULL DEFAULT 0,
  `narrator` VARCHAR(20) NOT NULL DEFAULT '',
  `image` VARCHAR(20) NOT NULL DEFAULT '',
  `start_time` int(10) unsigned NOT NULL DEFAULT 0,
  `status` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `create_time` int(10) unsigned NOT NULL DEFAULT 0,
  `update_time` int(10) unsigned NOT NULL DEFAULT 0,
  PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;

// 球员表

CREATE  TABLE `live_player`(
  `id` int(10) unsigned NOT NULL auto_increment,
  `name` VARCHAR(20) NOT NULL DEFAULT '',
  `image` VARCHAR(20) NOT NULL DEFAULT '',
  `age` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `position` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `status` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `create_time` int(10) unsigned NOT NULL DEFAULT 0,
  `update_time` int(10) unsigned NOT NULL DEFAULT 0,
  PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;

//赛事的赛况表
CREATE  TABLE `live_outs`(
  `id` int(10) unsigned NOT NULL auto_increment,
  `game_id` int(10) unsigned NOT NULL DEFAULT 0,
  `team_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `content` VARCHAR(200) NOT NULL DEFAULT '',
  `image` VARCHAR(20) NOT NULL DEFAULT '',
  `type` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `status` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `create_time` int(10) unsigned NOT NULL DEFAULT 0,
  PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;

//聊天室表
CREATE  TABLE `live_chart`(
  `id` int(10) unsigned NOT NULL auto_increment,
  `game_id` int(10) unsigned NOT NULL DEFAULT 0,
  `user_id` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `content` VARCHAR(200) NOT NULL DEFAULT '',
  `status` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `create_time` int(10) unsigned NOT NULL DEFAULT 0,
  PRIMARY KEY (`id`)
)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT charset=utf8;

1.海风教育

匹配合同编号对应的学生编号和学生姓名

#### 引用自:
#### 匹配合同编号对应的学生编号和学生姓名
#### 日期:2018-12-11
#### 需求方:张明杰
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by: 

SELECT
    tc.contract_id 合同编号,
    tc.student_name 学生姓名,
    tc.student_no 学生编号
FROM
    view_tms_contract tc
WHERE
    tc.contract_id in ('X22011810013489','X29011809015318')

2018-12匹配正式课课量

#### 引用自:王涤非
#### 匹配正式课课量
#### 日期:2018-12-12
#### 需求方:黄露
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by: 

select t.teacher_no 编号,(select name from view_user_info where user_id = t.teacher_id) 姓名,
(select case when quarters_type = 1 then '全职授课' when quarters_type = 2 then'全职教研' when quarters_type = 3 then'兼职' when quarters_type = 4 then'实习'
when quarters_type = 5 then'体验课老师' end from view_teacher where teacher_id = t.teacher_id) '岗位属性',
(select group_concat(subject_name) from subject where subject_id in
(select subject_id from teacher_subject_rel where teacher_id =t.teacher_id))'学科名称',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-01-01' and '2017-01-31') '17-01课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-02-01' and '2017-02-28') '17-02课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-03-01' and '2017-03-31') '17-03课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-04-01' and '2017-04-30') '17-04课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-05-01' and '2017-05-31') '17-05课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-06-01' and '2017-06-30') '17-06课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-07-01' and '2017-07-31') '17-07课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <>  0 and solve_status <> 6 and date(adjust_start_time)
between '2017-08-01' and '2017-08-31') '17-08课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-09-01' and '2017-09-30') '17-09课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-10-01' and '2017-10-31') '17-10课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-11-01' and '2017-11-30') '17-11课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2017-12-01' and '2017-12-31') '17-12课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-01-01' and '2018-01-31') '18-01课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-02-01' and '2018-02-28') '18-02课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-03-01' and '2018-03-31') '18-03课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-04-01' and '2018-04-30') '18-04课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-05-01' and '2018-05-31') '18-05课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-06-01' and '2018-06-30') '18-06课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-07-01' and '2018-07-31') '18-07课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-08-01' and '2018-08-31') '18-08课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-09-01' and '2018-09-30') '18-09课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-10-01' and '2018-10-31') '18-10课时量',
(select sum(timestampdiff(minute,adjust_start_time,adjust_end_time))/60 from lesson_plan where teacher_id=t.teacher_id and status <> 0 and solve_status <> 6 and date(adjust_start_time)
between '2018-11-01' and '2018-11-30') '18-11课时量'
from view_teacher t where  quarters_type <> 5 and suspend = 0

根据表内学生编号,拉取下沟通详情

#### 引用自:王沛弘
#### 根据表内学生编号,拉取下沟通详情
#### 执行时间:0.12s,shape274,3,新脱敏库hfjy
#### 日期:2018-12-13
#### 需求方:庄辉
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
    s.student_no 学员编号,
    s.name 学生姓名,
    (select name from view_user_info u where u.user_id = cr.communication_person) 沟通人,
    cr.role_code 沟通角色,
    (case when sale_stage2 = 0 then '新名单' when sale_stage2 = 1 then '跟进中' when sale_stage2 = 2 
    then '无效名单' when sale_stage2 = 3 then '未接听' when sale_stage2 = 4 then '未试听失单'
    when sale_stage2 = 5 then '工作台销售阶段' when sale_stage2 = 6 then '已发起设班单' 
    when sale_stage2 = 7 then '已驳回或撤回' when sale_stage2 = 8 then '已排待沟通'
    when sale_stage2 = 9 then '试听后待反馈' when sale_stage2 = 10 then '反馈后跟进中' 
    when sale_stage2 = 11 then '试听已跳票或取消' when sale_stage2 = 12 then '试听环节失单'
    when sale_stage2 = 13 then '待提交成单' when sale_stage2 = 14 then '成单签订中' when sale_stage2 = 15 then '已成单' end) '销售阶段',
    cr.content 沟通内容,
    cr.start_time 沟通开始时间,
    s.create_time 注册时间,
    ad.adid_no 'ADID',
    (select value from ddic where type = 'TP056' and code =cr.refuse_reason) 拒绝原因
FROM 
    view_student s
    left join view_communication_record cr on cr.student_intention_id = s.student_intention_id
    left join adid ad on ad.adid_id = s.adid_id
    left join lesson_plan lp on s.student_id = lp.student_id 
WHERE 
    s.student_no in    tc.contract_id in ()

扩展内推渠道

#### 引用自:
#### 扩展内推渠道,提高招聘效率
#### 执行时间:0.12s,shape274,3,不脱敏
#### 日期:2018-12-19
#### 需求方:符仲豪
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
    t.teacher_no 教师编号,
    case
        t.quarters_type
    when 1 then '全职授课'
    when 2 then '全职教研'
    when 3 then '兼职'
    when 4 then '实习'
    end 职位,
  (select 
      group_concat(subject_name) 
  from 
      subject 
  where 
      subject_id in (
                    select 
                        subject_id
                    from
                        teacher_subject_rel
                    where 
                        teacher_id = t.teacher_id)) 授课科目,
    u.phone 手机号码
FROM
    view_teacher     t
    left join view_user_info u on u.user_id = t.teacher_id
WHERE
    t.teacher_no in('EN986428','MA895758','CM619710','EN973284','MA872422','MA510182','CH638099','MA832436','EN452180','EN605560','MA650143','EN431375','MA623555','CH672780','MA251350','CH279462','MA416408','MA894492','PH773706','MA106630','CH570976','CH351722','CH590160','CM877339','PH761791','CH965963','EN651501','EN406361','CH730883','MA827543','EN136134','CH271984','EN230456','BI562010','CM351760','MA351150','MA695954','CH209523','MA213790','HI462339','MA116751','MA541162','MA187609','MA156942','MA403235','EN362828','CH379783','MA518124','GE266846','CH506465','MA793934','MA329451','MA834375','PH486008','MA967601','MA169318','EN223844','CH234352','CH232263','CM841499','GE239029','CH148363','CH127614','PH732951','CH335506','CH879687','CM679366','MA992383','MA460990','CH935940','MA488461','HI591424','CH810088','PH584001','PH639570','EN155475','PH311187','PH527149','EN917008','CH579165','MA358390','CH458473','PH756906','PH425722','EN377041','MA483267','EN849121','MA583832','MA275023','PO774285','CH456357','MA783656','MA441030','MA920021','MA275992','CH410863','MA547833','BI135382','MA147963','CH381538','MA149169','MA900789','MA580845','PH889762','MA675440','MA289681','EN580408')
    and t.quarters_type <> 5

陈佳的需求

#### 引用自: 赵淼
#### 学员编号   高考年份  最新剩余课时    最近一次消课时间   班主任  班主任所在部门 班主任所在组  6-12月总计消课次数   其中时长1小时的消课次数  
#### 执行时间:0.12s,shape274,3,不脱敏
#### 日期:2018-12-19
#### 需求方:陈佳
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
    s.student_no 学员编号,
    s.exam_year 高考年份,
    tb.rest 最新剩余课时,
    max(adjust_start_time) 最后一次上课日期,
    csl.by_assistant 班主任,
    csl.assistant_dept 班主任部门,
    csl.assistant_group 班主任所在组,
    sum(case when lp.adjust_start_time >='2018-06-01'and  lp.adjust_start_time< curdate() then 1 else  0 end) '6-12月总计消课次数',
    sum(case when lp.adjust_start_time >='2018-07-01'and  lp.adjust_start_time<'2018-09-01'  then 1 else  0 end) '7-8月总计消课次数',
    sum(case when lp.adjust_start_time >='2018-06-01'and  lp.adjust_start_time<= curdate() and timestampdiff(SECOND,lp.adjust_start_time,lp.adjust_end_time)/3600=1 then 1 else  0 end ) '6-12月1小时的消课次数',
    sum(case when lp.adjust_start_time >='2018-07-01'and  lp.adjust_start_time<'2018-09-01' and timestampdiff(SECOND,lp.adjust_start_time,lp.adjust_end_time)/3600=1 then 1 else  0 end ) '7-8月1小时的消课次数',
  csl.status 学员当时状态


FROM
    lesson_plan lp
  left join view_student s on lp.student_id = s.student_id
    left join 
    (
     select student_no,status,remaining_period,by_assistant,assistant_dept,assistant_group  from 
    bidata.yxy_curriculum_status_log where stat_date =date_sub(curdate(),interval 1 day)  
    )
  csl on  csl.student_no = s.student_no
    left join 
  (
  select 
    t.student_intention_id, sum(t.rest) as rest 
  from
    (select 
      c.student_intention_id, c.contract_id, 
      c.period + ifnull(c.donate_period,0)-ifnull(sum(l.class_period),0) as rest, sum(l.class_period)
    from 
      view_tms_contract c 
    left join lesson_plan l on l.contract_id = c.contract_id and l.status<>0 and l.solve_status = 5
    where big_type_id = 1 and c.status in (4,5) and c.teacher_level is not null 
    group by c.contract_id)  t 
  group by t.student_intention_id
  ) tb on tb.student_intention_id = s.student_intention_id

WHERE
  lp.adjust_start_time>='2018-06-01'
    and lp.adjust_start_time< curdate()
    and lp.status in (3,5)
    and lp.solve_status <> 6
    and lp.lesson_type = 1
  and    lp.subject_id<>11
    and s.exam_year <= '2022'
    and (select name from view_user_info where user_id=lp.teacher_id) not like '%测试%' 
  and (select name from view_user_info where user_id=lp.teacher_id) not like '%授课支持%'
GROUP BY
    s.student_id

拉取班主任黄群的学生明细

#### 引用自:
#### 拉取班主任黄群的学生明细
#### 执行时间:36.787s,shape274,3,新脱敏库hfjy
#### 日期:2018-12-20
#### 需求方:高敏霞
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
  sd.student_no 学生编号,
  sd.name 学生姓名,
  tb.rest 剩余课时数,
  (case sd.curriculum_status  
    when 0 then '正常学生' 
    when 1 then '已结课' 
    when 2 then '停课' 
    when 3 then '结课预警' 
    when 4 then '停课预警' 
    when 5 then '结课预警,停课预警' 
    end) 停结课情况,
  ui.name 班主任姓名
FROM
  view_student sd
    left join view_user_info ui on ui.user_id = sd.by_assistant
    left join 
  (
  select 
    t.student_intention_id, sum(t.rest) as rest 
  from
    (select 
      c.student_intention_id, c.contract_id, 
      c.period + ifnull(c.donate_period,0)-ifnull(sum(l.class_period),0) as rest, sum(l.class_period)
    from 
      view_tms_contract c 
    left join lesson_plan l on l.contract_id = c.contract_id and l.status<>0 and l.solve_status = 5
    where big_type_id = 1 and c.status in (4,5) and c.teacher_level is not null 
    group by c.contract_id)  t 
  group by t.student_intention_id
  ) tb on tb.student_intention_id = sd.student_intention_id

WHERE
    ui.name = '黄群' and sd.student_no is not null

拉取学生编号 学生姓名

#### 引用自:
#### 拉取拉取学生编号 学生姓名
#### 执行时间:0.416s,shape274,3,新脱敏库hfjy
#### 日期:2018-12-20
#### 需求方:张明杰
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

select
    tc.contract_id 合同编号,
    tc.student_name 学生姓名,
    tc.student_no 学生编号
from
    view_tms_contract tc
where
    tc.contract_id in ('X29011812006367','X20011812001838')

教师姓名-编号-学历-院校-专业

#### 引用自:
#### 教师姓名,教师编号,最高学历,毕业院校,专业(教师工作台(新))(不是服务中心!)
#### 执行时间:2.740,新脱敏库hfjy
#### 日期:2018-12-24
#### 需求方:吴建
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
  tea_name 教师姓名,
  teacher_no 教师编号,
  case t.education
  when 1 then '大专'
  when 2 then '本科'
  when 3 then '研究生'
  when 4 then '硕士'
  when 5 then '博士'
  end 最高学历,
  graduate_college 毕业院校,
  major 专业
FROM
  bidata.batch_teacher_info

可授课学科为科学的教师

#### 引用自:
#### 可授课学科为科学的教师
#### 执行时间:3.275,新脱敏库hfjy
#### 日期:2018-12-25
#### 需求方:张瑾
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
  u.name 教师姓名,
  t.teacher_no 教师编号,
  (select value from bidata.gen_dict where type ='teaching_group' and code = tt.teaching_group) 学科组,
  (select group_concat(subject_name) from subject where subject_id in (select subject_id from teacher_subject_rel where teacher_id =t.teacher_id) having group_concat(subject_name) like '%科学%') 可教授学科
FROM
  view_teacher t
  left join view_user_info u on u.user_id = t.teacher_id
  left join view_tms_teacher_ext tt  on tt.id = t.teacher_id
WHERE
  t.quarters_type <> 5
  and u.name not like '%测试%' and u.name not like '%授课支持%'
  and (select group_concat(subject_name) from subject where subject_id in (select subject_id from teacher_subject_rel where teacher_id =t.teacher_id) having group_concat(subject_name) like '%科学%') is not null

学生id匹配学生姓名和学生编号

#### 引用自:
#### 学生id匹配学生姓名和学生编号
#### 执行时间:0.823,新脱敏库hfjy
#### 日期:2018-12-27
#### 需求方:张瑾
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
    s.student_id 学生id,
    u.name 学生姓名,
    s.student_no 学生编号
FROM
    view_student s
    left join view_user_info u on u.user_id = s.student_id
WHERE
    s.student_id in ('943632','631213')

教师id匹配教师姓名和教师编号

#### 引用自:
#### 教师id匹配教师姓名和教师编号
#### 执行时间:0.823,新脱敏库hfjy
#### 日期:2018-12-27
#### 需求方:张瑾
#### 数据输出方式:excel
#### created by:汪国强
#### update by:汪国强
#### checked by:

SELECT
    t.teacher_id 教师id,
    u.name 教师姓名,
    t.teacher_no 教师编号
FROM
    view_teacher t
    left join view_user_info u on u.user_id = t.teacher_id
WHERE
    t.teacher_id in ('80920','122395'

2019-8月zxj排课率

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# # 排课率 = 当月排课人数/上月底在读人数

# ### =============================月维度=============================

# In[2]:


sql1 = '''
SELECT
        '201907' as '统计时间',
        a.学段,
        count(DISTINCT student_no) as '在读学生总数量'
FROM
        (
        SELECT
                stat_date,
                student_no,
                (case when slm.exam_year >= 2025 and slm.exam_year < 2031 then '小学'
             when slm.exam_year >= 2022 and slm.exam_year < 2025 then '初中'
             when slm.exam_year >= 2020 and slm.exam_year < 2022 then '高中' else '' end) '学段',
                remaining_period as '剩余总课时',
                status
        FROM
                yxy_curriculum_status_log_monthly slm
        WHERE
                slm.stat_date = '2019-06-30'
                and remaining_period > 0
                and slm.exam_year <> 2019
        ) a
WHERE
        a.学段 <> ''
GROUP BY
        a.学段
'''

df1 = pd.read_sql(sql1,conn_dmart)

df1.head()


# In[3]:


sql2 = '''
SELECT
        '201907' as '统计时间',
        a.学段,
        count(a.student_id) as '排课学生总数量'
FROM
(
SELECT
        lp.student_id,
        DATE(lp.adjust_start_time) as 'stat_date',
                (case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
             when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
             when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '' end) '学段'
FROM
        tms_lesson_plan_history lph
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
        view_student s on s.student_id=lp.student_id
WHERE
        year(lp.adjust_start_time) = 2019
        and month(lp.adjust_start_time)= 7
        and lph.opt_type=1
        and lp.lesson_type=1
        and s.account_type=1
        and s.exam_year <> 2019
GROUP BY
        lp.student_id
) a
WHERE
        a.学段 <> ''
GROUP BY
        a.学段
'''

df2 = pd.read_sql(sql2,conn_mysql)

df2.head()


# In[4]:


df_sub = pd.merge(df1,df2,on=['统计时间','学段'],how='left')

df_sub.head()


# In[5]:


df_sub['排课率'] = df_sub['排课学生总数量']/df_sub['在读学生总数量']
df_sub['排课率'] = df_sub['排课率'].apply(lambda x: format(x, '.2%')) 
df_sub.head()


# ### =============================周维度=============================

# In[6]:


sql3 = '''
SELECT
        a.周数,
        a.学段,
        count(DISTINCT a.student_id) as '排课总人数'
FROM
(
SELECT
        lp.student_id,
        DATE(lp.adjust_start_time) as 'stat_date',
        (case when DAY(lp.adjust_start_time) in (1,2,3,4,5,6,7) then '第一周' when DAY(lp.adjust_start_time) in (8,9,10,11,12,13,14) then '第二周'
        when DAY(lp.adjust_start_time) in (15,16,17,18,19,20,21) then '第三周' when DAY(lp.adjust_start_time) in (22,23,24,25,26,27,28) then '第四周'
        when DAY(lp.adjust_start_time) in (29,30,31) then '第五周' else '' end) as '周数',
                (case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
             when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
             when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '' end) '学段'

FROM
        tms_lesson_plan_history lph
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
        view_student s on s.student_id=lp.student_id
WHERE
        year(lp.adjust_start_time) = 2019
        and month(lp.adjust_start_time)= 7
        and lph.opt_type=1
        and lp.lesson_type=1
        and s.account_type=1
        and s.exam_year <> 2019
) a
WHERE
        a.学段 <> ''
GROUP BY
        a.周数,a.学段
ORDER BY
        field(a.周数,'第一周','第二周','第三周','第四周','第五周'),
        field(a.学段,'小学','初中','高中')
'''

df3 = pd.read_sql(sql3,conn_mysql)

df3.head()


# In[7]:


df4 = pd.merge(df3,df1,on='学段',how='inner')
df4


# In[8]:


df4.drop('统计时间',axis=1,inplace=True)
df4.head()


# In[9]:


df4['排课率'] = df4['排课总人数']/df4['在读学生总数量']
df4['排课率'] = df4['排课率'].apply(lambda x: format(x, '.2%')) 
df4.head()


# ### =================================日维度======================================

# In[10]:


sql5 = '''
SELECT
        a.天数,
        a.学段,
        count(DISTINCT a.student_id) as '排课学生总数量'
FROM
(
SELECT
        lp.student_id,
        DAY(lp.adjust_start_time) as '天数',
                (case when s.exam_year >= 2025 and s.exam_year < 2031 then '小学'
             when s.exam_year >= 2022 and s.exam_year < 2025 then '初中'
             when s.exam_year >= 2020 and s.exam_year < 2022 then '高中' else '' end) '学段'

FROM
        tms_lesson_plan_history lph
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id=lph.lesson_plan_id
LEFT JOIN
        view_student s on s.student_id=lp.student_id
WHERE
        year(lp.adjust_start_time) = 2019
        and month(lp.adjust_start_time)= 7
        and lph.opt_type=1
        and lp.lesson_type=1
        and s.account_type=1
        and s.exam_year <> 2019
) a
WHERE
        a.学段 <> ''
GROUP BY
        a.天数,a.学段
'''

df5 = pd.read_sql(sql5,conn_mysql)

df5.head()


# In[11]:


df5 = pd.merge(df5,df1,on='学段',how='inner')
df5


# In[12]:


df5.drop('统计时间',axis=1,inplace=True)


# In[13]:


df5['排课率'] = df5['排课学生总数量']/df5['在读学生总数量']
df5['排课率'] = df5['排课率'].apply(lambda x: format(x, '.2%')) 
df5.head()


# In[14]:


writer=pd.ExcelWriter(r'C:\Users\hp\Desktop\19年7月排课率004.xlsx')
df_sub.to_excel(writer,sheet_name='月维度')
df4.to_excel(writer,sheet_name='周维度')
df5.to_excel(writer,sheet_name='日维度')
writer.save()


# In[ ]:


统计讲义当月使用题目数

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[22]:


sql1 = '''
SELECT
        lpqh.paperid,
        count(lpqh.quizid) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub ON sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id
LEFT JOIN
        dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
        lp.status in (3,5)
        AND lp.adjust_start_time >= '2019-07-01'
        AND lp.adjust_end_time < '2019-08-01'
        AND lp.solve_status <> 6
        AND lp.lesson_type in (1,2)
GROUP BY
        lpqh.paperid
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[35]:


sql3 = '''
SELECT
        lpqh.paperid,
        lesson_plan_id,
        lpqh.quizid
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub ON sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id
LEFT JOIN
        dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
        lp.status in (3,5)
        AND lp.adjust_start_time >= '2019-07-01'
        AND lp.adjust_end_time < '2019-08-01'
        AND lp.solve_status <> 6
        AND lp.lesson_type in (1,2)
        AND lpqh.paperid = '167972C540D54D4483A8D0B66CC36E55'
        AND lpqh.lessonplanid=14570487
'''

df3 = pd.read_sql(sql3,conn_hive)

df3


# In[7]:


df1.dtypes


# In[16]:


print(df1[df1.paperid == 'BB03041048644CCBA1F3125F28097382'].quiz_count)


# In[19]:


df1[df1.paperid == '167972C540D54D4483A8D0B66CC36E55']


# In[14]:


import datetime
period = ('%d%02d' % (datetime.date.today().year - (datetime.date.today().month==1),datetime.date.today().month - 1 or 12))
period


# In[25]:


sql2 = '''SELECT * FROM dwd_db_hfjydb.lesson_plan_quiz_hwlinfo WHERE paperid  = '167972C540D54D4483A8D0B66CC36E55' and lesson_plan_id=14145739 ''' 

df2 = pd.read_sql(sql2,conn_hive)

df2

课程使用讲义占比

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[2]:


sql1 = '''
SELECT
        max(b.subject_name) subject_name,
        count(b.lesson_plan_id) count_plan
FROM
(
        SELECT
                max(a.subject_name) subject_name,
                a.lesson_plan_id
        FROM
        (
                SELECT
                        max(sb.subject_name) subject_name,
                        lp.lesson_plan_id,
                        lpqh.paperid,
                        count(lpq.quiz_id) quiz_count

                FROM
                        dwd_db_hfjydb.lesson_plan lp
                LEFT JOIN
                        dwd_db_hfjydb.lesson_plan_quiz_total lpq ON lpq.lesson_plan_id = lp.lesson_plan_id
                LEFT JOIN
                        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
                LEFT JOIN
                        dwd_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
                WHERE
                        lp.adjust_start_time >= '2019-05-01'
                        AND lp.adjust_start_time < '2019-06-01'
                        AND lp.solve_status <> 6
                        AND lp.status in (3,5)
                        AND lp.lesson_type = 1
                GROUP BY
                        lp.lesson_plan_id,lpqh.paperid
                HAVING
                        count(lpq.quiz_id) >= 5
        ) a
        GROUP BY
                a.lesson_plan_id
        HAVING
                count(a.paperid) >= 2
) b
GROUP BY
         b.subject_name
'''

df1 = pd.read_sql(sql1,conn_hive)

df1


# In[3]:


sql2 = '''
SELECT
        sb.subject_name,
        count(lp.lesson_plan_id) count_all
FROM
        dwd_db_hfjydb.lesson_plan lp
LEFT JOIN
        dwd_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
WHERE
        lp.adjust_start_time >= '2019-05-01'
        AND lp.adjust_start_time < '2019-06-01'
        AND lp.solve_status <> 6
        AND lp.status in (3,5)
        AND lp.lesson_type in (1,2)
GROUP BY
        sb.subject_name
'''
df2 = pd.read_sql(sql2,conn_hive)

df2.head()


# In[4]:


df_sub = pd.merge(df2,df1,how='left',on=['subject_name'])

df_sub


# In[5]:


df_sub['使用率'] = df_sub['count_plan']/df_sub['count_all']
df_sub['使用率'] = df_sub['使用率'].apply(lambda x: format(x, '.2%')) 
df_sub.head()


# In[6]:


df_sub


# In[7]:


df_sub.to_excel(r'C:\Users\hp\Desktop\5月正式课讲义使用率.xlsx')

经典题目使用率

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[3]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-01-01'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[4]:


df1.dtypes


# In[5]:


df1.shape



# In[34]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-07-29'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[55]:


aa = df1.loc[0]
aa


# In[56]:


aa['quiz_id']


# In[57]:


aa['quiz_count']


# In[58]:


bb = df1.loc[0]['quiz_id']
bb
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[3]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-01-01'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[4]:


df1.dtypes


# In[5]:


df1.shape



# In[34]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-07-29'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[55]:


aa = df1.loc[0]
aa


# In[56]:


aa['quiz_id']


# In[57]:


aa['quiz_count']


# In[58]:


bb = df1.loc[0]['quiz_id']
bb
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[3]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-01-01'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[4]:


df1.dtypes


# In[5]:


df1.shape



# In[34]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-07-29'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[55]:


aa = df1.loc[0]
aa


# In[56]:


aa['quiz_id']


# In[57]:


aa['quiz_count']


# In[58]:


bb = df1.loc[0]['quiz_id']
bb
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[3]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-01-01'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[4]:


df1.dtypes


# In[5]:


df1.shape



# In[34]:


sql1 = '''
SELECT
        lpq.quiz_id,
        COUNT(lp.lesson_plan_id) quiz_count
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
WHERE
        lp.adjust_start_time >= '2019-07-29'
        and lp.adjust_start_time < '2019-08-01'
        and sub.subject_id in (20,13,1,19,14,2,21,15,3,16,4,5,24,23)
GROUP BY
        lpq.quiz_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[55]:


aa = df1.loc[0]
aa


# In[56]:


aa['quiz_id']


# In[57]:


aa['quiz_count']


# In[58]:


bb = df1.loc[0]['quiz_id']
bb

讲义中的题目在一节课使用时常大于40%

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_mysql_bidata = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="bidata",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')
conn_dmart = pymysql.connect(host="rm-bp1303fj95u76622lmo.mysql.rds.aliyuncs.com",port=3306,user="wangguoqiang",passwd="rwpbDG8ahhJ5QlLfcOBV",db="bidata",charset="utf8")


# In[2]:


#### 每节课实际上课时间和使用的各讲义时间

sql1 = '''
SELECT
        max(sub.subject_name) subject_name,
        lp.lesson_plan_id,
        lpqh.paperid,
        max(date_diff('second', (cast (substring(real_start_time,1,19) as TIMESTAMP)), (cast (substring(real_end_time,1,19) as TIMESTAMP)))) lesson_time,
        sum(quiz.stay_time) paper_time
FROM
        dwd_db_hfjydb.lesson_plan lp
INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
LEFT JOIN
        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
LEFT JOIN
        (select ilrqi.lesson_plan_id,ilrqi.quiz_id,ilrqi.stay_time from edw.ls_learning_record_quiz_info ilrqi where ilrqi.real_start_time >= '2019-07-01' and ilrqi.real_start_time < '2019-08-01') quiz ON quiz.lesson_plan_id= lpq.lesson_plan_id and lpq.quiz_id = cast (quiz.quiz_id as int)
LEFT JOIN
        dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid
WHERE
        lp.adjust_start_time >= '2019-07-01'
        AND lp.adjust_start_time < '2019-08-01'
        AND lp.solve_status <> 6
        AND lp.status in (3,5)
        AND lp.lesson_type = 2
GROUP BY
        lp.lesson_plan_id,lpqh.paperid
HAVING
        paperid is not null
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[3]:


df1.shape


# In[4]:


df1['pro'] = df1['paper_time']/df1['lesson_time']
df1.head()


# In[5]:


df1.dtypes


# In[6]:


df1.shape


# In[7]:


df4 = df1[df1['pro']>=0.4]
df4.head()


# In[8]:


df4.shape


# In[9]:


df4.drop_duplicates(subset='lesson_plan_id',inplace=True)
df4.head()


# In[10]:


df4.shape


# In[11]:


df5 = df4.groupby(['subject_name'])['lesson_plan_id'].count().reset_index()
df5.head()


# In[12]:


df5.rename(columns={'lesson_plan_id':'课程数'},inplace=True)
df5.head()


# In[13]:


#### 成单数量

sql3 = '''
SELECT
        sub.subject_name,
        sum((case when (select count(contract_id) from view_tms_contract tc 
        where tc.student_intention_id = s.student_intention_id 
        and status in (3,4,5,9) and submit_time >lp.adjust_start_time)>0 then 1 
        else 0 end)) '成单数',
            case when opt_user=972038 then 'AI直排' 
      when opt_user=381322 and lpo.inform_status = 5 then 'app排课' 
      when opt_user=381322 and (select count(*) from hfjydb.data_teacher_requirements where order_id = lpo.order_id) > 0 then '指定老师排课' 
      when opt_user is null then '未排课' when opt_user not in  (381322, 32547,972038) then '教务手动' else '系统未知'  end 排课方式
FROM
        lesson_plan_order lpo
LEFT JOIN
        lesson_relation lr on lr.order_id = lpo.order_id
LEFT JOIN
        lesson_plan lp on lp.lesson_plan_id = lr.plan_id
LEFT JOIN
        tms_lesson_plan_history tlph on tlph.lesson_plan_id = lr.plan_id
LEFT JOIN
        view_user_info u on u.user_id = lp.teacher_id
INNER JOIN
        subject sub on sub.subject_id = lp.subject_id
LEFT JOIN
        view_student s on s.student_id = lp.student_id
WHERE
        lp.lesson_plan_id in {0}
GROUP BY
        sub.subject_name
HAVING
        排课方式 <> '指定老师排课'
'''.format(tuple(df4['lesson_plan_id']))

df33 = pd.read_sql(sql3,conn_mysql)

df33.head()


# In[14]:


df33.shape


# In[15]:


df_sub = pd.merge(df5,df33,on='subject_name',how='left')
df_sub


# In[16]:


df_sub['贡献率'] = df_sub['成单数']/df_sub['课程数']
df_sub['贡献率'] = df_sub['贡献率'].apply(lambda x: format(x, '.2%')) 
df_sub.head()


# In[17]:


df_sub.drop('排课方式', axis=1,inplace=True)
df_sub


# In[ ]:


df_sub.to_excel(r'C:\Users\hp\Desktop\7月试听课贡献率006.xlsx')


# In[ ]:


#### 8-16


# In[ ]:


#### 成单数


# In[ ]:


sql04 = '''
select
        lp.lesson_plan_id,
        tc.student_intention_id,
        tc.contract_id
from
        view_tms_contract tc
LEFT JOIN
        view_student s on s.student_intention_id = tc.student_intention_id
LEFT JOIN
        lesson_plan lp on lp.student_id= s.student_id
where 
        lp.lesson_plan_id in {0}
        and tc.status in (3,4,5,9)
        and tc.submit_time >lp.adjust_start_time
GROUP BY
        tc.student_intention_id
'''.format(tuple(df4['lesson_plan_id']))

df04 = pd.read_sql(sql04,conn_mysql)

df04.head()


# In[ ]:


df04.shape


# In[ ]:


df05 = pd.merge(df04,df4,on='lesson_plan_id',how='left')
df05.head()


# In[ ]:


df05.shape


# In[ ]:


df05.to_excel(r'C:\Users\hp\Desktop\df05.xlsx')


# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:


df_sub = pd.merge(df33,df4,on=['lesson_plan_id','subject_name'],how='left')
df_sub


# In[ ]:


df_sub.to_excel(r'C:\Users\hp\Desktop\7月试听课贡献率007.xlsx')


# In[ ]:


df4.to_excel(r'C:\Users\hp\Desktop\df4.xlsx')


# In[ ]:


sql01 = '''
SELECT
        s.student_intention_id,
        lp.lesson_plan_id
FROM
        lesson_plan lp
LEFT JOIN
        view_student s on s.student_id = lp.student_id
WHERE
        lp.lesson_plan_id in {0}

'''.format(tuple(df4['lesson_plan_id']))

df01 = pd.read_sql(sql01,conn_mysql)

df01.head()


# In[ ]:


sql02 = '''
select
        tc.student_intention_id,
        tc.contract_id
from
        view_tms_contract tc
LEFT JOIN
        view_student s on s.student_intention_id = tc.student_intention_id
LEFT JOIN
        lesson_plan lp on lp.student_id= s.student_id
where 
        tc.student_intention_id in {0}
        and tc.status in (3,4,5,9)
        and tc.submit_time >lp.adjust_start_time
GROUP BY
        tc.student_intention_id
'''.format(tuple(df01['student_intention_id']))

df02 = pd.read_sql(sql02,conn_mysql)

df02.head()


# In[ ]:


df02.shape

教研题目使用率

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[31]:


sql1 = '''
SELECT
    a.lesson_plan_id,
    max(a.lesson_type) lesson_type,
    max(a.adjust_start_time) adjust_start_time,
    max(a.subject_name) subject_name,
    count(case when (a.stay_time >= 10 AND a.subject_two='小学') then a.quiz_id when (a.stay_time >= 30 AND (a.subject_two='初中' OR a.subject_two='高中')) then a.quiz_id  end) quiz_count,
    count(case when (a.type in (1,3,4) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type in (1,3,4) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hf_quiz_count,
    count(case when (a.type = 2 AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type = 2 AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  homework_quiz_count,
    count(case when (a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hwl_quiz_count,
    count(case when (a.type = 6 AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type = 6 AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  zdy_quiz_count,
    count(case when (a.hwl_type='严选智能题库' AND a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.hwl_type='严选智能题库' AND a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hwl_zn_count,
    count(case when (a.hwl_type='严选讲义' AND a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.hwl_type='严选讲义' AND a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hwl_jy_count,
    count(case when (a.homework_type='作业中严选智能题库' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中严选智能题库' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_zn_count,
    count(case when (a.homework_type='作业中精品讲义题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中精品讲义题目' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_jy_count,
    count(case when (a.homework_type='作业中自定义的题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中自定义的题目' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_zdy_count,
    count(case when (a.homework_type='作业中经典的题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中经典的题目' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_jd_count,
    count(case when (a.homework_type='作业中的作业题目数' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中的作业题目数' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_hw_count
FROM
(
    SELECT
        lp.lesson_plan_id,
        lp.lesson_type,
        lp.adjust_start_time,
        sub.subject_name,
        SUBSTR(sub.subject_name,1,2) subject_two,
        lpq.quiz_id,
        lpq.type,
        (case when jh.id is null and lpq.type in (8,9) then '严选智能题库' when jh.id > 0 then '严选讲义' else '其他' end) hwl_type,
        (case when lpq.type = 2 and hwdi.type = 1 AND hwdih.homeworkid IS NULL then '作业中经典的题目' when lpq.type = 2 and hwdi.type = 6 AND hwdih.homeworkid IS NULL then '作业中自定义的题目' when lpq.type = 2 and hwdi.type in (8,9) and hwdih.homeworkid IS NULL then '作业中严选智能题库' when lpq.type = 2 and hwdi.type = 8 and hwdih.homeworkid IS NOT NULL then '作业中精品讲义题目' WHEN lpq.type = 2 AND hwdi.type=2 AND hwdih.homeworkid IS NULL THEN '作业中的作业题目数' else '其他'end ) homework_type,  
        quiz.stay_time
    FROM
        dwd_db_hfjydb.lesson_plan lp
    INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
    INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
    LEFT JOIN
        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh on lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
    LEFT JOIN
        dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid and jh.is_del = 0
    LEFT JOIN
        dwd_db_hfjydb.home_work_detail_info hwdi ON hwdi.Id = lpq.quiz_id and lpq.type = 2
    LEFT JOIN
        dwd_db_hfjydb.home_work_detail_info_hwlinfo hwdih ON hwdih.homeworkid = hwdi.home_work_id and hwdi.quiz_id = hwdih.quizid
    LEFT JOIN
        (select ilrqi.lesson_plan_id,ilrqi.quiz_id,ilrqi.stay_time from edw.ls_learning_record_quiz_info ilrqi where ilrqi.real_start_time >= '2019-07-01' and ilrqi.real_start_time < '2019-08-01') quiz
        ON quiz.lesson_plan_id= lpq.lesson_plan_id and lpq.quiz_id = cast (quiz.quiz_id as int)
    WHERE lp.status in (3,5)
            AND lp.solve_status <> 6
            AND lp.lesson_type in (1,2)
            AND lp.adjust_start_time >= '2019-07-01'
            AND lp.adjust_start_time < '2019-08-01'
) a
GROUP BY
        a.lesson_plan_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[32]:


df1.shape


# In[40]:


df1.dtypes


# In[33]:


df1.rename(columns={'lesson_plan_id':'课程id','lesson_type':'课程类型','adjust_start_time':'上课时间','subject_name':'学科','quiz_count':'题目总数','hf_quiz_count':'经典题目总数','homework_quiz_count':'作业题目总数','hwl_quiz_count':'好未来题目总数','zdy_quiz_count':'自定义题目总数','hwl_zn_count':'严选智能题库数','hwl_jy_count':'严选讲义','hw_zn_count':'作业中严选智能题库数','hw_zdy_count':'作业中自定义的题目数','hw_jy_count':'作业中精品讲义题目数','hw_jd_count':'作业中经典的题目','hw_hw_count':'作业中的作业题目数'},inplace=True)

df1.head(10)


# In[34]:


df1[df1['作业题目总数'] == (df1['作业中严选智能题库数']+df1['作业中精品讲义题目数']+df1['作业中自定义的题目数']+df1['作业中经典的题目']+df1['作业中的作业题目数'])].loc[:,['课程id','作业题目总数','作业中严选智能题库数','作业中精品讲义题目数','作业中自定义的题目数','作业中经典的题目','作业中的作业题目数']]


# In[37]:


df1[df1['好未来题目总数'] == (df1['严选智能题库数']+df1['严选讲义'])].loc[:,['课程id','好未来题目总数','严选智能题库数','严选讲义']]


# In[38]:


df1[df1['题目总数'] == (df1['经典题目总数']+df1['作业题目总数']+df1['好未来题目总数']+df1['自定义题目总数'])]


# In[39]:


df1.to_csv(r"C:\Users\hp\Desktop\题目使用率V4.csv",encoding="utf_8_sig")


# In[ ]:

2019-9豌豆思维

学生端思维导图
=IF(H2<5,"0-5分钟",IF(H2<10,"0-10分钟",IF(H2>10,"大于10分钟")))

=IF(S10<0.6,"0 <= 正确率 <= 60%",IF(S10<0.8,"60% <= 正确率 <= 80%",IF(S10<1,"80% <= 正确率 < 100%",IF(S10>=1,"正确率100%"))))


sql
SELECT
        *
FROM
(
(
SELECT
        a.`完成测评月份` as '完成测评月份',
        -- a.`当前阶段` as '当前阶段_降阶',
        a.`分数段_降阶`,
        count(a.`当前阶段`) as '数量_降阶'
FROM
(
SELECT
        dj.当前阶段,
        dj.完成测评月份,
        case when dj.平均分 < 60 then '0-60' when dj.平均分 > 60 and 
        dj.平均分 < 80 then '60-80' when dj.平均分 > 80 and dj.平均分 <=100 then '80-100'
        end as '分数段_降阶'
FROM
        wd_new_djcp_0927 dj
WHERE
        dj.升降标识 = '降阶'
) a
WHERE
        a.`完成测评月份` in (6,7,8)
        AND a.`分数段_降阶` is not null
GROUP BY
        a.`完成测评月份`,a.`分数段_降阶`
) x
RIGHT JOIN
(
SELECT
        a.`完成测评月份` as '完成测评月份',
        -- a.`当前阶段` as '当前阶段_不变',
        a.`分数段_不变`,
        count(a.`当前阶段`) as '数量_不变'
FROM
(
SELECT
        dj.当前阶段,
        dj.完成测评月份,
        case when dj.平均分 < 60 then '0-60' when dj.平均分 > 60 and 
        dj.平均分 < 80 then '60-80' when dj.平均分 > 80 and dj.平均分 <=100 then '80-100'
        end as '分数段_不变'
FROM
        wd_new_djcp_0927 dj
WHERE
        dj.升降标识 = '不变'
) a
WHERE
        a.`完成测评月份` in (6,7,8)
        AND a.`分数段_不变` is not null
GROUP BY
        a.`完成测评月份`,a.`分数段_不变`
) y
ON x.完成测评月份 = y.完成测评月份  and x.分数段_降阶 = y.分数段_不变

)

参考文献

苏宝宝-
汪国强
http://subaobao.club:8181/

打赏一个呗

取消

感谢您的支持,我会继续努力的!

扫码支持
扫码支持
扫码打赏,你说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦