此篇作为开端,即是引题又是总结,后续会在陆续发布不同角度优化的探索过程
整体来说,ClickHouse的性能,但这些都是基于优化后的结果,一条优化过的SQL与没有经过优化的SQL的性能可能会相差好几倍。
SQL慢查大部分主要体现在CPU负载过高,IO(磁盘IO/网络IO)过高,或者查询的列中无索引导致的。
对于ClickHouse的优化可以从表结构存储设计优化与查询SQL优化两个方向来进行。此篇先只针对查询SQL优化的方向进行整理,大致有以下几个方向
优化前每次查询在5S到6S之间,优化后每次查询在1S到2S之间。核心优化手段就两点。
优化点明细
--- /Users/dylanwu/Downloads/Diff/MP数据报表优化后.sql Thu Sep 9 15:23:01 2021
+++ /Users/dylanwu/Downloads/Diff/MP数据报表优化前.sql Thu Sep 9 14:39:17 2021
## 对所有MySQL映射表做子查询封装,并集合1,2两个方向的优化
@@ -79,29 +79,21 @@
, 0 as a_bid_uv
, 0 as a_imp_uv
- -- from (
- -- select dat, media_id, slot_id, consumer_type, forecast_earnings, c_imp_pv, c_click_pv
- -- from tracking_report.media_prime_consumer_report
- -- where dat between '2021-09-02' and '2021-09-08'
- -- ) a
- from tracking_report.media_prime_consumer_report a
- join (
- select dat, consumer_slot_id, cost_type, revenue_share, cost
- from mysql_tracking_report.consumer_slot_day
- where dat between '2021-09-02' and '2021-09-08' and cost_type <> 4
- ) d on a.dat = d.dat and a.consumer_slot_id = d.consumer_slot_id
- left join(
+ from tracking_report.media_prime_consumer_report a
+ join mysql_tracking_report.consumer_slot_day d on a.dat = d.dat and a.consumer_slot_id = d.consumer_slot_id
+ left join
+ (
select dsp_slot_id, dsp_id, is_assigned_to_customer
from mysql_media_prime_saas.consumer_slot
group by dsp_slot_id, dsp_id, is_assigned_to_customer
)f on a.consumer_slot_id = f.dsp_slot_id
- left join (
- select id from mysql_media_prime_saas.consumer
- ) g on f.dsp_id = toInt32(g.id)
- where a.dat between '2021-09-02' and '2021-09-08'
+ left join mysql_media_prime_saas.consumer g on f.dsp_id = toInt32(g.id)
+ where a.dat >= '2021-09-02'
+ and a.dat <= '2021-09-08'
+ and d.cost_type <> 4
group by date_date
, a_media_id
- , a_slot_id
+ , a_slot_id
union all
@@ -119,12 +111,13 @@
, 0 as income_click_pv
, 0 as a_bid_uv
, 0 as a_imp_uv
+
from tracking_report.media_prime_media_report
where dat >= '2021-09-02'
and dat <= '2021-09-08'
group by date_date
, a_media_id
- , a_slot_id
+ , a_slot_id
union all
## 减少无意义的中间表,降低IO,提高效率
@@ -140,33 +133,36 @@
, 0 as forecast_cost
, 0 as income_imp_pv
, 0 as income_click_pv
- , uniqCombinedMerge(user_id_bid) as a_bid_uv
- , uniqCombinedMerge(user_id_imp) as a_imp_uv
- from tracking_report.media_prime_uv_report
- where dat between '2021-09-02' and '2021-09-08'
+ , uniqCombinedMerge(bid_uv_state) as a_bid_uv
+ , uniqCombinedMerge(imp_uv_state) as a_imp_uv
+ from
+ (
+ select dat
+ , media_id
+ , slot_id
+
+ , uniqCombinedMergeState(user_id_bid) as bid_uv_state
+ , uniqCombinedMergeState(user_id_imp) as imp_uv_state
+ from tracking_report.media_prime_uv_report
+ where dat >= '2021-09-02'
+ and dat <= '2021-09-08'
+ group by dat
+ , media_id
+ , slot_id
+
+ ) uv
group by date_date
, a_media_id
- , a_slot_id
-
+ , a_slot_id
) data_table
## 对所有MySQL映射表做子查询封装,并集合1,2两个方向的优化
-left join (
- select app_slot_id, name, slot_access_type, slot_type
- from mysql_media_prime_saas.app_slot
-) b on toString(data_table.a_slot_id) = b.app_slot_id
-left join (
- select saas_mobile_app_id, name, package, account_id
- from media_prime.media
-) c on c.saas_mobile_app_id = data_table.a_media_id
-left join (
- select account_id, company_name
- from media_prime.user
-) e on e.account_id = c.account_id
-left join (
- select dat, app_slot_id, cost_type, cost, revenue_share
- from mysql_tracking_report.app_slot_day
- where dat between '2021-09-02' and '2021-09-08' and cost_type <> 4
-) d on data_table.date_date = d.dat and data_table.a_slot_id = d.app_slot_id
-where c.package not like '%debug%'
+left join mysql_media_prime_saas.app_slot b on toString(data_table.a_slot_id) = b.app_slot_id
+left join media_prime.media c on c.saas_mobile_app_id = data_table.a_media_id
+left join media_prime.user e on e.account_id = c.account_id
+left join mysql_tracking_report.app_slot_day d on data_table.date_date = d.dat and data_table.a_slot_id = d.app_slot_id
+where date_date >= '2021-09-02'
+ and date_date <= '2021-09-08'
+ and c.package not like '%debug%'
+ and d.cost_type <> 4
group by
`日期`
, `账号名称`
优化后的SQL
select
toString(date_date) as `日期`
, concat('(',ifNull(toString(e.account_id),''),')',ifNull(e.company_name,'')) as `账号名称`
, concat('(',ifNull(toString(saas_mobile_app_id),''),')',ifNull(c.name,c.package)) as `媒体名称`
, concat('(',b.app_slot_id,')',ifNull(b.name,'')) as `广告位名称`
, multiIf(b.slot_access_type=1, 'SDK', b.slot_access_type=2, 'API', b.slot_access_type=3, 'JS', b.slot_access_type=4, '小程序', ifNull(toString(b.slot_access_type),'SDK')) as `接入方式`
, multiIf(d.cost_type = 1,'-',d.cost_type = 2,'固定价格', d.cost_type = 3, '分成', d.cost_type = 4, '包断', d.cost_type = 5, '分成', '-') as `结算类型`
, ifNull( concat(toString(d.cost),'/', toString(d.revenue_share)) , '-') as `结算单价/分成比例`
, multiIf(b.slot_type=1, '视频贴片', b.slot_type=2, '视频暂停', b.slot_type=3, '浮层', b.slot_type=4, 'banner', b.slot_type=5, '开屏', b.slot_type=6, '插屏', b.slot_type=7, '信息流', b.slot_type=8, '文字链', b.slot_type=9, '动态创意', b.slot_type=10, '激励视频', b.slot_type=11, 'Draw信息流', b.slot_type=12, '互动广告', b.slot_type=13, '个性模板广告', '未知') as `广告形式`
, sum(ifNull(a_bid_pv,0)) as `广播总量`
, sum(ifNull(if(b.slot_access_type=4,income_imp_pv,a_imp_pv),0)) as `曝光总量`
, sum(ifNull(a_imp_uv,0)) as `UV总量`
, sum(ifNull(if(b.slot_access_type=4,income_click_pv,a_click_pv),0)) as `点击总量`
, ifNotFinite(sum(ifNull(if(b.slot_access_type=4,income_click_pv,a_click_pv),0)) / sum(ifNull(if(b.slot_access_type=4,income_imp_pv,a_imp_pv),0)), 0) as `点击率`
, sum(ifNull(forecast_revenue,0)) as `总预计收入`
, sum(ifNull(forecast_cost,0)) as `成本`
, sum(ifNull(forecast_revenue,0)) - sum(ifNull(forecast_cost,0)) as `利润`
, ifNotFinite((sum(ifNull(forecast_revenue,0)) - sum(ifNull(forecast_cost,0))) / sum(ifNull(forecast_revenue,0)) ,0) as `利润率`
, ifNotFinite(sum(ifNull(forecast_revenue,0)) / sum(ifNull(if(b.slot_access_type=4,income_imp_pv,a_imp_pv),0)), 0) * 1000.00 as `CPM单价`
, ifNotFinite(sum(ifNull(forecast_revenue,0)) / sum(ifNull(if(b.slot_access_type=4,income_click_pv,a_click_pv),0)), 0) as `CPC单价`
, ifNotFinite(sum(ifNull(forecast_revenue,0))/ sum(ifNull(a_bid_uv,0)), 0) as ARPU
, sum(a_bid_suc_pv) as `竞价成功数`
, sum(a_bid_selected_pv) as `竞价被选数`
, ifNotFinite(sum(a_bid_selected_pv) / sum(ifNull(a_bid_pv,0)) , 0) as `填充率`
, ifNotFinite(sum(ifNull(if(b.slot_access_type=4,income_imp_pv,a_imp_pv),0)) / sum(a_bid_selected_pv), 0) as `曝光率`
from
(
select a.dat as date_date
, media_id as a_media_id
, slot_id as a_slot_id
, 0 as a_bid_pv
, 0 as a_bid_suc_pv
, 0 as a_bid_selected_pv
, 0 as a_imp_pv
, 0 as a_click_pv
, sum(forecast_earnings) as forecast_revenue
, sum(
multiIf(
d.cost_type = 1, toFloat64(0)
, d.cost_type = 2, toFloat64(if(media_id = 0 and toString(slot_id) = '12' or(a.dat >= '2021-02-05' and a.dat <= '2021-03-07' and a.consumer_type = 'ptgapi'),0,toFloat64(c_imp_pv))* toFloat64(d.cost))
, d.cost_type = 3, toFloat64(if(a.dat >= '2021-02-05' and a.dat <= '2021-03-07' and a.consumer_type = 'ptgapi',toFloat64(0),forecast_earnings) * toFloat64(d.revenue_share) * 1000)
, d.cost_type = 5, toFloat64(if(a.dat >= '2021-02-05' and a.dat <= '2021-03-07' and a.consumer_type = 'ptgapi',toFloat64(0),forecast_earnings) * toFloat64(d.revenue_share) * 1000)
, 0)
)/1000 as forecast_cost
, sum(a.c_imp_pv) as income_imp_pv
, sum(a.c_click_pv) as income_click_pv
, 0 as a_bid_uv
, 0 as a_imp_uv
-- from (
-- select dat, media_id, slot_id, consumer_type, forecast_earnings, c_imp_pv, c_click_pv
-- from tracking_report.media_prime_consumer_report
-- where dat between '2021-09-02' and '2021-09-08'
-- ) a
from tracking_report.media_prime_consumer_report a
join (
select dat, consumer_slot_id, cost_type, revenue_share, cost
from mysql_tracking_report.consumer_slot_day
where dat between '2021-09-02' and '2021-09-08' and cost_type <> 4
) d on a.dat = d.dat and a.consumer_slot_id = d.consumer_slot_id
left join(
select dsp_slot_id, dsp_id, is_assigned_to_customer
from mysql_media_prime_saas.consumer_slot
group by dsp_slot_id, dsp_id, is_assigned_to_customer
)f on a.consumer_slot_id = f.dsp_slot_id
left join (
select id from mysql_media_prime_saas.consumer
) g on f.dsp_id = toInt32(g.id)
where a.dat between '2021-09-02' and '2021-09-08'
group by date_date
, a_media_id
, a_slot_id
union all
select dat as date_date
, media_id as a_media_id
, toString(slot_id) as a_slot_id
, sum(bid_pv) as a_bid_pv
, sum(bid_suc_pv) as a_bid_suc_pv
, sum(bid_selected_pv) as a_bid_selected_pv
, sum(if(v_imp_pv = 0, imp_pv, v_imp_pv)) as a_imp_pv
, sum(click_pv) as a_click_pv
, 0 as forecast_revenue
, 0 as forecast_cost
, 0 as income_imp_pv
, 0 as income_click_pv
, 0 as a_bid_uv
, 0 as a_imp_uv
from tracking_report.media_prime_media_report
where dat >= '2021-09-02'
and dat <= '2021-09-08'
group by date_date
, a_media_id
, a_slot_id
union all
select dat as date_date
, media_id as a_media_id
, slot_id as a_slot_id
, 0 as a_bid_pv
, 0 as a_bid_suc_pv
, 0 as a_bid_selected_pv
, 0 as a_imp_pv
, 0 as a_click_pv
, 0 as forecast_revenue
, 0 as forecast_cost
, 0 as income_imp_pv
, 0 as income_click_pv
, uniqCombinedMerge(user_id_bid) as a_bid_uv
, uniqCombinedMerge(user_id_imp) as a_imp_uv
from tracking_report.media_prime_uv_report
where dat between '2021-09-02' and '2021-09-08'
group by date_date
, a_media_id
, a_slot_id
) data_table
left join (
select app_slot_id, name, slot_access_type, slot_type
from mysql_media_prime_saas.app_slot
) b on toString(data_table.a_slot_id) = b.app_slot_id
left join (
select saas_mobile_app_id, name, package, account_id
from media_prime.media
) c on c.saas_mobile_app_id = data_table.a_media_id
left join (
select account_id, company_name
from media_prime.user
) e on e.account_id = c.account_id
left join (
select dat, app_slot_id, cost_type, cost, revenue_share
from mysql_tracking_report.app_slot_day
where dat between '2021-09-02' and '2021-09-08' and cost_type <> 4
) d on data_table.date_date = d.dat and data_table.a_slot_id = d.app_slot_id
where c.package not like '%debug%'
group by
`日期`, `账号名称` , `媒体名称` , `广告位名称` , `接入方式` , `结算类型` , `结算单价/分成比例` , `广告形式`
order by `日期` desc, `账号名称` , `媒体名称` , `广告位名称` , `接入方式` , `结算类型` , `广告形式`