商品订单数仓DWD层
原创大约 2 分钟
DWD层
初始化脚本,这个脚本和ODS层
的初始化脚本代码完全一致,只不过数据被保存到了DWD层
。
> cd /home/work/warehouse_goods_order
> vi dwd_shopmall_init.sh
#!/bin/bash
# dwd层数据库和表初始化,只需要执行一次
hive -e "
create database if not exists dwd_shopmall;
create external table if not exists dwd_shopmall.dwd_user (
id bigint,
username string,
gender tinyint,
birthday string,
email string,
mobile string,
createtime string,
disabled tinyint
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/user/';
create external table if not exists dwd_shopmall.dwd_user_addr (
id bigint,
userid bigint,
addrname string,
isdefault tinyint,
username string,
mobile string
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/user_addr/';
create external table if not exists dwd_shopmall.dwd_category (
id int,
parentids string,
name string,
level int,
createtime string
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/category/';
create external table if not exists dwd_shopmall.dwd_goods_info (
id bigint,
name string,
desc string,
price double,
categoryid int,
createtime string
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/goods_info/';
create external table if not exists dwd_shopmall.dwd_payment (
id bigint,
orderid bigint,
tradeno string,
money double,
type int,
createtime string
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/payment/';
create external table if not exists dwd_shopmall.dwd_order (
id bigint,
userid bigint,
money double,
type int,
status int,
payid bigint,
createtime string,
updatetime string
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/order/';
create external table if not exists dwd_shopmall.dwd_orderitem (
orderid bigint,
goodsid bigint,
amount int,
price double,
createtime string
) partitioned by(dt string)
row format delimited
fields terminated by '\t'
location 'hdfs://server01:9000/data/dwd/orderitem/';
"
然后通过脚本给这些表按天来添加分区。
> cd /home/work/warehouse_goods_order
> vi dwd_shopmall_add_partition.sh
#!/bin/bash
# 基于ods层的表进行清洗,清洗之后的数据会被添加到dwd层对应表的对应分区,每天凌晨执行一次
if [ "d$1" = "d" ]
then
dt=`date +%Y%m%d --date="1 days ago"`
else
dt=$1
fi
hive -e "
insert overwrite table dwd_shopmall.dwd_user partition(dt = '${dt}') select
id,
username,
gender,
birthday,
email,
mobile,
createtime,
disabled
from ods_shopmall.ods_user
where dt = '${dt}' and id is not null;
insert overwrite table dwd_shopmall.dwd_user_addr partition(dt = '${dt}') select
id,
userid,
addrname,
isdefault,
username,
mobile
from ods_shopmall.ods_user_addr
where dt = '${dt}' and id is not null;
insert overwrite table dwd_shopmall.dwd_category partition(dt = '${dt}') select
id,
parentids,
name,
level,
createtime
from ods_shopmall.ods_category
where dt = '${dt}' and id is not null;
insert overwrite table dwd_shopmall.dwd_goods_info partition(dt = '${dt}') select
id,
name,
desc,
price,
categoryid,
createtime
from ods_shopmall.ods_goods_info
where dt = '${dt}' and id is not null;
insert overwrite table dwd_shopmall.dwd_payment partition(dt = '${dt}') select
id,
orderid,
tradeno,
money,
type,
createtime
from ods_shopmall.ods_payment_flow
where dt = '${dt}' and orderid is not null;
insert overwrite table dwd_shopmall.dwd_order partition(dt = '${dt}') select
id,
userid,
money,
type,
status,
payid,
createtime,
updatetime
from ods_shopmall.ods_order
where dt = '${dt}' and id is not null;
insert overwrite table dwd_shopmall.dwd_orderitem partition(dt = '${dt}') select
orderid,
goodsid,
amount,
price,
createtime
from ods_shopmall.ods_orderitem
where dt = '${dt}' and orderid is not null;
可以通过定时任务来执行它。
> crontab -e
# 每天凌晨0点1分执行
1 0 * * * /home/work/warehouse_goods_order/dwd_shopmall_add_partition.sh
感谢支持
更多内容,请移步《超级个体》。