关于hive中开启acid(Atomicity-原子性,Consistency一致性 Isolation 隔离性 Durability持久性)事务表 hive中创建事务表前置条件: 1. 表的存储格式要求必须是orc 2. 需要开启核心的2个参数 并行以及事务管理器 3. hive2中要求表必须是分桶表 4. 注意设置表属性开启事务
# 开启必要的参数
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.strict.checks.cartesian.product=false;
set hive.stats.fetch.column.stats=true;
set hive.materializedview.rewriting=true;
-- 创建普通表
drop table if exists emps;
drop table if exists emps_tmp;
CREATE TABLE emps_tmp (
empid INT,
deptno INT,
name VARCHAR(256),
salary FLOAT,
hire_date TIMESTAMP
) ;
-- 创建事务表
CREATE TABLE emps (
empid INT,
name VARCHAR(256),
salary FLOAT,
hire_date TIMESTAMP
) partitioned by (deptno string)
clustered by (empid) into 10 buckets -- hive2 必须分桶
STORED AS ORC
TBLPROPERTIES ('transactional'='true'); -- 核心表属性要开启
insert into emps_tmp values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500),
(150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250),
(1330, 10, 'Bill', 10000, '2020-01-02');
set hive.exec.dynamic.partition=true ;
set hive.exec.dynamic.partition.mode=nonstrict;
-- 注意事务部 不支持 overwirte
insert into emps partition(deptno)
select empid, name,salary,hire_date,cast(deptno as string) from emps_tmp;
待完善
参考链接1: https://cwiki.apache.org/confluence/display/Hive/Hive+Transactions 参考连接2: https://cwiki.apache.org/confluence/display/Hive/Scheduled+Queries
1