Cloudera CDH/CDP 및 Hadoop EcoSystem, Semantic IoT등의 개발/운영 기술을 정리합니다. gooper@gooper.com로 문의 주세요.
Hive query for creating table
CREATE TABLE page_view ( viewTime INT, userid BIGINT, page_url STRING, ip STRING COMMENT 'IP Address of the User', referrer_url STRING ) COMMENT 'This is the page view table' PARTITIONED BY (dt STRING, country STRING) CLUSTERED BY (userid) SORTED BY (viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ' 01' COLLECTION ITEMS TERMINATED BY ' 02' MAP KEYS TERMINATED BY ' 03' LINES TERMINATED BY 'n' STORED AS SEQUENCEFILE;
Hive query for adding partitions
CREATE TABLE add_part_test (key STRING, value STRING) PARTITIONED BY (ds STRING); ALTER TABLE add_part_test ADD PARTITION (ds='2010-01-01'); ALTER TABLE add_part_test ADD IF NOT EXISTS PARTITION (ds='2010-01-01'); ALTER TABLE add_part_test ADD IF NOT EXISTS PARTITION (ds='2010-01-01') PARTITION (ds='2010-01-02') PARTITION (ds='2010-01-03'); ALTER TABLE alter2 ADD PARTITION (insertdate='2008-01-01') LOCATION '2008/01/01';
Hive query for showing partitions
show partitions alter2;
Hive query for changing table properties
ALTER TABLE alter1 SET TBLPROPERTIES ('a'='1', 'c'='4', 'd'='3');
Hive query for changing SerDe properties
ALTER TABLE alter1 SET SERDEPROPERTIES('s1'='10', 's2' ='20');
Hive query for changing SerDe
ADD JAR ../data/files/TestSerDe.jar; ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' WITH SERDEPROPERTIES ('s1'='9');
Hive query for changing columns
ALTER TABLE alter1 REPLACE COLUMNS (a int, b int, c string);
Hive query for changing table name
ALTER TABLE alter3 RENAME TO alter3_renamed;
Hive queries for loading data
CREATE TABLE alter3_src (col1 STRING) STORED AS TEXTFILE ; LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE alter3_src ;
-
load data local inpath '../data/files/kv1.txt' into table load_overwrite; load data inpath '${system:test.tmp.dir}/load_overwrite/kv*.txt' overwrite into table load_overwrite2; load data inpath '${system:test.tmp.dir}/load2_*' overwrite into table load_overwrite; LOAD DATA LOCAL INPATH '../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05'); -- 파티션 로드
Hive query for overwriting data
INSERT OVERWRITE TABLE alter3_like PARTITION (pCol1='test_part:', pcol2='test_part:') SELECT col1 FROM alter3_src;
Hive query for creating bucketed table
CREATE TABLE set_bucketing_test (key INT, value STRING) CLUSTERED BY (key) INTO 10 BUCKETS;
Hive query for changing bucketed table
ALTER TABLE set_bucketing_test NOT CLUSTERED;
Hive query for creating index
CREATE INDEX src_rc_concatenate_test_index ON TABLE src_rc_concatenate_test(key) AS 'compact' WITH DEFERRED REBUILD IDXPROPERTIES ("prop1"="val1", "prop2"="val2");
Hive query for showing index
SHOW INDEXES ON src_rc_concatenate_test;
Hive query for removing index from specific table
DROP INDEX src_rc_concatenate_test_index ON src_rc_concatenate_test;
Hive query for calculating statistics
analyze table src_rc_merge_test_part_stat partition(ds='2011') compute statistics; desc extended src_rc_merge_test_part_stat;
Hive query for changing # of buckets from specific table
create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets;
Hive query for changing fileformat
alter table alter_partition_format_test set fileformat rcfile; alter table alter_partition_format_test partition(ds='2010') set fileformat rcfile;
Hive query for changing where the data is located.
alter table alter_partition_format_test set location "file:/test/test/"; alter table alter_partition_format_test partition(ds='2010') set location "file:/test/test/ds=2010";
Hive query for protect mode
alter table alter_part_protect_mode partition (year='1996') enable no_drop; alter table alter_part_protect_mode partition (year='1995') disable no_drop; alter table tbl1 enable no_drop cascade; alter table tbl1 enable no_drop; alter table tbl1 disable no_drop cascade;
Hive query for changing partition names
CREATE TABLE alter_rename_partition (col1 STRING) PARTITIONED BY (pcol1 STRING, pcol2 STRING) STORED AS SEQUENCEFILE; ALTER TABLE alter_rename_partition PARTITION (pCol1='old_part1:', pcol2='old_part2:') RENAME TO PARTITION (pCol1='new_part1:', pcol2='new_part2:');
Hive query for authorization
create table authorization_part (key int, value string) partitioned by (ds string); -- 테이블 생성 ALTER TABLE authorization_part SET TBLPROPERTIES ("PARTITION_LEVEL_PRIVILEGE"="TRUE"); -- 프로퍼티 설정 set hive.security.authorization.enabled=true; -- 권한 사용 grant select on table src_auth_tmp to user hive_test_user; grant Create on table authorization_part to user hive_test_user; grant Update on table authorization_part to user hive_test_user; grant Drop on table authorization_part to user hive_test_user; show grant user hive_test_user on table authorization_part;
Hive query for creating a skewed table
set hive.mapred.supports.subdirectories=true; set hive.internal.ddl.list.bucketing.enable=true; create table original3 (key STRING, value STRING) SKEWED BY (key, value) ON ((1,1),(5,6)); alter table original3 not skewed;
Hive query for changing SerDe’s delimiter
alter table test_table set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; alter table test_table set serdeproperties ('field.delim' = ',');
Hive query for changing view name
CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING); CREATE VIEW view1 as SELECT * FROM invites; ALTER VIEW view1 RENAME TO view2;
Hive queries about archive
아카이브에 관한 자세한 내용응 여기를 참고.
If you want to get more information about achive, click this.
set hive.archive.enabled = true; set hive.enforce.bucketing = true; create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets; ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); CREATE TABLE harbucket(key INT) PARTITIONED by (ds STRING) CLUSTERED BY (key) INTO 10 BUCKETS; ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
Hive queries for authorization (grant, revoke)
권한 부여에 대한 자세한 내용은 여기를 참고.
set hive.security.authorization.enabled=true; --table grant to user grant select on table src_autho_test to user hive_test_user; revoke select on table src_autho_test from user hive_test_user; --column grant to user grant select(key) on table src_autho_test to user hive_test_user; revoke select(key) on table src_autho_test from user hive_test_user; --table grant to group grant select on table src_autho_test to group hive_test_group1; revoke select on table src_autho_test from group hive_test_group1; --column grant to group grant select(key) on table src_autho_test to group hive_test_group1; revoke select(key) on table src_autho_test from group hive_test_group1;
Hive query for creating roles
--role create role src_role; grant role src_role to user hive_test_user; show role grant user hive_test_user;
Hive query for granting authority to specific role
--column grant to role grant select(key) on table src_autho_test to role src_role; revoke select(key) on table src_autho_test from role src_role; --table grant to role grant select on table src_autho_test to role src_role; revoke select on table src_autho_test from role src_role;
Hive query for deleting role
drop role src_role;
Hive query for granting all authoritie to specific user
set hive.security.authorization.enabled=true; grant All on table src_autho_test to user hive_test_user;
Hive examples for granting table authority to specific user or role
CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database'; SHOW DATABASES; GRANT drop ON DATABASE test_db TO USER hive_test_user; -- 사용자에게 drop 권한 부여 GRANT select ON DATABASE test_db TO USER hive_test_user; -- 사용자에게 select 권한 부여 SHOW GRANT USER hive_test_user ON DATABASE test_db; -- 사용자 권한 보기 CREATE ROLE db_test_role; -- 역할 생성 GRANT ROLE db_test_role TO USER hive_test_user; -- 사용자에게 역할 부여 SHOW ROLE GRANT USER hive_test_user; -- 역할 보기 GRANT drop ON DATABASE test_db TO ROLE db_test_role; -- 역할에 drop 권한 부여 GRANT select ON DATABASE test_db TO ROLE db_test_role; -- 역할에 select 권한 부여 SHOW GRANT ROLE db_test_role ON DATABASE test_db; -- 역할 권한 보기
Hive example for revoking its all authorities for specific user
GRANT ALL TO USER hive_test_user; SET hive.security.authorization.enabled=true; CREATE TABLE src_authorization_7 (key int, value string); REVOKE ALL FROM USER hive_test_user;
Hive example for revoking its all authorities for specific group
GRANT ALL TO GROUP hive_test_group1; SET hive.security.authorization.enabled=true; CREATE TABLE src_authorization_7 (key int, value string); REVOKE ALL FROM GROUP hive_test_group1;
Hive example for setting prehook class
set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables,org.apache.hadoop.hive.ql.hooks.UpdateInputAccessTimeHook$PreExec;
Hive uniqujoin example
CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1; LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2; LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3; FROM UNIQUEJOIN PRESERVE T1 a (a.key), PRESERVE T2 b (b.key), PRESERVE T3 c (c.key) SELECT a.key, b.key, c.key; FROM UNIQUEJOIN T1 a (a.key), T2 b (b.key), T3 c (c.key) SELECT a.key, b.key, c.key;
Source command in Hive CLI
항이브 퀴리 스크립트 파일을 실행한다.
../data/files/source.txt 에 있는 내용 EXPLAIN SELECT x.* FROM SRC x; SELECT x.* FROM SRC x;
실제 source 커맨드
source ../data/files/source.txt;
Hive examples for variable substitution
set hivevar:key1=value1; EXPLAIN SELECT * FROM src where key="${key1}"; EXPLAIN SELECT * FROM src where key="${hivevar:key1}"; set hivevar:a=1; set hivevar:b=a; set hivevar:c=${hivevar:${hivevar:b}}; -- 서브 변수 치환 EXPLAIN SELECT * FROM src where key="${hivevar:c}"; set hivevar:a; set hivevar:b; set hivevar:c; set hivevar:key1;