#修改分隔符
alter table DB.TABLE set SERDEPROPERTIES('field.delim'='\001');#查看建表语句
show CREATE TABLE DB.TABLE#修改字段类型
ALTER TABLE DB.TABLE CHANGE hive_count_num hive_count_num string#更改表名
ALTER TABLE DB.TABLE RENAME TO DB.RETABLE;#添加表字段
ALTER TABLE DB.TABLE ADD COLUMNS (is_black string);#删除表字段
ALTER TABLE DB.TABLE drop is_black;#清除表数据
TRUNCATE DB.TABLE;#刷新表
REFRESH DB.TABLE;#刷新表元数据
INVALIDATE METADATA DB.TABLE;#重新shuffle数据
INSERT OVERWRITE DB.TABLE SELECT * FROM DB.TABLE;#复制表结果和数据
create table NEWDB.TABLE as select * from DB.TABLE;#复制表结构
create table NEWDB.TABLE like DB.TABLE;#修改表名
ALTER TABLE DB.TABLE TO DB.RETABLE#修改表字段类型
alter TABLE DB.TABLE CHANGE dt dt string#显示所有函数
show functions;#查看函数用法(hive)
describe function substr;#导出查询结果到文件
impala-shell -q "select * from DB.TABLE limit 10" -B --output_delimiter="\t" -o output.txt#收集表的统计信息,让Impala 基于每一个表的大小、每一个列不同值的个数、等等信息自动的优化查询。
compute stats DB.TABLE;#导入和导出
export table DB.TABLE to '/user/hive/test'; import from '/user/hive/test';#分区表导出和导入
export table DB.TABLE partition (etl_dt="2017-12-14") to '/user/hive/test'; import from '/user/hive/test';#import重命名表
import table DB.TABLE from '/user/hive/test';#导出表并且导入到分区表分区
import table DB.TABLE partition (etl_dt="2017-12-14") from '/user/hive/test';#指定导入位置
import table DB.TABLE from '/user/hive/test' location '/user/hive/test';#导入作为一个外部表
import external TABLE DB.TABLE from '/user/hive/test';#强制删除数据库
DROP DATABASE DB cascade#数据类型转换
SELECT cast(substr(createdate,1,10) as int)/86400 created FROM DB.TABLE#分组排序
row_number() over (PARTITION BY t4.extractcashbillid,t1.ze_checktime ORDER BY t4.overduedateb DESC) flag 除Row_number外还有rank,dense_rank rank() over([partition by col1] order by col2) dense_rank() over([partition by col1] order by col2) row_number() over([partition by col1] order by col2) rank排序时出现相等的值时会有并列,即值相等的两条数据会有相同的序列值 row_number的排序不允许并列,即使两条记录的值相等也不会出现相等的排序值 dense_rank排序的值允许并列,但会跳跃的排序,像这样:1,1,3,4,5,5,7. #创建分区表 CREATE TABLE IF NOT EXISTS DB.TABLE( autoindex INT, vindicatorteamid STRING, createtime STRING) PARTITIONED BY (dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS TEXTFILE; #添加分区 alter table DB.TABLE add partition (dt='2017-12-12'); #删除分区 ALTER TABLE DB.TABLE DROP IF EXISTS PARTITION(dt='2017-12-12'); #加载数据 LOAD DATA INPATH ‘/user/hive/warehouse/db/table/2017-12-12/000000_0’ INTO TABLE DB.TABLE PARTITION(dt='2017-12-12'); #加载数据到hdfs(hive) insert overwrite directory '/user/hive/warehouse/db/table/dt=2017-12-12' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' select * from DB.TABLE where queryday = '2017-12-12' #加载数据到本地(hive) insert overwrite local directory '/data/2017-12-12' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' select * from DB.TABLE where queryday = '2017-12-12' #插入数据到表分区 insert into table DB.TABLE1 PARTITION(dt='2017-12-10') select * from DB.TABLE where queryday = '2017-12-10' #展示表中有多少分区 show partitions DB.TABLE; #刷新分区 REFRESH DB.TABLE PARTITION(dt='2017-12-12');