Hive

    xiaoxiao2025-06-06  14

    hive命令行操作

    1.创建hive的数据库

    $hive>hive --version // $hive>hive --help // $hive>create database mydb2 ; // $hive>show databases ; $hive>use mydb2 ; $hive>create table mydb2.t(id int,name string,age int); $hive>drop table t ; $hive>drop table mydb2.t ; $hive>select * from mydb2.t ; //查看指定库的表 $hive>exit ; //退出 $>hive //hive --service cli $>hive //hive --service cli

    通过远程jdbc方式连接到hive数据仓库

    1.启动hiveserver2服务器,监听端口10000 $>hive --service hiveserver2 & 2.通过beeline命令行连接到hiveserver2 $>beeline //进入beeline命令行(于hive --service beeline) $beeline>!help //查看帮助 $beeline>!quit //退出 $beeline>!connect jdbc:hive2://localhost:10000/mydb2//连接到hibve数据 $beeline>show databases ; $beeline>use mydb2 ; $beeline>show tables; //显式表

    使用Hive-jdbc驱动程序采用jdbc方式访问远程数据仓库

    1.创建java模块 2.引入maven 3.添加hive-jdbc依赖

    <?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.it18zhang</groupId> <artifactId>HiveDemo</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>2.1.0</version> </dependency> </dependencies> </project> 4.App package com.hivedemo; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.Statement; /** * 使用jdbc方式连接到hive数据仓库,数据仓库需要开启hiveserver2服务。 */ public class App { public static void main(String[] args) throws Exception { Class.forName("org.apache.hive.jdbc.HiveDriver"); Connection conn = DriverManager.getConnection("jdbc:hive2://192.168.231.201:10000/mydb2"); Statement st = conn.createStatement(); ResultSet rs = st.executeQuery("select id , name ,age from t"); while(rs.next()){ System.out.println(rs.getInt(1) + "," + rs.getString(2)) ; } rs.close(); st.close(); conn.close(); } }

    hive中表

    1.managed table 托管表。 删除表时,数据也删除了。 2.external table 外部表。 删除表时,数据不删。 hive命令

    //创建表,external 外部表 $hive>CREATE external TABLE IF NOT EXISTS t2(id int,name string,age int) COMMENT ‘xx’ ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ STORED AS TEXTFILE ; //查看表数据 $hive>desc t2 ; $hive>desc formatted t2 ; //加载数据到hive表 $hive>load data local inpath ‘/home/centos/customers.txt’ into table t2 ; //local上传文件 $hive>load data inpath ‘/user/centos/customers.txt’ [overwrite] into table t2 ; //移动文件 //复制表 mysql>create table tt as select * from users ; //携带数据和表结构 mysql>create table tt like users ; //不带数据,只有表结构 hive>create table tt as select * from users ; hive>create table tt like users ;

    //count()查询要转成mr

    $hive>select count(*) from t2 ; $hive>select id,name from t2 ;

    // $hive>select * from t2 order by id desc ; //MR

    //启用/禁用表 $hive>ALTER TABLE t2 ENABLE NO_DROP; //不允许删除 $hive>ALTER TABLE t2 DISABLE NO_DROP; //允许删除

    //分区表,优化手段之一,从目录的层面控制搜索数据的范围。 //创建分区表. $hive>CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ ; //显式表的分区信息 $hive>SHOW PARTITIONS t3; //添加分区,创建目录 $hive>alter table t3 add partition add PARTITION (year=2014, month=11) PARTITION (year=2014, month=12);

    //删除分区 hive>ALTER TABLE employee_partitioned DROP IF EXISTS PARTITION (year=2014, month=11); //分区结构 hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=11 hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=12

    //加载数据到分区表 hive>load data local inpath ‘/home/centos/customers.txt’ into table t3 partition(year=2014,month=11); //创建桶表 $hive>CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ ;

    //加载数据不会进行分桶操作 $hive>load data local inpath ‘/home/centos/customers.txt’ into table t4 ;

    //查询t3表数据插入到t4中。 $hive>insert into t4 select id,name,age from t3 ; //桶表的数量如何设置? //评估数据量,保证每个桶的数据量block的2倍大小。

    //连接查询 $hive>CREATE TABLE customers(id int,name string,age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ ; $hive>CREATE TABLE orders(id int,orderno string,price float,cid int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ ; //加载数据到表 //内连接查询 hive>select a.,b. from customers a , orders b where a.id = b.cid ; //左外 hive>select a.,b. from customers a left outer join orders b on a.id = b.cid ; hive>select a.,b. from customers a right outer join orders b on a.id = b.cid ; hive>select a.,b. from customers a full outer join orders b on a.id = b.cid ; //explode,炸裂,表生成函数。 //使用hive实现单词统计 //1.建表 $hive>CREATE TABLE doc(line string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ ;

    最新回复(0)