Hive Extra Commands
View
Order by
Group by
Join
hive> show databases;
hive> use emp_db;
Create sample.txt using Tab space
$nano sample.txt
GNU nano 6.2 sample.txt *
1 Alen 50000 devloper
2 Tom 45000 jr.devloper
3 Harry 20000 sales
4 Vicky 50000 devloper
4 Vicky 50000 devloper
5 Ajay 20000 sales
6 Vishal 25000 sales
hive> create table employee(id int,name string, salary int, role string) row format delimited fields terminated by ‘\t’;
hive> load data local inpath ‘/home/cse7/sample.txt’ overwrite into table employee;
View (Command using filter data)
hive> select * from employee;
1 Alen 50000 devloper
2 Tom 45000 jr.devloper
3 Harry 20000 sales
4 Vicky 50000 devloper
4 Vicky 50000 devloper
5 Ajay 20000 sales
6 Vishal 25000 sales
hive> create view new_employee as select * from employee where salary > 45000;
hive> select * from new_employee;
1 Alen 50000 devloper
4 Vicky 50000 devloper
4 Vicky 50000 devloper
Order by
hive> select id,name,salary from employee order by salary;
5 Ajay 20000
3 Harry 20000
6 Vishal 25000
2 Tom 45000
4 Vicky 50000
4 Vicky 50000
1 Alen 50000
Group by
hive> select role,count(*) from employee group by role;
devloper 3
jr.devloper 1
sales 3
For Join
Create two table customer table and orders table
hive> create table customer(cid int,name string,city string) row format delimited fields terminated by ‘\t’;
customer.txt
1 Ajay Mumbai
2 Harry Banglore
3 Aakash Pune
4 Rahul Delhi
Customer id, customer name, customer city
cid int,name string,city string
hive> load data local inpath ‘/home/cse7/customer.txt’ overwrite into table customer;
hive> create table orders (oid int,cid int,amt int) row format delimited fields terminated by ‘\t’;
orders.txt
001 1 4000
002 4 5000
003 5 2000
004 2 8000
005 6 9000
order id, customer id, amount
oid int,cid int,amt int
hive> load data local inpath ‘/home/cse7/orders.txt’ overwrite into table orders;
Join
hive> select c.cid,c.name,o.amt from customer c join orders o on(c.cid=o.cid);
1 Ajay 4000
2 Harry 8000
4 Rahul 5000
Left Outer Join
hive> select c.cid,c.name,o.amt from customer c left outer join orders o on(c.cid=o.cid);
1 Ajay 4000
2 Harry 8000
3 Aakash NULL
4 Rahul 5000
Right Outer Join
hive> select c.cid,c.name,o.amt from customer c right outer join orders o on(c.cid=o.cid);
1 Ajay 4000
4 Rahul 5000
NULL NULL 2000
2 Harry 8000
NULL NULL 9000
hive> EXIT;
ORC File Setting to: conf/hive-site.xml
cd $HIVE_HOME
nano conf/hive-site.xml
Add below properties to file
<configuration>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>2</value>
</property>
<property>
<name>hive.in.test</name>
<value>true</value>
</property>
</configuration>
hive>create table OrcTable(id int ,name string ) clustered by (id) into 2 buckets stored as ORC TBLPROPERTIES(‘transactional’=’true’);
Insert:
hive>insert into table OrcTable values (1,’Amit’),(2,’Gyancs’),(3,’Arsalan’);
Update:
hive>update OrcTable set name = ‘Riyaz’ where id = 2;
Delete:
hive>delete from OrcTable where id = 1;
Test:
hive>select * from OrcTable;
Install Apache Pig on Hadoop Complete Setup
