Hive Extra Commands with Practical Examples

Hive Extra Commands

View
Order by
Group by
Join
hive> show databases;
hive> use emp_db;
Create sample.txt using Tab space
$nano sample.txt
GNU nano 6.2 sample.txt *
1 Alen    50000  devloper
2 Tom     45000  jr.devloper
3 Harry   20000  sales
4 Vicky   50000  devloper
4 Vicky   50000  devloper
5 Ajay    20000  sales
6 Vishal  25000  sales
hive> create table employee(id int,name string, salary int, role string) row format delimited fields terminated by ‘\t’;
hive> load data local inpath ‘/home/cse7/sample.txt’ overwrite into table employee;
View (Command using filter data)
hive> select * from employee;
1 Alen   50000  devloper
2 Tom    45000  jr.devloper
3 Harry  20000  sales
4 Vicky  50000  devloper
4 Vicky  50000  devloper
5 Ajay   20000  sales
6 Vishal 25000  sales
hive> create view new_employee as select * from employee where salary > 45000;
hive> select * from new_employee;
1 Alen    50000   devloper
4 Vicky   50000   devloper
4 Vicky   50000   devloper
Order by
hive> select id,name,salary from employee order by salary;
5 Ajay   20000
3 Harry  20000
6 Vishal 25000
2 Tom    45000
4 Vicky  50000
4 Vicky  50000
1 Alen   50000
Group by
hive> select role,count(*) from employee group by role;
devloper    3
jr.devloper 1
sales       3
For Join
Create two table customer table and orders table
hive> create table customer(cid int,name string,city string) row format delimited fields terminated by ‘\t’;
customer.txt
1 Ajay    Mumbai
2 Harry   Banglore
3 Aakash  Pune
4 Rahul   Delhi
Customer id, customer name, customer city
cid int,name string,city string
hive> load data local inpath ‘/home/cse7/customer.txt’ overwrite into table customer;
hive> create table orders (oid int,cid int,amt int) row format delimited fields terminated by ‘\t’;
orders.txt
001 1 4000
002 4 5000
003 5 2000
004 2 8000
005 6 9000
order id, customer id, amount
oid int,cid int,amt int
hive> load data local inpath ‘/home/cse7/orders.txt’ overwrite into table orders;
Join
hive> select c.cid,c.name,o.amt from customer c join orders o on(c.cid=o.cid);
1 Ajay    4000
2 Harry   8000
4 Rahul   5000
Left Outer Join
hive> select c.cid,c.name,o.amt from customer c left outer join orders o on(c.cid=o.cid);
1 Ajay    4000
2 Harry   8000
3 Aakash  NULL
4 Rahul   5000
Right Outer Join
hive> select c.cid,c.name,o.amt from customer c right outer join orders o on(c.cid=o.cid);
1 Ajay    4000
4 Rahul   5000
NULL NULL 2000
2 Harry   8000
NULL NULL 9000
hive> EXIT;
ORC File Setting to: conf/hive-site.xml
cd $HIVE_HOME
nano conf/hive-site.xml
Add below properties to file
<configuration>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>2</value>
</property>
<property>
<name>hive.in.test</name>
<value>true</value>
</property>
</configuration>
hive>create table OrcTable(id int ,name string ) clustered by (id) into 2 buckets stored as ORC TBLPROPERTIES(‘transactional’=’true’);
Insert:
hive>insert into table OrcTable values (1,’Amit’),(2,’Gyancs’),(3,’Arsalan’);
Update:
hive>update OrcTable set name = ‘Riyaz’ where id = 2;
Delete:
hive>delete from OrcTable where id = 1;
Test:
hive>select * from OrcTable;

Install Apache Pig on Hadoop Complete Setup