word count
import javax.xml.soap.Text;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool{
@Override
public int run(String[] args) throws Exception {
if(args.length<2){
System.out.println(“plz give input and output directories”);
return -1;
}
JobConf conf = new JobConf();
//conf.setJarByClass(WordCount.class);
conf.setJar(“wrd.jar”);
FileInputFormat.setInputPaths(conf,new Path(args[1]));
FileOutputFormat.setOutputPath(conf,new Path(args[2]));
conf.setMapperClass(WordMapper.class);
conf.setReducerClass(wordreducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String args[]) throws Exception{
int exitCode =ToolRunner.run(new WordCount(), args);
System.exit(exitCode);
}
}
word mapper
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class WordMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
@Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter r)
throws IOException {
String s = value.toString();
for(String word:s.split(” “)){
if(word.length()>0){
output.collect(new Text(word), new IntWritable(1));
}
}
}
}
wordreducer
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class wordreducer extends MapReduceBase implements Reducer<Text,IntWritable,Text,IntWritable>{
@Override
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter r)
throws IOException {
int count=0;
while(values.hasNext()){
IntWritable i= values.next();
count+=i.get();
}
output.collect(key,new IntWritable(count));
}
}
hadoop fs -mkdir /inputt
hadoop fs -put /home/training/Desktop/sample.txt /input/sample.txt
hadoop jar /home/training/Desktop/wc.jar wordcount /inputt/sample.txt/outputt
——————————–
MaxClosePrice.java
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxClosePrice extends Configured implements Tool {
public int run(String[] args) throws Exception {
if(args.length<2)
{
System.out.println(“Plz Give Input Output Directory Correctly”);
return -1;
}
JobConf conf = new JobConf(MaxClosePrice.class);
FileInputFormat.setInputPaths(conf,new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(MaxClosePriceMapper.class);
conf.setReducerClass(MaxClosePriceReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(FloatWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(FloatWritable.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitcode = ToolRunner.run(new MaxClosePrice(), args);
System.exit(exitcode);
}
}
———————————————————-
MaxClosePriceMapper.java
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.Mapper;
public class MaxClosePriceMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, FloatWritable>
{
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, FloatWritable> output, Reporter r)
throws IOException {
String line = value.toString();
String[] items = line.split(“,”);
String stock = items[1];
Float closePrice = Float.parseFloat(items[6]);
output.collect(new Text(stock), new FloatWritable(closePrice));
}
}
———————————————————
MaxClosePriceReducer.java
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class MaxClosePriceReducer extends MapReduceBase implements Reducer<Text,FloatWritable,Text,FloatWritable>
{
@Override
public void reduce(Text key, Iterator<FloatWritable> values,
OutputCollector<Text, FloatWritable> output, Reporter r)
throws IOException {
float maxClosePrice = 0;
//Iterate all and calculate maximum
while (values.hasNext()) {
FloatWritable i = values.next();
maxClosePrice = Math.max(maxClosePrice, i.get());
}
//Write output
output.collect(key, new FloatWritable(maxClosePrice));
}
}
—————————-
ABCSE,B7J,2010-02-08,8.63,8.70,8.57,8.64,78900,8.64
ABCSE,B7J,2010-02-05,8.63,8.71,8.31,8.58,218700,8.58
ABCSE,B7J,2010-02-04,8.88,8.88,8.59,8.66,89900,8.66
ABCSE,B7J,2010-02-03,8.83,8.92,8.80,8.89,119000,8.89
ABCSE,B7J,2010-02-02,8.77,8.90,8.73,8.87,51900,8.87
ABCSE,B7J,2010-02-01,8.69,8.77,8.66,8.75,38600,8.75
ABCSE,B7J,2010-01-29,8.81,8.81,8.56,8.57,91700,8.57
ABCSE,B7J,2010-01-28,8.90,8.90,8.60,8.69,92100,8.69
ABCSE,B7J,2010-01-27,8.87,8.87,8.68,8.79,82400,8.79
ABCSE,B7J,2010-01-26,8.83,8.92,8.71,8.82,106000,8.82
ABCSE,BYX,1983-01-04,50.00,50.00,46.25,50.00,1014000,1.99
ABCSE,BYX,1983-01-03,51.13,51.50,50.00,50.00,920400,1.99
ABCSE,BYX,1982-12-31,51.63,52.00,51.00,51.13,330000,2.03
ABCSE,BYX,1982-12-30,52.25,52.75,51.63,51.63,410400,2.05
ABCSE,BYX,1982-12-29,51.88,52.63,51.75,52.25,1017600,2.08
ABCSE,BYX,1982-12-28,52.00,52.38,51.63,51.88,906000,2.06
ABCSE,BYX,1982-12-27,51.63,52.38,51.63,52.00,1072800,2.07
ABCSE,BYX,1982-12-23,50.00,51.50,50.00,51.50,867600,2.05
ABCSE,BYX,1982-12-22,50.13,50.75,49.75,50.00,1395600,1.99
ABCSE,BYX,1982-12-21,49.50,50.25,49.50,50.13,1363200,1.99
ABCSE,BYX,1982-12-20,49.75,50.00,49.00,49.25,903600,1.96
ABCSE,BYX,1982-12-17,49.25,49.88,48.88,49.75,1329600,1.98
ABCSE,BYX,1982-12-16,50.25,50.38,49.25,49.25,975600,1.96
ABCSE,BYX,1982-12-15,49.88,50.50,49.63,50.25,1104000,2.00
ABCSE,BYX,1982-12-14,51.38,52.13,49.75,49.88,1309200,1.98
ABCSE,BYX,1982-12-13,50.88,51.38,50.63,51.38,502800,2.04
ABCSE,BYX,1982-12-10,50.75,51.00,50.50,50.88,780000,2.02
ABCSE,BYX,1982-12-09,52.00,52.00,50.50,50.75,1753200,2.02
ABCSE,BYX,1982-12-08,54.25,54.25,53.00,53.00,3024000,2.11
ABCSE,BYX,1982-12-07,55.00,55.00,54.00,54.25,1142400,2.16
ABCSE,BYX,1982-12-06,55.13,55.50,54.75,55.00,1249200,2.19
ABCSE,BYX,1982-12-03,54.38,55.50,54.38,55.13,1095600,2.19
ABCSE,BYX,1982-12-02,54.75,54.75,54.00,54.25,489600,2.16
ABCSE,BYX,1982-12-01,52.63,54.75,52.63,54.75,984000,2.18
ABCSE,BYX,1982-11-30,50.75,52.63,50.13,52.25,1442400,2.08
ABCSE,BYX,1982-11-29,51.63,51.75,50.00,50.75,1640400,2.02
ABCSE,BYX,1982-11-26,51.50,52.25,51.50,51.63,499200,2.05
ABCSE,BYX,1982-11-24,50.63,51.13,50.63,51.00,1622400,2.03
ABCSE,BYX,1982-11-23,51.13,51.13,50.00,50.63,866400,2.01
ABCSE,BYX,1982-11-22,53.25,53.63,51.25,51.25,657600,2.04
ABCSE,BYX,1982-11-19,53.75,54.25,53.25,53.25,472800,2.12
ABCSE,BYX,1982-11-18,54.25,54.38,53.00,53.75,622800,2.14
ABCSE,BYX,1982-11-17,53.00,54.25,53.00,54.25,783600,2.13
ABCSE,BYX,1982-11-16,53.25,53.25,52.25,53.00,962400,2.08
ABCSE,BYX,1982-11-15,53.75,53.88,52.50,53.50,500400,2.10
ABCSE,BYX,1982-11-12,53.50,54.25,53.50,53.75,1256400,2.11
ABCSE,BYX,1982-11-11,53.00,53.75,52.88,53.50,1027200,2.10
ABCSE,BYX,1982-11-10,54.25,54.88,52.00,53.00,2032800,2.08
ABCSE,BYX,1982-11-09,55.25,55.25,54.00,54.25,1650000,2.13
ABCSE,BYX,1982-11-08,56.88,56.88,55.25,55.25,1017600,2.17
ABCSE,BYX,1982-11-05,57.00,57.00,56.25,56.88,1273200,2.23
—————
hadoop fs -mkdir /stockinput
hadoop fs -put /home/training/Desktop/stock.txt /stockinput/stock.txt
hadoop jar /home/training/Desktop/stock.jar maxcloseprice /stockinput/stock.txt /stockoutput/
————————————-
LogMapper.txt
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class LogMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,IntWritable>
{
public void map(LongWritable key, Text value,
OutputCollector<Text, IntWritable> output, Reporter r)
throws IOException {
String[] s = value.toString().split(” “);
String ip = s[0];
output.collect(new Text(ip), new IntWritable(1));
}
}
—————-
LogReducer.txt
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class LogReducer extends MapReduceBase implements Reducer<Text,IntWritable,Text,IntWritable>
{
public void reduce(Text key, Iterator<IntWritable> values,
OutputCollector<Text, IntWritable> output, Reporter r)
throws IOException {
int count=0;
while(values.hasNext())
{
IntWritable i= values.next();
count+= i.get();
}
output.collect(key, new IntWritable(count));
}
}
——————-
ProcessLogs.txt
import java.io.*;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class ProcessLogs extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
if(args.length<2)
{
System.out.println(“Plz Give Input Output Directory Correctly”);
return -1;
}
JobConf conf = new JobConf(ProcessLogs.class);
FileInputFormat.setInputPaths(conf,new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(LogMapper.class);
conf.setReducerClass(LogReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String args[]) throws Exception
{
int exitcode = ToolRunner.run(new ProcessLogs(), args);
System.exit(exitcode);
}
}
——————-
webLog.txt
96.7.8.17 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
96.7.1.14 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
96.7.2.14 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
192.168.1.1 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
96.7.4.16 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
91.75.5.14 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
98.21.6.14 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
162.15.16.1 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
8.8.8.8 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
10.99.99.247 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
96.7.1.14 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
96.7.1.14 – – [24/Apr/2011:04:20:11 -0400] “GET /cat.jpg HTTP/1.1” 200 12433
hadoop fs -mkdir /loginput
hadoop fs -put /home/training/desktop/weblog.txt loginput/
hadoop jar /home/training/desktop/log.jar processlogs /loginput/weblog.txt /logoutput1
—-
hive
create database demo;
create table student(name string,rollno string,age int)row format delimited fields terminated by ‘,’;
load data local inpath ‘/home/training/Desktop/student1’ into table student;
select * from student;
load data local inpath ‘/home/training/Desktop/student2’ into table student;
select * from student;
describe student;
describe extended student;
—–
hive
create database part;
use part;
create table student(name string,rollno int,percentage float) partitioned by(state string,city string)row format delimited fields terminated by ‘,’;
load data local inpath ‘/home/training/Desktop/maharastra’ into table student partition(state=’maharastra’,city=’mumbai’);
select * from student;
load data local inpath ‘/home/training/Desktop/karnataka’ into table student partition(state=’karnataka’,city=’banglore’);
select * from student;
select * from student limit 6;
select * from student where state=’maharastra’;
dynamic
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
create table stu(name string,rollno int,percentage float, state string, city string) row format delimited fields terminated by ‘,’;
load data local inpath ‘/home/training/Desktop/result1’into table stu;
create table stud_part (name string, rollno int, percentage float)partitioned by (state string,city string)row format delimited fields terminated by ‘,’;
insert overwrite table stud_part partition(state,city)select name,rollno,percentage,state,city from stu;
select * from stud_part where city=’tumkur’;
——————–
hbase shell
status ‘simple’
status ‘detailed’
create ’emp’, ‘personal data’, ‘professional data’
list
describe emp
disable ’emp’
drop ’emp’
enable ’emp’
create ’emp’, ‘personal data’, ‘professional data’
scan ’emp’
count ’emp’
put ’emp’,’1′,’personal data:name’,’raju’
put ’emp’,’1′,’personal data:city’,’hyderabad’
put ’emp’,’1′,’professional data:designation’,’manager’
put ’emp’,’1′,’professional data:salary’,’50000′
scan ’emp’
—-