Bootstrap

在hadoop中创建mysql库命令_hadoo访问MySql数据库_1_简单访问例子

问题:执行自己编译的jar包时,是否要先将这个jar拷到每个结点的相同路径下(可以通过hdfs来复制,也可通过scp)?

几个备忘:

1.为保证中文编码问题,先配置默认UTF8编码

2.对JDBC的Jar包处理,可以放每个节点上,也可放在hdfs上的lib

3.如有需要,对其它节点访问mysql授权brian(123456),相应IP

mysql> grant all on *.* to 'brian'@172.19.32.108 identified by '123456'

-------------------------

本文先对参考的《hadoop开发者》相关文章例子在两个节点的hadoop上实现。

在.12机器上

1.配置默认UTF8编码

在my.cnf中

[mysqld]下添加

character_set_server=utf8(较新版本)

[mysql]下添加

default-character-set=utf8

重启mysql服务

这时可以验证登录mysql后不用先执行set name utf8;可以正常显示中文。

2.在tmp库下创建表,

use tmp;

set names utf8;(可不用,但这样保证些)

DROP TABLE IF EXISTS `tmp`.`teacher`;

CREATE TABLE `tmp`.`teacher` (

`id` int(11) default NULL,

`name` char(20) default NULL,

`age` int(11) default NULL,

`departmentID` int(11) default NULL

) ENGINE=InnoDB DEFAULT CHARSET=utf8;

insert into teacher(id, name, age, departmentID)

values('101', 'tom', '29', '10');

insert into teacher(id, name, age, departmentID)

values('102', 'tony', '39', '20');

insert into teacher(id, name, age, departmentID)

values('103', 'lily', '26', '20');

insert into teacher(id, name, age, departmentID)

values('104', '张三', '25', '30');

3.在~/hadoop-1.0.3下新建mytest目录

mkdir mytest

mkdir mytest/src

mkdir mytest/classes

4.

nano mytest/src/DBAccess2.java

代码:

importjava.io.IOException;importjava.io.IOException;importjava.util.Iterator;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.conf.Configured;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable; //importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapred.FileInputFormat;importorg.apache.hadoop.mapred.FileOutputFormat;importorg.apache.hadoop.mapred.JobClient;importorg.apache.hadoop.mapred.JobConf;importorg.apache.hadoop.mapred.KeyValueTextInputFormat;import org.apache.hadoop.mapred.TextInputFormat; //importorg.apache.hadoop.mapred.MapReduceBase;importorg.apache.hadoop.mapred.Mapper;importorg.apache.hadoop.mapred.OutputCollector;importorg.apache.hadoop.mapred.Reducer;importorg.apache.hadoop.mapred.Reporter;importorg.apache.hadoop.mapred.TextOutputFormat;import org.apache.hadoop.mapred.FileSplit; //import org.apache.hadoop.mapred.lib.IdentityReducer; //IdentityReducer.class

import org.apache.hadoop.mapred.lib.db.DBConfiguration; //import org.apache.hadoop.mapred.lib.db.*;import org.apache.hadoop.io.*;importjava.io.DataInput;importjava.io.DataOutput;import java.sql.*; //ResultSet, PreparedStatement

importcom.mysql.jdbc.Driver;importorg.apache.hadoop.util.Tool;importorg.apache.hadoop.util.ToolRunner;//importjava.io.BufferedReader;importjava.io.InputStreamReader;importjava.util.ArrayList;importjava.util.Arrays;public class DBAccess2 extends Configured implementsTool {public static class TeacherRecord implements Writable, DBWritable { //add "static" ??

intid;

String name;intage;intdepartmentID;public TeacherRecord(){ //add by brian

System.out.println("TeacherRecord()");

}public TeacherRecord(TeacherRecord t){ //add by brian

System.out.println("TeacherRecord(TeacherRecord t)");this.id =t.id;this.name =t.name;this.age =t.age;this.departmentID =t.departmentID;

}public void readFields(DataInput in) throwsIOException {//TODO Auto-generated method stub

this.id =in.readInt();this.name =Text.readString(in);this.age =in.readInt();this.departmentID =in.readInt();

}public void write(DataOutput out) throwsIOException {

out.writeInt(this.id);

Text.writeString(out,this.name);

out.writeInt(this.age);

out.writeInt(this.departmentID);

}public void readFields(ResultSet result) throwsSQLException {this.id = result.getInt(1);this.name = result.getString(2);this.age = result.getInt(3);this.departmentID = result.getInt(4);

}public void write(PreparedStatement stmt) throwsSQLException{

stmt.setInt(1, this.id);

stmt.setString(2, this.name);

stmt.setInt(3, this.age);

stmt.setInt(4, this.departmentID);

}publicString toString() {return new String(this.name + "," + this.age + "," + this.departmentID); //}

}public static class DBAccessMapper extendsMapReduceBaseimplements Mapper{private final static IntWritable uno = new IntWritable(1);private IntWritable citationCount = newIntWritable();public voidmap(LongWritable key, TeacherRecord value,

OutputCollector collector, Reporter reporter) throwsIOException {

collector.collect(new LongWritable(value.id), newText(value.toString()));

}

}public int run(String[] args) throwsException {Configuration conf=getConf();

JobConf job= new JobConf(conf, DBAccess2.class);

job.setInputFormat(DBInputFormat.class);

FileOutputFormat.setOutputPath(job,new Path("dboutput"));

DBConfiguration.configureDB(job,"com.mysql.jdbc.Driver", "jdbc:mysql://172.19.102.12/tmp", "brian", "123456");

String [] fields= {"id", "name", "age", "departmentID"};

DBInputFormat.setInput(job, TeacherRecord.class, "teacher", null, "id", fields);

job.setMapperClass(DBAccessMapper.class);

job.setReducerClass(IdentityReducer.class);

JobClient.runJob(job);return 0;

}public static void main(String[] args) throwsException {int res = ToolRunner.run(newConfiguration(),newDBAccess2(),

args);

System.exit(res);

}

}

5.将mysql-connector-java-5.1.18-bin.jar上传到hadoop-1.0.3/lib目录下

6.编译及执行命令

javac -classpath hadoop-core-1.0.3.jar:lib/commons-cli-1.2.jar:lib/mysql-connector-java-5.1.18-bin.jar -d mytest/classes/ mytest/src/DBAccess2.java

jar -cvf mytest/DBAccess2.jar -C mytest/classes/ .

bin/hadoop fs -rmr dboutput //输出路径在代码中指定的

time bin/hadoop jar mytest/DBAccess2.jar DBAccess2

但运行时出现下面错误:

huangshaobin@backtest12:~/hadoop-1.0.3$ time bin/hadoop jar mytest/DBAccess2.jar DBAccess212/09/21 17:20:25INFO mapred.JobClient: Running job: job_201209201824_000812/09/21 17:20:26 INFO mapred.JobClient: map 0% reduce 0%

12/09/21 17:20:38INFO mapred.JobClient: Task Id : attempt_201209201824_0008_m_000000_0, Status : FAILED

java.lang.RuntimeException: Errorin configuring objectat org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93)

at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64)

at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117)

at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)

at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.(MapTask.java:197)

at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:418)

at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)

at org.apache.hadoop.mapred.Child$4.run(Child.java:255)

at java.security.AccessController.doPrivileged(Native Method)

at javax.security.auth.Subject.doAs(Subject.java:416)

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)

at org.apache.hadoop.mapred.Child.main(Child.java:249)

Caused by: java.lang.reflect.InvocationTargetException

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

at java.lang.reflect.Method.invoke(Method.java:616)

at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88)

...11 moreCaused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver

at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271)

...16 moreCaused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver

at java.net.URLClassLoader$1.run(URLClassLoader.java:217)

at java.security.AccessController.doPrivileged(Native Method)

at java.net.URLClassLoader.findClass(URLClassLoader.java:205)

at java.lang.ClassLoader.loadClass(ClassLoader.java:321)

at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:294)

at java.lang.ClassLoader.loadClass(ClassLoader.java:266)

at java.lang.Class.forName0(Native Method)

at java.lang.Class.forName(Class.java:186)

at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123)

at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266)

...16 more

12/09/21 17:20:41INFO mapred.JobClient: Task Id : attempt_201209201824_0008_m_000001_0, Status : FAILED

java.lang.RuntimeException: Errorin configuring objectat org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93)

at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64)

at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117)

at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)

at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.(MapTask.java:197)

at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:418)

at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)

at org.apache.hadoop.mapred.Child$4.run(Child.java:255)

at java.security.AccessController.doPrivileged(Native Method)

at javax.security.auth.Subject.doAs(Subject.java:396)

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)

at org.apache.hadoop.mapred.Child.main(Child.java:249)

Caused by: java.lang.reflect.InvocationTargetException

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)

at java.lang.reflect.Method.invoke(Method.java:597)

at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88)

...11 moreCaused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver

at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:271)

...16 moreCaused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver

at java.net.URLClassLoader$1.run(URLClassLoader.java:202)

at java.security.AccessController.doPrivileged(Native Method)

at java.net.URLClassLoader.findClass(URLClassLoader.java:190)

at java.lang.ClassLoader.loadClass(ClassLoader.java:307)

at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)

at java.lang.ClassLoader.loadClass(ClassLoader.java:248)

at java.lang.Class.forName0(Native Method)

at java.lang.Class.forName(Class.java:169)

at org.apache.hadoop.mapred.lib.db.DBConfiguration.getConnection(DBConfiguration.java:123)

at org.apache.hadoop.mapred.lib.db.DBInputFormat.configure(DBInputFormat.java:266)

...16 more

7.将jdbc驱动拷到另外节点:

huangshaobin@backtest12:~/hadoop-1.0.3$ scp lib/mysql-connector-java-5.1.18-bin.jar backtest11:/home/huangshaobin/hadoop-1.0.3/lib

mysql-connector-java-5.1.18-bin.jar 100% 771KB 771.4KB/s 00:00

重新运行:

bin/hadoop fs -rmr dboutput

time bin/hadoop jar mytest/DBAccess2.jar DBAccess2

这时还出现一样的错误,而在伪分布式下是正常的???

将jar拷到另一节点:

huangshaobin@backtest12:~/hadoop-1.0.3$ scp -r mytest/ backtest11:/home/huangshaobin/hadoop-1.0.3

再重新运行:

bin/hadoop fs -rmr dboutput

time bin/hadoop jar mytest/DBAccess2.jar DBAccess2

还是一样错误!难道第一个错是java版本问题?但这里还是有找不到jdbc驱动的错误!

把11上面的lib/下jdbc驱动删了,同时也把mytest/DBAccess2.jar也删了,在代码中JobConf job = new JobConf(conf, DBAccess2.class);后加入以下两句:

job.set("mapred.job.tracker", "172.19.102.12:9001");//

DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //

并加上import org.apache.hadoop.filecache.DistributedCache;

将jdbc驱动拷到hafs上:

bin/hadoop fs -put lib/mysql-connector-java-5.1.18-bin.jar /lib/mysql-connector-java-5.1.18-bin.jar

重新编译

huangshaobin@backtest12:~/hadoop-1.0.3$ javac -classpath hadoop-core-1.0.3.jar:lib/commons-cli-1.2.jar:lib/mysql-connector-java-5.1.18-bin.jar -d mytest/classes/ mytest/src/DBAccess2.java

Note: mytest/src/DBAccess2.java uses or overrides a deprecated API.

Note: Recompile with -Xlint:deprecation for details.

这两个警告是什么意思?先不理

重新运行,这时候成功了!!

所以,

job.set("mapred.job.tracker", "172.19.102.12:9001");//

DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //

这两句很重要!!但并不需要将DBAccess2.jar拷到其它节点,估计代码会自动在各个节点之间传输。

代码前面的import也很乱,一并整理后,最后的改后的代码如下:

1 importjava.io.IOException;2 importjava.io.DataInput;3 importjava.io.DataOutput;4 importjava.sql.Connection;5 importjava.sql.DriverManager;6 importjava.sql.PreparedStatement;7 importjava.sql.ResultSet;8 importjava.sql.SQLException;9

10 importorg.apache.hadoop.filecache.DistributedCache;11 importorg.apache.hadoop.fs.Path;12 importorg.apache.hadoop.io.IntWritable;13 importorg.apache.hadoop.io.LongWritable;14 importorg.apache.hadoop.io.Text;15 importorg.apache.hadoop.io.Writable;16 importorg.apache.hadoop.mapred.JobClient;17 importorg.apache.hadoop.mapred.JobConf;18 importorg.apache.hadoop.mapred.MapReduceBase;19 importorg.apache.hadoop.mapred.Mapper;20 importorg.apache.hadoop.mapred.OutputCollector;21 importorg.apache.hadoop.mapred.FileOutputFormat;22 importorg.apache.hadoop.mapred.Reporter;23 importorg.apache.hadoop.mapred.lib.IdentityReducer;24 importorg.apache.hadoop.mapred.lib.db.DBWritable;25 importorg.apache.hadoop.mapred.lib.db.DBInputFormat;26 importorg.apache.hadoop.mapred.lib.db.DBConfiguration;27

28 importorg.apache.hadoop.util.Tool;29 importorg.apache.hadoop.util.ToolRunner;30 importorg.apache.hadoop.conf.Configuration;31 importorg.apache.hadoop.conf.Configured;32

33 public class DBAccess2 extends Configured implementsTool {34

35

36 public static class TeacherRecord implements Writable, DBWritable { //add "static" ??

37 intid;38 String name;39 intage;40 intdepartmentID;41

42 public TeacherRecord(){ //add by brian

43 System.out.println("TeacherRecord()");44 }45

46 public TeacherRecord(TeacherRecord t){ //add by brian

47 System.out.println("TeacherRecord(TeacherRecord t)");48 this.id =t.id;49 this.name =t.name;50 this.age =t.age;51 this.departmentID =t.departmentID;52

53 }54

55 public void readFields(DataInput in) throwsIOException {56 //TODO Auto-generated method stub

57 this.id =in.readInt();58 this.name =Text.readString(in);59 this.age =in.readInt();60 this.departmentID =in.readInt();61 }62

63 public void write(DataOutput out) throwsIOException {64 out.writeInt(this.id);65 Text.writeString(out, this.name);66 out.writeInt(this.age);67 out.writeInt(this.departmentID);68 }69

70 public void readFields(ResultSet result) throwsSQLException {71 this.id = result.getInt(1);72 this.name = result.getString(2);73 this.age = result.getInt(3);74 this.departmentID = result.getInt(4);75 }76

77 public void write(PreparedStatement stmt) throwsSQLException{78 stmt.setInt(1, this.id);79 stmt.setString(2, this.name);80 stmt.setInt(3, this.age);81 stmt.setInt(4, this.departmentID);82 }83

84 publicString toString() {85 return new String(this.name + "," + this.age + "," + this.departmentID); //86 }87

88 }89

90

91 public static class DBAccessMapper extendsMapReduceBase92 implements Mapper{93

94 private final static IntWritable uno = new IntWritable(1);95 private IntWritable citationCount = newIntWritable();96

97 public voidmap(LongWritable key, TeacherRecord value,98 OutputCollector collector, Reporter reporter) throwsIOException {99 collector.collect(new LongWritable(value.id), newText(value.toString()));100 }101 }102

103

104

105 public int run(String[] args) throwsException {106

107 Configuration conf =getConf();108 JobConf job = new JobConf(conf, DBAccess2.class);109 job.set("mapred.job.tracker", "172.19.102.12:9001");//110 DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job); //111 job.setInputFormat(DBInputFormat.class);112 FileOutputFormat.setOutputPath(job, new Path("dboutput"));113 DBConfiguration.configureDB(job, "com.mysql.jdbc.Driver", "jdbc:mysql://172.19.102.12/tmp", "brian", "123456");114

115 String [] fields = {"id", "name", "age", "departmentID"};116 DBInputFormat.setInput(job, TeacherRecord.class, "teacher", null, "id", fields);117

118 job.setMapperClass(DBAccessMapper.class);119 job.setReducerClass(IdentityReducer.class);120 JobClient.runJob(job);121

122 return 0;123 }124

125 public static void main(String[] args) throwsException {126 int res = ToolRunner.run(newConfiguration(),127 newDBAccess2(),128 args);129

130 System.exit(res);131 }132 }

输出的结果是:

huangshaobin@backtest12:~/hadoop-1.0.3$ bin/hadoop fs -cat dboutput/*

101 tom,29,10

102 tony,39,20

103 lily,26,20

104 张三,25,30

总结步骤:

1.在代码中指定jobtracker及额外的jdbc包jar

job.set("mapred.job.tracker", "172.19.102.12:9001");

DistributedCache.addFileToClassPath(new Path("/lib/mysql-connector-java-5.1.18-bin.jar"), job);

2.编译通过

3.将额外的jdbc包传到hdfs上/lib目录下bin/hadoop fs -put lib/mysql-connector-java-5.1.18-bin.jar /lib/mysql-connector-java-5.1.18-bin.jar

4.运行成功

另外,可在http://hadoop.apache.org/docs/r1.0.3/api/index.html 上查看 class DistributedCache,进一步了解其它方法用法,这里也有例子说明。

参考:

《hadoop开发者第二期:访问数据库》

;