Spring集成Hadoop和Hbase

Java框架

浏览数:104

2019-8-22

AD:资源代下载服务

hadoop是大数据环境下必备的一套系统,使用hadoop集群可以充分的共享服务器资源,在离线处理上已经有了多年的应用。

Spring Hadoop简化了Apache Hadoop,提供了一个统一的配置模型以及简单易用的API来使用HDFS、MapReduce、Pig以及Hive。还集成了其它Spring生态系统项目,如Spring Integration和Spring Batch.。

Spring Hadoop2.5的官方文档及API地址:

spring-hadoop文档

spring-hadoop API

Spring Hadoop

  1. 添加仓库,配置依赖
<repositories>
    <repository>
      <id>cloudera</id>
      <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
      <releases>
        <enabled>true</enabled>
      </releases>
      <snapshots>
        <enabled>false</enabled>
      </snapshots>
    </repository>
  </repositories>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
  </properties>

  <dependencies>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>${hadoop.version}</version>
      <scope>provided</scope>
    </dependency>
    <!-- 添加UserAgent解析的依赖 -->
    <dependency>
      <groupId>com.kumkee</groupId>
      <artifactId>UserAgentParser</artifactId>
      <version>0.0.1</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.10</version>
      <scope>test</scope>
    </dependency>
    <!-- 添加Spring Hadoop的依赖 -->
    <dependency>
      <groupId>org.springframework.data</groupId>
      <artifactId>spring-data-hadoop</artifactId>
      <version>2.5.0.RELEASE</version>
    </dependency>
  </dependencies>
  1. 在Spring的配置文件中添加hadoop配置
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xmlns:hdp="http://www.springframework.org/schema/hadoop"
      xmlns:context="http://www.springframework.org/schema/context"
      xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
       http://www.springframework.org/schema/hadoop
       http://www.springframework.org/schema/hadoop/spring-hadoop.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd">
   ......
   
   <!-- 加载属性文件 -->
   <context:property-placeholder location="application.properties"/>
   <hdp:configuration id="hadoopConfiguration">
       <!-- 服务器的url -->
       fs.defaultFS=${spring.hadoop.fsUri}
   </hdp:configuration>
   <!-- 装配文件系统bean以及操作用户 -->
   <hdp:file-system id="fileSystem" configuration-ref="hadoopConfiguration" user="root"/>
</beans>

只需配置hadoop服务器,服务器的url和加载属性文件。

然后再创建一个属性文件application.properties,添加hadoop配置信息。

  1. test
public class SpringHadoopApp {

    private ApplicationContext ctx;
    private FileSystem fileSystem;

    @Before
    public void setUp() {
        ctx = new ClassPathXmlApplicationContext("applicationContext.xml");
        fileSystem = (FileSystem) ctx.getBean("fileSystem");
    }

    @After
    public void tearDown() throws IOException {
        ctx = null;
        fileSystem.close();
    }

    /**
     * 在HDFS上创建一个目录
     * @throws Exception
     */
    @Test
    public void testMkdirs()throws Exception{
        fileSystem.mkdirs(new Path("/SpringHDFS/"));
    }
}

或者可以采用直接加载hadoop的配置文件的方式进行配置
将<HADOOP_DIR>/etc/hadoop/core-site.xml和<HADOOP_DIR>/etc/hadoop/hdfs-site.xml拷贝过来进行配值

Spring Data Hbase

  1. 添加依赖
<dependency> 
        <groupId>org.apache.hadoop</groupId> 
        <artifactId>hadoop-auth</artifactId> 
    </dependency> 
    <dependency> 
        <groupId>org.apache.hbase</groupId> 
        <artifactId>hbase-client</artifactId> 
        <version>1.2.3</version> 
        <scope>compile</scope> 
        <exclusions> 
            <exclusion> 
                <groupId>log4j</groupId> 
                <artifactId>log4j</artifactId> 
            </exclusion> 
            <exclusion> 
                <groupId>org.slf4j</groupId> 
                <artifactId>slf4j-log4j12</artifactId> 
            </exclusion> 
        </exclusions> 
    </dependency> 
  1. 拷贝Hbase配置文件,整合applictionContext.xml

将HBase的配置文件hbase-site.xml复制到resources下,新建Spring配置文件applicationContext.xml

<?xml version="1.0" encoding="UTF-8"?> 
<beans xmlns="http://www.springframework.org/schema/beans" 
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
       xmlns:context="http://www.springframework.org/schema/context" 
       xmlns:hdp="http://www.springframework.org/schema/hadoop" 
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd 
    http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd 
    http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd"> 
 
    <context:annotation-config/> 
    <context:component-scan base-package="com.sample.hbase"/> 
    <hdp:configuration resources="hbase-site.xml"/> 
    <hdp:hbase-configuration configuration-ref="hadoopConfiguration"/> 
    <bean id="hbaseTemplate" class="org.springframework.data.hadoop.hbase.HbaseTemplate"> 
        <property name="configuration" ref="hbaseConfiguration"/> 
    </bean> 
</beans> 

配置HbaseTemplate,和hbase配置文件位置

  1. test
@RunWith(SpringJUnit4ClassRunner.class) 
@ContextConfiguration(locations = {"classpath*:applicationContext.xml"}) 
public class BaseTest { 
 
    @Autowired 
    private HbaseTemplate template; 
 
    @Test 
    public void testFind() { 
        List<String> rows = template.find("user", "cf", "name", new RowMapper<String>() { 
            public String mapRow(Result result, int i) throws Exception { 
                return result.toString(); 
            } 
        }); 
        Assert.assertNotNull(rows); 
    } 
 
    @Test 
    public void testPut() { 
        template.put("user", "xiaogao", "cf", "name", Bytes.toBytes("Alice")); 
    } 
} 

作者:张晓天a