博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
.Hbase批量导入数据应用
阅读量:6227 次
发布时间:2019-06-21

本文共 5318 字,大约阅读时间需要 17 分钟。

1.创建一个hadoop项目(导入hadoop相关包,hbase-0.90.5.jar,zookeeper-3.3.2.jar)
2.创建测试数据(2.txt 上传至hdfs://127.0.0.1:9000/tmp/2.txt)
1150,content,email,xx@gmail.com
1152,content,email,xx@cc.com
3.创建Hbase表(input_tb)
hbase shell>create 'input_tb','content'      ---input_tb为表名  content为存储单位--族名
4.导入数据代码
SampleUploader.java
/**
 * Copyright 2009 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/**
 * Sample Uploader MapReduce
 * <p>
 * This is EXAMPLE code.  You will need to change it to work for your context.
 * <p>
 * Uses TableReducer} to put the data into HBase. Change the InputFormat
 * to suit your data.  In this example, we are importing a CSV file.
 * <p>
 * <pre>row,family,qualifier,value</pre>
 * <p>
 * The table and columnfamily we're to insert into must preexist.
 * <p>
 * There is no reducer in this example as it is not necessary and adds
 * significant overhead.  If you need to do any massaging of data before
 * inserting into HBase, you can do this in the map as well.
 * <p>Do the following to start the MR job:
 * <pre>
 * ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME
 * </pre>
 * <p>
 * This code was written against HBase 0.21 trunk.
 */
public class SampleUploader {
  private static final String NAME = "SampleUploader";
 
  static class Uploader
  extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    private long checkpoint = 100;
    private long count = 0;
   
    @Override
    public void map(LongWritable key, Text line, Context context)
    throws IOException {
     
      // Input is a CSV file
      // Each map() is a single line, where the key is the line number
      // Each line is comma-delimited; row,family,qualifier,value
           
      // Split CSV line
       java.util.Random r=new java.util.Random();
      
      String [] values = line.toString().split(",");
      System.out.println(r.nextInt());
     System.out.println(line.toString());
      if(values.length != 4) {
        return;
      }
     
      // Extract each value
      byte [] row = Bytes.toBytes(values[0]);
      byte [] family = Bytes.toBytes(values[1]);
      byte [] qualifier = Bytes.toBytes(values[2]);
      byte [] value = Bytes.toBytes(values[3]);
     
      // Create Put
      Put put = new Put(row);
      put.add(family, qualifier, value);
     
      // Uncomment below to disable WAL. This will improve performance but means
      // you will experience data loss in the case of a RegionServer crash.
      // put.setWriteToWAL(false);
     
      try {
        context.write(new ImmutableBytesWritable(row), put);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
     
      // Set status every checkpoint lines
      if(++count % checkpoint == 0) {
        context.setStatus("Emitting Put " + count);
      }
    }
  }
 
  /**
   * Job configuration.
   */
  public static Job configureJob(Configuration conf, String [] args)
  throws IOException {
    Path inputPath = new Path(args[0]);
    String tableName = args[1];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Uploader.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);    
    job.setMapperClass(Uploader.class);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
  }
  /**
   * Main entry point.
   *
   * @param args  The command line parameters.
   * @throws Exception When running the job fails.
   */
  public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if(otherArgs.length != 2) {
      System.err.println("Wrong number of arguments: " + otherArgs.length);
      System.err.println("Usage: " + NAME + " <input> <tablename>");
      System.exit(-1);
    }
    Job job = configureJob(conf, otherArgs);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}
 
5.hbase shell >scan 'input_tb'
即可看到所有导入的记录(2.txt)

转载于:https://www.cnblogs.com/bobsoft/archive/2012/10/07/2714484.html

你可能感兴趣的文章
PHP 反射机制
查看>>
jQuery手风琴效果
查看>>
oracle调度中使用schedule管理调度
查看>>
Ubuntu 14.04 Remmina远程桌面连接Windows计算机
查看>>
php 在linux系统下写出文件问题
查看>>
将EXCEL转为HTML有什么好办法?
查看>>
了解一下Elasticsearch的基本概念
查看>>
二、let变量声明方式介绍
查看>>
iOS逆向:在任意app上开启malloc stack追踪内存来源
查看>>
【BZOJ】4033: [HAOI2015]树上染色 树上背包
查看>>
python学习三:列表、元组、字典、集合
查看>>
iOS中使用UISegmentControl进行UITableView切换
查看>>
自适应响应式,手机,平板,PC,java企业网站源码
查看>>
【CodeForces】835F Roads in the Kingdom
查看>>
2014.4.17—openflow代码流程
查看>>
leetcode-414-Third Maximum Number
查看>>
最新Android开源库、工具、开源项目整理分享
查看>>
Sql 获取当前日期没有时分秒
查看>>
mybatis_mapper动态代理
查看>>
CoreData一些基本概念
查看>>