本篇内容介绍了“怎么往Hbase表添加测试数据”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
创建一张表,往Hbase表添加测试数据,然后将数据写入HDFS文件。
public class HBaseAndMapReduce1 {
public static void main(String[] args) throws Exception {
//测试数据
testData();
System.exit(run());
}
public static int run() throws IOException, ClassNotFoundException, InterruptedException{
//初始化HBase配置
Configuration conf = new Configuration();
conf = HBaseConfiguration.create(conf);
conf.set("hbase.zookeeper.quorum", "192.168.226.129");
Job job = Job.getInstance(conf,"searchPerson");
job.setJarByClass(HBaseAndMapReduce1.class);
//实例化scan对象。
Scan scan = new Scan();
//限定列,只读取关键数据,比如:article,author
scan.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"));
scan.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
/**
* 参照源码如下:
* TableMapper //对输出hbase数据来格式分割的处理类
* public static void initTableMapperJob(String table, Scan scan,
* Class<? extends TableMapper> mapper,
* Class<?> outputKeyClass,
* Class<?> outputValueClass, Job job) throws IOException {
* initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,job, true);
* }
* */
/**
* Result类中有个静态方法getFamilyMap()可以获得以列名为key,值为value,这刚好与hadoop中map结果是一样的。
*
*/
TableMapReduceUtil.initTableMapperJob( "blog",scan, FindFriendMapper.class,
ImmutableBytesWritable.class, Result.class, job);
DateFormat df = new SimpleDateFormat( "yyyyMMddHHmmssS" );
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.226.129:9000/hbasemapreduce/" + df.format( new Date() )+"/"));
return job.waitForCompletion(true) ? 0 : 1;
}
public static class FindFriendMapper extends TableMapper<ImmutableBytesWritable, Result>{
//key是hbase中的行键
//value是hbase中的所行键的所有数据
@Override
protected void map(ImmutableBytesWritable key,Result value,
Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context)
throws IOException, InterruptedException {
//System.out.println("key-->" + Bytes.toString(key.get()) + "---> " + key );
/*
* Cell (存储单元)
* 由{row key, column(=<family> + <label>), version} 唯一确定的单元。
* cell中的数据是没有类型的,全部是字节码形式存贮。
* */
List<Cell> cs = value.listCells(); // return Arrays.asList(rawCells());
//value.getFamilyMap("");
for (Cell cell : cs) {
System.out.println("Cell--->"+cell);
// 存储单元中的每个属性:
String rowKey = Bytes.toString(CellUtil.cloneRow(cell));
long timestamp = cell.getTimestamp();
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String val = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("RowKey=" + rowKey + ", Timestamp=" + timestamp +
", Family=" + family + ", Qualifier=" + qualifier + ", Value=" + val +"\n");
}
System.out.println( "key---> " +key + "\nvalue---> "+value);
super.map(key, value, context);
}
}
public static void testData() {
try {
//初始化HBase的配置文件。
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.226.129");
Connection con = ConnectionFactory.createConnection(conf);
Admin admin = con.getAdmin();
TableName tn = TableName.valueOf("blog");
if (admin.tableExists(tn)) {
admin.disableTable(tn);
admin.deleteTable(tn);
}
//创建表结构对象:用于描述表名和相关的列族。
HTableDescriptor htd = new HTableDescriptor(tn);
HColumnDescriptor hcd1 = new HColumnDescriptor("article");
HColumnDescriptor hcd2 = new HColumnDescriptor("author");
//描述相关的列族:
htd.addFamily(hcd1);
htd.addFamily(hcd2);
//创建表:
admin.createTable(htd);
Table table = con.getTable(tn);
//参数为行键:
Put put = new Put(Bytes.toBytes("rowkey1"));
//Put.addColumn(byte[] family, byte[] qualifier, byte[] value)
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("content"), Bytes.toBytes("HBase is the Hadoop database"));
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"), Bytes.toBytes("Hadoop,Zookeeper,HBase"));
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("title"),Bytes.toBytes("Head First Hbase"));
put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("name"), Bytes.toBytes("Berg"));
put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"),Bytes.toBytes("BergBerg"));
Put put2 = new Put(Bytes.toBytes("rowkey2"));
put2.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"), Bytes.toBytes("Hadoop"));
put2.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("Berg-OSChina"));
Put put3 = new Put(Bytes.toBytes("rowkey3"));
put3.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"), Bytes.toBytes("Zookeeper,HBase"));
put3.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"), Bytes.toBytes("OSChina"));
List<Put> puts = Arrays.asList(put, put2, put3);
//Table.put(List<Put> puts)
table.put(puts);
System.out.println(" \n\n***************测试数据准备完成****************\n");
if (admin != null){
admin.close();
}
if(con != null){
con.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
“怎么往Hbase表添加测试数据”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注天达云网站,小编将为大家输出更多高质量的实用文章!