YDB使用快速入门(QuickStart)

YDB使用快速入门(QuickStart)

第一,方便测试 先伪造一些数据
hadoop fs -rm -r /data/example/ydb/
hadoop fs -mkdir -p /data/example/ydb/
hadoop jar ./lib/ydb-1.1.8-pg.jar cn.net.ycloud.ydb.server.reader.kafka.MrMakeShuData 5000000  /data/example/ydb/1.txt

第二:现在ydb中创建ydb的表
--bin/conn.sh 里面创建表
create ydbtable ydb_example_shu(
phonenum y_charlike_isp,
usernick string,
ydb_sex string,
ydb_province string,
ydb_grade string,
ydb_age string,
ydb_blood string,
ydb_zhiye string,
ydb_earn string,
ydb_prefer string,
ydb_consume string,
ydb_day string,
amtdouble tdouble,
amtlong tlong,
content y_charlike_two_four_isp
)
;

第三:向ydb中导入数据
--直接追加数据
./bin/ydbimport.sh -t ydb_example_shu -p 3000w -tp txt  -f /data/example/ydb -sp , -fl phonenum,usernick,ydb_sex,ydb_province,ydb_grade,ydb_age,ydb_blood,ydb_zhiye,ydb_earn,ydb_prefer,ydb_consume,ydb_day,amtdouble,amtlong,content

--按照cond给的条件进行删除
./bin/ydbimport.sh -cond "ydb_sex='' and ydb_blood='A'" -t ydb_example_shu -p 3000w -tp txt  -f /data/example/ydb -sp , -fl phonenum,usernick,ydb_sex,ydb_province,ydb_grade,ydb_age,ydb_blood,ydb_zhiye,ydb_earn,ydb_prefer,ydb_consume,ydb_day,amtdouble,amtlong,content

--覆盖数据
./bin/ydbimport.sh -overwrite -t ydb_example_shu -p 3000w -tp txt  -f /data/example/ydb -sp , -fl phonenum,usernick,ydb_sex,ydb_province,ydb_grade,ydb_age,ydb_blood,ydb_zhiye,ydb_earn,ydb_prefer,ydb_consume,ydb_day,amtdouble,amtlong,content

参数说明
-t -table    Ydb中的表名
-p -partion    (可选)YDB中的分区名,如果不写,则导入到YDB的默认分区
-local    (可选)表示数据是存储在本地,而非默认存储在HDFS
-tp -type    文件类型  目前支持 txt json
-f -file    文件路径
-fl -fields     (txt格式必选)txt文件对应的YDB列的名字
-sp  -split     (txt格式必选)txt文件对应分隔符,支持正则,不可见字符,可以通过urlencode编码
-ov  -overwrite    (可选)导入前是否清空当前分区的数据
-cond  -condition    (可选)在追加数据前,先根据条件删除旧的数据

第四:查询示例

----count(*)计数
select count(*) from ydb_example_shu where ydbpartion = '3000w'  ;

----多个ydb分区数据一起检索###
select count(*) as cnt from ydb_example_shu where ydbpartion in ('3000w','300winsert','300winsert2','test1','test3') ;

----数据预览
select * from ydb_example_shu where ydbpartion = '3000w' limit 10 ;

----全文检索
select content,usernick from ydb_example_shu where ydbpartion = '3000w' and content='王老吉' limit 10 ;

----高性能like匹配(注意:该字段类型为y_charlike类型,所以能保证匹配顺序)
select content,usernick from ydb_example_shu where ydbpartion = '3000w' and content like '170*王老吉' limit 10 ;

----高性能not like匹配(注意:该字段类型为y_charlike类型,所以能保证匹配顺序)
select content,usernick from ydb_example_shu where ydbpartion = '3000w' and content not like '170*王老吉' and content='王老吉' limit 10 ;

----普通string 类型的like匹配(注意:改字段类型为string类型,本质是暴力扫描,如果该列重复值很低,那么性能并不好)
select ydb_province,count(*) from ydb_example_shu where ydbpartion = '3000w' and ydb_province like '**' group by ydb_province limit 10 ;
----普通string 类型的not like匹配(注意:改字段类型为string类型,本质是暴力扫描,如果该列重复值很低,那么性能并不好)
select ydb_province,count(*) from ydb_example_shu where ydbpartion = '3000w' and ydb_province not like '**' and ydb_province like '**' group by ydb_province limit 10 ;

----多个条件组合过滤
select ydb_sex,ydb_grade,ydb_age,ydb_blood,amtlong from ydb_example_shu where ydbpartion = '3000w' and ydb_sex='' and ydb_grade='本科' and (ydb_age='2030' or ydb_blood='O') and  (amtlong like '([3000 TO 4000] )') limit 10 ;

----sum求和
select sum(amtdouble) from ydb_example_shu where ydbpartion = '3000w' ;

----avg求平均数
select avg(amtdouble) as avgamt from ydb_example_shu where ydbpartion = '3000w' ;

----更复杂点的统计
select count(*),count(amtdouble),avg(amtdouble),sum(amtdouble),min(amtdouble),max(amtdouble),min(ydb_province),max(ydb_province) from ydb_example_shu where ydbpartion = '3000w' ;

----单列group by
select ydb_sex,count(*),count(amtdouble),sum(amtdouble) from ydb_example_shu where ydbpartion = '3000w' group by ydb_sex limit 10 ;

----多列group by
select ydb_sex,ydb_province,count(*) as cnt,count(amtdouble),sum(amtdouble) from ydb_example_shu where ydbpartion = '3000w' group by ydb_sex,ydb_province order by cnt desc limit 10 ;

----top N 排序
select ydb_sex, phonenum,amtlong,amtdouble from ydb_example_shu where ydbpartion='3000w'  order by amtdouble desc ,amtlong limit 10 ;

--查看YDB
show tables ;
@hiveonly show tables;
show tables detail;
--查看表结构
desc ydb_example_shu;
@hiveonly desc ydb_example_shu ;
--查看表的分区
show partitions ydb_example_shu ;
@hiveonly show partitions ydb_example_shu;

--按条件删除
delete from ydb_example_shu where ydbpartion='3000w' and ydb_sex='' and ydb_blood='A'
--整个分区清理,数据清空,但是分区还在
truncate table ydb_example_shu partions 3000w ;
truncate table ydb_example_shu partions 3000a4,3000a5,3000a6
--删除一个表的所有分区-保留表结构
truncate table ydb_example_shu  ;
--物理清理掉整个分区的数据(清理后分区也跟着删掉)
drop table ydb_example_shu partions 3000a4 ;
drop table ydb_example_shu partions 3000a4,3000a5,3000a6   ;
--删除一个表,表结构也删除掉
drop table ydb_example_shu ;
--暂停kafka的消费3600
pause kafka 3600 ;
--恢复 暂停的kafka的消费,kafka继续消费数据
pause kafka 0 ;
--导出到hdfs
export text into /data/example/export_text select YConcat_ws('#',usernick,ydb_sex) from (select usernick,ydb_sex from ydb_example_shu where ydbpartion = '3000w' and ydb_sex='') tmp
export json overwrite /data/example/export_json  select usernick,ydb_sex from ydb_example_shu where ydbpartion = '3000w' and ydb_sex=''