YDB使用快速入门(QuickStart)
YDB使用快速入门(QuickStart)
第一,方便测试 先伪造一些数据
hadoop fs -rm -r /data/example/ydb/
hadoop fs -mkdir -p /data/example/ydb/
hadoop jar ./lib/ydb-1.1.8-pg.jar cn.net.ycloud.ydb.server.reader.kafka.MrMakeShuData 5000000 /data/example/ydb/1.txt
第二:现在ydb中创建ydb的表
--bin/conn.sh 里面创建表
create ydbtable ydb_example_shu(
phonenum y_charlike_isp,
usernick string,
ydb_sex string,
ydb_province string,
ydb_grade string,
ydb_age string,
ydb_blood string,
ydb_zhiye string,
ydb_earn string,
ydb_prefer string,
ydb_consume string,
ydb_day string,
amtdouble tdouble,
amtlong tlong,
content y_charlike_two_four_isp
)
;
第三:向ydb中导入数据
--直接追加数据
./bin/ydbimport.sh -t ydb_example_shu -p 3000w -tp txt -f /data/example/ydb -sp , -fl phonenum,usernick,ydb_sex,ydb_province,ydb_grade,ydb_age,ydb_blood,ydb_zhiye,ydb_earn,ydb_prefer,ydb_consume,ydb_day,amtdouble,amtlong,content
--按照cond给的条件进行删除
./bin/ydbimport.sh -cond "ydb_sex='男' and ydb_blood='A'" -t ydb_example_shu -p 3000w -tp txt -f /data/example/ydb -sp , -fl phonenum,usernick,ydb_sex,ydb_province,ydb_grade,ydb_age,ydb_blood,ydb_zhiye,ydb_earn,ydb_prefer,ydb_consume,ydb_day,amtdouble,amtlong,content
--覆盖数据
./bin/ydbimport.sh -overwrite -t ydb_example_shu -p 3000w -tp txt -f /data/example/ydb -sp , -fl phonenum,usernick,ydb_sex,ydb_province,ydb_grade,ydb_age,ydb_blood,ydb_zhiye,ydb_earn,ydb_prefer,ydb_consume,ydb_day,amtdouble,amtlong,content
参数说明
-t -table Ydb中的表名
-p -partion (可选)YDB中的分区名,如果不写,则导入到YDB的默认分区
-local (可选)表示数据是存储在本地,而非默认存储在HDFS上
-tp -type 文件类型 目前支持 txt 和 json
-f -file 文件路径
-fl -fields (txt格式必选)txt文件对应的YDB列的名字
-sp -split (txt格式必选)txt文件对应分隔符,支持正则,不可见字符,可以通过urlencode编码
-ov -overwrite (可选)导入前是否清空当前分区的数据
-cond -condition (可选)在追加数据前,先根据条件删除旧的数据
第四:查询示例
----count(*)计数
select count(*) from ydb_example_shu where ydbpartion = '3000w' ;
----多个ydb分区数据一起检索###
select count(*) as cnt from ydb_example_shu where ydbpartion in ('3000w','300winsert','300winsert2','test1','test3') ;
----数据预览
select * from ydb_example_shu where ydbpartion = '3000w' limit 10 ;
----全文检索
select content,usernick from ydb_example_shu where ydbpartion = '3000w' and content='王老吉' limit 10 ;
----高性能like匹配(注意:该字段类型为y_charlike类型,所以能保证匹配顺序)
select content,usernick from ydb_example_shu where ydbpartion = '3000w' and content like '170*王老吉' limit 10 ;
----高性能not like匹配(注意:该字段类型为y_charlike类型,所以能保证匹配顺序)
select content,usernick from ydb_example_shu where ydbpartion = '3000w' and content not like '170*王老吉' and content='王老吉' limit 10 ;
----普通string 类型的like匹配(注意:改字段类型为string类型,本质是暴力扫描,如果该列重复值很低,那么性能并不好)
select ydb_province,count(*) from ydb_example_shu where ydbpartion = '3000w' and ydb_province like '*北*' group by ydb_province limit 10 ;
----普通string 类型的not like匹配(注意:改字段类型为string类型,本质是暴力扫描,如果该列重复值很低,那么性能并不好)
select ydb_province,count(*) from ydb_example_shu where ydbpartion = '3000w' and ydb_province not like '*京*' and ydb_province like '*北*' group by ydb_province limit 10 ;
----多个条件组合过滤
select ydb_sex,ydb_grade,ydb_age,ydb_blood,amtlong from ydb_example_shu where ydbpartion = '3000w' and ydb_sex='女' and ydb_grade='本科' and (ydb_age='20到30岁' or ydb_blood='O') and (amtlong like '([3000 TO 4000] )') limit 10 ;
----sum求和
select sum(amtdouble) from ydb_example_shu where ydbpartion = '3000w' ;
----avg求平均数
select avg(amtdouble) as avgamt from ydb_example_shu where ydbpartion = '3000w' ;
----更复杂点的统计
select count(*),count(amtdouble),avg(amtdouble),sum(amtdouble),min(amtdouble),max(amtdouble),min(ydb_province),max(ydb_province) from ydb_example_shu where ydbpartion = '3000w' ;
----单列group by
select ydb_sex,count(*),count(amtdouble),sum(amtdouble) from ydb_example_shu where ydbpartion = '3000w' group by ydb_sex limit 10 ;
----多列group by
select ydb_sex,ydb_province,count(*) as cnt,count(amtdouble),sum(amtdouble) from ydb_example_shu where ydbpartion = '3000w' group by ydb_sex,ydb_province order by cnt desc limit 10 ;
----top N 排序
select ydb_sex, phonenum,amtlong,amtdouble from ydb_example_shu where ydbpartion='3000w' order by amtdouble desc ,amtlong limit 10 ;
--查看YDB表
show tables ;
@hiveonly show tables;
show tables detail;
--查看表结构
desc ydb_example_shu;
@hiveonly desc ydb_example_shu ;
--查看表的分区
show partitions ydb_example_shu ;
@hiveonly show partitions ydb_example_shu;
--按条件删除
delete from ydb_example_shu where ydbpartion='3000w' and ydb_sex='男' and ydb_blood='A'
--整个分区清理,数据清空,但是分区还在
truncate table ydb_example_shu partions 3000w ;
truncate table ydb_example_shu partions 3000a4,3000a5,3000a6
--删除一个表的所有分区-保留表结构
truncate table ydb_example_shu ;
--物理清理掉整个分区的数据(清理后分区也跟着删掉)
drop table ydb_example_shu partions 3000a4 ;
drop table ydb_example_shu partions 3000a4,3000a5,3000a6 ;
--删除一个表,表结构也删除掉
drop table ydb_example_shu ;
--暂停kafka的消费3600秒
pause kafka 3600 ;
--恢复 暂停的kafka的消费,让kafka继续消费数据
pause kafka 0 ;
--导出到hdfs
export text into /data/example/export_text select YConcat_ws('#',usernick,ydb_sex) from (select usernick,ydb_sex from ydb_example_shu where ydbpartion = '3000w' and ydb_sex='女') tmp
export json overwrite /data/example/export_json select usernick,ydb_sex from ydb_example_shu where ydbpartion = '3000w' and ydb_sex='女'