概述
本文主要介绍通过Datax实现Hive数据迁移到崖山分布式。
环境
源Hive版本:3.1.3
目标YashanDB版本:23.2.3.100
建表脚本
sql 代码解读复制代码-- hive
CREATE TABLE IF NOT EXISTS product(
product_no char(5),
product_name varchar(30),
cost double,
price duble
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
STORED AS textfile;
-- yashandb
CREATE TABLE product
(
product_no CHAR(5),
product_name VARCHAR2(30),
cost NUMBER,
price NUMBER
);
hive表和DataX数据类型映射
hive同步到崖山job配置
markdown 代码解读复制代码{
"job": {
"content": [
{
"reader": {
"name":"hdfsreader",
"parameter":{
"column":[
{
"index":0,
"type":"string"
},
{
"index":1,
"type":"string"
},
{
"index":2,
"type":"double"
},
{
"index":3,
"type":"double"
}
],
"defaultFS":"hdfs://127.0.0.1:8020",
"encoding":"UTF-8",
"fieldDelimiter":"\u0001",
"fileType":"text",
"path":"/usr/hive/warehouse/sales.db/product"
}
},
"writer": {
"name": "yashandbwriter",
"parallel": {
"binder": 6
},
"parameter": {
"batchError": true,
"column":[
"PRODUCT_NO",
"PRODUCT_NAME",
"COST",
"PRICE"
],
"connection": [
{
"jdbcUrl": "jdbc:yasdb://127.0.0.1:1688/yashandb",
"table": [
"SALES.PRODUCT"
]
}
],
"batchSize": 4096,
"batchesPerTxn": 1000,
"password": "sales",
"preSql": ["truncate table SALES.PRODUCT"],
"session": [],
"username": "sales",
"writeMode": "bulkinsert"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
执行同步
python bin/datax.py job/hive2yashandb.json
本文系转载,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文系转载,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。