github地址
https://github.com/alibaba/DataX/blob/master/introduction.md
下载软件
https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/20220530/datax.tar.gz
上传的服务器的指定目录(hadoop02)
/bigdata/soft
解压到指定目录
tar -zxvf datax.tar.gz -C /bigdata/server/
运行示例程序
python bin/datax.py job/job.json
用户城市分布
CREATE TABLE `ads_user_city` (
city varchar(80) DEFAULT NULL COMMENT '城市',
province varchar(80) DEFAULT NULL COMMENT '省份',
area varchar(80) DEFAULT NULL COMMENT '区域',
dt varchar(80) DEFAULT NULL COMMENT '日期',
count bigint DEFAULT NULL COMMENT '统计数量'
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='用户城市分布'
网站访问的上网模式分布
create table ads_visit_type(
url VARCHAR(80) COMMENT '访问地址',
type VARCHAR(80) COMMENT '访问模式',
dt VARCHAR(80) COMMENT '日期',
month VARCHAR(80) COMMENT '月度',
quarter VARCHAR(80) COMMENT '季度',
count bigint COMMENT '统计数量'
) COMMENT '网站访问的上网模式分布'
ads_user_city.json
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/behavior/ads/ads_user_city/*",
"defaultFS": "hdfs://hadoop01:8020",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "long"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\t"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"city",
"province",
"area",
"dt",
"count"
],
"session": [
"set session sql_mode='ANSI'"
],
"preSql": [
"delete from ads_user_city"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://192.168.3.129:3306/behavior?useUnicode=true&characterEncoding=utf-8&useSSL=false",
"table": [
"ads_user_city"
]
}
]
}
}
}
]
}
}
我这里显示3041,数据7000样本
ads_visit_type.json
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/behavior/ads/ads_visit_type/*",
"defaultFS": "hdfs://hadoop01:8020",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
}
{
"index": 5,
"type": "long"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\t"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": "root",
"password": "123456",
"column": [
"url",
"type",
"dt",
"month",
"quarter",
"count"
],
"session": [
"set session sql_mode='ANSI'"
],
"preSql": [
"delete from ads_visit_type"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://192.168.3.129:3306/behavior?useUnicode=true&characterEncoding=utf-8&useSSL=false",
"table": [
"ads_visit_type"
]
}
]
}
}
}
]
}
}
结果:3410
ads_visit_mode.json
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/behavior/ads/ads_visit_mode/*",
"defaultFS": "hdfs://hadoop01:8020",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "long"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\t"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": "root",
"password": "123456",
"column": [
"url",
"device_type",
"dt",
"count"
],
"session": [
"set session sql_mode='ANSI'"
],
"preSql": [
"delete from ads_visit_mode"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://192.168.113.144:3306/behavior?useUnicode=true&characterEncoding=utf-8&useSSL=false",
"table": [
"ads_visit_mode"
]
}
]
}
}
}
]
}
}