首先先下载一个叫"elasticsearch-hadoop-hive"的JAR包,放到相应路径下:https://jar-download.com/artifacts/org.elasticsearch/elasticsearch-hadoop-hive
PUT /index_name
{
"mappings": {
"_doc": {
"properties": {
"field1": {"type": "keyword"},
"field2": {"type": "integer"}
}
}
},
"settings": {
"number_of_replicas": 0
}
}
建index的API可以参考https://www.elastic.co/guide/en/elasticsearch/reference/7.2/indices-create-index.html
定义ES中数据格式可以参考https://www.elastic.co/guide/en/elasticsearch/reference/7.2/mapping-types.html
add jar path/to/elasticsearch-hadoop-6.4.2.jar;
CREATE EXTERNAL TABLE index_name_to_es (
field1 string,
field2 int
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'index_name/_doc',
'es.index.auto.create' = 'false',
'es.nodes' = 'http://XXX.XXX.XXX.XXX',
'es.port'='9200'
);
也可以不用做第一步提前在ES中建index,让ES自动解析数据类型,那么这里建外部表时设置属性'es.index.auto.create' = 'true'
add jar path/to/elasticsearch-hadoop-6.4.2.jar;
insert overwrite table index_name_to_es
select field1, field2
from index_name;
与上面类似,不过步骤反过来
create table index_name (
field1 string,
field2 int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
LINES TERMINATED BY '\n';
add jar path/to/elasticsearch-hadoop-6.4.2.jar;
CREATE EXTERNAL TABLE index_name_from_es (
field1 string,
field2 int
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'index_name/_doc',
'es.query' = '?q=field2:[100 TO 1000]',
'es.nodes' = 'http://XXX.XXX.XXX.XXX',
'es.port'='9200'
);
这里可以把ES的过滤条件写在es.query
的属性中,例如上例中只查询filed2
字段在范围100到1000以内的数据。
query的写法可以参考https://www.elastic.co/guide/en/elasticsearch/reference/7.2/search-uri-request.html
add jar path/to/elasticsearch-hadoop-6.4.2.jar;
insert overwrite table index_name
select filed1, field2
from index_name_from_es;
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。