yum install maven git -y
cd ~
# 远程下载java工程或者使用maven创建一个新的工程:
# mvn archetype:generate -DgroupId=com.z3.examples -DartifactId=MyUDF -DarchetypeArtifactId=maven-archetype-quickstart -DinteractiveMode=false
git clone https://git.code.tencent.com/lacus_w/hive-labs.git
cd hive-labs/MyUDF
vi src/main/java/com/z3/examples/MyUDF.java
使用官方wiki中的udf代码:
package com.z3.examples;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public final class MyUDF extends UDF {
public Text evaluate(final Text s) {
if (s == null) { return null; }
return new Text(s.toString().toLowerCase());
}
}
编译并打包:
mvn compile package
查看jar程序:
ll target
# 查询jar程序的绝对路径:
readlink -f target/*.jar
导入日历类处理日期中的月份:
package com.z3.examples;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
public class SignUDF extends UDF {
private static final String[] SIGNS = {
"摩羯座", "水瓶座", "双鱼座", "白羊座", "金牛座", "双子座",
"巨蟹座", "狮子座", "处女座", "天秤座", "天蝎座", "射手座"
};
public Text evaluate(final Text birthDate) throws ParseException {
if (birthDate == null || birthDate.toString().isEmpty()) {
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = sdf.parse(birthDate.toString());
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
int month = calendar.get(Calendar.MONTH) + 1; // 月份从0开始
if (month < 1 || month > 12) {
return null; // 无效数据返回null
}
return new Text(SIGNS[month - 1]);
}
}
编译并打包jar程序:
mvn compile package
ll target
readlink -f target/*.jar
# /root/hive-labs/SignUDF/target/SignUDF-1.0-jar-with-dependencies.jar
在Hive中添加jar程序,根据类名创建函数:
add jar /root/hive-labs/SignUDF/target/SignUDF-1.0-jar-with-dependencies.jar;
create temporary function sign_udf as 'com.z3.examples.SignUDF';
调用函数:
select sign_udf('2020-1-1');
尝试使用枚举类型计算星座:
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
enum ZodiacSign {
CAPRICORN("摩羯座", 12, 22),
AQUARIUS("水瓶座", 1, 20),
PISCES("双鱼座", 2, 19),
ARIES("白羊座", 3, 21),
TAURUS("金牛座", 4, 20),
GEMINI("双子座", 5, 21),
CANCER("巨蟹座", 6, 22),
LEO("狮子座", 7, 23),
VIRGO("处女座", 8, 23),
LIBRA("天秤座", 9, 23),
SCORPIO("天蝎座", 10, 24),
SAGITTARIUS("射手座", 11, 22);
private final String name;
private final int startMonth;
private final int startDay;
ZodiacSign(String name, int startMonth, int startDay) {
this.name = name;
this.startMonth = startMonth;
this.startDay = startDay;
}
// 静态方法,用于根据日期查找星座
public static ZodiacSign getZodiacSign(int month, int day) {
ZodiacSign previousSign = ZodiacSign.CAPRICORN;
for (ZodiacSign sign : ZodiacSign.values()) {
if (month == sign.startMonth && day >= sign.startDay) {
return sign;
} else if (month == sign.startMonth && day < sign.startDay) {
return previousSign;
}
previousSign = sign;
}
return null;
}
public String getName() {
return name;
}
}
public class ZodiacSignCalculator {
public static void main(String[] args) {
ZodiacSign constellation = ZodiacSign.getZodiacSign(1, 19);
System.out.println(constellation.getName());
ZodiacSign c2 = ZodiacSign.getZodiacSign(1, 20);
System.out.println(c2.getName());
try {
String birthDate = "1999-1-20";
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = sdf.parse(birthDate.toString());
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
int month = calendar.get(Calendar.MONTH) + 1; // 注意月份是从0开始的
int day = calendar.get(Calendar.DATE);
ZodiacSign c3 = ZodiacSign.getZodiacSign(month, day);
System.out.println(c3.getName());
} catch(Exception e) {
System.out.println("error");
}
}
}
编译运行java程序:
vi ZodiacSignCalculator.java
javac ZodiacSignCalculator.java
java ZodiacSignCalculator
实现UDF中的evaluate方法:
package com.z3.examples;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
enum ZodiacSign {
CAPRICORN("摩羯座", 12, 22),
AQUARIUS("水瓶座", 1, 20),
PISCES("双鱼座", 2, 19),
ARIES("白羊座", 3, 21),
TAURUS("金牛座", 4, 20),
GEMINI("双子座", 5, 21),
CANCER("巨蟹座", 6, 22),
LEO("狮子座", 7, 23),
VIRGO("处女座", 8, 23),
LIBRA("天秤座", 9, 23),
SCORPIO("天蝎座", 10, 24),
SAGITTARIUS("射手座", 11, 22);
private final String name;
private final int startMonth;
private final int startDay;
ZodiacSign(String name, int startMonth, int startDay) {
this.name = name;
this.startMonth = startMonth;
this.startDay = startDay;
}
// 静态方法,用于根据日期查找星座
public static ZodiacSign getZodiacSign(int month, int day) {
ZodiacSign previousSign = ZodiacSign.CAPRICORN;
for (ZodiacSign sign : ZodiacSign.values()) {
if (month == sign.startMonth && day >= sign.startDay) {
return sign;
} else if (month == sign.startMonth && day < sign.startDay) {
return previousSign;
}
previousSign = sign;
}
return null;
}
public String getName() {
return name;
}
}
public class ZodiacSignCalculatorUDF extends UDF {
public Text evaluate(final Text birthDate) throws ParseException {
if (birthDate == null || birthDate.toString().isEmpty()) {
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
try {
Date date = sdf.parse(birthDate.toString());
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
int month = calendar.get(Calendar.MONTH) + 1; // 注意月份是从0开始的
int day = calendar.get(Calendar.DATE);
ZodiacSign sign = ZodiacSign.getZodiacSign(month, day);
return new Text(sign.getName());
} catch(Exception e) {
System.out.println("error");
}
return null;
}
}
编译打包:
mvn compile package
# 查看jar程序的绝对路径:
readlink -f target/*.jar
# /root/hive-labs/ZodiacSignCalculatorUDF/target/ZodiacSignCalculatorUDF-1.0-jar-with-dependencies.jar
根据完整类名注册UDF:
-- beeline连接:beeline -u jdbc:hive2:// -n scott -p tiger
add jar /root/hive-labs/ZodiacSignCalculatorUDF/target/ZodiacSignCalculatorUDF-1.0-jar-with-dependencies.jar;
create temporary function zsc_udf as 'com.z3.examples.ZodiacSignCalculatorUDF';
调用函数:
select zsc_udf('2020-1-1');
vi /tmp/mate.txt
输入简单的测试数据:
batman,2009-7-7,果汁|薯片|饼干
joker,2008-8-8,巧克力|饼干
在Hive中建表并加载数据:
create database z3;
use z3;
create table mate (
name String, birthday date, gift array<String>
)
row format delimited
fields terminated by ','
collection items terminated by '|'
map keys terminated by ':'
stored as textfile;
-- 导入数据:
load data local inpath '/tmp/mate.txt' into table z3.mate;
尝试调用函数:
-- 查询所有列:
select * from mate;
-- 查询所有列以及在birthday列上调用zsc_udf函数:
select *, zsc_udf(birthday) from mate;
参考效果:
附1:Hive依赖:
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.10.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.3</version>
<scope>provided</scope>
</dependency>
</dependencies>
附2:maven使用阿里云仓库:
mvn
vi ~/.m2/settings.xml
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
http://maven.apache.org/xsd/settings-1.0.0.xsd">
<localRepository/>
<interactiveMode/>
<usePluginRegistry/>
<offline/>
<pluginGroups/>
<servers/>
<mirrors>
<mirror>
<id>aliyunmaven</id>
<mirrorOf>central</mirrorOf>
<name>阿里云公共仓库</name>
<url>https://maven.aliyun.com/repository/central</url>
</mirror>
<mirror>
<id>repo1</id>
<mirrorOf>central</mirrorOf>
<name>central repo</name>
<url>http://repo1.maven.org/maven2/</url>
</mirror>
<mirror>
<id>aliyunmaven</id>
<mirrorOf>apache snapshots</mirrorOf>
<name>阿里云阿帕奇仓库</name>
<url>https://maven.aliyun.com/repository/apache-snapshots</url>
</mirror>
</mirrors>
<proxies/>
<activeProfiles/>
<profiles>
<profile>
<repositories>
<repository>
<id>aliyunmaven</id>
<name>aliyunmaven</name>
<url>https://maven.aliyun.com/repository/public</url>
<layout>default</layout>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>MavenCentral</id>
<url>http://repo1.maven.org/maven2/</url>
</repository>
<repository>
<id>aliyunmavenApache</id>
<url>https://maven.aliyun.com/repository/apache-snapshots</url>
</repository>
</repositories>
</profile>
</profiles>
</settings>
参考:
https://cwiki.apache.org/confluence/display/Hive/HivePlugins
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateFunction
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Cli#LanguageManualCli-HiveResources
https://learn.microsoft.com/en-us/azure/hdinsight/hadoop/apache-hadoop-hive-java-udf
https://docs.ksqldb.io/en/0.7.1-ksqldb/developer-guide/implement-a-udf/
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。