Hive自定义函数UDF编写
[toc]
# Hive UDF 开发
# pom.xml依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 代码
public class TestUDF extends UDF {
// 参数列表可以使用多个参数
public Text evaluate(Text s1) {
String retsult = s1.toString().toUpperCase();
return new Text(retsult);
}
}
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 函数安装
# 临时函数
# 添加jar包到hive
add jar /home/hadoop/xxx-udf-1.0.0.jar;
hive> add jar /home/hadoop/xxx-udf-1.0.0.jar;
Added [/home/hadoop/xxx-udf-1.0.0.jar] to class path
Added resources: [/home/hadoop/xxx-udf-1.0.0.jar]
1
2
3
2
3
# 将自己写的类创建为函数
create temporary function mytest as 'com.xxx.udf.TestUDF';
# 查看所有可用函数
show functions;
1
# 使用UDF函数
hive> select mytest('Aa');
OK
AA
1
2
3
2
3
# 删除函数
drop temporary function mytest;
1
# 验证函数
select mytest("1","2");
1
# UDF执行了多次问题
注意:这种验证会导致函数调用了三次,与hive执行流程有关;
传入固定的值就会执行多次,通过SQL查询则无该问题
select mytest("1",word) from keywords limit 1;这种执行只会执行一次;
https://blog.csdn.net/xiao_jun_0820/article/details/53258414
# 永久函数
# 将jar包上传HDFS
hdfs dfs -put udf-1.0.0.jar /
1
# 在Hive命令行中创建永久函数
create function testUDF as 'com.xxx.udf.xxxUDF' using jar 'hdfs:/udf-1.0.0.jar';
1
# 查看函数
在Hive的MySQL中查询
MySQL [hivemetastore]> select * from FUNCS;
+---------+------------------------+-------------+-------+-----------+-----------+------------+------------+
| FUNC_ID | CLASS_NAME | CREATE_TIME | DB_ID | FUNC_NAME | FUNC_TYPE | OWNER_NAME | OWNER_TYPE |
+---------+------------------------+-------------+-------+-----------+-----------+------------+------------+
| 1 | com.udf.xxxUDF | 1670403856 | 1 | testUDF | 1 | NULL | USER |
+---------+------------------------+-------------+-------+-----------+-----------+------------+------------+
1
2
3
4
5
6
2
3
4
5
6
# 查看函数详细信息
hive> desc function extended oneid;
Added [/data/emr/hive/tmp/54b84fca-bc3f-4dd5-9a80-13577e3979cf_resources/udf-1.0.0.jar] to class path
Added resources: [hdfs:/udf-1.0.0.jar]
OK
There is no documentation for function 'oneid'
Function class:com.xxx.udf.UDF
Function type:PERSISTENT
Resource:hdfs:/udf-1.0.0.jar
Time taken: 0.113 seconds, Fetched: 4 row(s)
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 删除函数
drop function testUDF;
1
https://zhuanlan.zhihu.com/p/401569063
上次更新: 2023/11/08, 21:00:35