使用
MyBatis
框架时,让你写一个批量插入,是不是只会在mapper.xml文件中使用forEach
标签循环呢?那你知道使用forEach
标签存在的问题吗?
创建数据表,并设置22个字段。也许你会好奇为什么创建如此多字段呢?因为只有在多字段且数据量较大时,才能体现BATCH
的优势。也就是说在数据表字段较少,且保存的数据量不多的情况呀,forEach
实现的批量插入还是有优势的,但是却有一个隐含的风险,这里先按下不表。
/*
Source Server Type : MySQL
Source Server Version : 80027
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for t_user
-- ----------------------------
DROP TABLE IF EXISTS `t_user`;
CREATE TABLE `t_user` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name2` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name3` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name4` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name5` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name6` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name7` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name8` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name9` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name10` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name11` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name12` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name13` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name14` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name15` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name16` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name17` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name18` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name19` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
`user_name20` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_bin ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
关于因为MyBatis
框架jar的pom信息,则可在maven
仓库查找,或者mybatis
官网粘贴,这里就不再赘述,因为笔者是在MyBatis源码
中测试的。
public class User {
// ID标识
private Integer id;
private String userId;
private String userName;
private String userName2;
private String userName3;
private String userName4;
private String userName5;
private String userName6;
private String userName7;
private String userName8;
public User() {
}
public User(Integer id, String userId, String userName, String userName2,
String userName3, String userName4, String userName5, String userName6,
String userName7, String userName8) {
this.id = id;
this.userId = userId;
this.userName = userName;
this.userName2 = userName2;
this.userName3 = userName3;
this.userName4 = userName4;
this.userName5 = userName5;
this.userName6 = userName6;
this.userName7 = userName7;
this.userName8 = userName8;
}
/************* 此处省略各个属性的getter和setter方法 **************/
}
注: 这里为了测试,所以随意编写数据表字段,实际项目中请遵循字段命名规则。
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tjau.mapper.UserMapper">
<insert id="insert" parameterType="com.tjau.pojo.User">
insert into t_user(user_id, user_name, user_name2, user_name3, user_name4,
user_name5, user_name6, user_name7, user_name8, user_name9, user_name10,
user_name11, user_name12, user_name13, user_name14, user_name15,
user_name16, user_name17, user_name18, user_name19, user_name20)
values (#{userId, jdbcType=VARCHAR},
#{userName, jdbcType=VARCHAR},
#{userName2, jdbcType=VARCHAR},
#{userName3, jdbcType=VARCHAR},
#{userName4, jdbcType=VARCHAR},
#{userName5, jdbcType=VARCHAR},
#{userName6, jdbcType=VARCHAR},
#{userName7, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR}
)
</insert>
<insert id="insertBatch" parameterType="java.util.List">
insert into t_user(user_id, user_name, user_name2, user_name3, user_name4,
user_name5, user_name6, user_name7, user_name8, user_name9, user_name10,
user_name11, user_name12, user_name13, user_name14, user_name15,
user_name16, user_name17, user_name18, user_name19, user_name20)
values
<foreach collection="list" item="item" separator=",">
(
#{item.userId, jdbcType=VARCHAR},
#{item.userName, jdbcType=VARCHAR},
#{item.userName2, jdbcType=VARCHAR},
#{item.userName3, jdbcType=VARCHAR},
#{item.userName4, jdbcType=VARCHAR},
#{item.userName5, jdbcType=VARCHAR},
#{item.userName6, jdbcType=VARCHAR},
#{item.userName7, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR}
)
</foreach>
</insert>
</mapper>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE configuration
PUBLIC "-//mybatis.org//DTD Config 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>
<!--第一部分:数据源配置-->
<environments default="development">
<environment id="development">
<!--使用jdbc事务管理 -->
<transactionManager type="JDBC"/>
<!-- 数据库连接池 -->
<dataSource type="POOLED">
<property name="driver" value="com.mysql.cj.jdbc.Driver"/>
<property name="url"
value="jdbc:mysql:///mybatis?useSSL=false&characterEncoding=UTF-8&serverTimezone=UTC"/>
<property name="username" value="root"/>
<property name="password" value="root"/>
</dataSource>
</environment>
</environments>
<!--第二部分:引入映射配置文件-->
<mappers>
<mapper resource="mapper/UserMapper.xml"/>
</mappers>
</configuration>
public class MyBatisBatchTest {
public static void main(String[] args) throws IOException {
// 1、读取配置文件
InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
// 2、创建插入数据
List<User> list = new ArrayList<>();
for (int i = 0; i < 5000; i++) {
list.add(new User(null, "userId-"+i, "userName-" + i, "userName2-" + i,
"userName3-" + i, "userName4-" + i, "userName5-" + i,
"userName6-" + i, "userName7-" + i, "userName8-" + i));
}
// 3、不同的插入 TODO
}
}
到这里前置的基本工作就完成了,那么下面就要开始面对forEach
和Batch
的抉择了。
forEach
的隐含的风险在前面提及使用forEach
会有一个隐含的风险,那么就是用代码复现一下。
编写测试代码:
public static void main(String[] args) throws IOException {
// 1、读取配置文件
InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
// 2、创建插入数据
List<User> list = new ArrayList<>();
for (int i = 0; i < 6000; i++) {
list.add(new User(null, "userId-"+i, "userName-" + i, "userName2-" + i,
"userName3-" + i, "userName4-" + i, "userName5-" + i,
"userName6-" + i, "userName7-" + i, "userName8-" + i));
}
// 3、forEach插入
insertForEach(sqlSessionFactory, list);
}
/**
* forEach批量插入
* @param sqlSessionFactory sqlSession工厂
* @param list 批量插入数据
*/
public static void insertForEach(SqlSessionFactory sqlSessionFactory,
List<User> list){
// 1、获取mapper代理类 -这里默认是SIMPLE模式
SqlSession sqlSession = sqlSessionFactory.openSession();
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
// 2、批量插入
long start = System.currentTimeMillis();
int count = userMapper.insertBatch(list);
sqlSession.commit();
long end = System.currentTimeMillis();
System.out.println(count);
System.out.println("ForEach时间:" + (end - start));
sqlSession.close();
}
执行结果:
没错报错了,因为forEach
循环实质是将插入语句拼凑在一起,一并发送给数据库并执行。
这个方法提升批量插入速度的原理是,将传统的:
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
转化为:
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2"),
("data1", "data2"),
("data1", "data2"),
("data1", "data2"),
("data1", "data2");
这样却会导致一次性插入的数据包过大,超过数据库的默认值。数据库默认的max_allowed_packet
默认为4M
,可以通过修改max_allowed_packet
的大小来避免这个报错:
set global max_allowed_packet = 2*1024*1024*10
重启MySQL数据库后,
这样本次forEach
的批量插入问题就解决了,但是在实际项目开发中,随意修改数据库参数不太现实。
因此,如果项目设计可以保证数据的批量插入数据量不大,则可以选择forEach
为批量插入的方案,如果存在数据量激增的情况下,使用forEach
则会存在埋雷的风险。
BATCH
和forEach
之多字段批量保存由于使用forEach
实现批量插入,数据库存在接收数据量瓶颈,接下来只能通过调低数据量来测试两者的时间差距。经过测试将数据量定为5600条数据。
public static void main(String[] args) throws IOException {
// 1、读取配置文件
InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
// 2、创建插入数据
List<User> list = new ArrayList<>();
for (int i = 0; i < 5600; i++) {
list.add(new User(null, "userId-"+i, "userName-" + i, "userName2-" + i,
"userName3-" + i, "userName4-" + i, "userName5-" + i,
"userName6-" + i, "userName7-" + i, "userName8-" + i));
}
// 3、插入规则
// insertBatch(sqlSessionFactory, list);
insertForEach(sqlSessionFactory, list);
}
/**
* BATCH批量插入
* @param sqlSessionFactory sqlSession工厂
* @param list 批量插入数据
*/
public static void insertBatch(SqlSessionFactory sqlSessionFactory, List<User> list){
// 1、获取mapper代理类
SqlSession sqlSession = sqlSessionFactory.openSession(ExecutorType.BATCH);
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
// 2、批量插入
long start = System.currentTimeMillis();
list.forEach(userMapper::insert);
sqlSession.commit();
long end = System.currentTimeMillis();
System.out.println(list.size());
System.out.println("BATCH时间:" + (end - start));
sqlSession.close();
}
/**
* forEach批量插入
* @param sqlSessionFactory sqlSession工厂
* @param list 批量插入数据
*/
public static void insertForEach(SqlSessionFactory sqlSessionFactory, List<User> list){
// 1、获取mapper代理类
SqlSession sqlSession = sqlSessionFactory.openSession();
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
// 2、批量插入
long start = System.currentTimeMillis();
int count = userMapper.insertBatch(list);
sqlSession.commit();
long end = System.currentTimeMillis();
System.out.println(count);
System.out.println("ForEach时间:" + (end - start));
sqlSession.close();
}
下面执行结果如下:
forEach
插入耗时:
BATCH
插入耗时:
通过比较我们会发现,在max_allowed_packet
默认为4M
的临界点,forEach
只能保存5600条数据的情况下,BATCH
在时间较于forEach
已有略微的领先,更何况数据量远大于5600时。
尝试如下:
public static void main(String[] args) throws IOException {
// 1、读取配置文件
InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
// 2、创建插入数据
List<User> list = new ArrayList<>();
for (int i = 0; i < 16000; i++) {
list.add(new User(null, "userId-"+i, "userName-" + i, "userName2-" + i,
"userName3-" + i, "userName4-" + i, "userName5-" + i,
"userName6-" + i, "userName7-" + i, "userName8-" + i));
}
// 3、插入规则
insertBatch(sqlSessionFactory, list);
}
/**
* BATCH批量插入
* @param sqlSessionFactory sqlSession工厂
* @param list 批量插入数据
*/
public static void insertBatch(SqlSessionFactory sqlSessionFactory, List<User> list){
// 1、获取mapper代理类
SqlSession sqlSession = sqlSessionFactory.openSession(ExecutorType.BATCH);
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
// 2、批量插入
long start = System.currentTimeMillis();
list.forEach(userMapper::insert);
sqlSession.commit();
long end = System.currentTimeMillis();
System.out.println(list.size());
System.out.println("BATCH时间:" + (end - start));
sqlSession.close();
}
插入结果:耗时2秒
BATCH
和forEach
之少字段批量保存修改mapper.xml文件,将插入字段介绍为10个字段。
<insert id="insert" parameterType="com.tjau.pojo.User">
insert into t_user(user_id, user_name, user_name2, user_name3, user_name4, user_name5, user_name6, user_name7, user_name8)
values (#{userId, jdbcType=VARCHAR},
#{userName, jdbcType=VARCHAR},
#{userName2, jdbcType=VARCHAR},
#{userName3, jdbcType=VARCHAR},
#{userName4, jdbcType=VARCHAR},
#{userName5, jdbcType=VARCHAR},
#{userName6, jdbcType=VARCHAR},
#{userName7, jdbcType=VARCHAR},
#{userName8, jdbcType=VARCHAR}
)
</insert>
<insert id="insertBatch" parameterType="java.util.List">
insert into t_user(user_id, user_name, user_name2, user_name3, user_name4, user_name5, user_name6, user_name7, user_name8)
values
<foreach collection="list" item="item" separator=",">
(
#{item.userId, jdbcType=VARCHAR},
#{item.userName, jdbcType=VARCHAR},
#{item.userName2, jdbcType=VARCHAR},
#{item.userName3, jdbcType=VARCHAR},
#{item.userName4, jdbcType=VARCHAR},
#{item.userName5, jdbcType=VARCHAR},
#{item.userName6, jdbcType=VARCHAR},
#{item.userName7, jdbcType=VARCHAR},
#{item.userName8, jdbcType=VARCHAR}
)
</foreach>
</insert>
将需要批量保存的数据数量调为12000,可能会好奇为什么是这个值,因为这是试出来forEach
批量插入的临界值。
public static void main(String[] args) throws IOException {
// 1、读取配置文件
InputStream resourceAsStream = Resources.getResourceAsStream("MybatisConfig.xml");
SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(resourceAsStream);
// 2、创建插入数据
List<User> list = new ArrayList<>();
for (int i = 0; i < 12000; i++) {
list.add(new User(null, "userId-"+i, "userName-" + i, "userName2-" + i,
"userName3-" + i, "userName4-" + i, "userName5-" + i,
"userName6-" + i, "userName7-" + i, "userName8-" + i));
}
// 3、插入规则
// insertBatch(sqlSessionFactory, list);
insertForEach(sqlSessionFactory, list);
}
/**
* BATCH批量插入
* @param sqlSessionFactory sqlSession工厂
* @param list 批量插入数据
*/
public static void insertBatch(SqlSessionFactory sqlSessionFactory, List<User> list){
// 1、获取mapper代理类
SqlSession sqlSession = sqlSessionFactory.openSession(ExecutorType.BATCH);
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
// 2、批量插入
long start = System.currentTimeMillis();
list.forEach(userMapper::insert);
sqlSession.commit();
long end = System.currentTimeMillis();
System.out.println(list.size());
System.out.println("BATCH时间:" + (end - start));
sqlSession.close();
}
/**
* forEach批量插入
* @param sqlSessionFactory sqlSession工厂
* @param list 批量插入数据
*/
public static void insertForEach(SqlSessionFactory sqlSessionFactory, List<User> list){
// 1、获取mapper代理类
SqlSession sqlSession = sqlSessionFactory.openSession();
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
// 2、批量插入
long start = System.currentTimeMillis();
int count = userMapper.insertBatch(list);
sqlSession.commit();
long end = System.currentTimeMillis();
System.out.println(count);
System.out.println("ForEach时间:" + (end - start));
sqlSession.close();
}
forEach
插入耗时:
BATCH
插入耗时:
这里可以很明显发现,当批量插入少量字段表的数据时,使用forEach
在不超过MySQL默认的4M接收包的情况下,性能比起BATCH
更胜一筹。
在SpringBoot整合MyBatis项目中,如何更改MyBatis
的默认执行模式呢?
@Autowired
private SqlSessionFactory sqlSessionFactroy;
@Transactional
public void insertBatch(List<User> list){
SqlSession sqlSession = sqlSessionFactory.openSession(ExecutorType.BATCH);
UserMapper userMapper = sqlSession.getMapper(UserMapper.class);
list.forEach(userMapper::insert);
// 重点:最后别忘了commit
sqlSession.commit();
// sqlSession.close();
}
在方法上加上@Transactional
注解,可以避免重复创建不同的sqlSession
,让这个方法类的所有mapper都是用同一个sqlSession
,而不是每执行一个方法开启一个sqlSession
。
如果不是使用@Transactional
注解,记得关闭sqlSession
。
在选择批量插入方式时,需要考虑以下三点:
当保存数据字段较多或者数据条数较多时,慎重选择forEach
,优先考虑BATCH
;
反之优先选择forEach
。