本文目录导读:

我来为您介绍几种常见的Java数据拆分案例实现。
按固定大小拆分列表
import java.util.ArrayList;
import java.util.List;
public class ListSplitter {
/**
* 将列表按固定大小拆分
*/
public static <T> List<List<T>> splitList(List<T> list, int batchSize) {
List<List<T>> result = new ArrayList<>();
if (list == null || list.isEmpty() || batchSize <= 0) {
return result;
}
for (int i = 0; i < list.size(); i += batchSize) {
int end = Math.min(i + batchSize, list.size());
result.add(new ArrayList<>(list.subList(i, end)));
}
return result;
}
public static void main(String[] args) {
List<Integer> numbers = new ArrayList<>();
for (int i = 1; i <= 10; i++) {
numbers.add(i);
}
// 拆分为每批3个元素
List<List<Integer>> batches = splitList(numbers, 3);
System.out.println("原始列表: " + numbers);
System.out.println("拆分批次: " + batches);
// 输出: [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]]
}
}
使用Stream API拆分
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class StreamListSplitter {
/**
* 使用Stream API拆分列表
*/
public static <T> List<List<T>> splitWithStream(List<T> list, int size) {
return IntStream.range(0, list.size())
.filter(i -> i % size == 0)
.mapToObj(i -> new ArrayList<>(
list.subList(i, Math.min(i + size, list.size()))
))
.collect(Collectors.toList());
}
public static void main(String[] args) {
List<String> fruits = Arrays.asList("苹果", "香蕉", "橘子", "西瓜", "葡萄", "草莓", "芒果");
List<List<String>> batches = splitWithStream(fruits, 2);
System.out.println("拆分结果: " + batches);
// 输出: [[苹果, 香蕉], [橘子, 西瓜], [葡萄, 草莓], [芒果]]
}
}
拆分成多行
import java.io.*;
import java.nio.file.*;
import java.util.*;
public class FileSplitter {
/**
* 将大文件拆分成多个小文件
*/
public static void splitTextFile(String inputFile, int linesPerFile)
throws IOException {
List<String> lines = Files.readAllLines(Paths.get(inputFile));
int fileIndex = 1;
for (int i = 0; i < lines.size(); i += linesPerFile) {
int end = Math.min(i + linesPerFile, lines.size());
List<String> batch = lines.subList(i, end);
String outputFile = String.format("part_%03d.txt", fileIndex++);
Files.write(Paths.get(outputFile), batch);
}
System.out.println("文件拆分完成,共生成 " + (fileIndex - 1) + " 个文件");
}
public static void main(String[] args) throws IOException {
// 创建测试文件
List<String> testData = new ArrayList<>();
for (int i = 1; i <= 100; i++) {
testData.add("这是第" + i + "行内容");
}
Files.write(Paths.get("test.txt"), testData);
// 每20行拆分为一个文件
splitTextFile("test.txt", 20);
}
}
字符串拆分处理
import java.util.*;
import java.util.regex.Pattern;
public class StringSplitter {
/**
* 按指定分隔符拆分字符串
*/
public static List<String> splitString(String input, String delimiter) {
return Arrays.asList(input.split(Pattern.quote(delimiter)));
}
/**
* 限制拆分数量
*/
public static List<String> splitStringWithLimit(String input,
String delimiter,
int limit) {
return Arrays.asList(input.split(Pattern.quote(delimiter), limit));
}
/**
* 拆分字符串为固定长度
*/
public static List<String> splitByLength(String input, int chunkSize) {
List<String> result = new ArrayList<>();
for (int i = 0; i < input.length(); i += chunkSize) {
int end = Math.min(i + chunkSize, input.length());
result.add(input.substring(i, end));
}
return result;
}
public static void main(String[] args) {
// 1. 按逗号拆分
String data = "Java,Python,C++,JavaScript,Go";
System.out.println("按逗号拆分: " + splitString(data, ","));
// 2. 限制拆分数量
System.out.println("限制拆分数量: " +
splitStringWithLimit(data, ",", 3));
// 3. 按固定长度拆分
String text = "HelloWorldJava";
System.out.println("按长度拆分: " + splitByLength(text, 4));
}
}
大数据批量处理拆分
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.*;
public class BatchProcessor {
private static final int BATCH_SIZE = 1000;
/**
* 批量处理数据
*/
public static <T> void processInBatches(List<T> data,
BatchProcessor<T> processor) {
int totalSize = data.size();
int batchCount = (totalSize + BATCH_SIZE - 1) / BATCH_SIZE;
System.out.println("总数据量: " + totalSize + ", 批次数量: " + batchCount);
for (int i = 0; i < totalSize; i += BATCH_SIZE) {
int end = Math.min(i + BATCH_SIZE, totalSize);
List<T> batch = data.subList(i, end);
processor.process(batch, i / BATCH_SIZE + 1, batchCount);
}
}
/**
* 并行批量处理
*/
public static <T> void processInBatchesParallel(List<T> data,
BatchProcessor<T> processor) {
int totalSize = data.size();
int parallelism = Runtime.getRuntime().availableProcessors();
IntStream.range(0, (totalSize + BATCH_SIZE - 1) / BATCH_SIZE)
.parallel()
.forEach(batchIndex -> {
int start = batchIndex * BATCH_SIZE;
int end = Math.min(start + BATCH_SIZE, totalSize);
List<T> batch = data.subList(start, end);
processor.process(batch, batchIndex + 1,
(totalSize + BATCH_SIZE - 1) / BATCH_SIZE);
});
}
@FunctionalInterface
interface BatchProcessor<T> {
void process(List<T> batch, int currentBatch, int totalBatch);
}
public static void main(String[] args) {
// 模拟大量数据
List<Integer> largeData = new ArrayList<>();
for (int i = 1; i <= 5000; i++) {
largeData.add(i);
}
// 串行处理
processInBatches(largeData, (batch, current, total) -> {
System.out.println("处理第 " + current + "/" + total +
" 批,数据量: " + batch.size());
// 在这里处理每一批数据
});
System.out.println("--- 并行处理 ---");
// 并行处理
processInBatchesParallel(largeData, (batch, current, total) -> {
System.out.println(Thread.currentThread().getName() +
" 处理第 " + current + "/" + total + " 批");
});
}
}
使用第三方库(Apache Commons Collections)
// 需要添加依赖: commons-collections4
// Maven: org.apache.commons:commons-collections4:4.4
import org.apache.commons.collections4.ListUtils;
import java.util.*;
public class ApacheSplitter {
public static void main(String[] args) {
List<Integer> numbers = new ArrayList<>();
for (int i = 1; i <= 20; i++) {
numbers.add(i);
}
// 使用Apache Commons拆分列表
List<List<Integer>> batches = ListUtils.partition(numbers, 5);
System.out.println("使用Apache Commons分割: " + batches);
// 输出: [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20]]
}
}
常见应用场景
- 数据库批量操作: 将大量数据分批插入或更新
- 文件处理: 大文件分成多个小文件处理
- 网络传输: 大数据包分成小数据包
- 日志处理: 按时间或大小分割日志文件
- 数据导出: 导出大量数据时分页处理
性能优化建议
// 使用预分配容量优化性能
List<List<Integer>> optimizedSplit(List<Integer> data, int batchSize) {
int totalSize = data.size();
int batchCount = (int) Math.ceil((double) totalSize / batchSize);
List<List<Integer>> result = new ArrayList<>(batchCount);
for (int i = 0; i < totalSize; i += batchSize) {
int end = Math.min(i + batchSize, totalSize);
result.add(new ArrayList<>(data.subList(i, end)));
}
return result;
}
就是Java中实现数据拆分的多种方式,您可以根据具体需求选择合适的方法。