本文目录导读:

我来介绍几种在Java中实现图片OCR识别的主流方案:
Tesseract OCR (最常用)
依赖配置 (Maven)
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>5.6.0</version>
</dependency>
基本实现代码
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.io.File;
public class OcrDemo {
public static void main(String[] args) {
// 创建Tesseract实例
ITesseract tesseract = new Tesseract();
try {
// 设置语言包路径(下载的语言包存放位置)
tesseract.setDatapath("D:/tessdata");
// 设置识别语言(中文简体)
tesseract.setLanguage("chi_sim+eng");
// 识别图片
String result = tesseract.doOCR(new File("test.png"));
System.out.println("识别结果:");
System.out.println(result);
} catch (TesseractException e) {
System.err.println("OCR识别失败:" + e.getMessage());
}
}
}
图片预处理优化
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class ImagePreProcessor {
/**
* 图片预处理,提高识别率
*/
public static BufferedImage preprocessImage(File imageFile) throws IOException {
BufferedImage originalImage = ImageIO.read(imageFile);
// 1. 灰度化
BufferedImage grayImage = new BufferedImage(
originalImage.getWidth(),
originalImage.getHeight(),
BufferedImage.TYPE_BYTE_GRAY
);
Graphics2D g2d = grayImage.createGraphics();
g2d.drawImage(originalImage, 0, 0, null);
g2d.dispose();
// 2. 二值化(提高对比度)
return binarizeImage(grayImage);
}
private static BufferedImage binarizeImage(BufferedImage image) {
int threshold = 128;
BufferedImage binaryImage = new BufferedImage(
image.getWidth(),
image.getHeight(),
BufferedImage.TYPE_BYTE_BINARY
);
for (int y = 0; y < image.getHeight(); y++) {
for (int x = 0; x < image.getWidth(); x++) {
int rgb = image.getRGB(x, y);
int gray = (rgb >> 16) & 0xFF; // 获取灰度值
// 二值化处理
if (gray > threshold) {
binaryImage.setRGB(x, y, Color.WHITE.getRGB());
} else {
binaryImage.setRGB(x, y, Color.BLACK.getRGB());
}
}
}
return binaryImage;
}
// 优化后的OCR识别
public static String enhancedOcr(File imageFile) throws IOException, TesseractException {
ITesseract tesseract = new Tesseract();
tesseract.setDatapath("D:/tessdata");
tesseract.setLanguage("chi_sim+eng");
// 预处理图片
BufferedImage processedImage = preprocessImage(imageFile);
// 识别处理后的图片
return tesseract.doOCR(processedImage);
}
}
百度AI OCR (云服务)
依赖配置
<dependency>
<groupId>com.baidu.aip</groupId>
<artifactId>java-sdk</artifactId>
<version>4.16.19</version>
</dependency>
实现代码
import com.baidu.aip.ocr.AipOcr;
import org.json.JSONObject;
import java.util.HashMap;
public class BaiduOcrDemo {
// 设置APPID/AK/SK
private static final String APP_ID = "你的 App ID";
private static final String API_KEY = "你的 API Key";
private static final String SECRET_KEY = "你的 Secret Key";
public static void main(String[] args) {
// 初始化客户端
AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);
// 设置网络连接参数
client.setConnectionTimeoutInMillis(5000);
client.setSocketTimeoutInMillis(30000);
// 调用通用文字识别
String imagePath = "test.png";
JSONObject res = client.basicGeneral(imagePath, new HashMap<String, String>());
System.out.println("识别结果:");
System.out.println(res.toString(2));
}
}
EasyOCR (基于Python的Java调用)
使用ProcessBuilder调用Python
import java.io.BufferedReader;
import java.io.InputStreamReader;
public class PythonOcrCaller {
public static String callPythonOcr(String imagePath) {
StringBuilder result = new StringBuilder();
try {
// 准备Python脚本命令
ProcessBuilder pb = new ProcessBuilder(
"python",
"ocr_script.py",
imagePath
);
// 启动进程
Process process = pb.start();
// 读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream())
);
String line;
while ((line = reader.readLine()) != null) {
result.append(line).append("\n");
}
// 等待进程结束
int exitCode = process.waitFor();
if (exitCode != 0) {
System.err.println("Python脚本执行失败,退出码:" + exitCode);
}
} catch (Exception e) {
e.printStackTrace();
}
return result.toString();
}
}
Python脚本 (ocr_script.py):
import easyocr
import sys
def ocr_image(image_path):
# 创建reader对象
reader = easyocr.Reader(['ch_sim', 'en'])
# 识别图片
result = reader.readtext(image_path)
# 输出识别结果
for (bbox, text, confidence) in result:
print(f"{text} (置信度: {confidence:.2f})")
if __name__ == "__main__":
if len(sys.argv) > 1:
ocr_image(sys.argv[1])
else:
print("请提供图片路径")
完整示例:图片识别工具类
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class OcrUtil {
private static final String TESSDATA_PATH = "D:/tessdata";
private static final String LANGUAGE = "chi_sim+eng";
/**
* 简单的OCR识别
*/
public static String recognizeText(File imageFile) {
ITesseract tesseract = new Tesseract();
tesseract.setDatapath(TESSDATA_PATH);
tesseract.setLanguage(LANGUAGE);
try {
return tesseract.doOCR(imageFile);
} catch (Exception e) {
return "OCR识别失败:" + e.getMessage();
}
}
/**
* 带预处理的高级OCR识别
*/
public static String advancedRecognize(File imageFile) {
try {
// 预处理
BufferedImage processedImage = preprocessImage(imageFile);
// 识别
ITesseract tesseract = new Tesseract();
tesseract.setDatapath(TESSDATA_PATH);
tesseract.setLanguage(LANGUAGE);
return tesseract.doOCR(processedImage);
} catch (Exception e) {
return "高级OCR识别失败:" + e.getMessage();
}
}
private static BufferedImage preprocessImage(File imageFile) throws IOException {
BufferedImage original = ImageIO.read(imageFile);
// 转为灰度
BufferedImage gray = new BufferedImage(
original.getWidth(),
original.getHeight(),
BufferedImage.TYPE_BYTE_GRAY
);
Graphics2D g2d = gray.createGraphics();
g2d.drawImage(original, 0, 0, null);
g2d.dispose();
// 放大(提高小字识别率)
int newWidth = gray.getWidth() * 2;
int newHeight = gray.getHeight() * 2;
BufferedImage scaled = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_BYTE_GRAY);
Graphics2D g2dScaled = scaled.createGraphics();
g2dScaled.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
RenderingHints.VALUE_INTERPOLATION_BILINEAR);
g2dScaled.drawImage(gray, 0, 0, newWidth, newHeight, null);
g2dScaled.dispose();
return scaled;
}
// 测试
public static void main(String[] args) {
File imageFile = new File("test.png");
System.out.println("===== 基本识别结果 =====");
String result1 = recognizeText(imageFile);
System.out.println(result1);
System.out.println("\n===== 高级识别结果 =====");
String result2 = advancedRecognize(imageFile);
System.out.println(result2);
}
}
安装配置说明
Tesseract安装:
- Windows:下载安装包安装
- Linux:
sudo apt-get install tesseract-ocr - 下载语言包:
chi_sim.traineddata(中文简体)
注意事项:
- 图片质量影响识别效果
- 复杂背景需要预处理
- 中文识别需要下载对应语言包
- 商业使用考虑云服务API
建议根据识别精度、预算和使用场景选择合适的方案。