Java案例如何实现图片OCR识别?

wen java案例 2

本文目录导读:

Java案例如何实现图片OCR识别?

  1. Tesseract OCR (最常用)
  2. 百度AI OCR (云服务)
  3. EasyOCR (基于Python的Java调用)
  4. 完整示例:图片识别工具类
  5. 安装配置说明

我来介绍几种在Java中实现图片OCR识别的主流方案:

Tesseract OCR (最常用)

依赖配置 (Maven)

<dependency>
    <groupId>net.sourceforge.tess4j</groupId>
    <artifactId>tess4j</artifactId>
    <version>5.6.0</version>
</dependency>

基本实现代码

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.io.File;
public class OcrDemo {
    public static void main(String[] args) {
        // 创建Tesseract实例
        ITesseract tesseract = new Tesseract();
        try {
            // 设置语言包路径(下载的语言包存放位置)
            tesseract.setDatapath("D:/tessdata");
            // 设置识别语言(中文简体)
            tesseract.setLanguage("chi_sim+eng");
            // 识别图片
            String result = tesseract.doOCR(new File("test.png"));
            System.out.println("识别结果:");
            System.out.println(result);
        } catch (TesseractException e) {
            System.err.println("OCR识别失败:" + e.getMessage());
        }
    }
}

图片预处理优化

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class ImagePreProcessor {
    /**
     * 图片预处理,提高识别率
     */
    public static BufferedImage preprocessImage(File imageFile) throws IOException {
        BufferedImage originalImage = ImageIO.read(imageFile);
        // 1. 灰度化
        BufferedImage grayImage = new BufferedImage(
            originalImage.getWidth(), 
            originalImage.getHeight(), 
            BufferedImage.TYPE_BYTE_GRAY
        );
        Graphics2D g2d = grayImage.createGraphics();
        g2d.drawImage(originalImage, 0, 0, null);
        g2d.dispose();
        // 2. 二值化(提高对比度)
        return binarizeImage(grayImage);
    }
    private static BufferedImage binarizeImage(BufferedImage image) {
        int threshold = 128;
        BufferedImage binaryImage = new BufferedImage(
            image.getWidth(), 
            image.getHeight(), 
            BufferedImage.TYPE_BYTE_BINARY
        );
        for (int y = 0; y < image.getHeight(); y++) {
            for (int x = 0; x < image.getWidth(); x++) {
                int rgb = image.getRGB(x, y);
                int gray = (rgb >> 16) & 0xFF; // 获取灰度值
                // 二值化处理
                if (gray > threshold) {
                    binaryImage.setRGB(x, y, Color.WHITE.getRGB());
                } else {
                    binaryImage.setRGB(x, y, Color.BLACK.getRGB());
                }
            }
        }
        return binaryImage;
    }
    // 优化后的OCR识别
    public static String enhancedOcr(File imageFile) throws IOException, TesseractException {
        ITesseract tesseract = new Tesseract();
        tesseract.setDatapath("D:/tessdata");
        tesseract.setLanguage("chi_sim+eng");
        // 预处理图片
        BufferedImage processedImage = preprocessImage(imageFile);
        // 识别处理后的图片
        return tesseract.doOCR(processedImage);
    }
}

百度AI OCR (云服务)

依赖配置

<dependency>
    <groupId>com.baidu.aip</groupId>
    <artifactId>java-sdk</artifactId>
    <version>4.16.19</version>
</dependency>

实现代码

import com.baidu.aip.ocr.AipOcr;
import org.json.JSONObject;
import java.util.HashMap;
public class BaiduOcrDemo {
    // 设置APPID/AK/SK
    private static final String APP_ID = "你的 App ID";
    private static final String API_KEY = "你的 API Key";
    private static final String SECRET_KEY = "你的 Secret Key";
    public static void main(String[] args) {
        // 初始化客户端
        AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);
        // 设置网络连接参数
        client.setConnectionTimeoutInMillis(5000);
        client.setSocketTimeoutInMillis(30000);
        // 调用通用文字识别
        String imagePath = "test.png";
        JSONObject res = client.basicGeneral(imagePath, new HashMap<String, String>());
        System.out.println("识别结果:");
        System.out.println(res.toString(2));
    }
}

EasyOCR (基于Python的Java调用)

使用ProcessBuilder调用Python

import java.io.BufferedReader;
import java.io.InputStreamReader;
public class PythonOcrCaller {
    public static String callPythonOcr(String imagePath) {
        StringBuilder result = new StringBuilder();
        try {
            // 准备Python脚本命令
            ProcessBuilder pb = new ProcessBuilder(
                "python", 
                "ocr_script.py",
                imagePath
            );
            // 启动进程
            Process process = pb.start();
            // 读取输出
            BufferedReader reader = new BufferedReader(
                new InputStreamReader(process.getInputStream())
            );
            String line;
            while ((line = reader.readLine()) != null) {
                result.append(line).append("\n");
            }
            // 等待进程结束
            int exitCode = process.waitFor();
            if (exitCode != 0) {
                System.err.println("Python脚本执行失败,退出码:" + exitCode);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result.toString();
    }
}

Python脚本 (ocr_script.py):

import easyocr
import sys
def ocr_image(image_path):
    # 创建reader对象
    reader = easyocr.Reader(['ch_sim', 'en'])
    # 识别图片
    result = reader.readtext(image_path)
    # 输出识别结果
    for (bbox, text, confidence) in result:
        print(f"{text} (置信度: {confidence:.2f})")
if __name__ == "__main__":
    if len(sys.argv) > 1:
        ocr_image(sys.argv[1])
    else:
        print("请提供图片路径")

完整示例:图片识别工具类

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class OcrUtil {
    private static final String TESSDATA_PATH = "D:/tessdata";
    private static final String LANGUAGE = "chi_sim+eng";
    /**
     * 简单的OCR识别
     */
    public static String recognizeText(File imageFile) {
        ITesseract tesseract = new Tesseract();
        tesseract.setDatapath(TESSDATA_PATH);
        tesseract.setLanguage(LANGUAGE);
        try {
            return tesseract.doOCR(imageFile);
        } catch (Exception e) {
            return "OCR识别失败:" + e.getMessage();
        }
    }
    /**
     * 带预处理的高级OCR识别
     */
    public static String advancedRecognize(File imageFile) {
        try {
            // 预处理
            BufferedImage processedImage = preprocessImage(imageFile);
            // 识别
            ITesseract tesseract = new Tesseract();
            tesseract.setDatapath(TESSDATA_PATH);
            tesseract.setLanguage(LANGUAGE);
            return tesseract.doOCR(processedImage);
        } catch (Exception e) {
            return "高级OCR识别失败:" + e.getMessage();
        }
    }
    private static BufferedImage preprocessImage(File imageFile) throws IOException {
        BufferedImage original = ImageIO.read(imageFile);
        // 转为灰度
        BufferedImage gray = new BufferedImage(
            original.getWidth(), 
            original.getHeight(), 
            BufferedImage.TYPE_BYTE_GRAY
        );
        Graphics2D g2d = gray.createGraphics();
        g2d.drawImage(original, 0, 0, null);
        g2d.dispose();
        // 放大(提高小字识别率)
        int newWidth = gray.getWidth() * 2;
        int newHeight = gray.getHeight() * 2;
        BufferedImage scaled = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_BYTE_GRAY);
        Graphics2D g2dScaled = scaled.createGraphics();
        g2dScaled.setRenderingHint(RenderingHints.KEY_INTERPOLATION, 
                                   RenderingHints.VALUE_INTERPOLATION_BILINEAR);
        g2dScaled.drawImage(gray, 0, 0, newWidth, newHeight, null);
        g2dScaled.dispose();
        return scaled;
    }
    // 测试
    public static void main(String[] args) {
        File imageFile = new File("test.png");
        System.out.println("===== 基本识别结果 =====");
        String result1 = recognizeText(imageFile);
        System.out.println(result1);
        System.out.println("\n===== 高级识别结果 =====");
        String result2 = advancedRecognize(imageFile);
        System.out.println(result2);
    }
}

安装配置说明

Tesseract安装:

  1. Windows:下载安装包安装
  2. Linuxsudo apt-get install tesseract-ocr
  3. 下载语言包:chi_sim.traineddata (中文简体)

注意事项:

  1. 图片质量影响识别效果
  2. 复杂背景需要预处理
  3. 中文识别需要下载对应语言包
  4. 商业使用考虑云服务API

建议根据识别精度、预算和使用场景选择合适的方案。

抱歉,评论功能暂时关闭!