RapidOCR引擎使用PP-OCRv6 onnx模型识别图片-马育民老师

# 安装依赖

```
pip install rapidocr-onnxruntime
```

### uv

```
uv add rapidocr-onnxruntime
```

# 下载模型

https://www.modelscope.cn/models/RapidAI/RapidOCR/files

[![](https://www.malaoshi.top/upload/0/0/1GW3aGjoVyqm.png)](https://www.malaoshi.top/upload/0/0/1GW3aGjoVyqm.png)

[![](https://www.malaoshi.top/upload/0/0/1GW3aGrjs2pO.png)](https://www.malaoshi.top/upload/0/0/1GW3aGrjs2pO.png)

[![](https://www.malaoshi.top/upload/0/0/1GW3aGsD7gRU.png)](https://www.malaoshi.top/upload/0/0/1GW3aGsD7gRU.png)

### 保存

将模型改名 `PP-OCRv6_det_small.onnx`、`PP-OCRv6_rec_small.onnx`

保存到：`models` 目录

# 下载字典文件

https://github.com/PaddlePaddle/PaddleOCR/blob/main/ppocr/utils/ppocr_keys_v1.txt

保存到：`models` 目录

# 代码

```
from rapidocr_onnxruntime import RapidOCR

# 本地模型路径
det_model_path = "./models/PP-OCRv6_det_small.onnx"
rec_model_path = "./models/PP-OCRv6_rec_small.onnx"
# 字符字典（仓库自带）
char_dict_path = "./models/ppocr_keys_v1.txt"

def group_same_line(ocr_result, y_tolerance=12):
    """
    按垂直坐标分组同行文字
    :param ocr_result: ocr原始返回列表 [[四点坐标,文本,置信度],...]
    :param y_tolerance: 垂直中心差值容差，像素，图片越大可调大
    :return: [[同行文本1,同行文本2...], ...] 从上到下排序
    """
    line_list = []
    for item in ocr_result:
        box, text, score = item
        # 四点坐标 [[x1,y1],[x2,y1],[x2,y2],[x1,y2]]
        y_coords = [p[1] for p in box]
        y_min = min(y_coords)
        y_max = max(y_coords)
        y_center = (y_min + y_max) / 2  # 文本框垂直中心点
        x_left = min(p[0] for p in box) # 文本左边界，用于同行左右排序
        line_list.append({"y_center": y_center, "x_left": x_left, "text": text})

# 按垂直中心从上到下排序
    line_list.sort(key=lambda d: d["y_center"])

groups = []
    for data in line_list:
        added = False
        # 遍历已有分组，判断是否和当前组同行
        for group in groups:
            group_avg_y = group["avg_y"]
            if abs(data["y_center"] - group_avg_y) <= y_tolerance:
                group["items"].append(data)
                # 更新分组平均y
                all_y = [i["y_center"] for i in group["items"]]
                group["avg_y"] = sum(all_y) / len(all_y)
                added = True
                break
        if not added:
            # 新建一行分组
            groups.append({
                "avg_y": data["y_center"],
                "items": [data]
            })

# 同行内部按从左到右排序
    final_lines = []
    for g in groups:
        items = sorted(g["items"], key=lambda d: d["x_left"])
        line_texts = [i["text"] for i in items]
        final_lines.append(line_texts)
    return final_lines

def print_cost_seconds(ls:list):
    det_cost, cls_cost, rec_cost = ls

print(f"文本检测耗时：{det_cost * 1000:.2f} ms")
    print(f"方向分类耗时：{cls_cost * 1000:.2f} ms")
    print(f"文字识别耗时：{rec_cost * 1000:.2f} ms")
    total_ms = sum(cost_seconds) * 1000
    print(f"总耗时：{total_ms:.2f} ms")
    print("-"*50)

# 推理、业务参数全部放字典里
ocr = RapidOCR(
    params={
        # ONNX Runtime 线程限制（生产必加）
        "ort.intra_op_num_threads": 4,
        "ort.inter_op_num_threads": 2,
        # 置信度过滤
        "score_thresh": 0.5,
        # 开启旋转文字分类
        "use_cls": True,
        # 图片长边限制，控内存
        "max_side_len": 960,
        # 本地离线模型路径（PP-OCRv5/v6按需修改）
        "Det.model_path": det_model_path,
        "Rec.model_path": rec_model_path,
        "Rec.char_dict_path": char_dict_path,
        "use_auto_download": False
    }
)

res, cost_seconds = ocr("test1.png")

print_cost_seconds(cost_seconds)
print(res)

if not res:
    print("未识别到文字")
else:
    # 分组，容差12像素，图片分辨率高可改成20~30
    line_groups = group_same_line(res, y_tolerance=12)
    print("===== 按行合并输出 =====")
    for idx, texts in enumerate(line_groups, 1):
        line_str = "".join(texts)
        # print(f"第{idx}行：{line_str}")
        print(line_str)
```

原文出处：http://malaoshi.top/show_1GW3aLAl1g19.html