RapidOCR引擎使用PP-OCRv6 onnx模型识别图片 作者:马育民 • 2026-06-29 10:22 • 阅读:10009 # 下载模型 https://www.modelscope.cn/models/RapidAI/RapidOCR/files [](https://www.malaoshi.top/upload/0/0/1GW3aGjoVyqm.png) [](https://www.malaoshi.top/upload/0/0/1GW3aGrjs2pO.png) [](https://www.malaoshi.top/upload/0/0/1GW3aGsD7gRU.png) ### 保存 将模型改名 `PP-OCRv6_det_small.onnx`、`PP-OCRv6_rec_small.onnx` 保存到:`models` 目录 # 下载字典文件 https://github.com/PaddlePaddle/PaddleOCR/blob/main/ppocr/utils/ppocr_keys_v1.txt 保存到:`models` 目录 # 代码 ``` from rapidocr_onnxruntime import RapidOCR # 本地模型路径 det_model_path = "./models/PP-OCRv6_det_small.onnx" rec_model_path = "./models/PP-OCRv6_rec_small.onnx" # 字符字典(仓库自带) char_dict_path = "./models/ppocr_keys_v1.txt" def group_same_line(ocr_result, y_tolerance=12): """ 按垂直坐标分组同行文字 :param ocr_result: ocr原始返回列表 [[四点坐标,文本,置信度],...] :param y_tolerance: 垂直中心差值容差,像素,图片越大可调大 :return: [[同行文本1,同行文本2...], ...] 从上到下排序 """ line_list = [] for item in ocr_result: box, text, score = item # 四点坐标 [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] y_coords = [p[1] for p in box] y_min = min(y_coords) y_max = max(y_coords) y_center = (y_min + y_max) / 2 # 文本框垂直中心点 x_left = min(p[0] for p in box) # 文本左边界,用于同行左右排序 line_list.append({"y_center": y_center, "x_left": x_left, "text": text}) # 按垂直中心从上到下排序 line_list.sort(key=lambda d: d["y_center"]) groups = [] for data in line_list: added = False # 遍历已有分组,判断是否和当前组同行 for group in groups: group_avg_y = group["avg_y"] if abs(data["y_center"] - group_avg_y) <= y_tolerance: group["items"].append(data) # 更新分组平均y all_y = [i["y_center"] for i in group["items"]] group["avg_y"] = sum(all_y) / len(all_y) added = True break if not added: # 新建一行分组 groups.append({ "avg_y": data["y_center"], "items": [data] }) # 同行内部按从左到右排序 final_lines = [] for g in groups: items = sorted(g["items"], key=lambda d: d["x_left"]) line_texts = [i["text"] for i in items] final_lines.append(line_texts) return final_lines def print_cost_seconds(ls:list): det_cost, cls_cost, rec_cost = ls print(f"文本检测耗时:{det_cost * 1000:.2f} ms") print(f"方向分类耗时:{cls_cost * 1000:.2f} ms") print(f"文字识别耗时:{rec_cost * 1000:.2f} ms") total_ms = sum(cost_seconds) * 1000 print(f"总耗时:{total_ms:.2f} ms") print("-"*50) # 推理、业务参数全部放字典里 ocr = RapidOCR( params={ # ONNX Runtime 线程限制(生产必加) "ort.intra_op_num_threads": 4, "ort.inter_op_num_threads": 2, # 置信度过滤 "score_thresh": 0.5, # 开启旋转文字分类 "use_cls": True, # 图片长边限制,控内存 "max_side_len": 960, # 本地离线模型路径(PP-OCRv5/v6按需修改) "Det.model_path": det_model_path, "Rec.model_path": rec_model_path, "Rec.char_dict_path": char_dict_path, "use_auto_download": False } ) res, cost_seconds = ocr("test1.png") print_cost_seconds(cost_seconds) print(res) if not res: print("未识别到文字") else: # 分组,容差12像素,图片分辨率高可改成20~30 line_groups = group_same_line(res, y_tolerance=12) print("===== 按行合并输出 =====") for idx, texts in enumerate(line_groups, 1): line_str = "".join(texts) # print(f"第{idx}行:{line_str}") print(line_str) ``` 原文出处:http://malaoshi.top/show_1GW3aLAl1g19.html