pipeline in linea single thread
This commit is contained in:
340
flywms_paddleocr_worker.py
Normal file
340
flywms_paddleocr_worker.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import site
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
CUDA_MIN_PIXELS = 640 * 360
|
||||
|
||||
|
||||
def opencv_cuda_available() -> bool:
|
||||
try:
|
||||
return hasattr(cv2, "cuda") and cv2.cuda.getCudaEnabledDeviceCount() > 0
|
||||
except cv2.error:
|
||||
return False
|
||||
|
||||
|
||||
OPENCV_CUDA_AVAILABLE = opencv_cuda_available()
|
||||
|
||||
|
||||
def cuda_resize(
|
||||
image: np.ndarray,
|
||||
size: tuple[int, int],
|
||||
interpolation: int = cv2.INTER_LINEAR,
|
||||
min_pixels: int = CUDA_MIN_PIXELS,
|
||||
) -> np.ndarray:
|
||||
if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
|
||||
return cv2.resize(image, size, interpolation=interpolation)
|
||||
try:
|
||||
gpu = cv2.cuda_GpuMat()
|
||||
gpu.upload(image)
|
||||
return cv2.cuda.resize(gpu, size, interpolation=interpolation).download()
|
||||
except cv2.error:
|
||||
return cv2.resize(image, size, interpolation=interpolation)
|
||||
|
||||
|
||||
def cuda_cvt_color(
|
||||
image: np.ndarray,
|
||||
code: int,
|
||||
min_pixels: int = CUDA_MIN_PIXELS,
|
||||
) -> np.ndarray:
|
||||
if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
|
||||
return cv2.cvtColor(image, code)
|
||||
try:
|
||||
gpu = cv2.cuda_GpuMat()
|
||||
gpu.upload(image)
|
||||
return cv2.cuda.cvtColor(gpu, code).download()
|
||||
except cv2.error:
|
||||
return cv2.cvtColor(image, code)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Candidate:
|
||||
text: str
|
||||
score: float
|
||||
variant: str
|
||||
|
||||
|
||||
def add_nvidia_dll_dirs() -> None:
|
||||
if os.name != "nt":
|
||||
return
|
||||
for site_dir in site.getsitepackages():
|
||||
nvidia_root = Path(site_dir) / "nvidia"
|
||||
if not nvidia_root.exists():
|
||||
continue
|
||||
for bin_dir in nvidia_root.glob("*/bin"):
|
||||
if bin_dir.exists():
|
||||
os.add_dll_directory(str(bin_dir))
|
||||
|
||||
|
||||
def resize_to_height(image: np.ndarray, target_height: int) -> np.ndarray:
|
||||
h, w = image.shape[:2]
|
||||
if h == target_height:
|
||||
return image
|
||||
scale = target_height / max(1, h)
|
||||
return cuda_resize(image, (max(1, int(w * scale)), target_height), interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
|
||||
def add_border(image: np.ndarray, ratio: float = 0.08) -> np.ndarray:
|
||||
h, w = image.shape[:2]
|
||||
pad_x = max(2, int(w * ratio))
|
||||
pad_y = max(2, int(h * ratio))
|
||||
return cv2.copyMakeBorder(
|
||||
image,
|
||||
pad_y,
|
||||
pad_y,
|
||||
pad_x,
|
||||
pad_x,
|
||||
cv2.BORDER_CONSTANT,
|
||||
value=(255, 255, 255),
|
||||
)
|
||||
|
||||
|
||||
def clahe_bgr(image: np.ndarray) -> np.ndarray:
|
||||
lab = cuda_cvt_color(image, cv2.COLOR_BGR2LAB)
|
||||
l_channel, a_channel, b_channel = cv2.split(lab)
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
|
||||
merged = cv2.merge([clahe.apply(l_channel), a_channel, b_channel])
|
||||
return cuda_cvt_color(merged, cv2.COLOR_LAB2BGR)
|
||||
|
||||
|
||||
def sharpen(image: np.ndarray, strength: float = 0.7) -> np.ndarray:
|
||||
blurred = cv2.GaussianBlur(image, (0, 0), sigmaX=1.0)
|
||||
return cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)
|
||||
|
||||
|
||||
def variants_for_height(image: np.ndarray, target_height: int) -> dict[str, np.ndarray]:
|
||||
base = resize_to_height(image, target_height)
|
||||
bordered = add_border(base)
|
||||
gray = cuda_cvt_color(base, cv2.COLOR_BGR2GRAY)
|
||||
clahe = clahe_bgr(base)
|
||||
sharp = sharpen(clahe)
|
||||
adaptive = cv2.adaptiveThreshold(
|
||||
gray,
|
||||
255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY,
|
||||
17,
|
||||
5,
|
||||
)
|
||||
otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel, iterations=1)
|
||||
denoise = cv2.fastNlMeansDenoising(gray, h=7, templateWindowSize=7, searchWindowSize=21)
|
||||
return {
|
||||
"orig": base,
|
||||
"orig_border": bordered,
|
||||
"gray": cuda_cvt_color(gray, cv2.COLOR_GRAY2BGR),
|
||||
"clahe": clahe,
|
||||
"clahe_sharp": sharp,
|
||||
"adaptive": cuda_cvt_color(adaptive, cv2.COLOR_GRAY2BGR),
|
||||
"otsu": cuda_cvt_color(otsu, cv2.COLOR_GRAY2BGR),
|
||||
"otsu_close": cuda_cvt_color(close, cv2.COLOR_GRAY2BGR),
|
||||
"denoise": cuda_cvt_color(denoise, cv2.COLOR_GRAY2BGR),
|
||||
}
|
||||
|
||||
|
||||
def build_variants(image: np.ndarray, target_heights: list[int]) -> dict[str, np.ndarray]:
|
||||
all_variants: dict[str, np.ndarray] = {}
|
||||
for height in target_heights:
|
||||
for name, variant in variants_for_height(image, height).items():
|
||||
all_variants[f"h{height}_{name}"] = variant
|
||||
return all_variants
|
||||
|
||||
|
||||
def filter_variants(all_variants: dict[str, np.ndarray], variant_set: str) -> dict[str, np.ndarray]:
|
||||
if variant_set == "full":
|
||||
return all_variants
|
||||
balanced_names = ("orig", "orig_border", "clahe", "clahe_sharp", "adaptive")
|
||||
fast_names = ("orig", "clahe", "clahe_sharp")
|
||||
allowed = balanced_names if variant_set == "balanced" else fast_names
|
||||
return {
|
||||
name: image
|
||||
for name, image in all_variants.items()
|
||||
if any(name.endswith(f"_{suffix}") for suffix in allowed)
|
||||
}
|
||||
|
||||
|
||||
def extract_candidates(result: Any) -> list[tuple[str, float]]:
|
||||
candidates: list[tuple[str, float]] = []
|
||||
|
||||
def walk(value: Any) -> None:
|
||||
if value is None:
|
||||
return
|
||||
if isinstance(value, dict):
|
||||
rec_texts = value.get("rec_texts")
|
||||
rec_scores = value.get("rec_scores")
|
||||
if isinstance(rec_texts, list):
|
||||
for idx, text in enumerate(rec_texts):
|
||||
score = rec_scores[idx] if isinstance(rec_scores, list) and idx < len(rec_scores) else 0.0
|
||||
if isinstance(text, str):
|
||||
candidates.append((text, float(score)))
|
||||
text = value.get("rec_text") or value.get("text")
|
||||
score = value.get("rec_score") or value.get("score")
|
||||
if isinstance(text, str):
|
||||
candidates.append((text, float(score) if score is not None else 0.0))
|
||||
for child in value.values():
|
||||
walk(child)
|
||||
return
|
||||
if isinstance(value, (list, tuple)):
|
||||
if len(value) >= 2 and isinstance(value[1], tuple) and len(value[1]) >= 2:
|
||||
text, score = value[1][0], value[1][1]
|
||||
if isinstance(text, str):
|
||||
candidates.append((text, float(score)))
|
||||
for child in value:
|
||||
walk(child)
|
||||
|
||||
walk(result)
|
||||
dedup: dict[str, float] = {}
|
||||
for text, score in candidates:
|
||||
digits = re.sub(r"\D+", "", text)
|
||||
if not digits:
|
||||
continue
|
||||
dedup[digits] = max(score, dedup.get(digits, 0.0))
|
||||
return sorted(dedup.items(), key=lambda item: item[1], reverse=True)
|
||||
|
||||
|
||||
def choose_best(candidates: list[Candidate], expected_digits: int) -> tuple[str, float, int, str, float]:
|
||||
if not candidates:
|
||||
return "", 0.0, 0, "", 0.0
|
||||
grouped: dict[str, list[Candidate]] = {}
|
||||
for candidate in candidates:
|
||||
grouped.setdefault(candidate.text, []).append(candidate)
|
||||
|
||||
best_text = ""
|
||||
best_rank = -999.0
|
||||
best_score = 0.0
|
||||
best_votes = 0
|
||||
best_variant = ""
|
||||
for text, group in grouped.items():
|
||||
max_conf = max(item.score for item in group)
|
||||
votes = len(group)
|
||||
unique_variants = len({item.variant for item in group})
|
||||
length_penalty = abs(len(text) - expected_digits) * 0.35
|
||||
exact_bonus = 0.35 if len(text) == expected_digits else 0.0
|
||||
consensus_bonus = min(0.30, votes * 0.035) + min(0.20, unique_variants * 0.025)
|
||||
rank = max_conf + exact_bonus + consensus_bonus - length_penalty
|
||||
if rank > best_rank:
|
||||
best_text = text
|
||||
best_rank = rank
|
||||
best_score = max_conf
|
||||
best_votes = votes
|
||||
best_variant = max(group, key=lambda item: item.score).variant
|
||||
return best_text, best_score, best_votes, best_variant, best_rank
|
||||
|
||||
|
||||
def parse_target_heights(raw: str) -> list[int]:
|
||||
values = []
|
||||
for part in raw.split(","):
|
||||
part = part.strip()
|
||||
if part:
|
||||
values.append(int(part))
|
||||
return values or [96]
|
||||
|
||||
|
||||
def make_ocr():
|
||||
add_nvidia_dll_dirs()
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
return PaddleOCR(
|
||||
lang="en",
|
||||
use_doc_orientation_classify=False,
|
||||
use_doc_unwarping=False,
|
||||
use_textline_orientation=False,
|
||||
text_rec_score_thresh=0.0,
|
||||
)
|
||||
|
||||
|
||||
def predict_one(ocr: Any, image: np.ndarray) -> list[tuple[str, float]]:
|
||||
if hasattr(ocr, "predict"):
|
||||
result = ocr.predict(image)
|
||||
else:
|
||||
result = ocr.ocr(image, det=False, cls=False)
|
||||
return extract_candidates(result)
|
||||
|
||||
|
||||
def handle_request(ocr: Any, request: dict[str, Any]) -> dict[str, Any]:
|
||||
image_path = Path(str(request["image_path"]))
|
||||
target_heights = parse_target_heights(str(request.get("target_heights", "96")))
|
||||
variant_set = str(request.get("variant_set", "fast")).strip().lower()
|
||||
expected_digits = int(request.get("expected_digits", 6))
|
||||
min_votes = int(request.get("min_votes", 2))
|
||||
min_confidence = float(request.get("min_confidence", 0.70))
|
||||
|
||||
image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
|
||||
if image is None:
|
||||
return {
|
||||
"ok": False,
|
||||
"text": "",
|
||||
"raw_text": "",
|
||||
"confidence": 0.0,
|
||||
"votes": 0,
|
||||
"variant": "",
|
||||
"reason": f"unreadable_image:{image_path}",
|
||||
"candidates": [],
|
||||
}
|
||||
|
||||
candidates: list[Candidate] = []
|
||||
ocr_variants = filter_variants(build_variants(image, target_heights), variant_set)
|
||||
for variant_name, variant_image in ocr_variants.items():
|
||||
for text, score in predict_one(ocr, variant_image):
|
||||
candidates.append(Candidate(text=text, score=score, variant=variant_name))
|
||||
|
||||
text, confidence, votes, variant, rank = choose_best(candidates, expected_digits)
|
||||
accepted = bool(text) and len(text) == expected_digits and votes >= min_votes and confidence >= min_confidence
|
||||
sorted_candidates = sorted(candidates, key=lambda item: item.score, reverse=True)
|
||||
raw_text = " | ".join(f"{item.text}:{item.score:.3f}:{item.variant}" for item in sorted_candidates[:18])
|
||||
return {
|
||||
"ok": accepted,
|
||||
"text": text if accepted else "",
|
||||
"best_text": text,
|
||||
"raw_text": raw_text,
|
||||
"confidence": confidence,
|
||||
"votes": votes,
|
||||
"variant": variant,
|
||||
"rank": rank,
|
||||
"reason": "ok" if accepted else "low_consensus_or_invalid_length",
|
||||
"candidates": [
|
||||
{"text": item.text, "score": item.score, "variant": item.variant}
|
||||
for item in sorted_candidates[:18]
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ocr = make_ocr()
|
||||
print(json.dumps({"ready": True}), flush=True)
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if line == "__quit__":
|
||||
break
|
||||
try:
|
||||
request = json.loads(line)
|
||||
response = handle_request(ocr, request)
|
||||
except Exception as exc:
|
||||
response = {
|
||||
"ok": False,
|
||||
"text": "",
|
||||
"raw_text": "",
|
||||
"confidence": 0.0,
|
||||
"votes": 0,
|
||||
"variant": "",
|
||||
"reason": f"worker_error:{exc}",
|
||||
"candidates": [],
|
||||
}
|
||||
print(json.dumps(response, ensure_ascii=True), flush=True)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user