pipeline in linea single thread

2026-05-19 08:52:44 +02:00
parent 98b43ce903
commit f728524ee6
43 changed files with 5245 additions and 154 deletions
--- a/flywms_paddleocr_worker.py
+++ b/flywms_paddleocr_worker.py
@@ -0,0 +1,340 @@
+from __future__ import annotations
+
+import json
+import os
+import re
+import site
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+
+CUDA_MIN_PIXELS = 640 * 360
+
+
+def opencv_cuda_available() -> bool:
+    try:
+        return hasattr(cv2, "cuda") and cv2.cuda.getCudaEnabledDeviceCount() > 0
+    except cv2.error:
+        return False
+
+
+OPENCV_CUDA_AVAILABLE = opencv_cuda_available()
+
+
+def cuda_resize(
+    image: np.ndarray,
+    size: tuple[int, int],
+    interpolation: int = cv2.INTER_LINEAR,
+    min_pixels: int = CUDA_MIN_PIXELS,
+) -> np.ndarray:
+    if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
+        return cv2.resize(image, size, interpolation=interpolation)
+    try:
+        gpu = cv2.cuda_GpuMat()
+        gpu.upload(image)
+        return cv2.cuda.resize(gpu, size, interpolation=interpolation).download()
+    except cv2.error:
+        return cv2.resize(image, size, interpolation=interpolation)
+
+
+def cuda_cvt_color(
+    image: np.ndarray,
+    code: int,
+    min_pixels: int = CUDA_MIN_PIXELS,
+) -> np.ndarray:
+    if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
+        return cv2.cvtColor(image, code)
+    try:
+        gpu = cv2.cuda_GpuMat()
+        gpu.upload(image)
+        return cv2.cuda.cvtColor(gpu, code).download()
+    except cv2.error:
+        return cv2.cvtColor(image, code)
+
+
+@dataclass(frozen=True)
+class Candidate:
+    text: str
+    score: float
+    variant: str
+
+
+def add_nvidia_dll_dirs() -> None:
+    if os.name != "nt":
+        return
+    for site_dir in site.getsitepackages():
+        nvidia_root = Path(site_dir) / "nvidia"
+        if not nvidia_root.exists():
+            continue
+        for bin_dir in nvidia_root.glob("*/bin"):
+            if bin_dir.exists():
+                os.add_dll_directory(str(bin_dir))
+
+
+def resize_to_height(image: np.ndarray, target_height: int) -> np.ndarray:
+    h, w = image.shape[:2]
+    if h == target_height:
+        return image
+    scale = target_height / max(1, h)
+    return cuda_resize(image, (max(1, int(w * scale)), target_height), interpolation=cv2.INTER_CUBIC)
+
+
+def add_border(image: np.ndarray, ratio: float = 0.08) -> np.ndarray:
+    h, w = image.shape[:2]
+    pad_x = max(2, int(w * ratio))
+    pad_y = max(2, int(h * ratio))
+    return cv2.copyMakeBorder(
+        image,
+        pad_y,
+        pad_y,
+        pad_x,
+        pad_x,
+        cv2.BORDER_CONSTANT,
+        value=(255, 255, 255),
+    )
+
+
+def clahe_bgr(image: np.ndarray) -> np.ndarray:
+    lab = cuda_cvt_color(image, cv2.COLOR_BGR2LAB)
+    l_channel, a_channel, b_channel = cv2.split(lab)
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
+    merged = cv2.merge([clahe.apply(l_channel), a_channel, b_channel])
+    return cuda_cvt_color(merged, cv2.COLOR_LAB2BGR)
+
+
+def sharpen(image: np.ndarray, strength: float = 0.7) -> np.ndarray:
+    blurred = cv2.GaussianBlur(image, (0, 0), sigmaX=1.0)
+    return cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)
+
+
+def variants_for_height(image: np.ndarray, target_height: int) -> dict[str, np.ndarray]:
+    base = resize_to_height(image, target_height)
+    bordered = add_border(base)
+    gray = cuda_cvt_color(base, cv2.COLOR_BGR2GRAY)
+    clahe = clahe_bgr(base)
+    sharp = sharpen(clahe)
+    adaptive = cv2.adaptiveThreshold(
+        gray,
+        255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY,
+        17,
+        5,
+    )
+    otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel, iterations=1)
+    denoise = cv2.fastNlMeansDenoising(gray, h=7, templateWindowSize=7, searchWindowSize=21)
+    return {
+        "orig": base,
+        "orig_border": bordered,
+        "gray": cuda_cvt_color(gray, cv2.COLOR_GRAY2BGR),
+        "clahe": clahe,
+        "clahe_sharp": sharp,
+        "adaptive": cuda_cvt_color(adaptive, cv2.COLOR_GRAY2BGR),
+        "otsu": cuda_cvt_color(otsu, cv2.COLOR_GRAY2BGR),
+        "otsu_close": cuda_cvt_color(close, cv2.COLOR_GRAY2BGR),
+        "denoise": cuda_cvt_color(denoise, cv2.COLOR_GRAY2BGR),
+    }
+
+
+def build_variants(image: np.ndarray, target_heights: list[int]) -> dict[str, np.ndarray]:
+    all_variants: dict[str, np.ndarray] = {}
+    for height in target_heights:
+        for name, variant in variants_for_height(image, height).items():
+            all_variants[f"h{height}_{name}"] = variant
+    return all_variants
+
+
+def filter_variants(all_variants: dict[str, np.ndarray], variant_set: str) -> dict[str, np.ndarray]:
+    if variant_set == "full":
+        return all_variants
+    balanced_names = ("orig", "orig_border", "clahe", "clahe_sharp", "adaptive")
+    fast_names = ("orig", "clahe", "clahe_sharp")
+    allowed = balanced_names if variant_set == "balanced" else fast_names
+    return {
+        name: image
+        for name, image in all_variants.items()
+        if any(name.endswith(f"_{suffix}") for suffix in allowed)
+    }
+
+
+def extract_candidates(result: Any) -> list[tuple[str, float]]:
+    candidates: list[tuple[str, float]] = []
+
+    def walk(value: Any) -> None:
+        if value is None:
+            return
+        if isinstance(value, dict):
+            rec_texts = value.get("rec_texts")
+            rec_scores = value.get("rec_scores")
+            if isinstance(rec_texts, list):
+                for idx, text in enumerate(rec_texts):
+                    score = rec_scores[idx] if isinstance(rec_scores, list) and idx < len(rec_scores) else 0.0
+                    if isinstance(text, str):
+                        candidates.append((text, float(score)))
+            text = value.get("rec_text") or value.get("text")
+            score = value.get("rec_score") or value.get("score")
+            if isinstance(text, str):
+                candidates.append((text, float(score) if score is not None else 0.0))
+            for child in value.values():
+                walk(child)
+            return
+        if isinstance(value, (list, tuple)):
+            if len(value) >= 2 and isinstance(value[1], tuple) and len(value[1]) >= 2:
+                text, score = value[1][0], value[1][1]
+                if isinstance(text, str):
+                    candidates.append((text, float(score)))
+            for child in value:
+                walk(child)
+
+    walk(result)
+    dedup: dict[str, float] = {}
+    for text, score in candidates:
+        digits = re.sub(r"\D+", "", text)
+        if not digits:
+            continue
+        dedup[digits] = max(score, dedup.get(digits, 0.0))
+    return sorted(dedup.items(), key=lambda item: item[1], reverse=True)
+
+
+def choose_best(candidates: list[Candidate], expected_digits: int) -> tuple[str, float, int, str, float]:
+    if not candidates:
+        return "", 0.0, 0, "", 0.0
+    grouped: dict[str, list[Candidate]] = {}
+    for candidate in candidates:
+        grouped.setdefault(candidate.text, []).append(candidate)
+
+    best_text = ""
+    best_rank = -999.0
+    best_score = 0.0
+    best_votes = 0
+    best_variant = ""
+    for text, group in grouped.items():
+        max_conf = max(item.score for item in group)
+        votes = len(group)
+        unique_variants = len({item.variant for item in group})
+        length_penalty = abs(len(text) - expected_digits) * 0.35
+        exact_bonus = 0.35 if len(text) == expected_digits else 0.0
+        consensus_bonus = min(0.30, votes * 0.035) + min(0.20, unique_variants * 0.025)
+        rank = max_conf + exact_bonus + consensus_bonus - length_penalty
+        if rank > best_rank:
+            best_text = text
+            best_rank = rank
+            best_score = max_conf
+            best_votes = votes
+            best_variant = max(group, key=lambda item: item.score).variant
+    return best_text, best_score, best_votes, best_variant, best_rank
+
+
+def parse_target_heights(raw: str) -> list[int]:
+    values = []
+    for part in raw.split(","):
+        part = part.strip()
+        if part:
+            values.append(int(part))
+    return values or [96]
+
+
+def make_ocr():
+    add_nvidia_dll_dirs()
+    from paddleocr import PaddleOCR
+
+    return PaddleOCR(
+        lang="en",
+        use_doc_orientation_classify=False,
+        use_doc_unwarping=False,
+        use_textline_orientation=False,
+        text_rec_score_thresh=0.0,
+    )
+
+
+def predict_one(ocr: Any, image: np.ndarray) -> list[tuple[str, float]]:
+    if hasattr(ocr, "predict"):
+        result = ocr.predict(image)
+    else:
+        result = ocr.ocr(image, det=False, cls=False)
+    return extract_candidates(result)
+
+
+def handle_request(ocr: Any, request: dict[str, Any]) -> dict[str, Any]:
+    image_path = Path(str(request["image_path"]))
+    target_heights = parse_target_heights(str(request.get("target_heights", "96")))
+    variant_set = str(request.get("variant_set", "fast")).strip().lower()
+    expected_digits = int(request.get("expected_digits", 6))
+    min_votes = int(request.get("min_votes", 2))
+    min_confidence = float(request.get("min_confidence", 0.70))
+
+    image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
+    if image is None:
+        return {
+            "ok": False,
+            "text": "",
+            "raw_text": "",
+            "confidence": 0.0,
+            "votes": 0,
+            "variant": "",
+            "reason": f"unreadable_image:{image_path}",
+            "candidates": [],
+        }
+
+    candidates: list[Candidate] = []
+    ocr_variants = filter_variants(build_variants(image, target_heights), variant_set)
+    for variant_name, variant_image in ocr_variants.items():
+        for text, score in predict_one(ocr, variant_image):
+            candidates.append(Candidate(text=text, score=score, variant=variant_name))
+
+    text, confidence, votes, variant, rank = choose_best(candidates, expected_digits)
+    accepted = bool(text) and len(text) == expected_digits and votes >= min_votes and confidence >= min_confidence
+    sorted_candidates = sorted(candidates, key=lambda item: item.score, reverse=True)
+    raw_text = " | ".join(f"{item.text}:{item.score:.3f}:{item.variant}" for item in sorted_candidates[:18])
+    return {
+        "ok": accepted,
+        "text": text if accepted else "",
+        "best_text": text,
+        "raw_text": raw_text,
+        "confidence": confidence,
+        "votes": votes,
+        "variant": variant,
+        "rank": rank,
+        "reason": "ok" if accepted else "low_consensus_or_invalid_length",
+        "candidates": [
+            {"text": item.text, "score": item.score, "variant": item.variant}
+            for item in sorted_candidates[:18]
+        ],
+    }
+
+
+def main() -> int:
+    ocr = make_ocr()
+    print(json.dumps({"ready": True}), flush=True)
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        if line == "__quit__":
+            break
+        try:
+            request = json.loads(line)
+            response = handle_request(ocr, request)
+        except Exception as exc:
+            response = {
+                "ok": False,
+                "text": "",
+                "raw_text": "",
+                "confidence": 0.0,
+                "votes": 0,
+                "variant": "",
+                "reason": f"worker_error:{exc}",
+                "candidates": [],
+            }
+        print(json.dumps(response, ensure_ascii=True), flush=True)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())