flywms/flywms_paddleocr_worker.py

from __future__ import annotations

import json
import os
import re
import site
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import cv2
import numpy as np

CUDA_MIN_PIXELS = 640 * 360


def opencv_cuda_available() -> bool:
    try:
        return hasattr(cv2, "cuda") and cv2.cuda.getCudaEnabledDeviceCount() > 0
    except cv2.error:
        return False


OPENCV_CUDA_AVAILABLE = opencv_cuda_available()


def cuda_resize(
    image: np.ndarray,
    size: tuple[int, int],
    interpolation: int = cv2.INTER_LINEAR,
    min_pixels: int = CUDA_MIN_PIXELS,
) -> np.ndarray:
    if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
        return cv2.resize(image, size, interpolation=interpolation)
    try:
        gpu = cv2.cuda_GpuMat()
        gpu.upload(image)
        return cv2.cuda.resize(gpu, size, interpolation=interpolation).download()
    except cv2.error:
        return cv2.resize(image, size, interpolation=interpolation)


def cuda_cvt_color(
    image: np.ndarray,
    code: int,
    min_pixels: int = CUDA_MIN_PIXELS,
) -> np.ndarray:
    if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
        return cv2.cvtColor(image, code)
    try:
        gpu = cv2.cuda_GpuMat()
        gpu.upload(image)
        return cv2.cuda.cvtColor(gpu, code).download()
    except cv2.error:
        return cv2.cvtColor(image, code)


@dataclass(frozen=True)
class Candidate:
    text: str
    score: float
    variant: str


def add_nvidia_dll_dirs() -> None:
    if os.name != "nt":
        return
    for site_dir in site.getsitepackages():
        nvidia_root = Path(site_dir) / "nvidia"
        if not nvidia_root.exists():
            continue
        for bin_dir in nvidia_root.glob("*/bin"):
            if bin_dir.exists():
                os.add_dll_directory(str(bin_dir))


def resize_to_height(image: np.ndarray, target_height: int) -> np.ndarray:
    h, w = image.shape[:2]
    if h == target_height:
        return image
    scale = target_height / max(1, h)
    return cuda_resize(image, (max(1, int(w * scale)), target_height), interpolation=cv2.INTER_CUBIC)


def add_border(image: np.ndarray, ratio: float = 0.08) -> np.ndarray:
    h, w = image.shape[:2]
    pad_x = max(2, int(w * ratio))
    pad_y = max(2, int(h * ratio))
    return cv2.copyMakeBorder(
        image,
        pad_y,
        pad_y,
        pad_x,
        pad_x,
        cv2.BORDER_CONSTANT,
        value=(255, 255, 255),
    )


def clahe_bgr(image: np.ndarray) -> np.ndarray:
    lab = cuda_cvt_color(image, cv2.COLOR_BGR2LAB)
    l_channel, a_channel, b_channel = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
    merged = cv2.merge([clahe.apply(l_channel), a_channel, b_channel])
    return cuda_cvt_color(merged, cv2.COLOR_LAB2BGR)


def sharpen(image: np.ndarray, strength: float = 0.7) -> np.ndarray:
    blurred = cv2.GaussianBlur(image, (0, 0), sigmaX=1.0)
    return cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)


def variants_for_height(image: np.ndarray, target_height: int) -> dict[str, np.ndarray]:
    base = resize_to_height(image, target_height)
    bordered = add_border(base)
    gray = cuda_cvt_color(base, cv2.COLOR_BGR2GRAY)
    clahe = clahe_bgr(base)
    sharp = sharpen(clahe)
    adaptive = cv2.adaptiveThreshold(
        gray,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        17,
        5,
    )
    otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel, iterations=1)
    denoise = cv2.fastNlMeansDenoising(gray, h=7, templateWindowSize=7, searchWindowSize=21)
    return {
        "orig": base,
        "orig_border": bordered,
        "gray": cuda_cvt_color(gray, cv2.COLOR_GRAY2BGR),
        "clahe": clahe,
        "clahe_sharp": sharp,
        "adaptive": cuda_cvt_color(adaptive, cv2.COLOR_GRAY2BGR),
        "otsu": cuda_cvt_color(otsu, cv2.COLOR_GRAY2BGR),
        "otsu_close": cuda_cvt_color(close, cv2.COLOR_GRAY2BGR),
        "denoise": cuda_cvt_color(denoise, cv2.COLOR_GRAY2BGR),
    }


def build_variants(image: np.ndarray, target_heights: list[int]) -> dict[str, np.ndarray]:
    all_variants: dict[str, np.ndarray] = {}
    for height in target_heights:
        for name, variant in variants_for_height(image, height).items():
            all_variants[f"h{height}_{name}"] = variant
    return all_variants


def filter_variants(all_variants: dict[str, np.ndarray], variant_set: str) -> dict[str, np.ndarray]:
    if variant_set == "full":
        return all_variants
    balanced_names = ("orig", "orig_border", "clahe", "clahe_sharp", "adaptive")
    fast_names = ("orig", "clahe", "clahe_sharp")
    allowed = balanced_names if variant_set == "balanced" else fast_names
    return {
        name: image
        for name, image in all_variants.items()
        if any(name.endswith(f"_{suffix}") for suffix in allowed)
    }


def extract_candidates(result: Any) -> list[tuple[str, float]]:
    candidates: list[tuple[str, float]] = []

    def walk(value: Any) -> None:
        if value is None:
            return
        if isinstance(value, dict):
            rec_texts = value.get("rec_texts")
            rec_scores = value.get("rec_scores")
            if isinstance(rec_texts, list):
                for idx, text in enumerate(rec_texts):
                    score = rec_scores[idx] if isinstance(rec_scores, list) and idx < len(rec_scores) else 0.0
                    if isinstance(text, str):
                        candidates.append((text, float(score)))
            text = value.get("rec_text") or value.get("text")
            score = value.get("rec_score") or value.get("score")
            if isinstance(text, str):
                candidates.append((text, float(score) if score is not None else 0.0))
            for child in value.values():
                walk(child)
            return
        if isinstance(value, (list, tuple)):
            if len(value) >= 2 and isinstance(value[1], tuple) and len(value[1]) >= 2:
                text, score = value[1][0], value[1][1]
                if isinstance(text, str):
                    candidates.append((text, float(score)))
            for child in value:
                walk(child)

    walk(result)
    dedup: dict[str, float] = {}
    for text, score in candidates:
        digits = re.sub(r"\D+", "", text)
        if not digits:
            continue
        dedup[digits] = max(score, dedup.get(digits, 0.0))
    return sorted(dedup.items(), key=lambda item: item[1], reverse=True)


def choose_best(candidates: list[Candidate], expected_digits: int) -> tuple[str, float, int, str, float]:
    if not candidates:
        return "", 0.0, 0, "", 0.0
    grouped: dict[str, list[Candidate]] = {}
    for candidate in candidates:
        grouped.setdefault(candidate.text, []).append(candidate)

    best_text = ""
    best_rank = -999.0
    best_score = 0.0
    best_votes = 0
    best_variant = ""
    for text, group in grouped.items():
        max_conf = max(item.score for item in group)
        votes = len(group)
        unique_variants = len({item.variant for item in group})
        length_penalty = abs(len(text) - expected_digits) * 0.35
        exact_bonus = 0.35 if len(text) == expected_digits else 0.0
        consensus_bonus = min(0.30, votes * 0.035) + min(0.20, unique_variants * 0.025)
        rank = max_conf + exact_bonus + consensus_bonus - length_penalty
        if rank > best_rank:
            best_text = text
            best_rank = rank
            best_score = max_conf
            best_votes = votes
            best_variant = max(group, key=lambda item: item.score).variant
    return best_text, best_score, best_votes, best_variant, best_rank


def parse_target_heights(raw: str) -> list[int]:
    values = []
    for part in raw.split(","):
        part = part.strip()
        if part:
            values.append(int(part))
    return values or [96]


def make_ocr():
    add_nvidia_dll_dirs()
    from paddleocr import PaddleOCR

    return PaddleOCR(
        lang="en",
        use_doc_orientation_classify=False,
        use_doc_unwarping=False,
        use_textline_orientation=False,
        text_rec_score_thresh=0.0,
    )


def predict_one(ocr: Any, image: np.ndarray) -> list[tuple[str, float]]:
    if hasattr(ocr, "predict"):
        result = ocr.predict(image)
    else:
        result = ocr.ocr(image, det=False, cls=False)
    return extract_candidates(result)


def handle_request(ocr: Any, request: dict[str, Any]) -> dict[str, Any]:
    image_path = Path(str(request["image_path"]))
    target_heights = parse_target_heights(str(request.get("target_heights", "96")))
    variant_set = str(request.get("variant_set", "fast")).strip().lower()
    expected_digits = int(request.get("expected_digits", 6))
    min_votes = int(request.get("min_votes", 2))
    min_confidence = float(request.get("min_confidence", 0.70))

    image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
    if image is None:
        return {
            "ok": False,
            "text": "",
            "raw_text": "",
            "confidence": 0.0,
            "votes": 0,
            "variant": "",
            "reason": f"unreadable_image:{image_path}",
            "candidates": [],
        }

    candidates: list[Candidate] = []
    ocr_variants = filter_variants(build_variants(image, target_heights), variant_set)
    for variant_name, variant_image in ocr_variants.items():
        for text, score in predict_one(ocr, variant_image):
            candidates.append(Candidate(text=text, score=score, variant=variant_name))

    text, confidence, votes, variant, rank = choose_best(candidates, expected_digits)
    accepted = bool(text) and len(text) == expected_digits and votes >= min_votes and confidence >= min_confidence
    sorted_candidates = sorted(candidates, key=lambda item: item.score, reverse=True)
    raw_text = " | ".join(f"{item.text}:{item.score:.3f}:{item.variant}" for item in sorted_candidates[:18])
    return {
        "ok": accepted,
        "text": text if accepted else "",
        "best_text": text,
        "raw_text": raw_text,
        "confidence": confidence,
        "votes": votes,
        "variant": variant,
        "rank": rank,
        "reason": "ok" if accepted else "low_consensus_or_invalid_length",
        "candidates": [
            {"text": item.text, "score": item.score, "variant": item.variant}
            for item in sorted_candidates[:18]
        ],
    }


def main() -> int:
    ocr = make_ocr()
    print(json.dumps({"ready": True}), flush=True)
    for line in sys.stdin:
        line = line.strip()
        if not line:
            continue
        if line == "__quit__":
            break
        try:
            request = json.loads(line)
            response = handle_request(ocr, request)
        except Exception as exc:
            response = {
                "ok": False,
                "text": "",
                "raw_text": "",
                "confidence": 0.0,
                "votes": 0,
                "variant": "",
                "reason": f"worker_error:{exc}",
                "candidates": [],
            }
        print(json.dumps(response, ensure_ascii=True), flush=True)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())