pipeline in linea single thread

This commit is contained in:
administrator
2026-05-19 08:52:44 +02:00
parent 98b43ce903
commit f728524ee6
43 changed files with 5245 additions and 154 deletions

340
flywms_paddleocr_worker.py Normal file
View File

@@ -0,0 +1,340 @@
from __future__ import annotations
import json
import os
import re
import site
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import cv2
import numpy as np
CUDA_MIN_PIXELS = 640 * 360
def opencv_cuda_available() -> bool:
try:
return hasattr(cv2, "cuda") and cv2.cuda.getCudaEnabledDeviceCount() > 0
except cv2.error:
return False
OPENCV_CUDA_AVAILABLE = opencv_cuda_available()
def cuda_resize(
image: np.ndarray,
size: tuple[int, int],
interpolation: int = cv2.INTER_LINEAR,
min_pixels: int = CUDA_MIN_PIXELS,
) -> np.ndarray:
if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
return cv2.resize(image, size, interpolation=interpolation)
try:
gpu = cv2.cuda_GpuMat()
gpu.upload(image)
return cv2.cuda.resize(gpu, size, interpolation=interpolation).download()
except cv2.error:
return cv2.resize(image, size, interpolation=interpolation)
def cuda_cvt_color(
image: np.ndarray,
code: int,
min_pixels: int = CUDA_MIN_PIXELS,
) -> np.ndarray:
if not OPENCV_CUDA_AVAILABLE or image.size < min_pixels:
return cv2.cvtColor(image, code)
try:
gpu = cv2.cuda_GpuMat()
gpu.upload(image)
return cv2.cuda.cvtColor(gpu, code).download()
except cv2.error:
return cv2.cvtColor(image, code)
@dataclass(frozen=True)
class Candidate:
text: str
score: float
variant: str
def add_nvidia_dll_dirs() -> None:
if os.name != "nt":
return
for site_dir in site.getsitepackages():
nvidia_root = Path(site_dir) / "nvidia"
if not nvidia_root.exists():
continue
for bin_dir in nvidia_root.glob("*/bin"):
if bin_dir.exists():
os.add_dll_directory(str(bin_dir))
def resize_to_height(image: np.ndarray, target_height: int) -> np.ndarray:
h, w = image.shape[:2]
if h == target_height:
return image
scale = target_height / max(1, h)
return cuda_resize(image, (max(1, int(w * scale)), target_height), interpolation=cv2.INTER_CUBIC)
def add_border(image: np.ndarray, ratio: float = 0.08) -> np.ndarray:
h, w = image.shape[:2]
pad_x = max(2, int(w * ratio))
pad_y = max(2, int(h * ratio))
return cv2.copyMakeBorder(
image,
pad_y,
pad_y,
pad_x,
pad_x,
cv2.BORDER_CONSTANT,
value=(255, 255, 255),
)
def clahe_bgr(image: np.ndarray) -> np.ndarray:
lab = cuda_cvt_color(image, cv2.COLOR_BGR2LAB)
l_channel, a_channel, b_channel = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
merged = cv2.merge([clahe.apply(l_channel), a_channel, b_channel])
return cuda_cvt_color(merged, cv2.COLOR_LAB2BGR)
def sharpen(image: np.ndarray, strength: float = 0.7) -> np.ndarray:
blurred = cv2.GaussianBlur(image, (0, 0), sigmaX=1.0)
return cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)
def variants_for_height(image: np.ndarray, target_height: int) -> dict[str, np.ndarray]:
base = resize_to_height(image, target_height)
bordered = add_border(base)
gray = cuda_cvt_color(base, cv2.COLOR_BGR2GRAY)
clahe = clahe_bgr(base)
sharp = sharpen(clahe)
adaptive = cv2.adaptiveThreshold(
gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
17,
5,
)
otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel, iterations=1)
denoise = cv2.fastNlMeansDenoising(gray, h=7, templateWindowSize=7, searchWindowSize=21)
return {
"orig": base,
"orig_border": bordered,
"gray": cuda_cvt_color(gray, cv2.COLOR_GRAY2BGR),
"clahe": clahe,
"clahe_sharp": sharp,
"adaptive": cuda_cvt_color(adaptive, cv2.COLOR_GRAY2BGR),
"otsu": cuda_cvt_color(otsu, cv2.COLOR_GRAY2BGR),
"otsu_close": cuda_cvt_color(close, cv2.COLOR_GRAY2BGR),
"denoise": cuda_cvt_color(denoise, cv2.COLOR_GRAY2BGR),
}
def build_variants(image: np.ndarray, target_heights: list[int]) -> dict[str, np.ndarray]:
all_variants: dict[str, np.ndarray] = {}
for height in target_heights:
for name, variant in variants_for_height(image, height).items():
all_variants[f"h{height}_{name}"] = variant
return all_variants
def filter_variants(all_variants: dict[str, np.ndarray], variant_set: str) -> dict[str, np.ndarray]:
if variant_set == "full":
return all_variants
balanced_names = ("orig", "orig_border", "clahe", "clahe_sharp", "adaptive")
fast_names = ("orig", "clahe", "clahe_sharp")
allowed = balanced_names if variant_set == "balanced" else fast_names
return {
name: image
for name, image in all_variants.items()
if any(name.endswith(f"_{suffix}") for suffix in allowed)
}
def extract_candidates(result: Any) -> list[tuple[str, float]]:
candidates: list[tuple[str, float]] = []
def walk(value: Any) -> None:
if value is None:
return
if isinstance(value, dict):
rec_texts = value.get("rec_texts")
rec_scores = value.get("rec_scores")
if isinstance(rec_texts, list):
for idx, text in enumerate(rec_texts):
score = rec_scores[idx] if isinstance(rec_scores, list) and idx < len(rec_scores) else 0.0
if isinstance(text, str):
candidates.append((text, float(score)))
text = value.get("rec_text") or value.get("text")
score = value.get("rec_score") or value.get("score")
if isinstance(text, str):
candidates.append((text, float(score) if score is not None else 0.0))
for child in value.values():
walk(child)
return
if isinstance(value, (list, tuple)):
if len(value) >= 2 and isinstance(value[1], tuple) and len(value[1]) >= 2:
text, score = value[1][0], value[1][1]
if isinstance(text, str):
candidates.append((text, float(score)))
for child in value:
walk(child)
walk(result)
dedup: dict[str, float] = {}
for text, score in candidates:
digits = re.sub(r"\D+", "", text)
if not digits:
continue
dedup[digits] = max(score, dedup.get(digits, 0.0))
return sorted(dedup.items(), key=lambda item: item[1], reverse=True)
def choose_best(candidates: list[Candidate], expected_digits: int) -> tuple[str, float, int, str, float]:
if not candidates:
return "", 0.0, 0, "", 0.0
grouped: dict[str, list[Candidate]] = {}
for candidate in candidates:
grouped.setdefault(candidate.text, []).append(candidate)
best_text = ""
best_rank = -999.0
best_score = 0.0
best_votes = 0
best_variant = ""
for text, group in grouped.items():
max_conf = max(item.score for item in group)
votes = len(group)
unique_variants = len({item.variant for item in group})
length_penalty = abs(len(text) - expected_digits) * 0.35
exact_bonus = 0.35 if len(text) == expected_digits else 0.0
consensus_bonus = min(0.30, votes * 0.035) + min(0.20, unique_variants * 0.025)
rank = max_conf + exact_bonus + consensus_bonus - length_penalty
if rank > best_rank:
best_text = text
best_rank = rank
best_score = max_conf
best_votes = votes
best_variant = max(group, key=lambda item: item.score).variant
return best_text, best_score, best_votes, best_variant, best_rank
def parse_target_heights(raw: str) -> list[int]:
values = []
for part in raw.split(","):
part = part.strip()
if part:
values.append(int(part))
return values or [96]
def make_ocr():
add_nvidia_dll_dirs()
from paddleocr import PaddleOCR
return PaddleOCR(
lang="en",
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
text_rec_score_thresh=0.0,
)
def predict_one(ocr: Any, image: np.ndarray) -> list[tuple[str, float]]:
if hasattr(ocr, "predict"):
result = ocr.predict(image)
else:
result = ocr.ocr(image, det=False, cls=False)
return extract_candidates(result)
def handle_request(ocr: Any, request: dict[str, Any]) -> dict[str, Any]:
image_path = Path(str(request["image_path"]))
target_heights = parse_target_heights(str(request.get("target_heights", "96")))
variant_set = str(request.get("variant_set", "fast")).strip().lower()
expected_digits = int(request.get("expected_digits", 6))
min_votes = int(request.get("min_votes", 2))
min_confidence = float(request.get("min_confidence", 0.70))
image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
if image is None:
return {
"ok": False,
"text": "",
"raw_text": "",
"confidence": 0.0,
"votes": 0,
"variant": "",
"reason": f"unreadable_image:{image_path}",
"candidates": [],
}
candidates: list[Candidate] = []
ocr_variants = filter_variants(build_variants(image, target_heights), variant_set)
for variant_name, variant_image in ocr_variants.items():
for text, score in predict_one(ocr, variant_image):
candidates.append(Candidate(text=text, score=score, variant=variant_name))
text, confidence, votes, variant, rank = choose_best(candidates, expected_digits)
accepted = bool(text) and len(text) == expected_digits and votes >= min_votes and confidence >= min_confidence
sorted_candidates = sorted(candidates, key=lambda item: item.score, reverse=True)
raw_text = " | ".join(f"{item.text}:{item.score:.3f}:{item.variant}" for item in sorted_candidates[:18])
return {
"ok": accepted,
"text": text if accepted else "",
"best_text": text,
"raw_text": raw_text,
"confidence": confidence,
"votes": votes,
"variant": variant,
"rank": rank,
"reason": "ok" if accepted else "low_consensus_or_invalid_length",
"candidates": [
{"text": item.text, "score": item.score, "variant": item.variant}
for item in sorted_candidates[:18]
],
}
def main() -> int:
ocr = make_ocr()
print(json.dumps({"ready": True}), flush=True)
for line in sys.stdin:
line = line.strip()
if not line:
continue
if line == "__quit__":
break
try:
request = json.loads(line)
response = handle_request(ocr, request)
except Exception as exc:
response = {
"ok": False,
"text": "",
"raw_text": "",
"confidence": 0.0,
"votes": 0,
"variant": "",
"reason": f"worker_error:{exc}",
"candidates": [],
}
print(json.dumps(response, ensure_ascii=True), flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())