1645 lines
58 KiB
Python
1645 lines
58 KiB
Python
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
from collections import deque
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class FramePacket:
|
|
frame_id: int
|
|
timestamp: float
|
|
frame: np.ndarray
|
|
width: int
|
|
height: int
|
|
source: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class Detection:
|
|
class_id: int
|
|
class_name: str
|
|
confidence: float
|
|
bbox: tuple[int, int, int, int]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class DetectionResult:
|
|
frame_id: int
|
|
timestamp: float
|
|
detections: list[Detection]
|
|
inference_ms: float
|
|
blob_ms: float
|
|
forward_ms: float
|
|
parse_ms: float
|
|
source_width: int
|
|
source_height: int
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RoiPacket:
|
|
roi_id: int
|
|
source_frame_id: int
|
|
timestamp: float
|
|
class_name: str
|
|
confidence: float
|
|
bbox: tuple[int, int, int, int]
|
|
roi_image: np.ndarray
|
|
width: int
|
|
height: int
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class OcrResult:
|
|
roi_id: int
|
|
source_frame_id: int
|
|
timestamp: float
|
|
text: str
|
|
raw_text: str
|
|
bbox: tuple[int, int, int, int]
|
|
ocr_ms: float
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class OcrEngineResult:
|
|
digits: str
|
|
processed: np.ndarray
|
|
raw_text: str
|
|
code_roi: np.ndarray
|
|
|
|
|
|
@dataclass
|
|
class BufferStats:
|
|
pushed: int = 0
|
|
popped: int = 0
|
|
dropped_on_put: int = 0
|
|
skipped_on_pop: int = 0
|
|
waits: int = 0
|
|
max_depth_seen: int = 0
|
|
|
|
|
|
@dataclass
|
|
class RuntimeStats:
|
|
capture_frames: int = 0
|
|
display_frames: int = 0
|
|
yolo_submitted_frames: int = 0
|
|
quality_rejected_frames: int = 0
|
|
quality_cycles: int = 0
|
|
yolo_cycles: int = 0
|
|
ocr_cycles: int = 0
|
|
quality_total_ms: float = 0.0
|
|
quality_score_total: float = 0.0
|
|
yolo_total_ms: float = 0.0
|
|
yolo_blob_ms: float = 0.0
|
|
yolo_forward_ms: float = 0.0
|
|
yolo_parse_ms: float = 0.0
|
|
ocr_total_ms: float = 0.0
|
|
display_total_ms: float = 0.0
|
|
capture_read_total_ms: float = 0.0
|
|
last_capture_frame_id: int = 0
|
|
last_quality_score: float = 0.0
|
|
last_quality_passed: bool = True
|
|
last_yolo_frame_id: int = 0
|
|
last_ocr_frame_id: int = 0
|
|
last_detection_count: int = 0
|
|
last_detection_summary: str = ""
|
|
|
|
|
|
class LatestBuffer:
|
|
def __init__(self, max_size: int, name: str):
|
|
if max_size < 1:
|
|
raise ValueError("max_size must be >= 1")
|
|
self.max_size = max_size
|
|
self.name = name
|
|
self._items: deque[Any] = deque(maxlen=max_size)
|
|
self._condition = threading.Condition()
|
|
self._stats = BufferStats()
|
|
|
|
def put(self, item: Any) -> None:
|
|
with self._condition:
|
|
if len(self._items) == self.max_size:
|
|
self._stats.dropped_on_put += 1
|
|
self._items.append(item)
|
|
self._stats.pushed += 1
|
|
self._stats.max_depth_seen = max(
|
|
self._stats.max_depth_seen,
|
|
len(self._items),
|
|
)
|
|
self._condition.notify_all()
|
|
|
|
def get_latest_blocking(self, stop_event: threading.Event) -> Any | None:
|
|
with self._condition:
|
|
while not self._items and not stop_event.is_set():
|
|
self._stats.waits += 1
|
|
self._condition.wait(timeout=0.1)
|
|
|
|
if stop_event.is_set():
|
|
return None
|
|
|
|
latest = self._items[-1]
|
|
skipped = len(self._items) - 1
|
|
self._stats.skipped_on_pop += skipped
|
|
self._items.clear()
|
|
self._stats.popped += 1
|
|
return latest
|
|
|
|
def wake_all(self) -> None:
|
|
with self._condition:
|
|
self._condition.notify_all()
|
|
|
|
def stats(self) -> BufferStats:
|
|
with self._condition:
|
|
return BufferStats(**self._stats.__dict__)
|
|
|
|
def depth(self) -> int:
|
|
with self._condition:
|
|
return len(self._items)
|
|
|
|
|
|
class SharedState:
|
|
def __init__(self, ocr_history_size: int):
|
|
self._lock = threading.Lock()
|
|
self.latest_detection: DetectionResult | None = None
|
|
self.ocr_results: deque[OcrResult] = deque(maxlen=ocr_history_size)
|
|
self.debug_yolo_frame: np.ndarray | None = None
|
|
self.debug_ocr_frame: np.ndarray | None = None
|
|
self.debug_ocr_text: str = ""
|
|
self.stats = RuntimeStats()
|
|
|
|
def set_latest_detection(self, result: DetectionResult) -> None:
|
|
with self._lock:
|
|
self.latest_detection = result
|
|
self.stats.yolo_cycles += 1
|
|
self.stats.yolo_total_ms += result.inference_ms
|
|
self.stats.yolo_blob_ms += result.blob_ms
|
|
self.stats.yolo_forward_ms += result.forward_ms
|
|
self.stats.yolo_parse_ms += result.parse_ms
|
|
self.stats.last_yolo_frame_id = result.frame_id
|
|
self.stats.last_detection_count = len(result.detections)
|
|
counts: dict[str, int] = {}
|
|
for det in result.detections:
|
|
counts[det.class_name] = counts.get(det.class_name, 0) + 1
|
|
self.stats.last_detection_summary = ",".join(
|
|
f"{name}:{count}" for name, count in sorted(counts.items())
|
|
)
|
|
|
|
def get_latest_detection(self) -> DetectionResult | None:
|
|
with self._lock:
|
|
return self.latest_detection
|
|
|
|
def add_ocr_result(self, result: OcrResult) -> None:
|
|
with self._lock:
|
|
self.ocr_results.append(result)
|
|
self.stats.ocr_cycles += 1
|
|
self.stats.ocr_total_ms += result.ocr_ms
|
|
self.stats.last_ocr_frame_id = result.source_frame_id
|
|
|
|
def get_recent_ocr_results(self) -> list[OcrResult]:
|
|
with self._lock:
|
|
return list(self.ocr_results)
|
|
|
|
def set_debug_yolo_frame(self, frame: np.ndarray | None) -> None:
|
|
with self._lock:
|
|
self.debug_yolo_frame = None if frame is None else frame.copy()
|
|
|
|
def set_debug_ocr_frame(self, frame: np.ndarray | None, text: str = "") -> None:
|
|
with self._lock:
|
|
self.debug_ocr_frame = None if frame is None else frame.copy()
|
|
self.debug_ocr_text = text
|
|
|
|
def get_debug_frames(self) -> tuple[np.ndarray | None, np.ndarray | None, str]:
|
|
with self._lock:
|
|
yolo = None if self.debug_yolo_frame is None else self.debug_yolo_frame.copy()
|
|
ocr = None if self.debug_ocr_frame is None else self.debug_ocr_frame.copy()
|
|
return yolo, ocr, self.debug_ocr_text
|
|
|
|
def add_capture_read(self, frame_id: int, read_ms: float) -> None:
|
|
with self._lock:
|
|
self.stats.capture_frames += 1
|
|
self.stats.capture_read_total_ms += read_ms
|
|
self.stats.last_capture_frame_id = frame_id
|
|
|
|
def add_quality_result(
|
|
self,
|
|
score: float,
|
|
passed: bool,
|
|
elapsed_ms: float,
|
|
submitted_to_yolo: bool,
|
|
) -> None:
|
|
with self._lock:
|
|
self.stats.quality_cycles += 1
|
|
self.stats.quality_score_total += score
|
|
self.stats.quality_total_ms += elapsed_ms
|
|
self.stats.last_quality_score = score
|
|
self.stats.last_quality_passed = passed
|
|
if submitted_to_yolo:
|
|
self.stats.yolo_submitted_frames += 1
|
|
else:
|
|
self.stats.quality_rejected_frames += 1
|
|
|
|
def add_display(self, display_ms: float) -> None:
|
|
with self._lock:
|
|
self.stats.display_frames += 1
|
|
self.stats.display_total_ms += display_ms
|
|
|
|
def snapshot_stats(self) -> RuntimeStats:
|
|
with self._lock:
|
|
return RuntimeStats(**self.stats.__dict__)
|
|
|
|
|
|
class IdGenerator:
|
|
def __init__(self, start: int = 1):
|
|
self._value = start
|
|
self._lock = threading.Lock()
|
|
|
|
def next(self) -> int:
|
|
with self._lock:
|
|
value = self._value
|
|
self._value += 1
|
|
return value
|
|
|
|
|
|
def parse_args():
|
|
ap = argparse.ArgumentParser()
|
|
|
|
ap.add_argument("-v", "--video", default=None,
|
|
help="Percorso video. Se omesso usa webcam 0")
|
|
ap.add_argument("--weights", default="yolov2.weights",
|
|
help="File pesi YOLOv2")
|
|
ap.add_argument("--config", default="yolov2.cfg",
|
|
help="File config YOLOv2")
|
|
ap.add_argument("--labels", default="labels.txt",
|
|
help="File labels classi")
|
|
ap.add_argument("--tesseract-cmd", default=None,
|
|
help="Percorso esplicito a tesseract.exe")
|
|
|
|
ap.add_argument("--backend", choices=["cpu", "cuda", "cuda-fp16"],
|
|
default="cpu", help="Backend OpenCV DNN")
|
|
ap.add_argument("--input-size", type=int, default=416,
|
|
help="Dimensione input YOLO")
|
|
ap.add_argument("--swap-rb", action="store_true",
|
|
help="Scambia canali R/B nella blob YOLO")
|
|
ap.add_argument("--frame-buffer-size", type=int, default=10,
|
|
help="Dimensione latest buffer frame")
|
|
ap.add_argument("--roi-buffer-size", type=int, default=20,
|
|
help="Dimensione latest buffer ROI")
|
|
ap.add_argument("--ocr-history-size", type=int, default=100,
|
|
help="Numero risultati OCR recenti mantenuti in memoria")
|
|
ap.add_argument("--preview-width", type=int, default=1280,
|
|
help="Larghezza massima preview")
|
|
ap.add_argument("--stats-interval", type=float, default=2.0,
|
|
help="Secondi tra riepiloghi prestazioni")
|
|
ap.add_argument("--max-frames", type=int, default=0,
|
|
help="Numero massimo frame da leggere; 0 = fino a fine stream")
|
|
ap.add_argument("--drain-seconds", type=float, default=0.0,
|
|
help="Secondi di attesa dopo max/fine stream per benchmark headless")
|
|
ap.add_argument("--realtime-playback", action="store_true",
|
|
help="Per file video, limita il loop al framerate del video")
|
|
ap.add_argument("--opencv-threads", type=int, default=1,
|
|
help="Numero thread OpenCV")
|
|
ap.add_argument("--quality-filter", action="store_true",
|
|
help="Filtra i frame troppo sfocati/mossi prima del buffer YOLO")
|
|
ap.add_argument("--blur-metric", choices=["laplacian", "tenengrad"],
|
|
default="laplacian", help="Metrica nitidezza usata dal filtro qualita'")
|
|
ap.add_argument("--min-sharpness", type=float, default=80.0,
|
|
help="Soglia minima nitidezza per inviare il frame a YOLO")
|
|
ap.add_argument("--blur-resize-width", type=int, default=320,
|
|
help="Larghezza usata per ridurre il frame prima della metrica blur")
|
|
ap.add_argument("--debug-quality-log", action="store_true",
|
|
help="Logga lo score qualita' di ogni frame")
|
|
ap.add_argument("--debug-rejected-window", action="store_true",
|
|
help="Mostra una finestra con i frame scartati dal filtro qualita'")
|
|
|
|
ap.add_argument("--min-confidence", type=float, default=0.30,
|
|
help="Soglia minima confidenza")
|
|
ap.add_argument("--nms-threshold", type=float, default=0.40,
|
|
help="Soglia NMS")
|
|
ap.add_argument("--use-nms", action="store_true",
|
|
help="Applica NMS alle detection; default off per compatibilita' YOLOv2")
|
|
ap.add_argument("--label-class", default="etichetta",
|
|
help="Nome classe etichetta su cui fare OCR")
|
|
ap.add_argument("--min-label-width", type=int, default=50,
|
|
help="Larghezza minima bbox etichetta")
|
|
ap.add_argument("--min-label-height", type=int, default=20,
|
|
help="Altezza minima bbox etichetta")
|
|
ap.add_argument("--max-roi-per-frame", type=int, default=2,
|
|
help="Numero massimo ROI etichetta inviate a OCR per detection")
|
|
ap.add_argument("--infer-gaylord-from-label", action="store_true",
|
|
help="Disegna un box gaylord stimato partendo dalle etichette se YOLO non trova gaylord")
|
|
ap.add_argument("--inferred-gaylord-width-factor", type=float, default=3.6,
|
|
help="Larghezza box gaylord stimato rispetto alla label")
|
|
ap.add_argument("--inferred-gaylord-height-factor", type=float, default=4.2,
|
|
help="Altezza box gaylord stimato rispetto alla label")
|
|
ap.add_argument("--inferred-gaylord-y-shift", type=float, default=1.35,
|
|
help="Spostamento verticale verso il basso, in multipli dell'altezza label")
|
|
ap.add_argument("--slot-size", type=int, default=120,
|
|
help="Dimensione griglia per cooldown OCR")
|
|
ap.add_argument("--ocr-cooldown-sec", type=float, default=1.0,
|
|
help="Secondi minimi prima di reinviare OCR sulla stessa zona")
|
|
ap.add_argument("--ocr-min-digits", type=int, default=2,
|
|
help="Numero minimo cifre per lettura valida")
|
|
ap.add_argument("--ocr-backend", choices=["tesseract", "paddle", "easyocr"],
|
|
default="paddle", help="Motore OCR da usare")
|
|
ap.add_argument("--ocr-lang", default="en",
|
|
help="Lingua OCR")
|
|
ap.add_argument("--easyocr-gpu", choices=["auto", "on", "off"],
|
|
default="auto", help="Uso GPU EasyOCR")
|
|
ap.add_argument("--easyocr-mode", choices=["subprocess", "inprocess"],
|
|
default="subprocess", help="Modalita' EasyOCR")
|
|
ap.add_argument("--easyocr-worker", action="store_true",
|
|
help=argparse.SUPPRESS)
|
|
ap.add_argument("--ocr-input", choices=["roi", "processed"],
|
|
default="roi", help="Immagine passata al motore OCR")
|
|
ap.add_argument("--ocr-code-mode", choices=["full", "fixed-band", "large-components"],
|
|
default="fixed-band", help="Prefiltro per isolare il codice grande")
|
|
ap.add_argument("--ocr-scale", type=float, default=1.5,
|
|
help="Fattore di ingrandimento preprocess OCR")
|
|
ap.add_argument("--ocr-max-width", type=int, default=900,
|
|
help="Larghezza massima immagine inviata all'OCR")
|
|
ap.add_argument("--ocr-band-x1", type=float, default=0.0,
|
|
help="Crop fisso OCR: x iniziale percentuale 0..1")
|
|
ap.add_argument("--ocr-band-y1", type=float, default=0.0,
|
|
help="Crop fisso OCR: y iniziale percentuale 0..1")
|
|
ap.add_argument("--ocr-band-x2", type=float, default=1.0,
|
|
help="Crop fisso OCR: x finale percentuale 0..1")
|
|
ap.add_argument("--ocr-band-y2", type=float, default=1.0,
|
|
help="Crop fisso OCR: y finale percentuale 0..1")
|
|
ap.add_argument("--ocr-component-min-height-ratio", type=float, default=0.22,
|
|
help="Altezza minima componente grande rispetto alla ROI")
|
|
ap.add_argument("--ocr-component-min-area-ratio", type=float, default=0.002,
|
|
help="Area minima componente grande rispetto alla ROI")
|
|
ap.add_argument("--ocr-component-pad-ratio", type=float, default=0.08,
|
|
help="Padding crop finale componenti grandi")
|
|
ap.add_argument("--ocr-pad-ratio", type=float, default=0.20,
|
|
help="Padding bbox etichetta prima dell'OCR")
|
|
ap.add_argument("--ocr-submit-min-interval", type=float, default=2.0,
|
|
help="Secondi minimi globali tra due ROI inviate all'OCR")
|
|
ap.add_argument("--ocr-max-pending", type=int, default=1,
|
|
help="Numero massimo ROI pendenti prima di saltare nuovi invii OCR")
|
|
ap.add_argument("--paddle-text-det-limit-side-len", type=int, default=320,
|
|
help="Parametro PaddleOCR text_det_limit_side_len")
|
|
ap.add_argument("--paddle-text-rec-score-thresh", type=float, default=0.0,
|
|
help="Soglia riconoscimento PaddleOCR")
|
|
ap.add_argument("--print-all-ocr", action="store_true",
|
|
help="Stampa anche OCR grezzi non validi")
|
|
ap.add_argument("--save-ocr-roi-dir", default=None,
|
|
help="Directory dove salvare ROI OCR raw/code/processed per debug")
|
|
ap.add_argument("--no-ocr", action="store_true",
|
|
help="Disabilita OCR; utile per benchmark YOLO/capture")
|
|
|
|
ap.add_argument("--debug-yolo-window", action="store_true",
|
|
help="Mostra una finestra debug con l'ultimo frame YOLO")
|
|
ap.add_argument("--debug-ocr-window", action="store_true",
|
|
help="Mostra una finestra debug con l'ultima ROI preprocessata")
|
|
ap.add_argument("--debug-yolo-output", action="store_true",
|
|
help="Logga shape e confidenze grezze dell'output YOLO")
|
|
ap.add_argument("--debug-yolo-top", type=int, default=0,
|
|
help="Logga le top N righe raw YOLO per confidenza classe")
|
|
ap.add_argument("--debug-inferred-gaylord", action="store_true",
|
|
help="Logga quanti box gaylord stimati vengono generati")
|
|
ap.add_argument("--no-display", action="store_true",
|
|
help="Disabilita finestre video, utile per benchmark")
|
|
|
|
return ap.parse_args()
|
|
|
|
|
|
def log(msg: str) -> None:
|
|
print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True)
|
|
|
|
|
|
def require_file(path_str: str, description: str) -> Path:
|
|
path = Path(path_str)
|
|
if not path.exists():
|
|
log(f"ERRORE: {description} non trovato: {path}")
|
|
sys.exit(1)
|
|
return path
|
|
|
|
|
|
def load_classes(labels_path: str) -> list[str]:
|
|
with open(labels_path, "rt", encoding="utf-8") as f:
|
|
classes = [line.strip() for line in f if line.strip()]
|
|
if not classes:
|
|
log("ERRORE: labels.txt vuoto")
|
|
sys.exit(1)
|
|
return classes
|
|
|
|
|
|
def open_capture(video_arg: str | None):
|
|
if video_arg is None:
|
|
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
|
|
if not cap.isOpened():
|
|
cap = cv2.VideoCapture(0)
|
|
return cap, "camera:0"
|
|
|
|
if str(video_arg).isdigit():
|
|
idx = int(video_arg)
|
|
cap = cv2.VideoCapture(idx, cv2.CAP_DSHOW)
|
|
if not cap.isOpened():
|
|
cap = cv2.VideoCapture(idx)
|
|
return cap, f"camera:{idx}"
|
|
|
|
return cv2.VideoCapture(video_arg), str(video_arg)
|
|
|
|
|
|
def configure_net_backend(net, backend: str) -> None:
|
|
if backend == "cpu":
|
|
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
|
|
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
|
|
return
|
|
|
|
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
|
|
if backend == "cuda-fp16":
|
|
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
|
|
else:
|
|
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
|
|
|
|
|
|
def resize_preview(frame: np.ndarray, max_width: int) -> np.ndarray:
|
|
h, w = frame.shape[:2]
|
|
if max_width <= 0 or w <= max_width:
|
|
return frame
|
|
scale = max_width / float(w)
|
|
new_w = int(w * scale)
|
|
new_h = int(h * scale)
|
|
return cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
|
|
|
|
|
def clip_box(x1: int, y1: int, x2: int, y2: int,
|
|
w: int, h: int) -> tuple[int, int, int, int]:
|
|
x1 = max(0, min(x1, w - 1))
|
|
y1 = max(0, min(y1, h - 1))
|
|
x2 = max(0, min(x2, w - 1))
|
|
y2 = max(0, min(y2, h - 1))
|
|
return x1, y1, x2, y2
|
|
|
|
|
|
def expand_box(x1: int, y1: int, x2: int, y2: int,
|
|
frame_w: int, frame_h: int,
|
|
pad_ratio: float = 0.08) -> tuple[int, int, int, int]:
|
|
bw = x2 - x1
|
|
bh = y2 - y1
|
|
pad_x = int(bw * pad_ratio)
|
|
pad_y = int(bh * pad_ratio)
|
|
return clip_box(
|
|
x1 - pad_x,
|
|
y1 - pad_y,
|
|
x2 + pad_x,
|
|
y2 + pad_y,
|
|
frame_w,
|
|
frame_h,
|
|
)
|
|
|
|
|
|
def quantized_slot_key(bbox: tuple[int, int, int, int], slot_size: int) -> tuple[int, int]:
|
|
x1, y1, x2, y2 = bbox
|
|
cx = (x1 + x2) // 2
|
|
cy = (y1 + y2) // 2
|
|
return cx // slot_size, cy // slot_size
|
|
|
|
|
|
def limit_width(image: np.ndarray, max_width: int) -> np.ndarray:
|
|
if max_width <= 0:
|
|
return image
|
|
h, w = image.shape[:2]
|
|
if w <= max_width:
|
|
return image
|
|
scale = max_width / float(w)
|
|
new_h = max(1, int(h * scale))
|
|
return cv2.resize(image, (max_width, new_h), interpolation=cv2.INTER_AREA)
|
|
|
|
|
|
def preprocess_for_ocr(roi: np.ndarray, scale: float = 1.5, max_width: int = 900) -> np.ndarray:
|
|
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
|
if scale != 1.0:
|
|
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
|
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
|
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
|
gray = cv2.copyMakeBorder(
|
|
gray, 8, 8, 8, 8,
|
|
borderType=cv2.BORDER_CONSTANT,
|
|
value=255,
|
|
)
|
|
gray = limit_width(gray, max_width)
|
|
return gray
|
|
|
|
|
|
def crop_fixed_band(
|
|
roi: np.ndarray,
|
|
x1_ratio: float,
|
|
y1_ratio: float,
|
|
x2_ratio: float,
|
|
y2_ratio: float,
|
|
) -> np.ndarray:
|
|
h, w = roi.shape[:2]
|
|
x1 = int(max(0.0, min(1.0, x1_ratio)) * w)
|
|
y1 = int(max(0.0, min(1.0, y1_ratio)) * h)
|
|
x2 = int(max(0.0, min(1.0, x2_ratio)) * w)
|
|
y2 = int(max(0.0, min(1.0, y2_ratio)) * h)
|
|
x1, y1, x2, y2 = clip_box(x1, y1, x2, y2, w, h)
|
|
if x2 <= x1 or y2 <= y1:
|
|
return roi
|
|
return roi[y1:y2, x1:x2].copy()
|
|
|
|
|
|
def crop_large_components(
|
|
roi: np.ndarray,
|
|
min_height_ratio: float,
|
|
min_area_ratio: float,
|
|
pad_ratio: float,
|
|
) -> np.ndarray:
|
|
h, w = roi.shape[:2]
|
|
if h <= 0 or w <= 0:
|
|
return roi
|
|
|
|
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
|
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
|
binary = cv2.threshold(
|
|
gray,
|
|
0,
|
|
255,
|
|
cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
|
|
)[1]
|
|
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
|
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)
|
|
|
|
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
min_h = max(3, int(h * min_height_ratio))
|
|
min_area = max(4, int(w * h * min_area_ratio))
|
|
|
|
boxes: list[tuple[int, int, int, int]] = []
|
|
for contour in contours:
|
|
x, y, bw, bh = cv2.boundingRect(contour)
|
|
area = bw * bh
|
|
if bh < min_h or area < min_area:
|
|
continue
|
|
aspect = bw / float(max(1, bh))
|
|
if aspect < 0.12 or aspect > 1.25:
|
|
continue
|
|
boxes.append((x, y, x + bw, y + bh))
|
|
|
|
if not boxes:
|
|
return roi
|
|
|
|
x1 = min(box[0] for box in boxes)
|
|
y1 = min(box[1] for box in boxes)
|
|
x2 = max(box[2] for box in boxes)
|
|
y2 = max(box[3] for box in boxes)
|
|
x1, y1, x2, y2 = expand_box(x1, y1, x2, y2, w, h, pad_ratio=pad_ratio)
|
|
if x2 <= x1 or y2 <= y1:
|
|
return roi
|
|
return roi[y1:y2, x1:x2].copy()
|
|
|
|
|
|
def extract_code_roi(roi: np.ndarray, args) -> np.ndarray:
|
|
if args.ocr_code_mode == "full":
|
|
return roi
|
|
if args.ocr_code_mode == "fixed-band":
|
|
return crop_fixed_band(
|
|
roi,
|
|
args.ocr_band_x1,
|
|
args.ocr_band_y1,
|
|
args.ocr_band_x2,
|
|
args.ocr_band_y2,
|
|
)
|
|
return crop_large_components(
|
|
roi,
|
|
args.ocr_component_min_height_ratio,
|
|
args.ocr_component_min_area_ratio,
|
|
args.ocr_component_pad_ratio,
|
|
)
|
|
|
|
|
|
def ocr_digits_only(roi: np.ndarray, pytesseract_module: Any) -> OcrEngineResult:
|
|
processed = preprocess_for_ocr(roi)
|
|
config = r"--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789"
|
|
raw_text = pytesseract_module.image_to_string(processed, config=config)
|
|
digits = re.sub(r"\D+", "", raw_text)
|
|
return OcrEngineResult(digits, processed, raw_text, roi)
|
|
|
|
|
|
class TesseractOcrEngine:
|
|
def __init__(self, args):
|
|
import pytesseract
|
|
if args.tesseract_cmd:
|
|
pytesseract.pytesseract.tesseract_cmd = args.tesseract_cmd
|
|
self._pytesseract = pytesseract
|
|
self._args = args
|
|
|
|
def read_digits(self, roi: np.ndarray) -> OcrEngineResult:
|
|
code_roi = extract_code_roi(roi, self._args)
|
|
return ocr_digits_only(code_roi, self._pytesseract)
|
|
|
|
|
|
class PaddleOcrEngine:
|
|
def __init__(self, args):
|
|
from paddleocr import PaddleOCR
|
|
self._ocr = PaddleOCR(
|
|
lang=args.ocr_lang,
|
|
use_doc_orientation_classify=False,
|
|
use_doc_unwarping=False,
|
|
use_textline_orientation=False,
|
|
text_det_limit_side_len=args.paddle_text_det_limit_side_len,
|
|
text_rec_score_thresh=args.paddle_text_rec_score_thresh,
|
|
)
|
|
self._input_mode = args.ocr_input
|
|
self._args = args
|
|
|
|
def read_digits(self, roi: np.ndarray) -> OcrEngineResult:
|
|
code_roi = extract_code_roi(roi, self._args)
|
|
code_roi = limit_width(code_roi, self._args.ocr_max_width)
|
|
processed = preprocess_for_ocr(
|
|
code_roi,
|
|
scale=self._args.ocr_scale,
|
|
max_width=self._args.ocr_max_width,
|
|
)
|
|
ocr_input = processed if self._input_mode == "processed" else code_roi
|
|
result = self._ocr.predict(ocr_input)
|
|
texts: list[str] = []
|
|
for item in result:
|
|
if isinstance(item, dict):
|
|
rec_texts = item.get("rec_texts") or item.get("texts") or []
|
|
if isinstance(rec_texts, str):
|
|
texts.append(rec_texts)
|
|
else:
|
|
texts.extend(str(text) for text in rec_texts)
|
|
elif isinstance(item, (list, tuple)):
|
|
texts.append(str(item))
|
|
|
|
raw_text = " ".join(texts)
|
|
digits = re.sub(r"\D+", "", raw_text)
|
|
return OcrEngineResult(digits, processed, raw_text, code_roi)
|
|
|
|
|
|
class EasyOcrInProcessEngine:
|
|
def __init__(self, args):
|
|
import easyocr
|
|
self._args = args
|
|
self._input_mode = args.ocr_input
|
|
langs = [part.strip() for part in args.ocr_lang.split(",") if part.strip()]
|
|
if not langs:
|
|
langs = ["en"]
|
|
|
|
requested_gpu = args.easyocr_gpu != "off"
|
|
if args.easyocr_gpu == "on":
|
|
self._reader = easyocr.Reader(langs, gpu=True, verbose=False)
|
|
self._using_gpu = True
|
|
elif requested_gpu:
|
|
try:
|
|
self._reader = easyocr.Reader(langs, gpu=True, verbose=False)
|
|
self._using_gpu = True
|
|
except Exception as exc:
|
|
log(f"EasyOCR GPU non disponibile, fallback CPU: {exc}")
|
|
self._reader = easyocr.Reader(langs, gpu=False, verbose=False)
|
|
self._using_gpu = False
|
|
else:
|
|
self._reader = easyocr.Reader(langs, gpu=False, verbose=False)
|
|
self._using_gpu = False
|
|
|
|
log(f"EasyOCR device: {'gpu' if self._using_gpu else 'cpu'}")
|
|
|
|
def read_digits(self, roi: np.ndarray) -> OcrEngineResult:
|
|
code_roi = extract_code_roi(roi, self._args)
|
|
code_roi = limit_width(code_roi, self._args.ocr_max_width)
|
|
processed = preprocess_for_ocr(
|
|
code_roi,
|
|
scale=self._args.ocr_scale,
|
|
max_width=self._args.ocr_max_width,
|
|
)
|
|
ocr_input = processed if self._input_mode == "processed" else code_roi
|
|
result = self._reader.readtext(
|
|
ocr_input,
|
|
allowlist="0123456789",
|
|
detail=1,
|
|
paragraph=False,
|
|
)
|
|
texts: list[str] = []
|
|
for item in result:
|
|
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
texts.append(str(item[1]))
|
|
else:
|
|
texts.append(str(item))
|
|
|
|
raw_text = " ".join(texts)
|
|
digits = re.sub(r"\D+", "", raw_text)
|
|
return OcrEngineResult(digits, processed, raw_text, code_roi)
|
|
|
|
|
|
class EasyOcrProcessEngine:
|
|
def __init__(self, args):
|
|
self._args = args
|
|
self._input_mode = args.ocr_input
|
|
self._tmpdir = tempfile.TemporaryDirectory(prefix="flywms_easyocr_")
|
|
cmd = [
|
|
sys.executable,
|
|
str(Path(__file__).resolve()),
|
|
"--easyocr-worker",
|
|
"--ocr-lang", args.ocr_lang,
|
|
"--easyocr-gpu", args.easyocr_gpu,
|
|
]
|
|
env = os.environ.copy()
|
|
env["PYTHONUTF8"] = "1"
|
|
self._proc = subprocess.Popen(
|
|
cmd,
|
|
stdin=subprocess.PIPE,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
encoding="utf-8",
|
|
errors="replace",
|
|
env=env,
|
|
)
|
|
ready = self._read_json_line()
|
|
if ready.get("event") != "ready":
|
|
raise RuntimeError(f"EasyOCR worker non pronto: {ready}")
|
|
log(f"EasyOCR worker process device: {ready.get('device', 'unknown')}")
|
|
|
|
def _read_json_line(self) -> dict[str, Any]:
|
|
assert self._proc.stdout is not None
|
|
while True:
|
|
line = self._proc.stdout.readline()
|
|
if line == "":
|
|
err = ""
|
|
if self._proc.stderr is not None:
|
|
err = self._proc.stderr.read()
|
|
raise RuntimeError(f"EasyOCR worker terminato: {err.strip()}")
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
return json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
def read_digits(self, roi: np.ndarray) -> OcrEngineResult:
|
|
if self._proc.poll() is not None:
|
|
raise RuntimeError("EasyOCR worker non attivo")
|
|
|
|
code_roi = extract_code_roi(roi, self._args)
|
|
code_roi = limit_width(code_roi, self._args.ocr_max_width)
|
|
processed = preprocess_for_ocr(
|
|
code_roi,
|
|
scale=self._args.ocr_scale,
|
|
max_width=self._args.ocr_max_width,
|
|
)
|
|
ocr_input = processed if self._input_mode == "processed" else code_roi
|
|
image_path = Path(self._tmpdir.name) / f"ocr_{time.perf_counter_ns()}.png"
|
|
cv2.imwrite(str(image_path), ocr_input)
|
|
|
|
assert self._proc.stdin is not None
|
|
self._proc.stdin.write(json.dumps({"path": str(image_path)}) + "\n")
|
|
self._proc.stdin.flush()
|
|
response = self._read_json_line()
|
|
try:
|
|
image_path.unlink(missing_ok=True)
|
|
except OSError:
|
|
pass
|
|
|
|
if response.get("error"):
|
|
raise RuntimeError(str(response["error"]))
|
|
raw_text = str(response.get("raw_text", ""))
|
|
digits = re.sub(r"\D+", "", raw_text)
|
|
return OcrEngineResult(digits, processed, raw_text, code_roi)
|
|
|
|
def close(self) -> None:
|
|
if getattr(self, "_proc", None) is None:
|
|
return
|
|
if self._proc.poll() is None:
|
|
try:
|
|
assert self._proc.stdin is not None
|
|
self._proc.stdin.write(json.dumps({"cmd": "stop"}) + "\n")
|
|
self._proc.stdin.flush()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
self._proc.wait(timeout=3)
|
|
except subprocess.TimeoutExpired:
|
|
self._proc.kill()
|
|
self._tmpdir.cleanup()
|
|
|
|
def __del__(self):
|
|
try:
|
|
self.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def create_ocr_engine(args):
|
|
if args.ocr_backend == "paddle":
|
|
return PaddleOcrEngine(args)
|
|
if args.ocr_backend == "easyocr":
|
|
if args.easyocr_mode == "inprocess":
|
|
return EasyOcrInProcessEngine(args)
|
|
return EasyOcrProcessEngine(args)
|
|
return TesseractOcrEngine(args)
|
|
|
|
|
|
def run_easyocr_worker(args) -> int:
|
|
import easyocr
|
|
|
|
langs = [part.strip() for part in args.ocr_lang.split(",") if part.strip()] or ["en"]
|
|
requested_gpu = args.easyocr_gpu != "off"
|
|
using_gpu = False
|
|
if args.easyocr_gpu == "on":
|
|
reader = easyocr.Reader(langs, gpu=True, verbose=False)
|
|
using_gpu = True
|
|
elif requested_gpu:
|
|
try:
|
|
reader = easyocr.Reader(langs, gpu=True, verbose=False)
|
|
using_gpu = True
|
|
except Exception:
|
|
reader = easyocr.Reader(langs, gpu=False, verbose=False)
|
|
else:
|
|
reader = easyocr.Reader(langs, gpu=False, verbose=False)
|
|
|
|
print(json.dumps({"event": "ready", "device": "gpu" if using_gpu else "cpu"}), flush=True)
|
|
for line in sys.stdin:
|
|
try:
|
|
request = json.loads(line)
|
|
if request.get("cmd") == "stop":
|
|
break
|
|
result = reader.readtext(
|
|
request["path"],
|
|
allowlist="0123456789",
|
|
detail=1,
|
|
paragraph=False,
|
|
)
|
|
texts: list[str] = []
|
|
for item in result:
|
|
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
texts.append(str(item[1]))
|
|
else:
|
|
texts.append(str(item))
|
|
raw_text = " ".join(texts)
|
|
print(json.dumps({"raw_text": raw_text}), flush=True)
|
|
except Exception as exc:
|
|
print(json.dumps({"error": str(exc)}), flush=True)
|
|
return 0
|
|
|
|
|
|
def save_ocr_debug_images(
|
|
output_dir: Path,
|
|
roi_packet: RoiPacket,
|
|
engine_result: OcrEngineResult,
|
|
) -> None:
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
prefix = output_dir / f"frame_{roi_packet.source_frame_id:06d}_roi_{roi_packet.roi_id:06d}"
|
|
cv2.imwrite(str(prefix) + "_raw.png", roi_packet.roi_image)
|
|
cv2.imwrite(str(prefix) + "_code.png", engine_result.code_roi)
|
|
cv2.imwrite(str(prefix) + "_processed.png", engine_result.processed)
|
|
|
|
|
|
def detect_yolov2(
|
|
net,
|
|
frame: np.ndarray,
|
|
classes: list[str],
|
|
min_confidence: float,
|
|
nms_threshold: float,
|
|
input_size: int,
|
|
use_nms: bool,
|
|
swap_rb: bool,
|
|
) -> tuple[list[Detection], dict[str, float | tuple[int, ...]]]:
|
|
t0 = time.perf_counter()
|
|
h, w = frame.shape[:2]
|
|
|
|
blob = cv2.dnn.blobFromImage(
|
|
frame,
|
|
scalefactor=1.0 / 255.0,
|
|
size=(input_size, input_size),
|
|
mean=(0, 0, 0),
|
|
swapRB=swap_rb,
|
|
crop=False,
|
|
)
|
|
t_blob = time.perf_counter()
|
|
|
|
net.setInput(blob)
|
|
predictions = net.forward()
|
|
t_forward = time.perf_counter()
|
|
|
|
predictions = np.array(predictions)
|
|
if predictions.ndim == 4:
|
|
predictions = predictions.reshape(predictions.shape[1], predictions.shape[-1])
|
|
elif predictions.ndim == 3:
|
|
predictions = predictions[0]
|
|
|
|
boxes: list[list[int]] = []
|
|
confidences: list[float] = []
|
|
class_ids: list[int] = []
|
|
|
|
if predictions.ndim == 2 and predictions.shape[1] > 5:
|
|
for i in range(predictions.shape[0]):
|
|
prob_arr = predictions[i][5:]
|
|
if prob_arr.size == 0:
|
|
continue
|
|
|
|
class_index = int(prob_arr.argmax(axis=0))
|
|
if class_index >= len(classes):
|
|
continue
|
|
|
|
confidence = float(prob_arr[class_index])
|
|
if confidence <= min_confidence:
|
|
continue
|
|
|
|
x_center = float(predictions[i][0]) * w
|
|
y_center = float(predictions[i][1]) * h
|
|
width_box = float(predictions[i][2]) * w
|
|
height_box = float(predictions[i][3]) * h
|
|
|
|
x1 = int(x_center - width_box * 0.5)
|
|
y1 = int(y_center - height_box * 0.5)
|
|
x2 = int(x_center + width_box * 0.5)
|
|
y2 = int(y_center + height_box * 0.5)
|
|
x1, y1, x2, y2 = clip_box(x1, y1, x2, y2, w, h)
|
|
|
|
bw = max(0, x2 - x1)
|
|
bh = max(0, y2 - y1)
|
|
if bw == 0 or bh == 0:
|
|
continue
|
|
|
|
boxes.append([x1, y1, bw, bh])
|
|
confidences.append(confidence)
|
|
class_ids.append(class_index)
|
|
|
|
detections: list[Detection] = []
|
|
if boxes:
|
|
if use_nms:
|
|
indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, nms_threshold)
|
|
selected_indices = np.array(indices).flatten() if len(indices) else []
|
|
else:
|
|
selected_indices = range(len(boxes))
|
|
|
|
for idx in selected_indices:
|
|
x, y, bw, bh = boxes[int(idx)]
|
|
class_id = class_ids[int(idx)]
|
|
detections.append(Detection(
|
|
class_id=class_id,
|
|
class_name=classes[class_id],
|
|
confidence=confidences[int(idx)],
|
|
bbox=(x, y, x + bw, y + bh),
|
|
))
|
|
|
|
t_parse = time.perf_counter()
|
|
info = {
|
|
"shape": tuple(predictions.shape),
|
|
"blob_ms": (t_blob - t0) * 1000.0,
|
|
"forward_ms": (t_forward - t_blob) * 1000.0,
|
|
"parse_ms": (t_parse - t_forward) * 1000.0,
|
|
"total_ms": (t_parse - t0) * 1000.0,
|
|
"raw_max": float(np.max(predictions)) if predictions.size else 0.0,
|
|
"class_max": float(np.max(predictions[:, 5:])) if predictions.ndim == 2 and predictions.shape[1] > 5 else 0.0,
|
|
"raw_predictions": predictions,
|
|
}
|
|
return detections, info
|
|
|
|
|
|
def best_label_detections(
|
|
detections: list[Detection],
|
|
label_name: str,
|
|
max_boxes: int,
|
|
) -> list[Detection]:
|
|
labels = [
|
|
d for d in detections
|
|
if d.class_name.strip().lower() == label_name.strip().lower()
|
|
]
|
|
labels.sort(
|
|
key=lambda d: (
|
|
d.confidence,
|
|
(d.bbox[2] - d.bbox[0]) * (d.bbox[3] - d.bbox[1]),
|
|
),
|
|
reverse=True,
|
|
)
|
|
return labels[:max_boxes]
|
|
|
|
|
|
def draw_detection(frame: np.ndarray, det: Detection, label_class: str) -> None:
|
|
x1, y1, x2, y2 = det.bbox
|
|
class_lower = det.class_name.lower()
|
|
color = (255, 255, 255)
|
|
if class_lower == "gaylord_stimato":
|
|
color = (255, 0, 255)
|
|
elif class_lower == label_class.lower():
|
|
color = (0, 255, 255)
|
|
elif class_lower == "gaylord":
|
|
color = (0, 255, 0)
|
|
|
|
thickness = 4 if class_lower in ("gaylord", "gaylord_stimato") else 2
|
|
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
|
text = f"{det.class_name} {det.confidence:.2f}"
|
|
cv2.putText(
|
|
frame,
|
|
text,
|
|
(x1, max(20, y1 - 8)),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.6,
|
|
color,
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
|
|
|
|
def infer_gaylords_from_labels(
|
|
detections: list[Detection],
|
|
frame_w: int,
|
|
frame_h: int,
|
|
label_class: str,
|
|
width_factor: float,
|
|
height_factor: float,
|
|
y_shift: float,
|
|
) -> list[Detection]:
|
|
has_gaylord = any(d.class_name.lower() == "gaylord" for d in detections)
|
|
if has_gaylord:
|
|
return []
|
|
|
|
inferred: list[Detection] = []
|
|
for det in detections:
|
|
if det.class_name.lower() != label_class.lower():
|
|
continue
|
|
|
|
x1, y1, x2, y2 = det.bbox
|
|
bw = x2 - x1
|
|
bh = y2 - y1
|
|
if bw <= 0 or bh <= 0:
|
|
continue
|
|
|
|
cx = (x1 + x2) / 2.0
|
|
cy = (y1 + y2) / 2.0 + bh * y_shift
|
|
gw = bw * width_factor
|
|
gh = bh * height_factor
|
|
|
|
gx1 = int(cx - gw / 2.0)
|
|
gy1 = int(cy - gh / 2.0)
|
|
gx2 = int(cx + gw / 2.0)
|
|
gy2 = int(cy + gh / 2.0)
|
|
gx1, gy1, gx2, gy2 = clip_box(gx1, gy1, gx2, gy2, frame_w, frame_h)
|
|
|
|
inferred.append(Detection(
|
|
class_id=-1,
|
|
class_name="gaylord_stimato",
|
|
confidence=det.confidence,
|
|
bbox=(gx1, gy1, gx2, gy2),
|
|
))
|
|
|
|
return inferred
|
|
|
|
|
|
def draw_ocr_results(
|
|
frame: np.ndarray,
|
|
ocr_results: list[OcrResult],
|
|
max_age_sec: float = 5.0,
|
|
) -> None:
|
|
now = time.perf_counter()
|
|
for result in ocr_results:
|
|
if now - result.timestamp > max_age_sec:
|
|
continue
|
|
if not result.text:
|
|
continue
|
|
|
|
x1, y1, x2, y2 = result.bbox
|
|
cv2.putText(
|
|
frame,
|
|
f"NUM: {result.text}",
|
|
(x1, min(frame.shape[0] - 5, y2 + 24)),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.7,
|
|
(0, 255, 255),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
|
|
|
|
def draw_status(frame: np.ndarray, stats_text: list[str]) -> None:
|
|
y = 25
|
|
for line in stats_text:
|
|
cv2.putText(
|
|
frame,
|
|
line,
|
|
(10, y),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.6,
|
|
(0, 0, 255),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
y += 24
|
|
|
|
|
|
def draw_ocr_debug(ocr_frame: np.ndarray, text: str, preview_width: int) -> np.ndarray:
|
|
if len(ocr_frame.shape) == 2:
|
|
display = cv2.cvtColor(ocr_frame, cv2.COLOR_GRAY2BGR)
|
|
else:
|
|
display = ocr_frame.copy()
|
|
|
|
display = resize_preview(display, preview_width)
|
|
canvas_h = display.shape[0] + 70
|
|
canvas_w = max(display.shape[1], 500)
|
|
canvas = np.full((canvas_h, canvas_w, 3), 255, dtype=np.uint8)
|
|
canvas[:display.shape[0], :display.shape[1]] = display
|
|
|
|
shown_text = text if text else "(nessun codice)"
|
|
cv2.putText(
|
|
canvas,
|
|
f"OCR: {shown_text}",
|
|
(10, display.shape[0] + 45),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
1.0,
|
|
(0, 0, 255),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
return canvas
|
|
|
|
|
|
def resize_for_quality(frame: np.ndarray, target_width: int) -> np.ndarray:
|
|
if target_width <= 0 or frame.shape[1] <= target_width:
|
|
return frame
|
|
scale = target_width / frame.shape[1]
|
|
height = max(1, int(frame.shape[0] * scale))
|
|
return cv2.resize(frame, (target_width, height), interpolation=cv2.INTER_AREA)
|
|
|
|
|
|
def estimate_sharpness(frame: np.ndarray, metric: str, resize_width: int) -> float:
|
|
small = resize_for_quality(frame, resize_width)
|
|
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
|
|
if metric == "tenengrad":
|
|
gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
|
|
gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
|
|
return float(np.mean(gx * gx + gy * gy))
|
|
|
|
lap = cv2.Laplacian(gray, cv2.CV_64F)
|
|
return float(lap.var())
|
|
|
|
|
|
def format_stats(
|
|
shared: SharedState,
|
|
frame_buffer: LatestBuffer,
|
|
roi_buffer: LatestBuffer,
|
|
start_time: float,
|
|
) -> list[str]:
|
|
stats = shared.snapshot_stats()
|
|
elapsed = max(0.001, time.perf_counter() - start_time)
|
|
fb = frame_buffer.stats()
|
|
rb = roi_buffer.stats()
|
|
|
|
avg_yolo = stats.yolo_total_ms / max(1, stats.yolo_cycles)
|
|
avg_forward = stats.yolo_forward_ms / max(1, stats.yolo_cycles)
|
|
avg_ocr = stats.ocr_total_ms / max(1, stats.ocr_cycles)
|
|
avg_display = stats.display_total_ms / max(1, stats.display_frames)
|
|
avg_quality = stats.quality_total_ms / max(1, stats.quality_cycles)
|
|
avg_sharpness = stats.quality_score_total / max(1, stats.quality_cycles)
|
|
|
|
return [
|
|
f"cap_fps={stats.capture_frames / elapsed:.1f}",
|
|
f"disp_fps={stats.display_frames / elapsed:.1f}",
|
|
f"quality yolo_in/reject={stats.yolo_submitted_frames}/{stats.quality_rejected_frames} "
|
|
f"last={stats.last_quality_score:.1f} avg={avg_sharpness:.1f} ms={avg_quality:.2f}",
|
|
f"yolo_fps={stats.yolo_cycles / elapsed:.1f} avg={avg_yolo:.1f}ms fwd={avg_forward:.1f}ms",
|
|
f"ocr_fps={stats.ocr_cycles / elapsed:.1f} avg={avg_ocr:.1f}ms",
|
|
f"display_avg={avg_display:.1f}ms",
|
|
f"frames id cap/yolo/ocr={stats.last_capture_frame_id}/{stats.last_yolo_frame_id}/{stats.last_ocr_frame_id}",
|
|
f"last_det={stats.last_detection_count} {stats.last_detection_summary}",
|
|
f"frame_buf push/pop/drop/skip={fb.pushed}/{fb.popped}/{fb.dropped_on_put}/{fb.skipped_on_pop}",
|
|
f"roi_buf push/pop/drop/skip={rb.pushed}/{rb.popped}/{rb.dropped_on_put}/{rb.skipped_on_pop}",
|
|
]
|
|
|
|
|
|
def yolo_worker(
|
|
stop_event: threading.Event,
|
|
frame_buffer: LatestBuffer,
|
|
roi_buffer: LatestBuffer,
|
|
shared: SharedState,
|
|
net,
|
|
classes: list[str],
|
|
args,
|
|
roi_id_gen: IdGenerator,
|
|
) -> None:
|
|
slot_last_ocr: dict[tuple[int, int], float] = {}
|
|
last_ocr_submit = -999999.0
|
|
label_class_lower = args.label_class.strip().lower()
|
|
|
|
log("YOLO worker avviato")
|
|
while not stop_event.is_set():
|
|
packet = frame_buffer.get_latest_blocking(stop_event)
|
|
if packet is None:
|
|
continue
|
|
|
|
local_frame = packet.frame.copy()
|
|
detections, info = detect_yolov2(
|
|
net=net,
|
|
frame=local_frame,
|
|
classes=classes,
|
|
min_confidence=args.min_confidence,
|
|
nms_threshold=args.nms_threshold,
|
|
input_size=args.input_size,
|
|
use_nms=args.use_nms,
|
|
swap_rb=args.swap_rb,
|
|
)
|
|
|
|
result = DetectionResult(
|
|
frame_id=packet.frame_id,
|
|
timestamp=time.perf_counter(),
|
|
detections=detections,
|
|
inference_ms=float(info["total_ms"]),
|
|
blob_ms=float(info["blob_ms"]),
|
|
forward_ms=float(info["forward_ms"]),
|
|
parse_ms=float(info["parse_ms"]),
|
|
source_width=packet.width,
|
|
source_height=packet.height,
|
|
)
|
|
shared.set_latest_detection(result)
|
|
|
|
if args.debug_yolo_output:
|
|
log(
|
|
f"YOLO frame={packet.frame_id} shape={info['shape']} "
|
|
f"raw_max={info['raw_max']:.4f} class_max={info['class_max']:.4f} "
|
|
f"det={len(detections)}"
|
|
)
|
|
if args.debug_yolo_top > 0:
|
|
raw_predictions = info["raw_predictions"]
|
|
if (
|
|
isinstance(raw_predictions, np.ndarray)
|
|
and raw_predictions.ndim == 2
|
|
and raw_predictions.shape[1] > 5
|
|
):
|
|
scores = raw_predictions[:, 5:]
|
|
row_best = scores.max(axis=1)
|
|
top_indices = np.argsort(row_best)[-args.debug_yolo_top:][::-1]
|
|
for idx in top_indices:
|
|
cls_scores = scores[idx]
|
|
cls_parts = " ".join(
|
|
f"{classes[i]}={float(cls_scores[i]):.4f}"
|
|
for i in range(min(len(classes), cls_scores.shape[0]))
|
|
)
|
|
log(
|
|
f" raw[{int(idx)}] obj={float(raw_predictions[idx, 4]):.4f} "
|
|
f"{cls_parts} box={tuple(float(v) for v in raw_predictions[idx, :4])}"
|
|
)
|
|
|
|
debug = local_frame.copy()
|
|
debug_detections = list(detections)
|
|
if args.infer_gaylord_from_label:
|
|
inferred_gaylords = infer_gaylords_from_labels(
|
|
detections,
|
|
packet.width,
|
|
packet.height,
|
|
args.label_class,
|
|
args.inferred_gaylord_width_factor,
|
|
args.inferred_gaylord_height_factor,
|
|
args.inferred_gaylord_y_shift,
|
|
)
|
|
debug_detections.extend(inferred_gaylords)
|
|
if args.debug_inferred_gaylord and inferred_gaylords:
|
|
log(
|
|
f"gaylord stimati frame={packet.frame_id}: "
|
|
f"{[det.bbox for det in inferred_gaylords]}"
|
|
)
|
|
for det in debug_detections:
|
|
draw_detection(debug, det, args.label_class)
|
|
shared.set_debug_yolo_frame(resize_preview(debug, args.preview_width))
|
|
|
|
label_dets = best_label_detections(
|
|
detections,
|
|
args.label_class,
|
|
args.max_roi_per_frame,
|
|
)
|
|
now = time.perf_counter()
|
|
for det in label_dets:
|
|
if now - last_ocr_submit < args.ocr_submit_min_interval:
|
|
continue
|
|
if roi_buffer.depth() >= args.ocr_max_pending:
|
|
continue
|
|
|
|
x1, y1, x2, y2 = det.bbox
|
|
bw = x2 - x1
|
|
bh = y2 - y1
|
|
if bw < args.min_label_width or bh < args.min_label_height:
|
|
continue
|
|
|
|
slot_key = quantized_slot_key(det.bbox, args.slot_size)
|
|
if now - slot_last_ocr.get(slot_key, -999999.0) < args.ocr_cooldown_sec:
|
|
continue
|
|
|
|
rx1, ry1, rx2, ry2 = expand_box(
|
|
x1, y1, x2, y2,
|
|
packet.width,
|
|
packet.height,
|
|
pad_ratio=args.ocr_pad_ratio,
|
|
)
|
|
roi = local_frame[ry1:ry2, rx1:rx2]
|
|
if roi.size == 0:
|
|
continue
|
|
|
|
roi_copy = roi.copy()
|
|
roi_buffer.put(RoiPacket(
|
|
roi_id=roi_id_gen.next(),
|
|
source_frame_id=packet.frame_id,
|
|
timestamp=now,
|
|
class_name=label_class_lower,
|
|
confidence=det.confidence,
|
|
bbox=det.bbox,
|
|
roi_image=roi_copy,
|
|
width=roi_copy.shape[1],
|
|
height=roi_copy.shape[0],
|
|
))
|
|
slot_last_ocr[slot_key] = now
|
|
last_ocr_submit = now
|
|
|
|
log("YOLO worker terminato")
|
|
|
|
|
|
def ocr_worker(
|
|
stop_event: threading.Event,
|
|
roi_buffer: LatestBuffer,
|
|
shared: SharedState,
|
|
args,
|
|
) -> None:
|
|
try:
|
|
engine = create_ocr_engine(args)
|
|
except Exception as exc:
|
|
log(f"OCR worker disabilitato: impossibile inizializzare {args.ocr_backend}: {exc}")
|
|
return
|
|
|
|
debug_dir = Path(args.save_ocr_roi_dir) if args.save_ocr_roi_dir else None
|
|
|
|
log(f"OCR worker avviato con backend {args.ocr_backend}")
|
|
try:
|
|
while not stop_event.is_set():
|
|
roi_packet = roi_buffer.get_latest_blocking(stop_event)
|
|
if roi_packet is None:
|
|
continue
|
|
|
|
t0 = time.perf_counter()
|
|
engine_result = engine.read_digits(roi_packet.roi_image)
|
|
ocr_ms = (time.perf_counter() - t0) * 1000.0
|
|
digits = engine_result.digits
|
|
raw_text = engine_result.raw_text
|
|
processed = engine_result.processed
|
|
|
|
if debug_dir is not None:
|
|
save_ocr_debug_images(debug_dir, roi_packet, engine_result)
|
|
|
|
if args.print_all_ocr:
|
|
log(
|
|
f"OCR frame={roi_packet.source_frame_id} "
|
|
f"raw='{raw_text.strip()}' digits='{digits}' ms={ocr_ms:.1f}"
|
|
)
|
|
|
|
result = OcrResult(
|
|
roi_id=roi_packet.roi_id,
|
|
source_frame_id=roi_packet.source_frame_id,
|
|
timestamp=time.perf_counter(),
|
|
text=digits if len(digits) >= args.ocr_min_digits else "",
|
|
raw_text=raw_text,
|
|
bbox=roi_packet.bbox,
|
|
ocr_ms=ocr_ms,
|
|
)
|
|
shared.add_ocr_result(result)
|
|
shared.set_debug_ocr_frame(processed, result.text or raw_text.strip())
|
|
|
|
if result.text:
|
|
log(
|
|
f"Etichetta letta frame={result.source_frame_id} "
|
|
f"roi={result.roi_id}: {result.text} ({ocr_ms:.1f} ms)"
|
|
)
|
|
finally:
|
|
close = getattr(engine, "close", None)
|
|
if close is not None:
|
|
close()
|
|
|
|
log("OCR worker terminato")
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
if args.easyocr_worker:
|
|
return run_easyocr_worker(args)
|
|
|
|
require_file(args.weights, "File pesi YOLOv2")
|
|
require_file(args.config, "File config YOLOv2")
|
|
require_file(args.labels, "File labels")
|
|
classes = load_classes(args.labels)
|
|
|
|
cv2.setNumThreads(args.opencv_threads)
|
|
log(f"OpenCV version: {cv2.__version__}")
|
|
log(f"Classi: {classes}")
|
|
log(f"Backend richiesto: {args.backend}")
|
|
|
|
net = cv2.dnn.readNetFromDarknet(args.config, args.weights)
|
|
configure_net_backend(net, args.backend)
|
|
|
|
cap, source_name = open_capture(args.video)
|
|
if not cap.isOpened():
|
|
log("ERRORE: impossibile aprire la sorgente video")
|
|
return 1
|
|
|
|
frame_buffer = LatestBuffer(args.frame_buffer_size, "frames")
|
|
roi_buffer = LatestBuffer(args.roi_buffer_size, "roi")
|
|
shared = SharedState(args.ocr_history_size)
|
|
stop_event = threading.Event()
|
|
roi_id_gen = IdGenerator()
|
|
start_time = time.perf_counter()
|
|
|
|
yolo_thread = threading.Thread(
|
|
target=yolo_worker,
|
|
name="yolo-worker",
|
|
args=(stop_event, frame_buffer, roi_buffer, shared, net, classes, args, roi_id_gen),
|
|
daemon=True,
|
|
)
|
|
yolo_thread.start()
|
|
ocr_thread = None
|
|
if not args.no_ocr:
|
|
ocr_thread = threading.Thread(
|
|
target=ocr_worker,
|
|
name="ocr-worker",
|
|
args=(stop_event, roi_buffer, shared, args),
|
|
daemon=True,
|
|
)
|
|
ocr_thread.start()
|
|
else:
|
|
log("OCR disabilitato da --no-ocr")
|
|
|
|
if not args.no_display:
|
|
cv2.namedWindow("flywms capture", cv2.WINDOW_NORMAL)
|
|
if args.debug_yolo_window:
|
|
cv2.namedWindow("flywms yolo", cv2.WINDOW_NORMAL)
|
|
if args.debug_ocr_window or not args.no_ocr:
|
|
cv2.namedWindow("flywms ocr", cv2.WINDOW_NORMAL)
|
|
if args.quality_filter and args.debug_rejected_window:
|
|
cv2.namedWindow("flywms scartati", cv2.WINDOW_NORMAL)
|
|
|
|
frame_id = 0
|
|
last_stats_log = time.perf_counter()
|
|
video_fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
|
|
frame_period = 1.0 / video_fps if args.realtime_playback and video_fps > 0 else 0.0
|
|
|
|
try:
|
|
while not stop_event.is_set():
|
|
loop_start = time.perf_counter()
|
|
t_read0 = time.perf_counter()
|
|
grabbed, frame = cap.read()
|
|
read_ms = (time.perf_counter() - t_read0) * 1000.0
|
|
|
|
if not grabbed or frame is None:
|
|
log("Fine stream o impossibile leggere il frame")
|
|
break
|
|
|
|
frame_id += 1
|
|
if args.max_frames > 0 and frame_id > args.max_frames:
|
|
log(f"Raggiunto --max-frames={args.max_frames}")
|
|
break
|
|
|
|
height, width = frame.shape[:2]
|
|
sharpness = 0.0
|
|
quality_ms = 0.0
|
|
quality_passed = True
|
|
if args.quality_filter or args.debug_quality_log:
|
|
t_quality0 = time.perf_counter()
|
|
sharpness = estimate_sharpness(
|
|
frame,
|
|
args.blur_metric,
|
|
args.blur_resize_width,
|
|
)
|
|
quality_ms = (time.perf_counter() - t_quality0) * 1000.0
|
|
quality_passed = sharpness >= args.min_sharpness
|
|
if args.debug_quality_log:
|
|
log(
|
|
f"quality frame={frame_id} sharpness={sharpness:.1f} "
|
|
f"passed={quality_passed} ms={quality_ms:.2f}"
|
|
)
|
|
|
|
packet = FramePacket(
|
|
frame_id=frame_id,
|
|
timestamp=time.perf_counter(),
|
|
frame=frame,
|
|
width=width,
|
|
height=height,
|
|
source=source_name,
|
|
)
|
|
shared.add_capture_read(frame_id, read_ms)
|
|
if quality_passed:
|
|
frame_buffer.put(packet)
|
|
shared.add_quality_result(
|
|
score=sharpness,
|
|
passed=quality_passed,
|
|
elapsed_ms=quality_ms,
|
|
submitted_to_yolo=quality_passed,
|
|
)
|
|
|
|
t_display0 = time.perf_counter()
|
|
if not args.no_display:
|
|
display = frame.copy()
|
|
if args.quality_filter:
|
|
quality_color = (0, 180, 0) if quality_passed else (0, 0, 255)
|
|
cv2.putText(
|
|
display,
|
|
f"sharp={sharpness:.1f} {'OK' if quality_passed else 'BLUR'}",
|
|
(20, 38),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
1.0,
|
|
quality_color,
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
latest_detection = shared.get_latest_detection()
|
|
if latest_detection is not None:
|
|
display_detections = list(latest_detection.detections)
|
|
if args.infer_gaylord_from_label:
|
|
inferred_gaylords = infer_gaylords_from_labels(
|
|
latest_detection.detections,
|
|
latest_detection.source_width,
|
|
latest_detection.source_height,
|
|
args.label_class,
|
|
args.inferred_gaylord_width_factor,
|
|
args.inferred_gaylord_height_factor,
|
|
args.inferred_gaylord_y_shift,
|
|
)
|
|
display_detections.extend(inferred_gaylords)
|
|
for det in display_detections:
|
|
draw_detection(display, det, args.label_class)
|
|
|
|
draw_ocr_results(display, shared.get_recent_ocr_results())
|
|
draw_status(display, format_stats(shared, frame_buffer, roi_buffer, start_time)[:5])
|
|
display = resize_preview(display, args.preview_width)
|
|
cv2.imshow("flywms capture", display)
|
|
|
|
debug_yolo, debug_ocr, debug_ocr_text = shared.get_debug_frames()
|
|
if args.debug_yolo_window and debug_yolo is not None:
|
|
cv2.imshow("flywms yolo", debug_yolo)
|
|
if (args.debug_ocr_window or not args.no_ocr) and debug_ocr is not None:
|
|
cv2.imshow(
|
|
"flywms ocr",
|
|
draw_ocr_debug(debug_ocr, debug_ocr_text, args.preview_width),
|
|
)
|
|
if (
|
|
args.quality_filter
|
|
and args.debug_rejected_window
|
|
and not quality_passed
|
|
):
|
|
rejected = frame.copy()
|
|
cv2.putText(
|
|
rejected,
|
|
f"SCARTATO sharp={sharpness:.1f} < {args.min_sharpness:.1f}",
|
|
(20, 38),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
1.0,
|
|
(0, 0, 255),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
rejected = resize_preview(rejected, args.preview_width)
|
|
cv2.imshow("flywms scartati", rejected)
|
|
|
|
key = cv2.waitKey(1) & 0xFF
|
|
if key == ord("q"):
|
|
log("Premuto q, uscita")
|
|
break
|
|
|
|
display_ms = (time.perf_counter() - t_display0) * 1000.0
|
|
shared.add_display(display_ms)
|
|
|
|
now = time.perf_counter()
|
|
if now - last_stats_log >= args.stats_interval:
|
|
for line in format_stats(shared, frame_buffer, roi_buffer, start_time):
|
|
log(line)
|
|
last_stats_log = now
|
|
|
|
if frame_period > 0:
|
|
elapsed = time.perf_counter() - loop_start
|
|
if elapsed < frame_period:
|
|
time.sleep(frame_period - elapsed)
|
|
|
|
except KeyboardInterrupt:
|
|
log("Interrotto da tastiera")
|
|
finally:
|
|
if args.drain_seconds > 0:
|
|
log(f"Drain worker per {args.drain_seconds:.1f}s")
|
|
time.sleep(args.drain_seconds)
|
|
stop_event.set()
|
|
frame_buffer.wake_all()
|
|
roi_buffer.wake_all()
|
|
yolo_thread.join(timeout=3.0)
|
|
if ocr_thread is not None:
|
|
ocr_thread.join(timeout=3.0)
|
|
cap.release()
|
|
if not args.no_display:
|
|
cv2.destroyAllWindows()
|
|
|
|
log("=== riepilogo finale ===")
|
|
for line in format_stats(shared, frame_buffer, roi_buffer, start_time):
|
|
log(line)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|