Initial import

This commit is contained in:
administrator
2026-05-14 09:36:44 +02:00
commit 747298ac7a
1212 changed files with 56349 additions and 0 deletions

539
ware_detect_optimized.py Normal file
View File

@@ -0,0 +1,539 @@
import argparse
import re
import sys
from pathlib import Path
import cv2
import numpy as np
import pytesseract
def parse_args():
ap = argparse.ArgumentParser()
ap.add_argument(
"-v", "--video",
default=None,
help="Percorso video. Se omesso usa la webcam 0"
)
ap.add_argument(
"--weights",
default="yolov2.weights",
help="File pesi YOLO"
)
ap.add_argument(
"--config",
default="yolov2.cfg",
help="File cfg YOLO"
)
ap.add_argument(
"--labels",
default="labels.txt",
help="File classi YOLO"
)
ap.add_argument(
"--min-confidence",
type=float,
default=0.35,
help="Soglia minima confidenza YOLO"
)
ap.add_argument(
"--nms-threshold",
type=float,
default=0.40,
help="Soglia NMS"
)
ap.add_argument(
"--ocr-class",
default="etichetta",
help="Classe YOLO su cui fare OCR"
)
ap.add_argument(
"--ocr-every",
type=int,
default=5,
help="Fai OCR ogni N frame"
)
ap.add_argument(
"--slot-cooldown",
type=int,
default=15,
help="Numero minimo di frame prima di rifare OCR sulla stessa zona"
)
ap.add_argument(
"--slot-size",
type=int,
default=120,
help="Dimensione griglia per deduplicare le etichette"
)
ap.add_argument(
"--max-ocr-boxes",
type=int,
default=2,
help="Numero massimo di ROI etichetta su cui fare OCR per frame"
)
ap.add_argument(
"--min-label-width",
type=int,
default=60,
help="Larghezza minima bbox etichetta"
)
ap.add_argument(
"--min-label-height",
type=int,
default=25,
help="Altezza minima bbox etichetta"
)
ap.add_argument(
"--detect-width",
type=int,
default=960,
help="Larghezza massima del frame usato per detection YOLO"
)
ap.add_argument(
"--show-roi",
action="store_true",
help="Mostra anche la ROI preprocessata per OCR"
)
ap.add_argument(
"--tesseract-cmd",
default=None,
help="Percorso esplicito a tesseract.exe, se necessario"
)
ap.add_argument(
"--print-all",
action="store_true",
help="Stampa anche OCR vuoti o corti"
)
return ap.parse_args()
def require_file(path_str, description):
path = Path(path_str)
if not path.exists():
print(f"Errore: {description} non trovato: {path}")
sys.exit(1)
return path
def load_classes(labels_path):
with open(labels_path, "rt", encoding="utf-8") as f:
classes = [line.strip() for line in f if line.strip()]
if not classes:
print("Errore: labels.txt vuoto")
sys.exit(1)
return classes
def open_capture(video_arg):
if video_arg is None:
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
if not cap.isOpened():
cap = cv2.VideoCapture(0)
return cap
if str(video_arg).isdigit():
idx = int(video_arg)
cap = cv2.VideoCapture(idx, cv2.CAP_DSHOW)
if not cap.isOpened():
cap = cv2.VideoCapture(idx)
return cap
return cv2.VideoCapture(video_arg)
def resize_for_detection(frame, max_width):
h, w = frame.shape[:2]
if max_width <= 0 or w <= max_width:
return frame, 1.0, 1.0
scale = max_width / float(w)
new_w = int(w * scale)
new_h = int(h * scale)
resized = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
scale_x = w / float(new_w)
scale_y = h / float(new_h)
return resized, scale_x, scale_y
def clip_box(x1, y1, x2, y2, w, h):
x1 = max(0, min(x1, w - 1))
y1 = max(0, min(y1, h - 1))
x2 = max(0, min(x2, w - 1))
y2 = max(0, min(y2, h - 1))
return x1, y1, x2, y2
def expand_box(x1, y1, x2, y2, frame_w, frame_h, pad_ratio=0.10):
bw = x2 - x1
bh = y2 - y1
pad_x = int(bw * pad_ratio)
pad_y = int(bh * pad_ratio)
x1 -= pad_x
y1 -= pad_y
x2 += pad_x
y2 += pad_y
return clip_box(x1, y1, x2, y2, frame_w, frame_h)
def preprocess_for_ocr(roi):
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
gray = cv2.resize(
gray,
None,
fx=2.0,
fy=2.0,
interpolation=cv2.INTER_CUBIC
)
gray = cv2.GaussianBlur(gray, (3, 3), 0)
gray = cv2.threshold(
gray,
0,
255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU
)[1]
gray = cv2.copyMakeBorder(
gray,
10, 10, 10, 10,
borderType=cv2.BORDER_CONSTANT,
value=255
)
return gray
def ocr_digits_only(roi):
processed = preprocess_for_ocr(roi)
config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789'
raw_text = pytesseract.image_to_string(processed, config=config)
digits = re.sub(r"\D+", "", raw_text)
return digits, processed, raw_text
def detect_yolo(net, frame, classes, min_confidence, nms_threshold):
h, w = frame.shape[:2]
blob = cv2.dnn.blobFromImage(
frame,
scalefactor=1.0 / 255.0,
size=(416, 416),
swapRB=True,
crop=False
)
net.setInput(blob)
output_layer_names = net.getUnconnectedOutLayersNames()
layer_outputs = net.forward(output_layer_names)
boxes = []
confidences = []
class_ids = []
for output in layer_outputs:
for detection in output:
scores = detection[5:]
if scores.size == 0:
continue
class_id = int(np.argmax(scores))
confidence = float(scores[class_id])
if confidence < min_confidence:
continue
center_x = int(detection[0] * w)
center_y = int(detection[1] * h)
box_w = int(detection[2] * w)
box_h = int(detection[3] * h)
x = int(center_x - box_w / 2)
y = int(center_y - box_h / 2)
boxes.append([x, y, box_w, box_h])
confidences.append(confidence)
class_ids.append(class_id)
final_detections = []
if len(boxes) == 0:
return final_detections
indices = cv2.dnn.NMSBoxes(
boxes,
confidences,
min_confidence,
nms_threshold
)
if len(indices) == 0:
return final_detections
indices = np.array(indices).flatten()
for i in indices:
x, y, bw, bh = boxes[i]
x1 = x
y1 = y
x2 = x + bw
y2 = y + bh
x1, y1, x2, y2 = clip_box(x1, y1, x2, y2, w, h)
final_detections.append({
"class_id": class_ids[i],
"label": classes[class_ids[i]],
"confidence": confidences[i],
"box": (x1, y1, x2, y2),
})
return final_detections
def quantized_slot_key(x1, y1, x2, y2, slot_size):
cx = (x1 + x2) // 2
cy = (y1 + y2) // 2
return (cx // slot_size, cy // slot_size)
def draw_detection(frame, det, color=(255, 255, 255), text_extra=""):
x1, y1, x2, y2 = det["box"]
label = det["label"]
conf = det["confidence"]
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
text = f"{label} {conf:.2f}"
if text_extra:
text += f" | {text_extra}"
y_text = max(20, y1 - 8)
cv2.putText(
frame,
text,
(x1, y_text),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
color,
2,
cv2.LINE_AA
)
def main():
args = parse_args()
if args.tesseract_cmd:
pytesseract.pytesseract.tesseract_cmd = args.tesseract_cmd
require_file(args.weights, "File pesi YOLO")
require_file(args.config, "File cfg YOLO")
require_file(args.labels, "File labels YOLO")
classes = load_classes(args.labels)
print(classes)
ocr_class_lower = args.ocr_class.strip().lower()
net = cv2.dnn.readNetFromDarknet(args.config, args.weights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
cap = open_capture(args.video)
if not cap.isOpened():
print("Errore: impossibile aprire la sorgente video")
sys.exit(1)
win_name = "YOLO + OCR etichette"
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
frame_idx = 0
# Memoria OCR per posizione etichetta
slot_memory = {}
# Memoria per evitare stampe duplicate continue dello stesso numero
printed_texts = {}
while True:
grabbed, frame = cap.read()
if not grabbed or frame is None:
print("Fine stream o impossibile leggere il frame")
break
frame_idx += 1
orig_h, orig_w = frame.shape[:2]
det_frame, scale_x, scale_y = resize_for_detection(
frame,
args.detect_width
)
detections = detect_yolo(
net,
det_frame,
classes,
args.min_confidence,
args.nms_threshold
)
# Scala bbox sul frame originale
scaled_detections = []
for det in detections:
x1, y1, x2, y2 = det["box"]
x1 = int(x1 * scale_x)
y1 = int(y1 * scale_y)
x2 = int(x2 * scale_x)
y2 = int(y2 * scale_y)
x1, y1, x2, y2 = clip_box(x1, y1, x2, y2, orig_w, orig_h)
scaled_detections.append({
"class_id": det["class_id"],
"label": det["label"],
"confidence": det["confidence"],
"box": (x1, y1, x2, y2),
})
# OCR solo per etichette
label_detections = [
d for d in scaled_detections
if d["label"].strip().lower() == ocr_class_lower
]
label_detections.sort(
key=lambda d: d["confidence"],
reverse=True
)
label_detections = label_detections[:args.max_ocr_boxes]
# Disegna tutte le detection
for det in scaled_detections:
color = (255, 255, 255)
if det["label"].strip().lower() == ocr_class_lower:
color = (0, 255, 255)
elif det["label"].strip().lower() == "gaylord":
color = (0, 255, 0)
draw_detection(frame, det, color=color)
# OCR solo ogni N frame e con cooldown per slot
if frame_idx % args.ocr_every == 0:
for det in label_detections:
x1, y1, x2, y2 = det["box"]
bw = x2 - x1
bh = y2 - y1
if bw < args.min_label_width or bh < args.min_label_height:
continue
slot_key = quantized_slot_key(
x1, y1, x2, y2,
args.slot_size
)
slot_info = slot_memory.get(slot_key)
if slot_info is not None:
if frame_idx - slot_info["last_ocr_frame"] < args.slot_cooldown:
continue
rx1, ry1, rx2, ry2 = expand_box(
x1, y1, x2, y2,
orig_w, orig_h,
pad_ratio=0.10
)
roi = frame[ry1:ry2, rx1:rx2]
if roi.size == 0:
continue
digits, processed, raw_text = ocr_digits_only(roi)
if args.print_all:
print(
f"[frame {frame_idx}] OCR grezzo='{raw_text.strip()}' -> digits='{digits}'"
)
if len(digits) >= 2:
slot_memory[slot_key] = {
"text": digits,
"last_ocr_frame": frame_idx,
"box": (x1, y1, x2, y2),
}
last_print_frame = printed_texts.get(digits, -999999)
if frame_idx - last_print_frame > 30:
print(f"[frame {frame_idx}] Etichetta letta: {digits}")
printed_texts[digits] = frame_idx
if args.show_roi:
cv2.imshow("ROI OCR", processed)
else:
slot_memory[slot_key] = {
"text": slot_info["text"] if slot_info else "",
"last_ocr_frame": frame_idx,
"box": (x1, y1, x2, y2),
}
# Ridisegna testo OCR memorizzato vicino alle etichette
for det in label_detections:
x1, y1, x2, y2 = det["box"]
slot_key = quantized_slot_key(
x1, y1, x2, y2,
args.slot_size
)
slot_info = slot_memory.get(slot_key)
if slot_info and slot_info.get("text"):
cv2.putText(
frame,
f"NUM: {slot_info['text']}",
(x1, min(orig_h - 5, y2 + 22)),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 255, 255),
2,
cv2.LINE_AA
)
cv2.imshow(win_name, frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()