diff --git a/.gitignore b/.gitignore index df46ba8..657d054 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .venv_cuda/ __pycache__/ +debug_ocr/ *.py[cod] .pytest_cache/ .mypy_cache/ diff --git a/data/img_orig/air1.jpg b/data/img_orig/air1.jpg deleted file mode 100644 index 8cc83f4..0000000 Binary files a/data/img_orig/air1.jpg and /dev/null differ diff --git a/data/img_orig/air1.txt b/data/img_orig/air1.txt deleted file mode 100644 index 143de00..0000000 --- a/data/img_orig/air1.txt +++ /dev/null @@ -1,4 +0,0 @@ -0 0.498437 0.479861 0.117188 0.168056 -0 0.000000 -0.138889 0.000000 0.000000 -0 0.000000 -0.138889 0.000000 0.000000 -0 0.000000 -0.138889 0.000000 0.000000 diff --git a/data/img_orig/air2.jpg b/data/img_orig/air2.jpg deleted file mode 100644 index 231a6f8..0000000 Binary files a/data/img_orig/air2.jpg and /dev/null differ diff --git a/data/img_orig/air2.txt b/data/img_orig/air2.txt deleted file mode 100644 index 1ea565f..0000000 --- a/data/img_orig/air2.txt +++ /dev/null @@ -1 +0,0 @@ -0 0.602734 0.609028 0.110156 0.181944 diff --git a/data/img_orig/air3.jpg b/data/img_orig/air3.jpg deleted file mode 100644 index eb5e5b4..0000000 Binary files a/data/img_orig/air3.jpg and /dev/null differ diff --git a/data/img_orig/air3.txt b/data/img_orig/air3.txt deleted file mode 100644 index fb1779b..0000000 --- a/data/img_orig/air3.txt +++ /dev/null @@ -1,2 +0,0 @@ -0 0.498828 0.459722 0.152344 0.191667 -0 0.530859 0.457639 0.214844 0.315278 diff --git a/data/img_orig/air4.jpg b/data/img_orig/air4.jpg deleted file mode 100644 index 3abf362..0000000 Binary files a/data/img_orig/air4.jpg and /dev/null differ diff --git a/data/img_orig/air4.txt b/data/img_orig/air4.txt deleted file mode 100644 index 8f1fc44..0000000 --- a/data/img_orig/air4.txt +++ /dev/null @@ -1,9 +0,0 @@ -0 0.377734 0.401389 0.132031 0.155556 -0 0.529297 0.211111 0.144531 0.205556 -0 0.641406 0.393750 0.126563 0.143056 -0 0.744531 0.609028 0.148438 0.193056 -0 0.275000 0.642361 0.167187 0.206944 -0 0.526953 0.622917 0.160156 0.184722 -0 0.635156 0.807639 0.126563 0.134722 -0 0.375781 0.857639 0.126563 0.145833 -0 0.531641 0.949306 0.172656 0.101389 diff --git a/data/img_orig/air5.jpg b/data/img_orig/air5.jpg deleted file mode 100644 index 1f21075..0000000 Binary files a/data/img_orig/air5.jpg and /dev/null differ diff --git a/data/img_orig/air5.txt b/data/img_orig/air5.txt deleted file mode 100644 index d4d8504..0000000 --- a/data/img_orig/air5.txt +++ /dev/null @@ -1 +0,0 @@ -0 0.462109 0.513889 0.186719 0.163889 diff --git a/data/img_orig/air6.jpg b/data/img_orig/air6.jpg deleted file mode 100644 index a365ffb..0000000 Binary files a/data/img_orig/air6.jpg and /dev/null differ diff --git a/data/img_orig/air6.txt b/data/img_orig/air6.txt deleted file mode 100644 index 6645ee4..0000000 --- a/data/img_orig/air6.txt +++ /dev/null @@ -1,15 +0,0 @@ -0 0.140234 0.091667 0.244531 0.127778 -0 0.458594 0.102083 0.295312 0.115278 -0 0.792578 0.118750 0.272656 0.193056 -0 0.186328 0.302083 0.307031 0.154167 -0 0.498047 0.272222 0.261719 0.119444 -0 0.812500 0.316667 0.323438 0.158333 -0 0.203125 0.502778 0.367188 0.172222 -0 0.521094 0.487500 0.207813 0.255556 -0 0.798047 0.497222 0.303906 0.155556 -0 0.157031 0.714583 0.264062 0.140278 -0 0.480469 0.693056 0.321875 0.155556 -0 0.816406 0.688889 0.318750 0.133333 -0 0.182812 0.879167 0.314063 0.125000 -0 0.520312 0.882639 0.340625 0.181944 -0 0.832812 0.861111 0.260938 0.141667 diff --git a/data/img_orig/bird1.jpg b/data/img_orig/bird1.jpg deleted file mode 100644 index 38d86b4..0000000 Binary files a/data/img_orig/bird1.jpg and /dev/null differ diff --git a/data/img_orig/bird1.txt b/data/img_orig/bird1.txt deleted file mode 100644 index c56538d..0000000 --- a/data/img_orig/bird1.txt +++ /dev/null @@ -1 +0,0 @@ -1 0.657812 0.411806 0.189063 0.223611 diff --git a/data/img_orig/bird2.jpg b/data/img_orig/bird2.jpg deleted file mode 100644 index 0685783..0000000 Binary files a/data/img_orig/bird2.jpg and /dev/null differ diff --git a/data/img_orig/bird2.txt b/data/img_orig/bird2.txt deleted file mode 100644 index e08d6ca..0000000 --- a/data/img_orig/bird2.txt +++ /dev/null @@ -1,2 +0,0 @@ -1 0.552344 0.384028 0.159375 0.195833 -1 0.780078 0.595833 0.236719 0.133333 diff --git a/data/img_orig/bird3.jpg b/data/img_orig/bird3.jpg deleted file mode 100644 index d011579..0000000 Binary files a/data/img_orig/bird3.jpg and /dev/null differ diff --git a/data/img_orig/bird3.txt b/data/img_orig/bird3.txt deleted file mode 100644 index 05eb5c5..0000000 --- a/data/img_orig/bird3.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 0.508594 0.282639 0.107813 0.151389 -1 0.521875 0.470833 0.081250 0.100000 -1 0.527344 0.709722 0.162500 0.133333 diff --git a/data/img_orig/bird4.jpg b/data/img_orig/bird4.jpg deleted file mode 100644 index f543ab5..0000000 Binary files a/data/img_orig/bird4.jpg and /dev/null differ diff --git a/data/img_orig/bird4.txt b/data/img_orig/bird4.txt deleted file mode 100644 index 3db7190..0000000 --- a/data/img_orig/bird4.txt +++ /dev/null @@ -1 +0,0 @@ -1 0.526953 0.502083 0.303906 0.226389 diff --git a/data/train.txt b/data/train.txt index cfd322f..8ce6a24 100644 --- a/data/train.txt +++ b/data/train.txt @@ -1,201 +1,201 @@ -/home/abonvicini/darknetAB/darknet/data/img/thumb0100.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0101.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0102.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0103.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0104.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0105.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0106.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0107.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0108.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0109.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0110.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0111.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0112.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0113.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0114.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0115.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0116.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0117.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0118.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0119.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0120.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0121.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0122.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0123.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0124.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0125.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0126.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0127.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0128.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0129.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0130.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0131.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0132.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0133.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0134.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0135.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0136.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0137.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0138.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0139.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0140.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0141.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0142.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0143.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0144.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0145.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0146.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0147.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0148.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0149.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0150.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0151.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0152.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0153.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0154.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0155.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0156.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0157.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0158.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0159.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0160.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0161.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0162.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0163.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0164.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0165.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0166.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0167.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0168.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0169.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0170.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0171.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0172.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0173.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0174.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0175.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0176.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0177.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0178.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0179.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0180.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0181.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0182.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0183.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0184.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0185.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0186.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0187.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0188.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0189.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0190.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0191.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0192.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0193.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0194.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0195.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0196.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0197.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0198.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0199.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0200.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0201.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0202.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0203.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0204.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0205.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0206.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0207.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0208.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0209.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0210.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0211.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0212.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0213.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0214.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0215.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0216.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0217.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0218.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0219.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0220.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0221.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0222.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0223.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0224.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0225.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0226.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0227.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0228.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0229.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0230.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0231.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0232.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0233.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0234.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0235.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0236.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0237.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0238.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0239.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0240.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0241.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0242.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0243.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0244.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0245.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0246.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0247.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0248.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0249.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0250.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0251.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0252.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0253.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0254.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0255.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0256.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0257.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0258.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0259.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0260.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0261.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0262.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0263.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0264.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0265.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0266.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0267.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0268.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0269.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0270.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0271.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0272.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0273.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0274.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0275.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0276.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0277.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0278.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0279.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0280.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0281.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0282.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0283.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0284.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0285.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0286.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0287.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0288.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0289.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0290.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0291.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0292.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0293.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0294.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0295.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0296.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0297.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0298.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0299.jpg -/home/abonvicini/darknetAB/darknet/data/img/thumb0300.jpg +data/img/thumb0100.jpg +data/img/thumb0101.jpg +data/img/thumb0102.jpg +data/img/thumb0103.jpg +data/img/thumb0104.jpg +data/img/thumb0105.jpg +data/img/thumb0106.jpg +data/img/thumb0107.jpg +data/img/thumb0108.jpg +data/img/thumb0109.jpg +data/img/thumb0110.jpg +data/img/thumb0111.jpg +data/img/thumb0112.jpg +data/img/thumb0113.jpg +data/img/thumb0114.jpg +data/img/thumb0115.jpg +data/img/thumb0116.jpg +data/img/thumb0117.jpg +data/img/thumb0118.jpg +data/img/thumb0119.jpg +data/img/thumb0120.jpg +data/img/thumb0121.jpg +data/img/thumb0122.jpg +data/img/thumb0123.jpg +data/img/thumb0124.jpg +data/img/thumb0125.jpg +data/img/thumb0126.jpg +data/img/thumb0127.jpg +data/img/thumb0128.jpg +data/img/thumb0129.jpg +data/img/thumb0130.jpg +data/img/thumb0131.jpg +data/img/thumb0132.jpg +data/img/thumb0133.jpg +data/img/thumb0134.jpg +data/img/thumb0135.jpg +data/img/thumb0136.jpg +data/img/thumb0137.jpg +data/img/thumb0138.jpg +data/img/thumb0139.jpg +data/img/thumb0140.jpg +data/img/thumb0141.jpg +data/img/thumb0142.jpg +data/img/thumb0143.jpg +data/img/thumb0144.jpg +data/img/thumb0145.jpg +data/img/thumb0146.jpg +data/img/thumb0147.jpg +data/img/thumb0148.jpg +data/img/thumb0149.jpg +data/img/thumb0150.jpg +data/img/thumb0151.jpg +data/img/thumb0152.jpg +data/img/thumb0153.jpg +data/img/thumb0154.jpg +data/img/thumb0155.jpg +data/img/thumb0156.jpg +data/img/thumb0157.jpg +data/img/thumb0158.jpg +data/img/thumb0159.jpg +data/img/thumb0160.jpg +data/img/thumb0161.jpg +data/img/thumb0162.jpg +data/img/thumb0163.jpg +data/img/thumb0164.jpg +data/img/thumb0165.jpg +data/img/thumb0166.jpg +data/img/thumb0167.jpg +data/img/thumb0168.jpg +data/img/thumb0169.jpg +data/img/thumb0170.jpg +data/img/thumb0171.jpg +data/img/thumb0172.jpg +data/img/thumb0173.jpg +data/img/thumb0174.jpg +data/img/thumb0175.jpg +data/img/thumb0176.jpg +data/img/thumb0177.jpg +data/img/thumb0178.jpg +data/img/thumb0179.jpg +data/img/thumb0180.jpg +data/img/thumb0181.jpg +data/img/thumb0182.jpg +data/img/thumb0183.jpg +data/img/thumb0184.jpg +data/img/thumb0185.jpg +data/img/thumb0186.jpg +data/img/thumb0187.jpg +data/img/thumb0188.jpg +data/img/thumb0189.jpg +data/img/thumb0190.jpg +data/img/thumb0191.jpg +data/img/thumb0192.jpg +data/img/thumb0193.jpg +data/img/thumb0194.jpg +data/img/thumb0195.jpg +data/img/thumb0196.jpg +data/img/thumb0197.jpg +data/img/thumb0198.jpg +data/img/thumb0199.jpg +data/img/thumb0200.jpg +data/img/thumb0201.jpg +data/img/thumb0202.jpg +data/img/thumb0203.jpg +data/img/thumb0204.jpg +data/img/thumb0205.jpg +data/img/thumb0206.jpg +data/img/thumb0207.jpg +data/img/thumb0208.jpg +data/img/thumb0209.jpg +data/img/thumb0210.jpg +data/img/thumb0211.jpg +data/img/thumb0212.jpg +data/img/thumb0213.jpg +data/img/thumb0214.jpg +data/img/thumb0215.jpg +data/img/thumb0216.jpg +data/img/thumb0217.jpg +data/img/thumb0218.jpg +data/img/thumb0219.jpg +data/img/thumb0220.jpg +data/img/thumb0221.jpg +data/img/thumb0222.jpg +data/img/thumb0223.jpg +data/img/thumb0224.jpg +data/img/thumb0225.jpg +data/img/thumb0226.jpg +data/img/thumb0227.jpg +data/img/thumb0228.jpg +data/img/thumb0229.jpg +data/img/thumb0230.jpg +data/img/thumb0231.jpg +data/img/thumb0232.jpg +data/img/thumb0233.jpg +data/img/thumb0234.jpg +data/img/thumb0235.jpg +data/img/thumb0236.jpg +data/img/thumb0237.jpg +data/img/thumb0238.jpg +data/img/thumb0239.jpg +data/img/thumb0240.jpg +data/img/thumb0241.jpg +data/img/thumb0242.jpg +data/img/thumb0243.jpg +data/img/thumb0244.jpg +data/img/thumb0245.jpg +data/img/thumb0246.jpg +data/img/thumb0247.jpg +data/img/thumb0248.jpg +data/img/thumb0249.jpg +data/img/thumb0250.jpg +data/img/thumb0251.jpg +data/img/thumb0252.jpg +data/img/thumb0253.jpg +data/img/thumb0254.jpg +data/img/thumb0255.jpg +data/img/thumb0256.jpg +data/img/thumb0257.jpg +data/img/thumb0258.jpg +data/img/thumb0259.jpg +data/img/thumb0260.jpg +data/img/thumb0261.jpg +data/img/thumb0262.jpg +data/img/thumb0263.jpg +data/img/thumb0264.jpg +data/img/thumb0265.jpg +data/img/thumb0266.jpg +data/img/thumb0267.jpg +data/img/thumb0268.jpg +data/img/thumb0269.jpg +data/img/thumb0270.jpg +data/img/thumb0271.jpg +data/img/thumb0272.jpg +data/img/thumb0273.jpg +data/img/thumb0274.jpg +data/img/thumb0275.jpg +data/img/thumb0276.jpg +data/img/thumb0277.jpg +data/img/thumb0278.jpg +data/img/thumb0279.jpg +data/img/thumb0280.jpg +data/img/thumb0281.jpg +data/img/thumb0282.jpg +data/img/thumb0283.jpg +data/img/thumb0284.jpg +data/img/thumb0285.jpg +data/img/thumb0286.jpg +data/img/thumb0287.jpg +data/img/thumb0288.jpg +data/img/thumb0289.jpg +data/img/thumb0290.jpg +data/img/thumb0291.jpg +data/img/thumb0292.jpg +data/img/thumb0293.jpg +data/img/thumb0294.jpg +data/img/thumb0295.jpg +data/img/thumb0296.jpg +data/img/thumb0297.jpg +data/img/thumb0298.jpg +data/img/thumb0299.jpg +data/img/thumb0300.jpg diff --git a/flywms.py b/flywms.py new file mode 100644 index 0000000..b36f362 --- /dev/null +++ b/flywms.py @@ -0,0 +1,1644 @@ +import argparse +import json +import os +import re +import subprocess +import sys +import tempfile +import threading +import time +from collections import deque +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import cv2 +import numpy as np + + +@dataclass(frozen=True) +class FramePacket: + frame_id: int + timestamp: float + frame: np.ndarray + width: int + height: int + source: str + + +@dataclass(frozen=True) +class Detection: + class_id: int + class_name: str + confidence: float + bbox: tuple[int, int, int, int] + + +@dataclass(frozen=True) +class DetectionResult: + frame_id: int + timestamp: float + detections: list[Detection] + inference_ms: float + blob_ms: float + forward_ms: float + parse_ms: float + source_width: int + source_height: int + + +@dataclass(frozen=True) +class RoiPacket: + roi_id: int + source_frame_id: int + timestamp: float + class_name: str + confidence: float + bbox: tuple[int, int, int, int] + roi_image: np.ndarray + width: int + height: int + + +@dataclass(frozen=True) +class OcrResult: + roi_id: int + source_frame_id: int + timestamp: float + text: str + raw_text: str + bbox: tuple[int, int, int, int] + ocr_ms: float + + +@dataclass(frozen=True) +class OcrEngineResult: + digits: str + processed: np.ndarray + raw_text: str + code_roi: np.ndarray + + +@dataclass +class BufferStats: + pushed: int = 0 + popped: int = 0 + dropped_on_put: int = 0 + skipped_on_pop: int = 0 + waits: int = 0 + max_depth_seen: int = 0 + + +@dataclass +class RuntimeStats: + capture_frames: int = 0 + display_frames: int = 0 + yolo_submitted_frames: int = 0 + quality_rejected_frames: int = 0 + quality_cycles: int = 0 + yolo_cycles: int = 0 + ocr_cycles: int = 0 + quality_total_ms: float = 0.0 + quality_score_total: float = 0.0 + yolo_total_ms: float = 0.0 + yolo_blob_ms: float = 0.0 + yolo_forward_ms: float = 0.0 + yolo_parse_ms: float = 0.0 + ocr_total_ms: float = 0.0 + display_total_ms: float = 0.0 + capture_read_total_ms: float = 0.0 + last_capture_frame_id: int = 0 + last_quality_score: float = 0.0 + last_quality_passed: bool = True + last_yolo_frame_id: int = 0 + last_ocr_frame_id: int = 0 + last_detection_count: int = 0 + last_detection_summary: str = "" + + +class LatestBuffer: + def __init__(self, max_size: int, name: str): + if max_size < 1: + raise ValueError("max_size must be >= 1") + self.max_size = max_size + self.name = name + self._items: deque[Any] = deque(maxlen=max_size) + self._condition = threading.Condition() + self._stats = BufferStats() + + def put(self, item: Any) -> None: + with self._condition: + if len(self._items) == self.max_size: + self._stats.dropped_on_put += 1 + self._items.append(item) + self._stats.pushed += 1 + self._stats.max_depth_seen = max( + self._stats.max_depth_seen, + len(self._items), + ) + self._condition.notify_all() + + def get_latest_blocking(self, stop_event: threading.Event) -> Any | None: + with self._condition: + while not self._items and not stop_event.is_set(): + self._stats.waits += 1 + self._condition.wait(timeout=0.1) + + if stop_event.is_set(): + return None + + latest = self._items[-1] + skipped = len(self._items) - 1 + self._stats.skipped_on_pop += skipped + self._items.clear() + self._stats.popped += 1 + return latest + + def wake_all(self) -> None: + with self._condition: + self._condition.notify_all() + + def stats(self) -> BufferStats: + with self._condition: + return BufferStats(**self._stats.__dict__) + + def depth(self) -> int: + with self._condition: + return len(self._items) + + +class SharedState: + def __init__(self, ocr_history_size: int): + self._lock = threading.Lock() + self.latest_detection: DetectionResult | None = None + self.ocr_results: deque[OcrResult] = deque(maxlen=ocr_history_size) + self.debug_yolo_frame: np.ndarray | None = None + self.debug_ocr_frame: np.ndarray | None = None + self.debug_ocr_text: str = "" + self.stats = RuntimeStats() + + def set_latest_detection(self, result: DetectionResult) -> None: + with self._lock: + self.latest_detection = result + self.stats.yolo_cycles += 1 + self.stats.yolo_total_ms += result.inference_ms + self.stats.yolo_blob_ms += result.blob_ms + self.stats.yolo_forward_ms += result.forward_ms + self.stats.yolo_parse_ms += result.parse_ms + self.stats.last_yolo_frame_id = result.frame_id + self.stats.last_detection_count = len(result.detections) + counts: dict[str, int] = {} + for det in result.detections: + counts[det.class_name] = counts.get(det.class_name, 0) + 1 + self.stats.last_detection_summary = ",".join( + f"{name}:{count}" for name, count in sorted(counts.items()) + ) + + def get_latest_detection(self) -> DetectionResult | None: + with self._lock: + return self.latest_detection + + def add_ocr_result(self, result: OcrResult) -> None: + with self._lock: + self.ocr_results.append(result) + self.stats.ocr_cycles += 1 + self.stats.ocr_total_ms += result.ocr_ms + self.stats.last_ocr_frame_id = result.source_frame_id + + def get_recent_ocr_results(self) -> list[OcrResult]: + with self._lock: + return list(self.ocr_results) + + def set_debug_yolo_frame(self, frame: np.ndarray | None) -> None: + with self._lock: + self.debug_yolo_frame = None if frame is None else frame.copy() + + def set_debug_ocr_frame(self, frame: np.ndarray | None, text: str = "") -> None: + with self._lock: + self.debug_ocr_frame = None if frame is None else frame.copy() + self.debug_ocr_text = text + + def get_debug_frames(self) -> tuple[np.ndarray | None, np.ndarray | None, str]: + with self._lock: + yolo = None if self.debug_yolo_frame is None else self.debug_yolo_frame.copy() + ocr = None if self.debug_ocr_frame is None else self.debug_ocr_frame.copy() + return yolo, ocr, self.debug_ocr_text + + def add_capture_read(self, frame_id: int, read_ms: float) -> None: + with self._lock: + self.stats.capture_frames += 1 + self.stats.capture_read_total_ms += read_ms + self.stats.last_capture_frame_id = frame_id + + def add_quality_result( + self, + score: float, + passed: bool, + elapsed_ms: float, + submitted_to_yolo: bool, + ) -> None: + with self._lock: + self.stats.quality_cycles += 1 + self.stats.quality_score_total += score + self.stats.quality_total_ms += elapsed_ms + self.stats.last_quality_score = score + self.stats.last_quality_passed = passed + if submitted_to_yolo: + self.stats.yolo_submitted_frames += 1 + else: + self.stats.quality_rejected_frames += 1 + + def add_display(self, display_ms: float) -> None: + with self._lock: + self.stats.display_frames += 1 + self.stats.display_total_ms += display_ms + + def snapshot_stats(self) -> RuntimeStats: + with self._lock: + return RuntimeStats(**self.stats.__dict__) + + +class IdGenerator: + def __init__(self, start: int = 1): + self._value = start + self._lock = threading.Lock() + + def next(self) -> int: + with self._lock: + value = self._value + self._value += 1 + return value + + +def parse_args(): + ap = argparse.ArgumentParser() + + ap.add_argument("-v", "--video", default=None, + help="Percorso video. Se omesso usa webcam 0") + ap.add_argument("--weights", default="yolov2.weights", + help="File pesi YOLOv2") + ap.add_argument("--config", default="yolov2.cfg", + help="File config YOLOv2") + ap.add_argument("--labels", default="labels.txt", + help="File labels classi") + ap.add_argument("--tesseract-cmd", default=None, + help="Percorso esplicito a tesseract.exe") + + ap.add_argument("--backend", choices=["cpu", "cuda", "cuda-fp16"], + default="cpu", help="Backend OpenCV DNN") + ap.add_argument("--input-size", type=int, default=416, + help="Dimensione input YOLO") + ap.add_argument("--swap-rb", action="store_true", + help="Scambia canali R/B nella blob YOLO") + ap.add_argument("--frame-buffer-size", type=int, default=10, + help="Dimensione latest buffer frame") + ap.add_argument("--roi-buffer-size", type=int, default=20, + help="Dimensione latest buffer ROI") + ap.add_argument("--ocr-history-size", type=int, default=100, + help="Numero risultati OCR recenti mantenuti in memoria") + ap.add_argument("--preview-width", type=int, default=1280, + help="Larghezza massima preview") + ap.add_argument("--stats-interval", type=float, default=2.0, + help="Secondi tra riepiloghi prestazioni") + ap.add_argument("--max-frames", type=int, default=0, + help="Numero massimo frame da leggere; 0 = fino a fine stream") + ap.add_argument("--drain-seconds", type=float, default=0.0, + help="Secondi di attesa dopo max/fine stream per benchmark headless") + ap.add_argument("--realtime-playback", action="store_true", + help="Per file video, limita il loop al framerate del video") + ap.add_argument("--opencv-threads", type=int, default=1, + help="Numero thread OpenCV") + ap.add_argument("--quality-filter", action="store_true", + help="Filtra i frame troppo sfocati/mossi prima del buffer YOLO") + ap.add_argument("--blur-metric", choices=["laplacian", "tenengrad"], + default="laplacian", help="Metrica nitidezza usata dal filtro qualita'") + ap.add_argument("--min-sharpness", type=float, default=80.0, + help="Soglia minima nitidezza per inviare il frame a YOLO") + ap.add_argument("--blur-resize-width", type=int, default=320, + help="Larghezza usata per ridurre il frame prima della metrica blur") + ap.add_argument("--debug-quality-log", action="store_true", + help="Logga lo score qualita' di ogni frame") + ap.add_argument("--debug-rejected-window", action="store_true", + help="Mostra una finestra con i frame scartati dal filtro qualita'") + + ap.add_argument("--min-confidence", type=float, default=0.30, + help="Soglia minima confidenza") + ap.add_argument("--nms-threshold", type=float, default=0.40, + help="Soglia NMS") + ap.add_argument("--use-nms", action="store_true", + help="Applica NMS alle detection; default off per compatibilita' YOLOv2") + ap.add_argument("--label-class", default="etichetta", + help="Nome classe etichetta su cui fare OCR") + ap.add_argument("--min-label-width", type=int, default=50, + help="Larghezza minima bbox etichetta") + ap.add_argument("--min-label-height", type=int, default=20, + help="Altezza minima bbox etichetta") + ap.add_argument("--max-roi-per-frame", type=int, default=2, + help="Numero massimo ROI etichetta inviate a OCR per detection") + ap.add_argument("--infer-gaylord-from-label", action="store_true", + help="Disegna un box gaylord stimato partendo dalle etichette se YOLO non trova gaylord") + ap.add_argument("--inferred-gaylord-width-factor", type=float, default=3.6, + help="Larghezza box gaylord stimato rispetto alla label") + ap.add_argument("--inferred-gaylord-height-factor", type=float, default=4.2, + help="Altezza box gaylord stimato rispetto alla label") + ap.add_argument("--inferred-gaylord-y-shift", type=float, default=1.35, + help="Spostamento verticale verso il basso, in multipli dell'altezza label") + ap.add_argument("--slot-size", type=int, default=120, + help="Dimensione griglia per cooldown OCR") + ap.add_argument("--ocr-cooldown-sec", type=float, default=1.0, + help="Secondi minimi prima di reinviare OCR sulla stessa zona") + ap.add_argument("--ocr-min-digits", type=int, default=2, + help="Numero minimo cifre per lettura valida") + ap.add_argument("--ocr-backend", choices=["tesseract", "paddle", "easyocr"], + default="paddle", help="Motore OCR da usare") + ap.add_argument("--ocr-lang", default="en", + help="Lingua OCR") + ap.add_argument("--easyocr-gpu", choices=["auto", "on", "off"], + default="auto", help="Uso GPU EasyOCR") + ap.add_argument("--easyocr-mode", choices=["subprocess", "inprocess"], + default="subprocess", help="Modalita' EasyOCR") + ap.add_argument("--easyocr-worker", action="store_true", + help=argparse.SUPPRESS) + ap.add_argument("--ocr-input", choices=["roi", "processed"], + default="roi", help="Immagine passata al motore OCR") + ap.add_argument("--ocr-code-mode", choices=["full", "fixed-band", "large-components"], + default="fixed-band", help="Prefiltro per isolare il codice grande") + ap.add_argument("--ocr-scale", type=float, default=1.5, + help="Fattore di ingrandimento preprocess OCR") + ap.add_argument("--ocr-max-width", type=int, default=900, + help="Larghezza massima immagine inviata all'OCR") + ap.add_argument("--ocr-band-x1", type=float, default=0.0, + help="Crop fisso OCR: x iniziale percentuale 0..1") + ap.add_argument("--ocr-band-y1", type=float, default=0.0, + help="Crop fisso OCR: y iniziale percentuale 0..1") + ap.add_argument("--ocr-band-x2", type=float, default=1.0, + help="Crop fisso OCR: x finale percentuale 0..1") + ap.add_argument("--ocr-band-y2", type=float, default=1.0, + help="Crop fisso OCR: y finale percentuale 0..1") + ap.add_argument("--ocr-component-min-height-ratio", type=float, default=0.22, + help="Altezza minima componente grande rispetto alla ROI") + ap.add_argument("--ocr-component-min-area-ratio", type=float, default=0.002, + help="Area minima componente grande rispetto alla ROI") + ap.add_argument("--ocr-component-pad-ratio", type=float, default=0.08, + help="Padding crop finale componenti grandi") + ap.add_argument("--ocr-pad-ratio", type=float, default=0.20, + help="Padding bbox etichetta prima dell'OCR") + ap.add_argument("--ocr-submit-min-interval", type=float, default=2.0, + help="Secondi minimi globali tra due ROI inviate all'OCR") + ap.add_argument("--ocr-max-pending", type=int, default=1, + help="Numero massimo ROI pendenti prima di saltare nuovi invii OCR") + ap.add_argument("--paddle-text-det-limit-side-len", type=int, default=320, + help="Parametro PaddleOCR text_det_limit_side_len") + ap.add_argument("--paddle-text-rec-score-thresh", type=float, default=0.0, + help="Soglia riconoscimento PaddleOCR") + ap.add_argument("--print-all-ocr", action="store_true", + help="Stampa anche OCR grezzi non validi") + ap.add_argument("--save-ocr-roi-dir", default=None, + help="Directory dove salvare ROI OCR raw/code/processed per debug") + ap.add_argument("--no-ocr", action="store_true", + help="Disabilita OCR; utile per benchmark YOLO/capture") + + ap.add_argument("--debug-yolo-window", action="store_true", + help="Mostra una finestra debug con l'ultimo frame YOLO") + ap.add_argument("--debug-ocr-window", action="store_true", + help="Mostra una finestra debug con l'ultima ROI preprocessata") + ap.add_argument("--debug-yolo-output", action="store_true", + help="Logga shape e confidenze grezze dell'output YOLO") + ap.add_argument("--debug-yolo-top", type=int, default=0, + help="Logga le top N righe raw YOLO per confidenza classe") + ap.add_argument("--debug-inferred-gaylord", action="store_true", + help="Logga quanti box gaylord stimati vengono generati") + ap.add_argument("--no-display", action="store_true", + help="Disabilita finestre video, utile per benchmark") + + return ap.parse_args() + + +def log(msg: str) -> None: + print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True) + + +def require_file(path_str: str, description: str) -> Path: + path = Path(path_str) + if not path.exists(): + log(f"ERRORE: {description} non trovato: {path}") + sys.exit(1) + return path + + +def load_classes(labels_path: str) -> list[str]: + with open(labels_path, "rt", encoding="utf-8") as f: + classes = [line.strip() for line in f if line.strip()] + if not classes: + log("ERRORE: labels.txt vuoto") + sys.exit(1) + return classes + + +def open_capture(video_arg: str | None): + if video_arg is None: + cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) + if not cap.isOpened(): + cap = cv2.VideoCapture(0) + return cap, "camera:0" + + if str(video_arg).isdigit(): + idx = int(video_arg) + cap = cv2.VideoCapture(idx, cv2.CAP_DSHOW) + if not cap.isOpened(): + cap = cv2.VideoCapture(idx) + return cap, f"camera:{idx}" + + return cv2.VideoCapture(video_arg), str(video_arg) + + +def configure_net_backend(net, backend: str) -> None: + if backend == "cpu": + net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) + net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) + return + + net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) + if backend == "cuda-fp16": + net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) + else: + net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) + + +def resize_preview(frame: np.ndarray, max_width: int) -> np.ndarray: + h, w = frame.shape[:2] + if max_width <= 0 or w <= max_width: + return frame + scale = max_width / float(w) + new_w = int(w * scale) + new_h = int(h * scale) + return cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + + +def clip_box(x1: int, y1: int, x2: int, y2: int, + w: int, h: int) -> tuple[int, int, int, int]: + x1 = max(0, min(x1, w - 1)) + y1 = max(0, min(y1, h - 1)) + x2 = max(0, min(x2, w - 1)) + y2 = max(0, min(y2, h - 1)) + return x1, y1, x2, y2 + + +def expand_box(x1: int, y1: int, x2: int, y2: int, + frame_w: int, frame_h: int, + pad_ratio: float = 0.08) -> tuple[int, int, int, int]: + bw = x2 - x1 + bh = y2 - y1 + pad_x = int(bw * pad_ratio) + pad_y = int(bh * pad_ratio) + return clip_box( + x1 - pad_x, + y1 - pad_y, + x2 + pad_x, + y2 + pad_y, + frame_w, + frame_h, + ) + + +def quantized_slot_key(bbox: tuple[int, int, int, int], slot_size: int) -> tuple[int, int]: + x1, y1, x2, y2 = bbox + cx = (x1 + x2) // 2 + cy = (y1 + y2) // 2 + return cx // slot_size, cy // slot_size + + +def limit_width(image: np.ndarray, max_width: int) -> np.ndarray: + if max_width <= 0: + return image + h, w = image.shape[:2] + if w <= max_width: + return image + scale = max_width / float(w) + new_h = max(1, int(h * scale)) + return cv2.resize(image, (max_width, new_h), interpolation=cv2.INTER_AREA) + + +def preprocess_for_ocr(roi: np.ndarray, scale: float = 1.5, max_width: int = 900) -> np.ndarray: + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + if scale != 1.0: + gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + gray = cv2.GaussianBlur(gray, (3, 3), 0) + gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] + gray = cv2.copyMakeBorder( + gray, 8, 8, 8, 8, + borderType=cv2.BORDER_CONSTANT, + value=255, + ) + gray = limit_width(gray, max_width) + return gray + + +def crop_fixed_band( + roi: np.ndarray, + x1_ratio: float, + y1_ratio: float, + x2_ratio: float, + y2_ratio: float, +) -> np.ndarray: + h, w = roi.shape[:2] + x1 = int(max(0.0, min(1.0, x1_ratio)) * w) + y1 = int(max(0.0, min(1.0, y1_ratio)) * h) + x2 = int(max(0.0, min(1.0, x2_ratio)) * w) + y2 = int(max(0.0, min(1.0, y2_ratio)) * h) + x1, y1, x2, y2 = clip_box(x1, y1, x2, y2, w, h) + if x2 <= x1 or y2 <= y1: + return roi + return roi[y1:y2, x1:x2].copy() + + +def crop_large_components( + roi: np.ndarray, + min_height_ratio: float, + min_area_ratio: float, + pad_ratio: float, +) -> np.ndarray: + h, w = roi.shape[:2] + if h <= 0 or w <= 0: + return roi + + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (3, 3), 0) + binary = cv2.threshold( + gray, + 0, + 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU, + )[1] + + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) + binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1) + + contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + min_h = max(3, int(h * min_height_ratio)) + min_area = max(4, int(w * h * min_area_ratio)) + + boxes: list[tuple[int, int, int, int]] = [] + for contour in contours: + x, y, bw, bh = cv2.boundingRect(contour) + area = bw * bh + if bh < min_h or area < min_area: + continue + aspect = bw / float(max(1, bh)) + if aspect < 0.12 or aspect > 1.25: + continue + boxes.append((x, y, x + bw, y + bh)) + + if not boxes: + return roi + + x1 = min(box[0] for box in boxes) + y1 = min(box[1] for box in boxes) + x2 = max(box[2] for box in boxes) + y2 = max(box[3] for box in boxes) + x1, y1, x2, y2 = expand_box(x1, y1, x2, y2, w, h, pad_ratio=pad_ratio) + if x2 <= x1 or y2 <= y1: + return roi + return roi[y1:y2, x1:x2].copy() + + +def extract_code_roi(roi: np.ndarray, args) -> np.ndarray: + if args.ocr_code_mode == "full": + return roi + if args.ocr_code_mode == "fixed-band": + return crop_fixed_band( + roi, + args.ocr_band_x1, + args.ocr_band_y1, + args.ocr_band_x2, + args.ocr_band_y2, + ) + return crop_large_components( + roi, + args.ocr_component_min_height_ratio, + args.ocr_component_min_area_ratio, + args.ocr_component_pad_ratio, + ) + + +def ocr_digits_only(roi: np.ndarray, pytesseract_module: Any) -> OcrEngineResult: + processed = preprocess_for_ocr(roi) + config = r"--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789" + raw_text = pytesseract_module.image_to_string(processed, config=config) + digits = re.sub(r"\D+", "", raw_text) + return OcrEngineResult(digits, processed, raw_text, roi) + + +class TesseractOcrEngine: + def __init__(self, args): + import pytesseract + if args.tesseract_cmd: + pytesseract.pytesseract.tesseract_cmd = args.tesseract_cmd + self._pytesseract = pytesseract + self._args = args + + def read_digits(self, roi: np.ndarray) -> OcrEngineResult: + code_roi = extract_code_roi(roi, self._args) + return ocr_digits_only(code_roi, self._pytesseract) + + +class PaddleOcrEngine: + def __init__(self, args): + from paddleocr import PaddleOCR + self._ocr = PaddleOCR( + lang=args.ocr_lang, + use_doc_orientation_classify=False, + use_doc_unwarping=False, + use_textline_orientation=False, + text_det_limit_side_len=args.paddle_text_det_limit_side_len, + text_rec_score_thresh=args.paddle_text_rec_score_thresh, + ) + self._input_mode = args.ocr_input + self._args = args + + def read_digits(self, roi: np.ndarray) -> OcrEngineResult: + code_roi = extract_code_roi(roi, self._args) + code_roi = limit_width(code_roi, self._args.ocr_max_width) + processed = preprocess_for_ocr( + code_roi, + scale=self._args.ocr_scale, + max_width=self._args.ocr_max_width, + ) + ocr_input = processed if self._input_mode == "processed" else code_roi + result = self._ocr.predict(ocr_input) + texts: list[str] = [] + for item in result: + if isinstance(item, dict): + rec_texts = item.get("rec_texts") or item.get("texts") or [] + if isinstance(rec_texts, str): + texts.append(rec_texts) + else: + texts.extend(str(text) for text in rec_texts) + elif isinstance(item, (list, tuple)): + texts.append(str(item)) + + raw_text = " ".join(texts) + digits = re.sub(r"\D+", "", raw_text) + return OcrEngineResult(digits, processed, raw_text, code_roi) + + +class EasyOcrInProcessEngine: + def __init__(self, args): + import easyocr + self._args = args + self._input_mode = args.ocr_input + langs = [part.strip() for part in args.ocr_lang.split(",") if part.strip()] + if not langs: + langs = ["en"] + + requested_gpu = args.easyocr_gpu != "off" + if args.easyocr_gpu == "on": + self._reader = easyocr.Reader(langs, gpu=True, verbose=False) + self._using_gpu = True + elif requested_gpu: + try: + self._reader = easyocr.Reader(langs, gpu=True, verbose=False) + self._using_gpu = True + except Exception as exc: + log(f"EasyOCR GPU non disponibile, fallback CPU: {exc}") + self._reader = easyocr.Reader(langs, gpu=False, verbose=False) + self._using_gpu = False + else: + self._reader = easyocr.Reader(langs, gpu=False, verbose=False) + self._using_gpu = False + + log(f"EasyOCR device: {'gpu' if self._using_gpu else 'cpu'}") + + def read_digits(self, roi: np.ndarray) -> OcrEngineResult: + code_roi = extract_code_roi(roi, self._args) + code_roi = limit_width(code_roi, self._args.ocr_max_width) + processed = preprocess_for_ocr( + code_roi, + scale=self._args.ocr_scale, + max_width=self._args.ocr_max_width, + ) + ocr_input = processed if self._input_mode == "processed" else code_roi + result = self._reader.readtext( + ocr_input, + allowlist="0123456789", + detail=1, + paragraph=False, + ) + texts: list[str] = [] + for item in result: + if isinstance(item, (list, tuple)) and len(item) >= 2: + texts.append(str(item[1])) + else: + texts.append(str(item)) + + raw_text = " ".join(texts) + digits = re.sub(r"\D+", "", raw_text) + return OcrEngineResult(digits, processed, raw_text, code_roi) + + +class EasyOcrProcessEngine: + def __init__(self, args): + self._args = args + self._input_mode = args.ocr_input + self._tmpdir = tempfile.TemporaryDirectory(prefix="flywms_easyocr_") + cmd = [ + sys.executable, + str(Path(__file__).resolve()), + "--easyocr-worker", + "--ocr-lang", args.ocr_lang, + "--easyocr-gpu", args.easyocr_gpu, + ] + env = os.environ.copy() + env["PYTHONUTF8"] = "1" + self._proc = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + encoding="utf-8", + errors="replace", + env=env, + ) + ready = self._read_json_line() + if ready.get("event") != "ready": + raise RuntimeError(f"EasyOCR worker non pronto: {ready}") + log(f"EasyOCR worker process device: {ready.get('device', 'unknown')}") + + def _read_json_line(self) -> dict[str, Any]: + assert self._proc.stdout is not None + while True: + line = self._proc.stdout.readline() + if line == "": + err = "" + if self._proc.stderr is not None: + err = self._proc.stderr.read() + raise RuntimeError(f"EasyOCR worker terminato: {err.strip()}") + line = line.strip() + if not line: + continue + try: + return json.loads(line) + except json.JSONDecodeError: + continue + + def read_digits(self, roi: np.ndarray) -> OcrEngineResult: + if self._proc.poll() is not None: + raise RuntimeError("EasyOCR worker non attivo") + + code_roi = extract_code_roi(roi, self._args) + code_roi = limit_width(code_roi, self._args.ocr_max_width) + processed = preprocess_for_ocr( + code_roi, + scale=self._args.ocr_scale, + max_width=self._args.ocr_max_width, + ) + ocr_input = processed if self._input_mode == "processed" else code_roi + image_path = Path(self._tmpdir.name) / f"ocr_{time.perf_counter_ns()}.png" + cv2.imwrite(str(image_path), ocr_input) + + assert self._proc.stdin is not None + self._proc.stdin.write(json.dumps({"path": str(image_path)}) + "\n") + self._proc.stdin.flush() + response = self._read_json_line() + try: + image_path.unlink(missing_ok=True) + except OSError: + pass + + if response.get("error"): + raise RuntimeError(str(response["error"])) + raw_text = str(response.get("raw_text", "")) + digits = re.sub(r"\D+", "", raw_text) + return OcrEngineResult(digits, processed, raw_text, code_roi) + + def close(self) -> None: + if getattr(self, "_proc", None) is None: + return + if self._proc.poll() is None: + try: + assert self._proc.stdin is not None + self._proc.stdin.write(json.dumps({"cmd": "stop"}) + "\n") + self._proc.stdin.flush() + except Exception: + pass + try: + self._proc.wait(timeout=3) + except subprocess.TimeoutExpired: + self._proc.kill() + self._tmpdir.cleanup() + + def __del__(self): + try: + self.close() + except Exception: + pass + + +def create_ocr_engine(args): + if args.ocr_backend == "paddle": + return PaddleOcrEngine(args) + if args.ocr_backend == "easyocr": + if args.easyocr_mode == "inprocess": + return EasyOcrInProcessEngine(args) + return EasyOcrProcessEngine(args) + return TesseractOcrEngine(args) + + +def run_easyocr_worker(args) -> int: + import easyocr + + langs = [part.strip() for part in args.ocr_lang.split(",") if part.strip()] or ["en"] + requested_gpu = args.easyocr_gpu != "off" + using_gpu = False + if args.easyocr_gpu == "on": + reader = easyocr.Reader(langs, gpu=True, verbose=False) + using_gpu = True + elif requested_gpu: + try: + reader = easyocr.Reader(langs, gpu=True, verbose=False) + using_gpu = True + except Exception: + reader = easyocr.Reader(langs, gpu=False, verbose=False) + else: + reader = easyocr.Reader(langs, gpu=False, verbose=False) + + print(json.dumps({"event": "ready", "device": "gpu" if using_gpu else "cpu"}), flush=True) + for line in sys.stdin: + try: + request = json.loads(line) + if request.get("cmd") == "stop": + break + result = reader.readtext( + request["path"], + allowlist="0123456789", + detail=1, + paragraph=False, + ) + texts: list[str] = [] + for item in result: + if isinstance(item, (list, tuple)) and len(item) >= 2: + texts.append(str(item[1])) + else: + texts.append(str(item)) + raw_text = " ".join(texts) + print(json.dumps({"raw_text": raw_text}), flush=True) + except Exception as exc: + print(json.dumps({"error": str(exc)}), flush=True) + return 0 + + +def save_ocr_debug_images( + output_dir: Path, + roi_packet: RoiPacket, + engine_result: OcrEngineResult, +) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + prefix = output_dir / f"frame_{roi_packet.source_frame_id:06d}_roi_{roi_packet.roi_id:06d}" + cv2.imwrite(str(prefix) + "_raw.png", roi_packet.roi_image) + cv2.imwrite(str(prefix) + "_code.png", engine_result.code_roi) + cv2.imwrite(str(prefix) + "_processed.png", engine_result.processed) + + +def detect_yolov2( + net, + frame: np.ndarray, + classes: list[str], + min_confidence: float, + nms_threshold: float, + input_size: int, + use_nms: bool, + swap_rb: bool, +) -> tuple[list[Detection], dict[str, float | tuple[int, ...]]]: + t0 = time.perf_counter() + h, w = frame.shape[:2] + + blob = cv2.dnn.blobFromImage( + frame, + scalefactor=1.0 / 255.0, + size=(input_size, input_size), + mean=(0, 0, 0), + swapRB=swap_rb, + crop=False, + ) + t_blob = time.perf_counter() + + net.setInput(blob) + predictions = net.forward() + t_forward = time.perf_counter() + + predictions = np.array(predictions) + if predictions.ndim == 4: + predictions = predictions.reshape(predictions.shape[1], predictions.shape[-1]) + elif predictions.ndim == 3: + predictions = predictions[0] + + boxes: list[list[int]] = [] + confidences: list[float] = [] + class_ids: list[int] = [] + + if predictions.ndim == 2 and predictions.shape[1] > 5: + for i in range(predictions.shape[0]): + prob_arr = predictions[i][5:] + if prob_arr.size == 0: + continue + + class_index = int(prob_arr.argmax(axis=0)) + if class_index >= len(classes): + continue + + confidence = float(prob_arr[class_index]) + if confidence <= min_confidence: + continue + + x_center = float(predictions[i][0]) * w + y_center = float(predictions[i][1]) * h + width_box = float(predictions[i][2]) * w + height_box = float(predictions[i][3]) * h + + x1 = int(x_center - width_box * 0.5) + y1 = int(y_center - height_box * 0.5) + x2 = int(x_center + width_box * 0.5) + y2 = int(y_center + height_box * 0.5) + x1, y1, x2, y2 = clip_box(x1, y1, x2, y2, w, h) + + bw = max(0, x2 - x1) + bh = max(0, y2 - y1) + if bw == 0 or bh == 0: + continue + + boxes.append([x1, y1, bw, bh]) + confidences.append(confidence) + class_ids.append(class_index) + + detections: list[Detection] = [] + if boxes: + if use_nms: + indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, nms_threshold) + selected_indices = np.array(indices).flatten() if len(indices) else [] + else: + selected_indices = range(len(boxes)) + + for idx in selected_indices: + x, y, bw, bh = boxes[int(idx)] + class_id = class_ids[int(idx)] + detections.append(Detection( + class_id=class_id, + class_name=classes[class_id], + confidence=confidences[int(idx)], + bbox=(x, y, x + bw, y + bh), + )) + + t_parse = time.perf_counter() + info = { + "shape": tuple(predictions.shape), + "blob_ms": (t_blob - t0) * 1000.0, + "forward_ms": (t_forward - t_blob) * 1000.0, + "parse_ms": (t_parse - t_forward) * 1000.0, + "total_ms": (t_parse - t0) * 1000.0, + "raw_max": float(np.max(predictions)) if predictions.size else 0.0, + "class_max": float(np.max(predictions[:, 5:])) if predictions.ndim == 2 and predictions.shape[1] > 5 else 0.0, + "raw_predictions": predictions, + } + return detections, info + + +def best_label_detections( + detections: list[Detection], + label_name: str, + max_boxes: int, +) -> list[Detection]: + labels = [ + d for d in detections + if d.class_name.strip().lower() == label_name.strip().lower() + ] + labels.sort( + key=lambda d: ( + d.confidence, + (d.bbox[2] - d.bbox[0]) * (d.bbox[3] - d.bbox[1]), + ), + reverse=True, + ) + return labels[:max_boxes] + + +def draw_detection(frame: np.ndarray, det: Detection, label_class: str) -> None: + x1, y1, x2, y2 = det.bbox + class_lower = det.class_name.lower() + color = (255, 255, 255) + if class_lower == "gaylord_stimato": + color = (255, 0, 255) + elif class_lower == label_class.lower(): + color = (0, 255, 255) + elif class_lower == "gaylord": + color = (0, 255, 0) + + thickness = 4 if class_lower in ("gaylord", "gaylord_stimato") else 2 + cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness) + text = f"{det.class_name} {det.confidence:.2f}" + cv2.putText( + frame, + text, + (x1, max(20, y1 - 8)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + color, + 2, + cv2.LINE_AA, + ) + + +def infer_gaylords_from_labels( + detections: list[Detection], + frame_w: int, + frame_h: int, + label_class: str, + width_factor: float, + height_factor: float, + y_shift: float, +) -> list[Detection]: + has_gaylord = any(d.class_name.lower() == "gaylord" for d in detections) + if has_gaylord: + return [] + + inferred: list[Detection] = [] + for det in detections: + if det.class_name.lower() != label_class.lower(): + continue + + x1, y1, x2, y2 = det.bbox + bw = x2 - x1 + bh = y2 - y1 + if bw <= 0 or bh <= 0: + continue + + cx = (x1 + x2) / 2.0 + cy = (y1 + y2) / 2.0 + bh * y_shift + gw = bw * width_factor + gh = bh * height_factor + + gx1 = int(cx - gw / 2.0) + gy1 = int(cy - gh / 2.0) + gx2 = int(cx + gw / 2.0) + gy2 = int(cy + gh / 2.0) + gx1, gy1, gx2, gy2 = clip_box(gx1, gy1, gx2, gy2, frame_w, frame_h) + + inferred.append(Detection( + class_id=-1, + class_name="gaylord_stimato", + confidence=det.confidence, + bbox=(gx1, gy1, gx2, gy2), + )) + + return inferred + + +def draw_ocr_results( + frame: np.ndarray, + ocr_results: list[OcrResult], + max_age_sec: float = 5.0, +) -> None: + now = time.perf_counter() + for result in ocr_results: + if now - result.timestamp > max_age_sec: + continue + if not result.text: + continue + + x1, y1, x2, y2 = result.bbox + cv2.putText( + frame, + f"NUM: {result.text}", + (x1, min(frame.shape[0] - 5, y2 + 24)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, + (0, 255, 255), + 2, + cv2.LINE_AA, + ) + + +def draw_status(frame: np.ndarray, stats_text: list[str]) -> None: + y = 25 + for line in stats_text: + cv2.putText( + frame, + line, + (10, y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (0, 0, 255), + 2, + cv2.LINE_AA, + ) + y += 24 + + +def draw_ocr_debug(ocr_frame: np.ndarray, text: str, preview_width: int) -> np.ndarray: + if len(ocr_frame.shape) == 2: + display = cv2.cvtColor(ocr_frame, cv2.COLOR_GRAY2BGR) + else: + display = ocr_frame.copy() + + display = resize_preview(display, preview_width) + canvas_h = display.shape[0] + 70 + canvas_w = max(display.shape[1], 500) + canvas = np.full((canvas_h, canvas_w, 3), 255, dtype=np.uint8) + canvas[:display.shape[0], :display.shape[1]] = display + + shown_text = text if text else "(nessun codice)" + cv2.putText( + canvas, + f"OCR: {shown_text}", + (10, display.shape[0] + 45), + cv2.FONT_HERSHEY_SIMPLEX, + 1.0, + (0, 0, 255), + 2, + cv2.LINE_AA, + ) + return canvas + + +def resize_for_quality(frame: np.ndarray, target_width: int) -> np.ndarray: + if target_width <= 0 or frame.shape[1] <= target_width: + return frame + scale = target_width / frame.shape[1] + height = max(1, int(frame.shape[0] * scale)) + return cv2.resize(frame, (target_width, height), interpolation=cv2.INTER_AREA) + + +def estimate_sharpness(frame: np.ndarray, metric: str, resize_width: int) -> float: + small = resize_for_quality(frame, resize_width) + gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) + if metric == "tenengrad": + gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) + gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) + return float(np.mean(gx * gx + gy * gy)) + + lap = cv2.Laplacian(gray, cv2.CV_64F) + return float(lap.var()) + + +def format_stats( + shared: SharedState, + frame_buffer: LatestBuffer, + roi_buffer: LatestBuffer, + start_time: float, +) -> list[str]: + stats = shared.snapshot_stats() + elapsed = max(0.001, time.perf_counter() - start_time) + fb = frame_buffer.stats() + rb = roi_buffer.stats() + + avg_yolo = stats.yolo_total_ms / max(1, stats.yolo_cycles) + avg_forward = stats.yolo_forward_ms / max(1, stats.yolo_cycles) + avg_ocr = stats.ocr_total_ms / max(1, stats.ocr_cycles) + avg_display = stats.display_total_ms / max(1, stats.display_frames) + avg_quality = stats.quality_total_ms / max(1, stats.quality_cycles) + avg_sharpness = stats.quality_score_total / max(1, stats.quality_cycles) + + return [ + f"cap_fps={stats.capture_frames / elapsed:.1f}", + f"disp_fps={stats.display_frames / elapsed:.1f}", + f"quality yolo_in/reject={stats.yolo_submitted_frames}/{stats.quality_rejected_frames} " + f"last={stats.last_quality_score:.1f} avg={avg_sharpness:.1f} ms={avg_quality:.2f}", + f"yolo_fps={stats.yolo_cycles / elapsed:.1f} avg={avg_yolo:.1f}ms fwd={avg_forward:.1f}ms", + f"ocr_fps={stats.ocr_cycles / elapsed:.1f} avg={avg_ocr:.1f}ms", + f"display_avg={avg_display:.1f}ms", + f"frames id cap/yolo/ocr={stats.last_capture_frame_id}/{stats.last_yolo_frame_id}/{stats.last_ocr_frame_id}", + f"last_det={stats.last_detection_count} {stats.last_detection_summary}", + f"frame_buf push/pop/drop/skip={fb.pushed}/{fb.popped}/{fb.dropped_on_put}/{fb.skipped_on_pop}", + f"roi_buf push/pop/drop/skip={rb.pushed}/{rb.popped}/{rb.dropped_on_put}/{rb.skipped_on_pop}", + ] + + +def yolo_worker( + stop_event: threading.Event, + frame_buffer: LatestBuffer, + roi_buffer: LatestBuffer, + shared: SharedState, + net, + classes: list[str], + args, + roi_id_gen: IdGenerator, +) -> None: + slot_last_ocr: dict[tuple[int, int], float] = {} + last_ocr_submit = -999999.0 + label_class_lower = args.label_class.strip().lower() + + log("YOLO worker avviato") + while not stop_event.is_set(): + packet = frame_buffer.get_latest_blocking(stop_event) + if packet is None: + continue + + local_frame = packet.frame.copy() + detections, info = detect_yolov2( + net=net, + frame=local_frame, + classes=classes, + min_confidence=args.min_confidence, + nms_threshold=args.nms_threshold, + input_size=args.input_size, + use_nms=args.use_nms, + swap_rb=args.swap_rb, + ) + + result = DetectionResult( + frame_id=packet.frame_id, + timestamp=time.perf_counter(), + detections=detections, + inference_ms=float(info["total_ms"]), + blob_ms=float(info["blob_ms"]), + forward_ms=float(info["forward_ms"]), + parse_ms=float(info["parse_ms"]), + source_width=packet.width, + source_height=packet.height, + ) + shared.set_latest_detection(result) + + if args.debug_yolo_output: + log( + f"YOLO frame={packet.frame_id} shape={info['shape']} " + f"raw_max={info['raw_max']:.4f} class_max={info['class_max']:.4f} " + f"det={len(detections)}" + ) + if args.debug_yolo_top > 0: + raw_predictions = info["raw_predictions"] + if ( + isinstance(raw_predictions, np.ndarray) + and raw_predictions.ndim == 2 + and raw_predictions.shape[1] > 5 + ): + scores = raw_predictions[:, 5:] + row_best = scores.max(axis=1) + top_indices = np.argsort(row_best)[-args.debug_yolo_top:][::-1] + for idx in top_indices: + cls_scores = scores[idx] + cls_parts = " ".join( + f"{classes[i]}={float(cls_scores[i]):.4f}" + for i in range(min(len(classes), cls_scores.shape[0])) + ) + log( + f" raw[{int(idx)}] obj={float(raw_predictions[idx, 4]):.4f} " + f"{cls_parts} box={tuple(float(v) for v in raw_predictions[idx, :4])}" + ) + + debug = local_frame.copy() + debug_detections = list(detections) + if args.infer_gaylord_from_label: + inferred_gaylords = infer_gaylords_from_labels( + detections, + packet.width, + packet.height, + args.label_class, + args.inferred_gaylord_width_factor, + args.inferred_gaylord_height_factor, + args.inferred_gaylord_y_shift, + ) + debug_detections.extend(inferred_gaylords) + if args.debug_inferred_gaylord and inferred_gaylords: + log( + f"gaylord stimati frame={packet.frame_id}: " + f"{[det.bbox for det in inferred_gaylords]}" + ) + for det in debug_detections: + draw_detection(debug, det, args.label_class) + shared.set_debug_yolo_frame(resize_preview(debug, args.preview_width)) + + label_dets = best_label_detections( + detections, + args.label_class, + args.max_roi_per_frame, + ) + now = time.perf_counter() + for det in label_dets: + if now - last_ocr_submit < args.ocr_submit_min_interval: + continue + if roi_buffer.depth() >= args.ocr_max_pending: + continue + + x1, y1, x2, y2 = det.bbox + bw = x2 - x1 + bh = y2 - y1 + if bw < args.min_label_width or bh < args.min_label_height: + continue + + slot_key = quantized_slot_key(det.bbox, args.slot_size) + if now - slot_last_ocr.get(slot_key, -999999.0) < args.ocr_cooldown_sec: + continue + + rx1, ry1, rx2, ry2 = expand_box( + x1, y1, x2, y2, + packet.width, + packet.height, + pad_ratio=args.ocr_pad_ratio, + ) + roi = local_frame[ry1:ry2, rx1:rx2] + if roi.size == 0: + continue + + roi_copy = roi.copy() + roi_buffer.put(RoiPacket( + roi_id=roi_id_gen.next(), + source_frame_id=packet.frame_id, + timestamp=now, + class_name=label_class_lower, + confidence=det.confidence, + bbox=det.bbox, + roi_image=roi_copy, + width=roi_copy.shape[1], + height=roi_copy.shape[0], + )) + slot_last_ocr[slot_key] = now + last_ocr_submit = now + + log("YOLO worker terminato") + + +def ocr_worker( + stop_event: threading.Event, + roi_buffer: LatestBuffer, + shared: SharedState, + args, +) -> None: + try: + engine = create_ocr_engine(args) + except Exception as exc: + log(f"OCR worker disabilitato: impossibile inizializzare {args.ocr_backend}: {exc}") + return + + debug_dir = Path(args.save_ocr_roi_dir) if args.save_ocr_roi_dir else None + + log(f"OCR worker avviato con backend {args.ocr_backend}") + try: + while not stop_event.is_set(): + roi_packet = roi_buffer.get_latest_blocking(stop_event) + if roi_packet is None: + continue + + t0 = time.perf_counter() + engine_result = engine.read_digits(roi_packet.roi_image) + ocr_ms = (time.perf_counter() - t0) * 1000.0 + digits = engine_result.digits + raw_text = engine_result.raw_text + processed = engine_result.processed + + if debug_dir is not None: + save_ocr_debug_images(debug_dir, roi_packet, engine_result) + + if args.print_all_ocr: + log( + f"OCR frame={roi_packet.source_frame_id} " + f"raw='{raw_text.strip()}' digits='{digits}' ms={ocr_ms:.1f}" + ) + + result = OcrResult( + roi_id=roi_packet.roi_id, + source_frame_id=roi_packet.source_frame_id, + timestamp=time.perf_counter(), + text=digits if len(digits) >= args.ocr_min_digits else "", + raw_text=raw_text, + bbox=roi_packet.bbox, + ocr_ms=ocr_ms, + ) + shared.add_ocr_result(result) + shared.set_debug_ocr_frame(processed, result.text or raw_text.strip()) + + if result.text: + log( + f"Etichetta letta frame={result.source_frame_id} " + f"roi={result.roi_id}: {result.text} ({ocr_ms:.1f} ms)" + ) + finally: + close = getattr(engine, "close", None) + if close is not None: + close() + + log("OCR worker terminato") + + +def main() -> int: + args = parse_args() + if args.easyocr_worker: + return run_easyocr_worker(args) + + require_file(args.weights, "File pesi YOLOv2") + require_file(args.config, "File config YOLOv2") + require_file(args.labels, "File labels") + classes = load_classes(args.labels) + + cv2.setNumThreads(args.opencv_threads) + log(f"OpenCV version: {cv2.__version__}") + log(f"Classi: {classes}") + log(f"Backend richiesto: {args.backend}") + + net = cv2.dnn.readNetFromDarknet(args.config, args.weights) + configure_net_backend(net, args.backend) + + cap, source_name = open_capture(args.video) + if not cap.isOpened(): + log("ERRORE: impossibile aprire la sorgente video") + return 1 + + frame_buffer = LatestBuffer(args.frame_buffer_size, "frames") + roi_buffer = LatestBuffer(args.roi_buffer_size, "roi") + shared = SharedState(args.ocr_history_size) + stop_event = threading.Event() + roi_id_gen = IdGenerator() + start_time = time.perf_counter() + + yolo_thread = threading.Thread( + target=yolo_worker, + name="yolo-worker", + args=(stop_event, frame_buffer, roi_buffer, shared, net, classes, args, roi_id_gen), + daemon=True, + ) + yolo_thread.start() + ocr_thread = None + if not args.no_ocr: + ocr_thread = threading.Thread( + target=ocr_worker, + name="ocr-worker", + args=(stop_event, roi_buffer, shared, args), + daemon=True, + ) + ocr_thread.start() + else: + log("OCR disabilitato da --no-ocr") + + if not args.no_display: + cv2.namedWindow("flywms capture", cv2.WINDOW_NORMAL) + if args.debug_yolo_window: + cv2.namedWindow("flywms yolo", cv2.WINDOW_NORMAL) + if args.debug_ocr_window or not args.no_ocr: + cv2.namedWindow("flywms ocr", cv2.WINDOW_NORMAL) + if args.quality_filter and args.debug_rejected_window: + cv2.namedWindow("flywms scartati", cv2.WINDOW_NORMAL) + + frame_id = 0 + last_stats_log = time.perf_counter() + video_fps = cap.get(cv2.CAP_PROP_FPS) or 0.0 + frame_period = 1.0 / video_fps if args.realtime_playback and video_fps > 0 else 0.0 + + try: + while not stop_event.is_set(): + loop_start = time.perf_counter() + t_read0 = time.perf_counter() + grabbed, frame = cap.read() + read_ms = (time.perf_counter() - t_read0) * 1000.0 + + if not grabbed or frame is None: + log("Fine stream o impossibile leggere il frame") + break + + frame_id += 1 + if args.max_frames > 0 and frame_id > args.max_frames: + log(f"Raggiunto --max-frames={args.max_frames}") + break + + height, width = frame.shape[:2] + sharpness = 0.0 + quality_ms = 0.0 + quality_passed = True + if args.quality_filter or args.debug_quality_log: + t_quality0 = time.perf_counter() + sharpness = estimate_sharpness( + frame, + args.blur_metric, + args.blur_resize_width, + ) + quality_ms = (time.perf_counter() - t_quality0) * 1000.0 + quality_passed = sharpness >= args.min_sharpness + if args.debug_quality_log: + log( + f"quality frame={frame_id} sharpness={sharpness:.1f} " + f"passed={quality_passed} ms={quality_ms:.2f}" + ) + + packet = FramePacket( + frame_id=frame_id, + timestamp=time.perf_counter(), + frame=frame, + width=width, + height=height, + source=source_name, + ) + shared.add_capture_read(frame_id, read_ms) + if quality_passed: + frame_buffer.put(packet) + shared.add_quality_result( + score=sharpness, + passed=quality_passed, + elapsed_ms=quality_ms, + submitted_to_yolo=quality_passed, + ) + + t_display0 = time.perf_counter() + if not args.no_display: + display = frame.copy() + if args.quality_filter: + quality_color = (0, 180, 0) if quality_passed else (0, 0, 255) + cv2.putText( + display, + f"sharp={sharpness:.1f} {'OK' if quality_passed else 'BLUR'}", + (20, 38), + cv2.FONT_HERSHEY_SIMPLEX, + 1.0, + quality_color, + 2, + cv2.LINE_AA, + ) + latest_detection = shared.get_latest_detection() + if latest_detection is not None: + display_detections = list(latest_detection.detections) + if args.infer_gaylord_from_label: + inferred_gaylords = infer_gaylords_from_labels( + latest_detection.detections, + latest_detection.source_width, + latest_detection.source_height, + args.label_class, + args.inferred_gaylord_width_factor, + args.inferred_gaylord_height_factor, + args.inferred_gaylord_y_shift, + ) + display_detections.extend(inferred_gaylords) + for det in display_detections: + draw_detection(display, det, args.label_class) + + draw_ocr_results(display, shared.get_recent_ocr_results()) + draw_status(display, format_stats(shared, frame_buffer, roi_buffer, start_time)[:5]) + display = resize_preview(display, args.preview_width) + cv2.imshow("flywms capture", display) + + debug_yolo, debug_ocr, debug_ocr_text = shared.get_debug_frames() + if args.debug_yolo_window and debug_yolo is not None: + cv2.imshow("flywms yolo", debug_yolo) + if (args.debug_ocr_window or not args.no_ocr) and debug_ocr is not None: + cv2.imshow( + "flywms ocr", + draw_ocr_debug(debug_ocr, debug_ocr_text, args.preview_width), + ) + if ( + args.quality_filter + and args.debug_rejected_window + and not quality_passed + ): + rejected = frame.copy() + cv2.putText( + rejected, + f"SCARTATO sharp={sharpness:.1f} < {args.min_sharpness:.1f}", + (20, 38), + cv2.FONT_HERSHEY_SIMPLEX, + 1.0, + (0, 0, 255), + 2, + cv2.LINE_AA, + ) + rejected = resize_preview(rejected, args.preview_width) + cv2.imshow("flywms scartati", rejected) + + key = cv2.waitKey(1) & 0xFF + if key == ord("q"): + log("Premuto q, uscita") + break + + display_ms = (time.perf_counter() - t_display0) * 1000.0 + shared.add_display(display_ms) + + now = time.perf_counter() + if now - last_stats_log >= args.stats_interval: + for line in format_stats(shared, frame_buffer, roi_buffer, start_time): + log(line) + last_stats_log = now + + if frame_period > 0: + elapsed = time.perf_counter() - loop_start + if elapsed < frame_period: + time.sleep(frame_period - elapsed) + + except KeyboardInterrupt: + log("Interrotto da tastiera") + finally: + if args.drain_seconds > 0: + log(f"Drain worker per {args.drain_seconds:.1f}s") + time.sleep(args.drain_seconds) + stop_event.set() + frame_buffer.wake_all() + roi_buffer.wake_all() + yolo_thread.join(timeout=3.0) + if ocr_thread is not None: + ocr_thread.join(timeout=3.0) + cap.release() + if not args.no_display: + cv2.destroyAllWindows() + + log("=== riepilogo finale ===") + for line in format_stats(shared, frame_buffer, roi_buffer, start_time): + log(line) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/flywms_pipeline_spec.rtf b/flywms_pipeline_spec.rtf new file mode 100644 index 0000000..fcb1b08 --- /dev/null +++ b/flywms_pipeline_spec.rtf @@ -0,0 +1,421 @@ +{\rtf1\ansi\deff0 +{\fonttbl{\f0 Calibri;}{\f1 Consolas;}} +\fs24 +\b Specifica pipeline realtime flywms\par\b0 +\par +\b Obiettivo\par\b0 +Costruire una pipeline realtime modulare per video/camera, con tre blocchi principali:\par +\par +{\f1 Capture/Preview\par +YOLO Detection\par +OCR\par} +\par +I blocchi lavorano in parallelo, ognuno al proprio ritmo. Se un blocco e' lento, non deve bloccare gli altri. I frame/ROI vecchi possono essere scartati per mantenere bassa la latenza.\par +\par +\b Principio chiave\par\b0 +Non elaboriamo necessariamente tutti i frame. Elaboriamo il piu' recente disponibile quando un worker e' libero.\par +\par +{\f1 meglio saltare frame vecchi\par +che accumulare ritardo\par} +\par +\b Strutture dati\par\b0 +{\f1 FramePacket\par + frame_id: intero crescente\par + timestamp: tempo acquisizione\par + frame: immagine OpenCV BGR\par + width: larghezza\par + height: altezza\par + source: video/camera\par +\par +Detection\par + class_id\par + class_name\par + confidence\par + bbox: x1, y1, x2, y2\par +\par +DetectionResult\par + frame_id\par + timestamp\par + detections: lista Detection\par + inference_ms\par + source_width\par + source_height\par +\par +RoiPacket\par + roi_id\par + source_frame_id\par + timestamp\par + class_name\par + confidence\par + bbox\par + roi_image: crop BGR copiato\par + width\par + height\par +\par +OcrResult\par + roi_id\par + source_frame_id\par + timestamp\par + text\par + raw_text\par + confidence opzionale\par + bbox\par + ocr_ms\par} +\par +\b Buffer\par\b0 +Usiamo buffer thread-safe "latest", non FIFO classici.\par +\par +{\f1 LatestBuffer\par + max_size\par + put(item)\par + get_latest_blocking(stop_event)\par + get_latest_nowait()\par + stats()\par} +\par +Comportamento:\par +\par +{\f1 put(item):\par + se pieno, elimina elementi vecchi\par + inserisce item nuovo\par + notifica i worker in attesa\par +\par +get_latest_blocking():\par + se vuoto, aspetta\par + quando c'e' almeno un item:\par + prende il piu' recente\par + scarta gli elementi piu' vecchi\par + restituisce item\par} +\par +Buffer previsti:\par +\par +{\f1 frame_buffer: FramePacket\par +roi_buffer: RoiPacket\par} +\par +Risultati condivisi:\par +\par +{\f1 SharedState\par + latest_detection_result\par + latest_ocr_results\par + counters/statistiche\par} +\par +SharedState va protetto con lock breve.\par +\par +\b Thread\par\b0 +{\f1 Main/Capture Thread\par + legge frame\par + genera FramePacket\par + mette FramePacket in frame_buffer\par + visualizza preview\par + opzionalmente disegna ultimi risultati disponibili\par +\par +YOLO Worker Thread\par + aspetta ultimo FramePacket\par + copia il frame fuori lock\par + esegue detection\par + salva DetectionResult in SharedState\par + crea RoiPacket per le etichette utili\par + mette RoiPacket in roi_buffer\par + aggiorna statistiche\par +\par +OCR Worker Thread\par + aspetta ultimo RoiPacket\par + analizza ROI\par + salva OcrResult in SharedState/log\par + aggiorna statistiche\par} +\par +\b Thread safety\par\b0 +Il lock deve proteggere solo operazioni piccole:\par +\par +{\f1 inserire riferimento nel buffer\par +leggere riferimento dal buffer\par +aggiornare contatori\par +aggiornare latest result\par +notify/wait\par} +\par +Il lock non deve coprire:\par +\par +{\f1 cap.read()\par +frame.copy()\par +YOLO forward\par +OCR\par +cv2.imshow\par +conversioni immagine\par} +\par +\b Politica copie\par\b0 +Prima versione, robusta:\par +\par +{\f1 Capture:\par + non copia ogni frame\par + mette il frame letto nel FramePacket\par +\par +YOLO:\par + prende FramePacket\par + fa frame.copy() solo del frame che analizzera'\par +\par +OCR:\par + riceve ROI gia' copiata\par} +\par +Questo evita corruzione dati e mantiene copie limitate.\par +\par +\b Politica realtime\par\b0 +Se YOLO e' lento:\par +\par +{\f1 non analizza frame arretrati\par +prende il piu' recente disponibile\par} +\par +Se OCR e' lento:\par +\par +{\f1 non legge ROI vecchie\par +prende la ROI piu' recente disponibile\par} +\par +Questo riduce la latenza.\par +\par +\b Algoritmo pseudocodice - main/capture\par\b0 +{\f1 inizializza config\par +carica labels\par +carica modello YOLO\par +apri video/camera\par +\par +crea stop_event\par +crea frame_buffer max_size=N\par +crea roi_buffer max_size=M\par +crea shared_state\par +\par +avvia thread yolo_worker\par +avvia thread ocr_worker\par +\par +frame_id = 0\par +\par +loop main:\par + grabbed, frame = cap.read()\par +\par + se non grabbed:\par + stop_event.set()\par + break\par +\par + frame_id += 1\par +\par + packet = FramePacket(\par + frame_id=frame_id,\par + timestamp=now(),\par + frame=frame,\par + width=frame.width,\par + height=frame.height,\par + source=source_name\par + )\par +\par + frame_buffer.put(packet)\par +\par + detection_result = shared_state.get_latest_detection()\par + ocr_results = shared_state.get_recent_ocr_results()\par +\par + display_frame = frame.copy()\par + draw detections compatibili su display_frame\par + draw OCR texts compatibili su display_frame\par +\par + cv2.imshow("Capture", display_frame)\par +\par + se premuto q:\par + stop_event.set()\par + break\par +\par +attendi chiusura worker\par +rilascia video\par +chiudi finestre\par} +\par +\b Algoritmo pseudocodice - YOLO worker\par\b0 +{\f1 function yolo_worker():\par + while not stop_event:\par + packet = frame_buffer.get_latest_blocking(stop_event)\par +\par + se packet e' None:\par + continue\par +\par + local_frame = packet.frame.copy()\par +\par + t0 = now()\par + detections = run_yolo(local_frame)\par + inference_ms = now() - t0\par +\par + result = DetectionResult(\par + frame_id=packet.frame_id,\par + timestamp=now(),\par + detections=detections,\par + inference_ms=inference_ms,\par + source_width=packet.width,\par + source_height=packet.height\par + )\par +\par + shared_state.set_latest_detection(result)\par +\par + per ogni detection in detections:\par + se detection.class_name non e' "etichetta":\par + continue\par +\par + se bbox troppo piccola:\par + continue\par +\par + se OCR cooldown per questa zona non e' scaduto:\par + continue\par +\par + roi = crop local_frame usando bbox espansa\par + roi_copy = roi.copy()\par +\par + roi_packet = RoiPacket(\par + roi_id=next_roi_id(),\par + source_frame_id=packet.frame_id,\par + timestamp=now(),\par + class_name=detection.class_name,\par + confidence=detection.confidence,\par + bbox=detection.bbox,\par + roi_image=roi_copy,\par + width=roi_copy.width,\par + height=roi_copy.height\par + )\par +\par + roi_buffer.put(roi_packet)\par +\par + aggiorna statistiche yolo\par} +\par +\b Algoritmo pseudocodice - OCR worker\par\b0 +{\f1 function ocr_worker():\par + while not stop_event:\par + roi_packet = roi_buffer.get_latest_blocking(stop_event)\par +\par + se roi_packet e' None:\par + continue\par +\par + t0 = now()\par + processed = preprocess_for_ocr(roi_packet.roi_image)\par + text, raw_text = run_ocr(processed)\par + ocr_ms = now() - t0\par +\par + result = OcrResult(\par + roi_id=roi_packet.roi_id,\par + source_frame_id=roi_packet.source_frame_id,\par + timestamp=now(),\par + text=text,\par + raw_text=raw_text,\par + bbox=roi_packet.bbox,\par + ocr_ms=ocr_ms\par + )\par +\par + shared_state.add_ocr_result(result)\par +\par + se text valido:\par + logga risultato\par +\par + aggiorna statistiche ocr\par} +\par +\b LatestBuffer pseudocodice\par\b0 +{\f1 class LatestBuffer:\par + items = deque(maxlen=max_size)\par + condition = Condition()\par + pushed = 0\par + popped = 0\par + dropped = 0\par +\par + put(item):\par + with condition:\par + before = len(items)\par +\par + se before == max_size:\par + dropped += 1\par +\par + items.append(item)\par + pushed += 1\par + condition.notify_all()\par +\par + get_latest_blocking(stop_event):\par + with condition:\par + while items e' vuoto and not stop_event.is_set():\par + condition.wait(timeout=0.1)\par +\par + se stop_event.is_set():\par + return None\par +\par + latest = items[-1]\par + skipped = len(items) - 1\par + dropped += skipped\par + items.clear()\par + popped += 1\par +\par + return latest\par} +\par +\b SharedState pseudocodice\par\b0 +{\f1 class SharedState:\par + lock\par + latest_detection_result\par + ocr_results_deque maxlen=100\par + stats\par +\par + set_latest_detection(result):\par + with lock:\par + latest_detection_result = result\par +\par + get_latest_detection():\par + with lock:\par + return latest_detection_result\par +\par + add_ocr_result(result):\par + with lock:\par + ocr_results_deque.append(result)\par +\par + get_recent_ocr_results():\par + with lock:\par + return copia lista risultati recenti\par} +\par +\b Statistiche minime\par\b0 +Da stampare ogni N secondi:\par +\par +{\f1 capture_fps\par +yolo_fps\par +ocr_fps\par +frame_buffer pushed/popped/dropped\par +roi_buffer pushed/popped/dropped\par +avg_yolo_ms\par +avg_ocr_ms\par +last_frame_id\par +last_yolo_frame_id\par +last_ocr_frame_id\par +latency_yolo_ms\par +latency_ocr_ms\par} +\par +\b Configurazione iniziale\par\b0 +{\f1 --video testhd.mp4\par +--frame-buffer-size 10\par +--roi-buffer-size 20\par +--min-confidence 0.30\par +--label-class etichetta\par +--max-roi-per-frame 2\par +--ocr-min-digits 2\par +--preview-width 1280\par +--backend cpu|cuda|cuda-fp16\par +--debug-yolo-window\par +--debug-ocr-window\par} +\par +\b Prima versione: cosa evitare\par\b0 +Per la prima versione eviterei:\par +\par +{\f1 Dear PyGui\par +PaddleOCR\par +modello YOLO moderno\par +tracking complesso\par} +\par +Prima validiamo la pipeline con OpenCV window + YOLO attuale + OCR attuale. Poi sostituiamo i motori uno alla volta.\par +\par +\b Decisione architetturale\par\b0 +Questa pipeline privilegia:\par +\par +{\f1 bassa latenza\par +fluidita' realtime\par +uso controllato della memoria\par +misurabilita'\par} +\par +e sacrifica:\par +\par +{\f1 analisi completa di ogni frame\par +ordine perfetto di output per ogni frame\par} +\par +Per un sistema operativo realtime e' il compromesso giusto.\par +} diff --git a/pip-before-opencv-cuda.txt b/pip-before-opencv-cuda.txt new file mode 100644 index 0000000..9fce375 --- /dev/null +++ b/pip-before-opencv-cuda.txt @@ -0,0 +1 @@ +imutils==0.5.4