alpha01 backoffice: crossword engine, lexicon curation and JSON contract

2026-04-29 13:24:04 +02:00
parent a1f8cb8577
commit f0adfc2f68
20 changed files with 5985 additions and 16 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,14 @@
+__pycache__/
+*.pyc
+.babelnet_cache.json
+.wiktionary_cache.json
+.wiktextract_it_index.json
+.babelnet_api_key.local
+logs/
+raw-wiktextract-data.jsonl
+lexicon_it*.json
+llm_rescue_patch.json
+treccani_rescue_patch.json
+to_be_review*.json
+_*.json
+idee.txt
--- a/apply_llm_rescue_patch.py
+++ b/apply_llm_rescue_patch.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+
+DEFAULT_LEXICON_PATH = Path(__file__).with_name("lexicon_it_curated.json")
+DEFAULT_PATCH_PATH = Path(__file__).with_name("llm_rescue_patch.json")
+DEFAULT_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_curated_llm.json")
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Applica una patch LLM rescue al lessico curato per produrre un lessico operativo aggiornato."
+    )
+    parser.add_argument("--lexicon", type=Path, default=DEFAULT_LEXICON_PATH, help="Lessico curato di partenza.")
+    parser.add_argument("--patch", type=Path, default=DEFAULT_PATCH_PATH, help="Patch LLM rescue da applicare.")
+    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT_PATH, help="Lessico aggiornato in uscita.")
+    parser.add_argument(
+        "--min-confidence",
+        type=float,
+        default=0.6,
+        help="Confidenza minima per applicare automaticamente una definizione rescue.",
+    )
+    parser.add_argument(
+        "--include-needs-review",
+        action="store_true",
+        help="Applica anche voci marcate needs_human_review=true se superano la soglia di confidenza.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path, default: object) -> object:
+    if not path.exists():
+        return default
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: object) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def normalize_key(form: str, pos: str) -> Tuple[str, str]:
+    return (str(form or "").strip().lower(), str(pos or "").strip().upper())
+
+
+def merge_topics(existing: List[str], incoming: List[str]) -> List[str]:
+    merged: List[str] = []
+    seen = set()
+    for item in list(existing or []) + list(incoming or []):
+        value = str(item).strip()
+        if not value:
+            continue
+        key = value.lower()
+        if key in seen:
+            continue
+        seen.add(key)
+        merged.append(value)
+    return merged
+
+
+def apply_patch(args: argparse.Namespace) -> Dict[str, Any]:
+    lexicon_payload = load_json(args.lexicon, {"entries": []})
+    patch_payload = load_json(args.patch, {"entries": []})
+    if not isinstance(lexicon_payload, dict):
+        raise ValueError(f"Lessico non valido: {args.lexicon}")
+    lexicon = lexicon_payload.get("entries")
+    if not isinstance(lexicon, list):
+        raise ValueError(f"Lessico non valido: {args.lexicon}")
+    if not isinstance(patch_payload, dict):
+        raise ValueError(f"Patch non valida: {args.patch}")
+
+    patch_entries = patch_payload.get("entries") or []
+    patch_by_key = {}
+    for entry in patch_entries:
+        if not isinstance(entry, dict):
+            continue
+        patch_by_key[normalize_key(entry.get("form", ""), entry.get("pos", ""))] = entry
+
+    applied = 0
+    skipped = 0
+    for entry in lexicon:
+        if not isinstance(entry, dict):
+            continue
+        patch = patch_by_key.get(normalize_key(entry.get("form", ""), entry.get("pos", "")))
+        if not patch:
+            continue
+        confidence = float(patch.get("confidence", 0.0) or 0.0)
+        needs_review = bool(patch.get("needs_human_review", True))
+        definition = str(patch.get("rescue_definition", "")).strip()
+        if not definition:
+            skipped += 1
+            continue
+        if confidence < float(args.min_confidence):
+            skipped += 1
+            continue
+        if needs_review and not args.include_needs_review:
+            skipped += 1
+            continue
+
+        entry["preferred_definition"] = definition
+        entry["preferred_source"] = patch.get("rescue_source", "llm_rescue")
+        clue_defs = entry.get("clue_definitions") or {}
+        if not isinstance(clue_defs, dict):
+            clue_defs = {}
+        for level in ("easy", "medium", "hard", "expert"):
+            clue_defs[level] = definition
+        entry["clue_definitions"] = clue_defs
+
+        entry["topics"] = merge_topics(entry.get("topics", []), patch.get("rescue_topics", []))
+        entry["semantic_tags"] = merge_topics(entry.get("semantic_tags", []), patch.get("rescue_semantic_tags", []))
+        entry["alpha_ready"] = True
+        review_reasons = [reason for reason in (entry.get("review_reasons") or []) if reason != "no_viable_definition"]
+        if not args.include_needs_review:
+            review_reasons = [reason for reason in review_reasons if reason != "flagged_by_refined_stage"]
+        entry["review_reasons"] = review_reasons
+        entry["llm_rescue"] = {
+            "definition": definition,
+            "source": patch.get("rescue_source", "llm_rescue"),
+            "topics": patch.get("rescue_topics", []),
+            "semantic_tags": patch.get("rescue_semantic_tags", []),
+            "notes": patch.get("rescue_notes", ""),
+            "confidence": confidence,
+            "needs_human_review": needs_review,
+            "status": patch.get("status", ""),
+        }
+        applied += 1
+
+    meta = dict(lexicon_payload.get("meta") or {})
+    meta["base_lexicon"] = args.lexicon.name
+    meta["generated_from_patch"] = args.patch.name
+    meta["generated_by"] = "apply_llm_rescue_patch.py"
+    meta["entry_count"] = len(lexicon)
+    meta["llm_rescue_applied"] = applied
+    meta["llm_rescue_skipped"] = skipped
+    meta["alpha_ready_count"] = sum(1 for item in lexicon if isinstance(item, dict) and item.get("alpha_ready"))
+    meta["review_count"] = sum(
+        1
+        for item in lexicon
+        if isinstance(item, dict) and (item.get("review_reasons") or item.get("needs_review"))
+    )
+    output_payload = {"meta": meta, "entries": lexicon}
+    write_json(args.output, output_payload)
+    return {
+        "applied": applied,
+        "skipped": skipped,
+        "output": str(args.output),
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    result = apply_patch(args)
+    print(f"Lessico aggiornato generato: {result['output']}")
+    print(f"Patch applicate: {result['applied']}")
+    print(f"Voci saltate: {result['skipped']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/babelnet_daily_batch.py
+++ b/babelnet_daily_batch.py
@@ -0,0 +1,490 @@
+from __future__ import annotations
+
+import argparse
+import json
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from babelnet_incremental_enricher import (
+    DEFAULT_TOPIC,
+    merge_babelnet_entries,
+    rebuild_enriched,
+)
+from build_babelnet_enrichment import (
+    BABELNET_CACHE_PATH,
+    BABELNET_ENV_KEY,
+    BABELNET_OUTPUT_PATH,
+    BabelNetApiCallLimitReached,
+    BabelNetKeyUnavailable,
+    POS_TO_BABELNET,
+    enrich_entry,
+    load_babelnet_api_keys,
+    load_json,
+    write_json,
+)
+from build_enriched_lexicon import ENRICHED_LEXICON_OUTPUT_PATH
+from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH
+
+
+LOG_DIR = Path(__file__).with_name("logs")
+DEFAULT_API_CALL_LIMIT = 950
+DEFAULT_PER_KEY_API_CALL_LIMIT = 950
+DEFAULT_WORD_LIMIT = 10_000
+MIN_WORD_LENGTH = 3
+MAX_WORD_LENGTH = 16
+USEFUL_POS_PRIORITY = {
+    "NOUN": 6,
+    "VERB": 5,
+    "ADJ": 4,
+    "ADV": 3,
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Batch giornaliero per fondere progressivamente ItalWordNet e BabelNet: "
+            "arricchisce parole mancanti, aggiorna lexicon_it_babelnet.json e rigenera lexicon_it_enriched.json."
+        )
+    )
+    parser.add_argument(
+        "--api-call-limit",
+        type=int,
+        default=DEFAULT_API_CALL_LIMIT,
+        help="Numero massimo complessivo di chiamate API BabelNet reali consentite in questa esecuzione.",
+    )
+    parser.add_argument(
+        "--per-key-api-call-limit",
+        type=int,
+        default=DEFAULT_PER_KEY_API_CALL_LIMIT,
+        help="Numero massimo di chiamate API reali consentite per ciascuna chiave caricata.",
+    )
+    parser.add_argument(
+        "--token-index",
+        default=None,
+        help="Usa una o piu chiavi locali, contando da 1. Esempi: --token-index 2 oppure --token-index 1,2,3.",
+    )
+    parser.add_argument(
+        "--token-indexes",
+        default=None,
+        help="Alias esplicito per una lista di chiavi locali. Esempio: --token-indexes 1,2,3.",
+    )
+    parser.add_argument(
+        "--word-limit",
+        type=int,
+        default=DEFAULT_WORD_LIMIT,
+        help="Numero massimo di parole candidate da tentare in questa esecuzione.",
+    )
+    parser.add_argument(
+        "--sleep",
+        type=float,
+        default=0.2,
+        help="Pausa tra richieste API.",
+    )
+    parser.add_argument(
+        "--topic",
+        default=None,
+        help="Topic opzionale per concentrare il batch su una parte del lessico.",
+    )
+    parser.add_argument(
+        "--include-not-crossword",
+        action="store_true",
+        help="Include anche voci non marcate allowed_in_crossword.",
+    )
+    parser.add_argument(
+        "--retry-no-match",
+        action="store_true",
+        help="Riprova anche parole gia marcate come no_match.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Mostra le prossime parole candidate senza chiamare BabelNet e senza scrivere file.",
+    )
+    parser.add_argument(
+        "--ignore-cache",
+        action="store_true",
+        help="Ignora la cache in questa esecuzione diagnostica, utile per testare un token specifico.",
+    )
+    parser.add_argument(
+        "--semantic",
+        type=Path,
+        default=SEMANTIC_LEXICON_OUTPUT_PATH,
+        help="Lessico semantico completo di partenza.",
+    )
+    parser.add_argument(
+        "--babelnet",
+        type=Path,
+        default=BABELNET_OUTPUT_PATH,
+        help="Archivio incrementale degli arricchimenti BabelNet.",
+    )
+    parser.add_argument(
+        "--enriched",
+        type=Path,
+        default=ENRICHED_LEXICON_OUTPUT_PATH,
+        help="Lessico fuso da rigenerare dopo il batch.",
+    )
+    return parser.parse_args()
+
+
+def entry_key(entry: Dict[str, object]) -> Tuple[str, str]:
+    form = str(entry.get("normalized_form") or entry.get("form") or "").strip().lower()
+    pos = str(entry.get("pos") or "").strip().upper()
+    return form, pos
+
+
+def load_source_payload(enriched_path: Path, semantic_path: Path) -> Dict[str, object]:
+    if enriched_path.exists():
+        payload = load_json(enriched_path, {})
+        if isinstance(payload, dict) and "entries" in payload:
+            return payload
+    payload = load_json(semantic_path, {})
+    if isinstance(payload, dict) and "entries" in payload:
+        return payload
+    raise ValueError(f"Nessun lessico valido trovato: {enriched_path} / {semantic_path}")
+
+
+def babelnet_status(entry: Dict[str, object]) -> str:
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        return str(babelnet.get("status", "not_requested"))
+    return "not_requested"
+
+
+def entry_topics(entry: Dict[str, object]) -> set[str]:
+    topics = {str(item).lower() for item in entry.get("topics", []) or [] if item}
+    semantic = entry.get("semantic", {})
+    if isinstance(semantic, dict):
+        topics.update(str(item).lower() for item in semantic.get("semantic_topics", []) or [] if item)
+    return topics
+
+
+def eligible_entry(entry: Dict[str, object], args: argparse.Namespace) -> bool:
+    word = str(entry.get("form", "")).strip().lower()
+    pos = str(entry.get("pos", "")).strip().upper()
+    status = babelnet_status(entry)
+    allowed_statuses = {"not_requested", "api_error"}
+    if args.retry_no_match:
+        allowed_statuses.add("no_match")
+
+    if status not in allowed_statuses:
+        return False
+    if pos not in POS_TO_BABELNET:
+        return False
+    if not word.isalpha() or not MIN_WORD_LENGTH <= len(word) <= MAX_WORD_LENGTH:
+        return False
+    if not args.include_not_crossword and not entry.get("allowed_in_crossword", False):
+        return False
+    if args.topic and args.topic.strip().lower() not in entry_topics(entry):
+        return False
+    return True
+
+
+def candidate_priority(entry: Dict[str, object]) -> Tuple[int, int, int, int, int, str]:
+    word = str(entry.get("form", ""))
+    pos = str(entry.get("pos", "")).upper()
+    topics = {str(item).lower() for item in entry.get("topics", []) or []}
+    semantic = entry.get("semantic", {})
+    semantic_topics = set()
+    if isinstance(semantic, dict):
+        semantic_topics = {str(item).lower() for item in semantic.get("semantic_topics", []) or []}
+
+    useful_topic_bonus = 2 if topics - {DEFAULT_TOPIC, "abstract", "actions"} else 0
+    semantic_topic_bonus = 1 if semantic_topics else 0
+    length_bonus = 3 if 4 <= len(word) <= 11 else 1
+    return (
+        useful_topic_bonus,
+        semantic_topic_bonus,
+        int(entry.get("quality_score", 0)),
+        USEFUL_POS_PRIORITY.get(pos, 0),
+        length_bonus,
+        word,
+    )
+
+
+def select_candidates(payload: Dict[str, object], args: argparse.Namespace) -> List[Dict[str, object]]:
+    candidates = [
+        entry
+        for entry in payload.get("entries", []) or []
+        if isinstance(entry, dict) and eligible_entry(entry, args)
+    ]
+    candidates.sort(key=candidate_priority, reverse=True)
+    return candidates[: max(0, args.word_limit)]
+
+
+def progress_counts(payload: Dict[str, object]) -> Dict[str, int]:
+    counts: Dict[str, int] = {}
+    for entry in payload.get("entries", []) or []:
+        if not isinstance(entry, dict):
+            continue
+        status = babelnet_status(entry)
+        counts[status] = counts.get(status, 0) + 1
+    return counts
+
+
+def parse_token_indexes(value: Optional[str], key_count: int, option_name: str) -> Optional[List[int]]:
+    if value is None:
+        return None
+
+    selected: List[int] = []
+    seen = set()
+    for raw_part in str(value).replace(";", ",").split(","):
+        part = raw_part.strip()
+        if not part:
+            continue
+        try:
+            index = int(part)
+        except ValueError as exc:
+            raise SystemExit(f"{option_name} deve contenere solo numeri separati da virgola.") from exc
+        if not 1 <= index <= key_count:
+            raise SystemExit(
+                f"{option_name} contiene {index}, ma deve essere tra 1 e {key_count}. Chiavi caricate: {key_count}."
+            )
+        if index in seen:
+            continue
+        selected.append(index)
+        seen.add(index)
+
+    if not selected:
+        raise SystemExit(f"{option_name} non contiene nessun indice valido.")
+    return selected
+
+
+def write_batch_log(payload: Dict[str, object]) -> Path:
+    LOG_DIR.mkdir(exist_ok=True)
+    timestamp = datetime.now().astimezone().strftime("%Y%m%d_%H%M%S")
+    path = LOG_DIR / f"babelnet_batch_{timestamp}.json"
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    return path
+
+
+def run_batch(args: argparse.Namespace) -> Dict[str, object]:
+    source_payload = load_source_payload(args.enriched, args.semantic)
+    candidates = select_candidates(source_payload, args)
+    before_counts = progress_counts(source_payload)
+
+    if args.dry_run:
+        return {
+            "mode": "dry-run",
+            "candidate_count": len(candidates),
+            "selected_words": [entry.get("form") for entry in candidates[:50]],
+            "before_counts": before_counts,
+        }
+
+    api_keys = load_babelnet_api_keys()
+    if not api_keys:
+        raise SystemExit(
+            f"Chiave BabelNet mancante. Imposta {BABELNET_ENV_KEY} oppure crea .babelnet_api_key.local."
+        )
+    token_indexes = parse_token_indexes(args.token_index, len(api_keys), "--token-index")
+    token_indexes_alias = parse_token_indexes(args.token_indexes, len(api_keys), "--token-indexes")
+    if token_indexes and token_indexes_alias:
+        raise SystemExit("Usa solo uno tra --token-index e --token-indexes.")
+    selected_token_indexes = token_indexes or token_indexes_alias
+    if selected_token_indexes:
+        api_keys = [api_keys[index - 1] for index in selected_token_indexes]
+
+    cache = {} if args.ignore_cache else load_json(BABELNET_CACHE_PATH, {})
+    if not isinstance(cache, dict):
+        cache = {}
+    babelnet_payload = load_json(args.babelnet, {"entries": []})
+    if not isinstance(babelnet_payload, dict):
+        babelnet_payload = {"entries": []}
+
+    global_stats = {
+        "api_calls": 0,
+        "cache_hits": 0,
+        "responses": 0,
+        "api_call_limit": max(0, args.api_call_limit),
+    }
+    per_key_limit = max(0, args.per_key_api_call_limit)
+    key_stats = [
+        {
+            "key_index": selected_token_indexes[index] if selected_token_indexes else index + 1,
+            "local_key_index": index + 1,
+            "api_calls": 0,
+            "cache_hits": 0,
+            "responses": 0,
+            "api_call_limit": per_key_limit,
+        }
+        for index, _ in enumerate(api_keys)
+    ]
+    enriched_entries: List[Dict[str, object]] = []
+    word_logs = []
+    stopped_reason = "completed"
+
+    def select_key_index() -> Optional[int]:
+        available = [
+            (stats["api_calls"], index)
+            for index, stats in enumerate(key_stats)
+            if stats["api_calls"] < stats["api_call_limit"]
+        ]
+        if not available:
+            return None
+        available.sort()
+        return available[0][1]
+
+    for index, entry in enumerate(candidates, start=1):
+        if global_stats["api_calls"] >= global_stats["api_call_limit"]:
+            stopped_reason = "api_call_limit"
+            break
+        key_index = select_key_index()
+        if key_index is None:
+            stopped_reason = "per_key_api_call_limit"
+            break
+
+        before_api_calls = global_stats["api_calls"]
+        before_cache_hits = global_stats["cache_hits"]
+        before_responses = global_stats["responses"]
+        before_key_api_calls = key_stats[key_index]["api_calls"]
+        before_key_cache_hits = key_stats[key_index]["cache_hits"]
+        before_key_responses = key_stats[key_index]["responses"]
+
+        updated = deepcopy(entry)
+        updated.pop("babelnet", None)
+        try:
+            updated["babelnet"] = enrich_entry(updated, api_keys[key_index], cache, args.sleep, key_stats[key_index])
+        except BabelNetApiCallLimitReached:
+            global_stats["api_calls"] += key_stats[key_index]["api_calls"] - before_key_api_calls
+            global_stats["cache_hits"] += key_stats[key_index]["cache_hits"] - before_key_cache_hits
+            global_stats["responses"] += key_stats[key_index]["responses"] - before_key_responses
+            stopped_reason = "per_key_api_call_limit"
+            break
+        except BabelNetKeyUnavailable as exc:
+            global_stats["api_calls"] += key_stats[key_index]["api_calls"] - before_key_api_calls
+            global_stats["cache_hits"] += key_stats[key_index]["cache_hits"] - before_key_cache_hits
+            global_stats["responses"] += key_stats[key_index]["responses"] - before_key_responses
+            key_stats[key_index]["api_calls"] = key_stats[key_index]["api_call_limit"]
+            word_logs.append(
+                {
+                    "index": index,
+                    "word": updated.get("form"),
+                    "pos": updated.get("pos"),
+                    "key_index": key_stats[key_index]["key_index"],
+                    "api_calls": global_stats["api_calls"] - before_api_calls,
+                    "cache_hits": global_stats["cache_hits"] - before_cache_hits,
+                    "responses": global_stats["responses"] - before_responses,
+                    "matched": False,
+                    "synsets": 0,
+                    "reason": "key_unavailable_or_daily_limit",
+                    "error": str(exc),
+                }
+            )
+            print(
+                f"[{index}/{len(candidates)}] {updated.get('form')}: "
+                f"token={key_stats[key_index]['key_index']} non disponibile o limite giornaliero raggiunto"
+            )
+            if select_key_index() is None:
+                stopped_reason = "all_keys_unavailable_or_daily_limit"
+                break
+            continue
+
+        global_stats["api_calls"] += key_stats[key_index]["api_calls"] - before_key_api_calls
+        global_stats["cache_hits"] += key_stats[key_index]["cache_hits"] - before_key_cache_hits
+        global_stats["responses"] += key_stats[key_index]["responses"] - before_key_responses
+
+        enriched_entries.append(updated)
+        write_json(BABELNET_CACHE_PATH, cache)
+
+        word_log = {
+            "index": index,
+            "word": updated.get("form"),
+            "pos": updated.get("pos"),
+            "key_index": key_stats[key_index]["key_index"],
+            "api_calls": global_stats["api_calls"] - before_api_calls,
+            "cache_hits": global_stats["cache_hits"] - before_cache_hits,
+            "responses": global_stats["responses"] - before_responses,
+            "matched": bool(updated.get("babelnet", {}).get("matched")),
+            "synsets": len(updated.get("babelnet", {}).get("synsets", []) or []),
+            "reason": updated.get("babelnet", {}).get("reason"),
+        }
+        word_logs.append(word_log)
+        print(
+            f"[{index}/{len(candidates)}] {word_log['word']}: "
+            f"token={word_log['key_index']} api_calls={word_log['api_calls']} cache_hits={word_log['cache_hits']} "
+            f"match={word_log['matched']} tot_api={global_stats['api_calls']}/{global_stats['api_call_limit']}"
+        )
+
+    merged_babelnet = merge_babelnet_entries(
+        babelnet_payload,
+        enriched_entries,
+        args.topic or "all",
+        "all",
+    )
+    write_json(args.babelnet, merged_babelnet)
+    enriched_payload = rebuild_enriched(
+        args.semantic,
+        args.babelnet,
+        args.enriched,
+        args.topic or DEFAULT_TOPIC,
+    )
+    after_counts = progress_counts(enriched_payload)
+
+    total_entries = int(enriched_payload.get("meta", {}).get("entry_count", 0))
+    covered = total_entries - after_counts.get("not_requested", 0)
+    coverage = covered / total_entries if total_entries else 0.0
+
+    result = {
+        "mode": "batch",
+        "started_topic": args.topic,
+        "stopped_reason": stopped_reason,
+        "candidate_count": len(candidates),
+        "attempted_words": len(enriched_entries),
+        "matched_words": sum(1 for entry in enriched_entries if entry.get("babelnet", {}).get("matched")),
+        "api_calls": global_stats["api_calls"],
+        "cache_hits": global_stats["cache_hits"],
+        "responses": global_stats["responses"],
+        "api_call_limit": global_stats["api_call_limit"],
+        "api_key_count": len(api_keys),
+        "forced_token_indexes": selected_token_indexes,
+        "per_key_api_call_limit": per_key_limit,
+        "per_key_stats": key_stats,
+        "before_counts": before_counts,
+        "after_counts": after_counts,
+        "total_entries": total_entries,
+        "covered_entries": covered,
+        "coverage_ratio": coverage,
+        "word_logs": word_logs,
+    }
+    log_path = write_batch_log(result)
+    result["log_path"] = str(log_path)
+    return result
+
+
+def print_result(result: Dict[str, object]) -> None:
+    if result["mode"] == "dry-run":
+        print("Dry-run batch BabelNet")
+        print(f"Candidate selezionate: {result['candidate_count']}")
+        print(f"Stati iniziali: {result['before_counts']}")
+        print("Prime parole:")
+        for index, word in enumerate(result["selected_words"], start=1):
+            print(f"{index:>2}. {word}")
+        return
+
+    print("Batch BabelNet completato")
+    print(f"- motivo stop: {result['stopped_reason']}")
+    print(f"- parole tentate: {result['attempted_words']}/{result['candidate_count']}")
+    print(f"- parole con match: {result['matched_words']}")
+    print(f"- chiamate API reali: {result['api_calls']}/{result['api_call_limit']}")
+    print(f"- chiavi caricate: {result['api_key_count']} (limite per chiave: {result['per_key_api_call_limit']})")
+    if result.get("forced_token_indexes"):
+        print(f"- token forzati: {', '.join('#' + str(index) for index in result['forced_token_indexes'])}")
+    for item in result["per_key_stats"]:
+        print(f"  chiave #{item['key_index']}: {item['api_calls']}/{item['api_call_limit']} chiamate API")
+    print(f"- cache hit: {result['cache_hits']}")
+    print(f"- copertura lessico: {result['covered_entries']}/{result['total_entries']} ({result['coverage_ratio'] * 100:.1f}%)")
+    print(f"- stati dopo: {result['after_counts']}")
+    print(f"- log: {result['log_path']}")
+
+
+def main() -> None:
+    args = parse_args()
+    result = run_batch(args)
+    print_result(result)
+
+
+if __name__ == "__main__":
+    main()
--- a/babelnet_incremental_enricher.py
+++ b/babelnet_incremental_enricher.py
@@ -0,0 +1,583 @@
+from __future__ import annotations
+
+import argparse
+import os
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from build_babelnet_enrichment import (
+    BABELNET_CACHE_PATH,
+    BABELNET_ENV_KEY,
+    BABELNET_OUTPUT_PATH,
+    POS_TO_BABELNET,
+    enrich_entry,
+    load_json,
+    write_json,
+)
+from build_enriched_lexicon import (
+    ENRICHED_LEXICON_OUTPUT_PATH,
+    build_enriched_lexicon,
+    write_json as write_enriched_json,
+)
+from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH
+
+
+DIFFICULTY_ALIASES: Dict[str, int] = {
+    "easy": 1,
+    "medium": 2,
+    "hard": 4,
+    "expert": 5,
+}
+DEFAULT_TOPIC = "general"
+ABSTRACTISH_SUFFIXES = ("zione", "zioni", "mento", "menti", "ita", "ezza", "anza", "enza", "ismo")
+FILL_ALLOWED_POS = {"NOUN", "VERB", "ADJ", "ADV", "PREP", "CONJ"}
+GENERAL_FILL_MIN_QUALITY = 6
+GENERAL_FILL_MAX_LENGTH = 10
+SOFT_RELATED_FILL_LIMIT = 120
+CONCRETE_TOPICS = {
+    "animals",
+    "plants",
+    "nature",
+    "ecology",
+    "geography",
+    "weather",
+    "sea",
+    "mountain",
+    "health",
+    "science",
+    "sport",
+    "history",
+    "school",
+    "cinema",
+    "literature",
+    "food",
+    "city",
+    "transport",
+    "work",
+    "home",
+}
+
+TOPIC_SEED_REQUIRED_SUBSTRINGS: Dict[str, Tuple[str, ...]] = {
+    "transport": (
+        "auto", "mot", "tren", "nav", "barc", "port", "pist", "vol", "aer",
+        "bici", "cicl", "rimorch", "reattor", "vettur", "ambul", "imbarc",
+        "trattor", "carr", "vap", "rota", "ruot",
+    ),
+    "animals": (
+        "can", "gatt", "lup", "ors", "pesc", "aquil", "anatr", "cavall",
+        "serpent", "tig", "leon", "volp", "cerv", "capr", "pecor",
+    ),
+    "nature": (
+        "mar", "lag", "fium", "vent", "bosch", "mont", "collin", "isol",
+        "rocc", "terra", "acqu", "fiore", "fogli", "radic", "affluent",
+        "litoral", "piogg", "nev", "onda", "clim",
+    ),
+    "cinema": (
+        "film", "cin", "teatr", "attor", "scen", "reg", "doppi", "dialog",
+        "comic", "div", "docu", "pellic", "spettacol",
+    ),
+}
+
+TOPIC_SEED_BLOCKED_SUBSTRINGS: Dict[str, Tuple[str, ...]] = {
+    "transport": (
+        "intervist", "intratten", "speriment", "stermin", "investig",
+        "intervent", "centometr", "sintetizz", "erot", "adoraz", "esalt",
+        "eccit", "traduz", "fluttu", "sollecit",
+    ),
+    "animals": (
+        "assicur", "finanz", "coediz", "camerier", "servitor", "indic",
+        "estens", "diffus", "difensor", "spessor", "maggior",
+    ),
+    "cinema": (
+        "manifest", "riediz", "dissimul", "diffus", "difensor", "estens",
+        "malumor", "eversor",
+    ),
+}
+
+ENRICHABLE_STATUSES = {"not_requested", "api_error"}
+
+BABELNET_TOPIC_SAFE_PREFIXES: Dict[str, Tuple[str, ...]] = {
+    "transport": (
+        "ambul",
+        "aer",
+        "autobus",
+        "autocar",
+        "automob",
+        "autostrad",
+        "autoveic",
+        "autovett",
+        "bicicl",
+        "ciclo",
+        "imbarc",
+        "locom",
+        "motoc",
+        "motr",
+        "navig",
+        "rimorch",
+        "trattor",
+        "tren",
+        "veicol",
+        "vettur",
+    ),
+}
+
+
+def parse_difficulty(value: str) -> int:
+    text = str(value).strip().lower()
+    if text in DIFFICULTY_ALIASES:
+        return DIFFICULTY_ALIASES[text]
+    try:
+        level = int(text)
+    except ValueError as exc:
+        raise SystemExit(
+            "Valore non valido per --difficulty. Usa easy, medium, hard, expert oppure un intero tra 1 e 5."
+        ) from exc
+    if not 1 <= level <= 5:
+        raise SystemExit("Il valore numerico di --difficulty deve essere compreso tra 1 e 5.")
+    return level
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Arricchisce incrementalmente il lessico: seleziona parole mancanti, "
+            "chiama BabelNet entro un limite e rigenera lexicon_it_enriched.json."
+        )
+    )
+    parser.add_argument(
+        "--api-key",
+        default=os.environ.get(BABELNET_ENV_KEY),
+        help=f"Chiave API BabelNet. In alternativa imposta la variabile ambiente {BABELNET_ENV_KEY}.",
+    )
+    parser.add_argument(
+        "--topic",
+        default=DEFAULT_TOPIC,
+        help="Topic per cui scegliere le prossime parole da arricchire.",
+    )
+    parser.add_argument(
+        "--difficulty",
+        default="medium",
+        help="Difficolta massima: easy, medium, hard, expert oppure 1-5.",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=50,
+        help="Numero massimo di parole da arricchire in questa esecuzione.",
+    )
+    parser.add_argument(
+        "--sleep",
+        type=float,
+        default=0.2,
+        help="Pausa tra richieste API.",
+    )
+    parser.add_argument(
+        "--semantic",
+        type=Path,
+        default=SEMANTIC_LEXICON_OUTPUT_PATH,
+        help="Lessico semantico completo di partenza.",
+    )
+    parser.add_argument(
+        "--babelnet",
+        type=Path,
+        default=BABELNET_OUTPUT_PATH,
+        help="Archivio degli arricchimenti BabelNet parziali.",
+    )
+    parser.add_argument(
+        "--enriched",
+        type=Path,
+        default=ENRICHED_LEXICON_OUTPUT_PATH,
+        help="Lessico arricchito da aggiornare.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Mostra le parole candidate senza chiamare BabelNet e senza scrivere file.",
+    )
+    parser.add_argument(
+        "--retry-no-match",
+        action="store_true",
+        help="Riprova anche parole gia marcate come no_match.",
+    )
+    parser.add_argument(
+        "--words",
+        nargs="*",
+        default=None,
+        help="Parole specifiche da arricchire, utile per generare definizioni sul cruciverba finale.",
+    )
+    return parser.parse_args()
+
+
+def entry_key(entry: Dict[str, object]) -> Tuple[str, str]:
+    form = str(entry.get("normalized_form") or entry.get("form") or "").strip().lower()
+    pos = str(entry.get("pos") or "").strip().upper()
+    return form, pos
+
+
+def dedupe(items: Iterable[Dict[str, object]]) -> List[Dict[str, object]]:
+    seen = set()
+    result = []
+    for item in items:
+        key = entry_key(item)
+        if key in seen:
+            continue
+        seen.add(key)
+        result.append(item)
+    return result
+
+
+def entry_topics(entry: Dict[str, object]) -> Tuple[set[str], set[str]]:
+    topics = {str(item).lower() for item in entry.get("topics", []) if item}
+    semantic = entry.get("semantic", {})
+    semantic_topics = set()
+    if isinstance(semantic, dict):
+        semantic_topics = {str(item).lower() for item in semantic.get("semantic_topics", []) if item}
+    return topics, semantic_topics
+
+
+def current_babelnet_status(entry: Dict[str, object]) -> str:
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        return str(babelnet.get("status", "not_requested"))
+    return "not_requested"
+
+
+def matches_topic_roots(word: str, topic: str) -> bool:
+    roots = TOPIC_SEED_REQUIRED_SUBSTRINGS.get(topic, ())
+    return not roots or any(root in word for root in roots)
+
+
+def matches_safe_babelnet_roots(word: str, topic: str) -> bool:
+    prefixes = BABELNET_TOPIC_SAFE_PREFIXES.get(topic)
+    if prefixes is None:
+        return False
+    return any(word.startswith(prefix) for prefix in prefixes)
+
+
+def is_blocked_for_topic(word: str, topic: str) -> bool:
+    return any(part in word for part in TOPIC_SEED_BLOCKED_SUBSTRINGS.get(topic, ()))
+
+
+def topic_score(entry: Dict[str, object], topic: str) -> int:
+    if topic == DEFAULT_TOPIC:
+        return 20
+
+    word = str(entry.get("form", "")).lower()
+    topics, semantic_topics = entry_topics(entry)
+    score = 0
+    if topic in topics:
+        score += 100
+    if topic in semantic_topics:
+        score += 45
+    if matches_topic_roots(word, topic):
+        score += 35
+    if DEFAULT_TOPIC in topics:
+        score += 5
+    if is_blocked_for_topic(word, topic):
+        score -= 100
+    if topic in CONCRETE_TOPICS and word.endswith(ABSTRACTISH_SUFFIXES):
+        score -= 30
+    return score
+
+
+def candidate_score(entry: Dict[str, object], topic: str) -> Tuple[int, int, int, int, int, str]:
+    word = str(entry.get("form", ""))
+    pos = str(entry.get("pos", ""))
+    pos_bonus = {
+        "NOUN": 12,
+        "VERB": 8,
+        "ADJ": 6,
+        "ADV": 4,
+    }.get(pos, 0)
+    semantic = entry.get("semantic", {})
+    semantic_bonus = 3 if isinstance(semantic, dict) and semantic.get("matched") else 0
+    length_bonus = 4 if 4 <= len(word) <= 10 else 1 if len(word) <= 14 else -3
+    return (
+        topic_score(entry, topic),
+        int(entry.get("quality_score", 0)),
+        pos_bonus,
+        semantic_bonus,
+        length_bonus,
+        word,
+    )
+
+
+def eligible_for_babelnet(entry: Dict[str, object], topic: str, difficulty_level: int, retry_no_match: bool) -> bool:
+    word = str(entry.get("form", "")).lower()
+    pos = str(entry.get("pos", ""))
+    topics, semantic_topics = entry_topics(entry)
+    status = current_babelnet_status(entry)
+    allowed_statuses = set(ENRICHABLE_STATUSES)
+    if retry_no_match:
+        allowed_statuses.add("no_match")
+
+    if status not in allowed_statuses:
+        return False
+    if not word.isalpha() or len(word) < 3 or len(word) > 16:
+        return False
+    if pos not in POS_TO_BABELNET or pos not in FILL_ALLOWED_POS:
+        return False
+    if int(entry.get("difficulty_word", 5)) > difficulty_level:
+        return False
+    if not entry.get("allowed_in_crossword", False):
+        return False
+    if topic != DEFAULT_TOPIC:
+        if topic in CONCRETE_TOPICS and word.endswith(ABSTRACTISH_SUFFIXES):
+            return False
+        conservative_match = topic in topics
+        safe_root_match = matches_safe_babelnet_roots(word, topic)
+        semantic_only_match = topic in semantic_topics and topic not in CONCRETE_TOPICS
+        if not (conservative_match or safe_root_match or semantic_only_match):
+            return False
+    return True
+
+
+def select_candidates(payload: Dict[str, object], topic: str, difficulty_level: int, limit: int, retry_no_match: bool) -> List[Dict[str, object]]:
+    entries = [
+        entry
+        for entry in payload.get("entries", []) or []
+        if isinstance(entry, dict) and eligible_for_babelnet(entry, topic, difficulty_level, retry_no_match)
+    ]
+
+    if topic != DEFAULT_TOPIC:
+        strong = [entry for entry in entries if topic in entry_topics(entry)[0]]
+        soft = [
+            entry
+            for entry in entries
+            if entry not in strong
+            and int(entry.get("quality_score", 0)) >= GENERAL_FILL_MIN_QUALITY
+            and len(str(entry.get("form", ""))) <= GENERAL_FILL_MAX_LENGTH
+        ]
+        support = [
+            entry
+            for entry in entries
+            if entry not in strong
+            and entry not in soft
+            and int(entry.get("quality_score", 0)) >= GENERAL_FILL_MIN_QUALITY
+            and not str(entry.get("form", "")).endswith(ABSTRACTISH_SUFFIXES)
+        ]
+        entries = strong + sorted(soft, key=lambda item: candidate_score(item, topic), reverse=True)[:SOFT_RELATED_FILL_LIMIT]
+        entries += sorted(support, key=lambda item: candidate_score(item, topic), reverse=True)
+
+    entries = dedupe(entries)
+    entries.sort(key=lambda item: candidate_score(item, topic), reverse=True)
+    return entries[:limit]
+
+
+def select_word_candidates(
+    payload: Dict[str, object],
+    words: Iterable[str],
+    limit: int,
+    retry_no_match: bool,
+) -> List[Dict[str, object]]:
+    requested = []
+    seen_words = set()
+    for word in words:
+        normalized = str(word).strip().lower()
+        if normalized and normalized not in seen_words:
+            requested.append(normalized)
+            seen_words.add(normalized)
+
+    by_word = {
+        str(entry.get("form", "")).lower(): entry
+        for entry in payload.get("entries", []) or []
+        if isinstance(entry, dict)
+    }
+    selected = []
+    allowed_statuses = set(ENRICHABLE_STATUSES)
+    if retry_no_match:
+        allowed_statuses.add("no_match")
+
+    for word in requested:
+        entry = by_word.get(word)
+        if not entry:
+            continue
+        status = current_babelnet_status(entry)
+        if status not in allowed_statuses:
+            continue
+        if str(entry.get("pos", "")) not in POS_TO_BABELNET:
+            continue
+        if not str(entry.get("form", "")).isalpha():
+            continue
+        selected.append(entry)
+        if len(selected) >= limit:
+            break
+
+    return selected
+
+
+def load_source_payload(enriched_path: Path, semantic_path: Path) -> Dict[str, object]:
+    if enriched_path.exists():
+        payload = load_json(enriched_path, {})
+        if isinstance(payload, dict) and "entries" in payload:
+            return payload
+    payload = load_json(semantic_path, {})
+    if isinstance(payload, dict) and "entries" in payload:
+        return payload
+    raise ValueError(f"Nessun lessico valido trovato: {enriched_path} / {semantic_path}")
+
+
+def merge_babelnet_entries(existing_payload: Dict[str, object], new_entries: List[Dict[str, object]], topic: str, difficulty: str) -> Dict[str, object]:
+    existing_entries = [
+        entry for entry in existing_payload.get("entries", []) or [] if isinstance(entry, dict)
+    ]
+    index = {entry_key(entry): deepcopy(entry) for entry in existing_entries}
+    generated_at = datetime.now().astimezone().isoformat(timespec="seconds")
+
+    for entry in new_entries:
+        updated = deepcopy(entry)
+        updated["babelnet_generated_at"] = generated_at
+        index[entry_key(updated)] = updated
+
+    entries = sorted(index.values(), key=lambda item: (str(item.get("form", "")), str(item.get("pos", ""))))
+    meta = dict(existing_payload.get("meta", {})) if isinstance(existing_payload.get("meta", {}), dict) else {}
+    meta.update(
+        {
+            "language": meta.get("language", "it"),
+            "version": max(1, int(meta.get("version", 1))),
+            "source": "BabelNet API",
+            "updated_at": generated_at,
+            "last_topic": topic,
+            "last_difficulty": difficulty,
+            "entry_count": len(entries),
+        }
+    )
+    return {"meta": meta, "entries": entries}
+
+
+def rebuild_enriched(semantic_path: Path, babelnet_path: Path, enriched_path: Path, topic: str) -> Dict[str, object]:
+    namespace = SimpleNamespace(
+        semantic=semantic_path,
+        babelnet=babelnet_path,
+        output=enriched_path,
+        topic=topic,
+    )
+    payload = build_enriched_lexicon(namespace)
+    write_enriched_json(enriched_path, payload)
+    return payload
+
+
+def run_incremental_enrichment(args: argparse.Namespace) -> Dict[str, object]:
+    normalized_topic = args.topic.strip().lower()
+    difficulty_level = parse_difficulty(str(args.difficulty))
+    source_payload = load_source_payload(args.enriched, args.semantic)
+    target_words = getattr(args, "words", None)
+    if target_words:
+        candidates = select_word_candidates(
+            source_payload,
+            target_words,
+            max(0, args.limit),
+            args.retry_no_match,
+        )
+    else:
+        candidates = select_candidates(
+            source_payload,
+            normalized_topic,
+            difficulty_level,
+            max(0, args.limit),
+            args.retry_no_match,
+        )
+
+    if args.dry_run:
+        return {
+            "mode": "dry-run",
+            "topic": normalized_topic,
+            "difficulty": args.difficulty,
+            "selected_count": len(candidates),
+            "selected_words": [entry.get("form") for entry in candidates],
+        }
+
+    if not args.api_key:
+        raise SystemExit(
+            f"Chiave BabelNet mancante. Imposta {BABELNET_ENV_KEY} oppure usa --api-key <chiave>."
+        )
+
+    cache = load_json(BABELNET_CACHE_PATH, {})
+    if not isinstance(cache, dict):
+        cache = {}
+    babelnet_payload = load_json(args.babelnet, {"entries": []})
+    if not isinstance(babelnet_payload, dict):
+        babelnet_payload = {"entries": []}
+
+    enriched_candidates = []
+    word_logs = []
+    for index, entry in enumerate(candidates, start=1):
+        updated = deepcopy(entry)
+        updated.pop("babelnet", None)
+        stats = {"api_calls": 0, "cache_hits": 0, "responses": 0}
+        updated["babelnet"] = enrich_entry(updated, args.api_key, cache, args.sleep, stats)
+        enriched_candidates.append(updated)
+        write_json(BABELNET_CACHE_PATH, cache)
+        word_logs.append(
+            {
+                "word": updated["form"],
+                "api_calls": stats["api_calls"],
+                "cache_hits": stats["cache_hits"],
+                "responses": stats["responses"],
+                "matched": bool(updated["babelnet"].get("matched")),
+                "synsets": len(updated["babelnet"].get("synsets", []) or []),
+                "reason": updated["babelnet"].get("reason"),
+            }
+        )
+        print(
+            f"[{index}/{len(candidates)}] {updated['form']}: "
+            f"api_calls={stats['api_calls']} cache_hits={stats['cache_hits']} "
+            f"risposta={stats['responses'] > 0} match={updated['babelnet'].get('matched')}"
+        )
+
+    merged_babelnet = merge_babelnet_entries(
+        babelnet_payload,
+        enriched_candidates,
+        normalized_topic,
+        str(args.difficulty),
+    )
+    write_json(args.babelnet, merged_babelnet)
+    enriched_payload = rebuild_enriched(args.semantic, args.babelnet, args.enriched, normalized_topic)
+
+    return {
+        "mode": "enriched",
+        "topic": normalized_topic,
+        "difficulty": args.difficulty,
+        "selected_count": len(candidates),
+        "matched_count": sum(1 for entry in enriched_candidates if entry.get("babelnet", {}).get("matched")),
+        "api_call_count": sum(item["api_calls"] for item in word_logs),
+        "cache_hit_count": sum(item["cache_hits"] for item in word_logs),
+        "word_logs": word_logs,
+        "babelnet_entry_count": merged_babelnet["meta"]["entry_count"],
+        "enriched_status_counts": enriched_payload["meta"]["babelnet_status_counts"],
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    result = run_incremental_enrichment(args)
+    if result["mode"] == "dry-run":
+        print("Dry-run BabelNet incrementale")
+        print(f"Topic: {result['topic']}")
+        print(f"Difficolta: {result['difficulty']}")
+        print(f"Parole selezionate: {result['selected_count']}")
+        for index, word in enumerate(result["selected_words"], start=1):
+            print(f"{index:2d}. {word}")
+        return
+
+    print("Arricchimento BabelNet completato")
+    print(f"Topic: {result['topic']}")
+    print(f"Parole interrogate: {result['selected_count']}")
+    print(f"Chiamate API BabelNet reali: {result['api_call_count']}")
+    print(f"Risposte da cache: {result['cache_hit_count']}")
+    print(f"Match BabelNet: {result['matched_count']}")
+    for item in result["word_logs"]:
+        print(
+            f"- {item['word']}: api_calls={item['api_calls']}, "
+            f"cache_hits={item['cache_hits']}, risposta={item['responses'] > 0}, "
+            f"match={item['matched']}, synsets={item['synsets']}"
+        )
+    print(f"Voci BabelNet archiviate: {result['babelnet_entry_count']}")
+    print(f"Stati lessico arricchito: {result['enriched_status_counts']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/build_babelnet_enrichment.py
+++ b/build_babelnet_enrichment.py
@@ -12,11 +12,11 @@ from pathlib import Path
 from typing import Dict, Iterable, List, Optional

 from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH
-from main import parse_difficulty


 BABELNET_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_babelnet.json")
 BABELNET_CACHE_PATH = Path(__file__).with_name(".babelnet_cache.json")
+BABELNET_LOCAL_KEY_PATH = Path(__file__).with_name(".babelnet_api_key.local")
 BABELNET_API_BASE = "https://babelnet.io/v9"
 BABELNET_ENV_KEY = "BABELNET_API_KEY"

@@ -28,14 +28,76 @@ POS_TO_BABELNET = {
 }


+class BabelNetApiCallLimitReached(RuntimeError):
+    pass
+
+
+class BabelNetKeyUnavailable(RuntimeError):
+    pass
+
+DIFFICULTY_ALIASES: Dict[str, int] = {
+    "easy": 1,
+    "medium": 2,
+    "hard": 4,
+    "expert": 5,
+}
+
+
+def parse_difficulty(value: str) -> int:
+    text = str(value).strip().lower()
+    if text in DIFFICULTY_ALIASES:
+        return DIFFICULTY_ALIASES[text]
+    try:
+        level = int(text)
+    except ValueError as exc:
+        raise SystemExit(
+            "Valore non valido per --difficulty. Usa easy, medium, hard, expert oppure un intero tra 1 e 5."
+        ) from exc
+    if not 1 <= level <= 5:
+        raise SystemExit("Il valore numerico di --difficulty deve essere compreso tra 1 e 5.")
+    return level
+
+
+def _split_api_keys(text: str) -> List[str]:
+    keys = []
+    seen = set()
+    normalized = text.replace(";", "\n").replace(",", "\n")
+    for line in normalized.splitlines():
+        key = line.strip()
+        if not key or key.startswith("#") or key in seen:
+            continue
+        keys.append(key)
+        seen.add(key)
+    return keys
+
+
+def load_babelnet_api_keys() -> List[str]:
+    env_key = os.environ.get(BABELNET_ENV_KEY)
+    if env_key:
+        return _split_api_keys(env_key)
+    if BABELNET_LOCAL_KEY_PATH.exists():
+        return _split_api_keys(BABELNET_LOCAL_KEY_PATH.read_text(encoding="utf-8"))
+    return []
+
+
+def load_babelnet_api_key() -> Optional[str]:
+    keys = load_babelnet_api_keys()
+    if keys:
+        return keys[0]
+    return None
+
+
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Arricchisce lexicon_it_semantic.json usando BabelNet, se disponibile una API key."
    )
    parser.add_argument(
        "--api-key",
-        default=os.environ.get(BABELNET_ENV_KEY),
-        help=f"Chiave API BabelNet. In alternativa imposta la variabile ambiente {BABELNET_ENV_KEY}.",
+        default=load_babelnet_api_key(),
+        help=(
+            f"Chiave API BabelNet. In alternativa imposta {BABELNET_ENV_KEY} "
+            f"o crea {BABELNET_LOCAL_KEY_PATH.name}."
+        ),
    )
    parser.add_argument(
        "--topic",
@@ -78,10 +140,29 @@ def write_json(path: Path, payload: object) -> None:
    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")


-def request_json(endpoint: str, params: Dict[str, str], cache: Dict[str, object]) -> object:
+def cache_key(endpoint: str, params: Dict[str, str]) -> str:
+    safe_params = {key: value for key, value in params.items() if key != "key"}
+    return f"{endpoint}?{urllib.parse.urlencode(sorted(safe_params.items()))}"
+
+
+def request_json(
+    endpoint: str,
+    params: Dict[str, str],
+    cache: Dict[str, object],
+    stats: Optional[Dict[str, int]] = None,
+) -> object:
    url = f"{BABELNET_API_BASE}/{endpoint}?{urllib.parse.urlencode(params)}"
-    if url in cache:
-        return cache[url]
+    key = cache_key(endpoint, params)
+    if key in cache:
+        if stats is not None:
+            stats["cache_hits"] = stats.get("cache_hits", 0) + 1
+        return cache[key]
+
+    if stats is not None:
+        limit = stats.get("api_call_limit")
+        current = stats.get("api_calls", 0)
+        if limit is not None and current >= limit:
+            raise BabelNetApiCallLimitReached("Limite chiamate API BabelNet raggiunto")

    request = urllib.request.Request(url, headers={"Accept": "application/json"})
    try:
@@ -89,9 +170,14 @@ def request_json(endpoint: str, params: Dict[str, str], cache: Dict[str, object]
            payload = json.loads(response.read().decode("utf-8"))
    except urllib.error.HTTPError as exc:
        detail = exc.read().decode("utf-8", errors="replace")
+        if exc.code == 403:
+            raise BabelNetKeyUnavailable(f"Chiave BabelNet non valida o limite giornaliero raggiunto: {detail}") from exc
        raise RuntimeError(f"Errore BabelNet HTTP {exc.code}: {detail}") from exc

-    cache[url] = payload
+    cache[key] = payload
+    if stats is not None:
+        stats["api_calls"] = stats.get("api_calls", 0) + 1
+        stats["responses"] = stats.get("responses", 0) + 1
    return payload


@@ -180,7 +266,13 @@ def dedupe(items: Iterable[str]) -> List[str]:
    return result


-def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object], sleep_seconds: float) -> Dict[str, object]:
+def enrich_entry(
+    entry: Dict[str, object],
+    api_key: str,
+    cache: Dict[str, object],
+    sleep_seconds: float,
+    stats: Optional[Dict[str, int]] = None,
+) -> Dict[str, object]:
    word = str(entry.get("form", ""))
    pos = POS_TO_BABELNET.get(str(entry.get("pos", "")))
    if not pos:
@@ -195,6 +287,7 @@ def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object
            "key": api_key,
        },
        cache,
+        stats,
    )
    if sleep_seconds:
        time.sleep(sleep_seconds)
@@ -215,6 +308,7 @@ def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object
                "key": api_key,
            },
            cache,
+            stats,
        )
        if sleep_seconds:
            time.sleep(sleep_seconds)
--- a/build_enriched_lexicon.py
+++ b/build_enriched_lexicon.py
@@ -0,0 +1,324 @@
+from __future__ import annotations
+
+import argparse
+import json
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from build_babelnet_enrichment import BABELNET_OUTPUT_PATH
+from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH
+
+
+ENRICHED_LEXICON_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_enriched.json")
+
+TOPIC_DOMAIN_RULES: Dict[str, Dict[str, Tuple[str, ...]]] = {
+    "transport": {
+        "strong": (
+            "TRANSPORT_AND_TRAVEL",
+            "NAVIGATION_AND_AVIATION",
+        ),
+        "weak": (
+            "CRAFT_ENGINEERING_AND_TECHNOLOGY",
+            "FARMING_FISHING_AND_HUNTING",
+        ),
+        "negative": (
+            "MEDIA_AND_PRESS",
+            "PHILOSOPHY_PSYCHOLOGY_AND_BEHAVIOR",
+            "RELIGION_MYSTICISM_AND_MYTHOLOGY",
+            "CHEMISTRY_AND_MINERALOGY",
+        ),
+    },
+    "health": {
+        "strong": ("HEALTH_AND_MEDICINE",),
+        "weak": ("BIOLOGY",),
+        "negative": ("MEDIA_AND_PRESS",),
+    },
+    "cinema": {
+        "strong": ("MEDIA_AND_PRESS",),
+        "weak": ("ART_ARCHITECTURE_AND_ARCHAEOLOGY",),
+        "negative": ("HEALTH_AND_MEDICINE", "CHEMISTRY_AND_MINERALOGY"),
+    },
+    "nature": {
+        "strong": (
+            "BIOLOGY",
+            "ANIMALS",
+            "PLANTS",
+            "EARTH",
+            "METEOROLOGY",
+        ),
+        "weak": ("GEOGRAPHY_AND_PLACES",),
+        "negative": ("MEDIA_AND_PRESS",),
+    },
+    "ecology": {
+        "strong": ("BIOLOGY", "EARTH", "METEOROLOGY"),
+        "weak": ("GEOGRAPHY_AND_PLACES",),
+        "negative": ("MEDIA_AND_PRESS",),
+    },
+}
+
+TOPIC_TEXT_KEYWORDS: Dict[str, Tuple[str, ...]] = {
+    "transport": (
+        "aereo",
+        "auto",
+        "autobus",
+        "barca",
+        "bicicletta",
+        "imbarcazione",
+        "motore",
+        "nave",
+        "pista",
+        "trasport",
+        "treno",
+        "veicolo",
+        "viaggio",
+    ),
+    "health": ("cura", "malato", "medic", "ospedale", "paziente", "salute", "soccorso"),
+    "cinema": ("attore", "cinema", "film", "pellicola", "regia", "spettacolo"),
+    "nature": ("acqua", "animale", "bosco", "fiore", "mare", "montagna", "pianta", "terra"),
+    "ecology": ("ambiente", "ecologia", "inquinamento", "natura", "sostenibile"),
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Fonde lexicon_it_semantic.json con gli arricchimenti BabelNet gia disponibili."
+    )
+    parser.add_argument(
+        "--semantic",
+        type=Path,
+        default=SEMANTIC_LEXICON_OUTPUT_PATH,
+        help="Lessico semantico completo di partenza.",
+    )
+    parser.add_argument(
+        "--babelnet",
+        type=Path,
+        default=BABELNET_OUTPUT_PATH,
+        help="File con arricchimenti BabelNet parziali.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=ENRICHED_LEXICON_OUTPUT_PATH,
+        help="Lessico arricchito da generare.",
+    )
+    parser.add_argument(
+        "--topic",
+        default=None,
+        help="Topic opzionale da usare per scegliere il synset BabelNet migliore.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path, default: object) -> object:
+    if not path.exists():
+        return default
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: object) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def entry_key(entry: Dict[str, object]) -> Tuple[str, str]:
+    form = str(entry.get("normalized_form") or entry.get("form") or "").strip().lower()
+    pos = str(entry.get("pos") or "").strip().upper()
+    return form, pos
+
+
+def dedupe(items: Iterable[str]) -> List[str]:
+    result = []
+    seen = set()
+    for item in items:
+        text = str(item).strip()
+        if not text or text in seen:
+            continue
+        seen.add(text)
+        result.append(text)
+    return result
+
+
+def topic_candidates(entry: Dict[str, object], requested_topic: Optional[str]) -> List[str]:
+    topics = [str(topic).lower() for topic in entry.get("topics", []) if topic]
+    if requested_topic:
+        topics.insert(0, requested_topic.lower())
+    return [topic for topic in dedupe(topics) if topic != "general"]
+
+
+def synset_text(synset: Dict[str, object]) -> str:
+    fields = []
+    fields.extend(str(item) for item in synset.get("glosses", []) or [])
+    fields.extend(str(item) for item in synset.get("categories", []) or [])
+    fields.extend(str(item) for item in synset.get("senses", []) or [])
+    return " ".join(fields).lower()
+
+
+def score_synset_for_topic(synset: Dict[str, object], topic: str) -> int:
+    score = 0
+    domains = {str(domain).upper() for domain in synset.get("domains", []) or []}
+    rules = TOPIC_DOMAIN_RULES.get(topic, {})
+
+    score += 60 * len(domains.intersection(rules.get("strong", ())))
+    score += 25 * len(domains.intersection(rules.get("weak", ())))
+    score -= 35 * len(domains.intersection(rules.get("negative", ())))
+
+    text = synset_text(synset)
+    for keyword in TOPIC_TEXT_KEYWORDS.get(topic, ()):
+        if keyword in text:
+            score += 12
+
+    return score
+
+
+def choose_best_synset(
+    babelnet: Dict[str, object], entry: Dict[str, object], requested_topic: Optional[str]
+) -> Tuple[Optional[Dict[str, object]], Dict[str, int]]:
+    synsets = [item for item in babelnet.get("synsets", []) or [] if isinstance(item, dict)]
+    topics = topic_candidates(entry, requested_topic)
+    if not synsets:
+        return None, {}
+
+    if not topics:
+        best_synset = synsets[0]
+        return {
+            "id": best_synset.get("id"),
+            "topic": None,
+            "topic_score": 0,
+            "strong_topic": False,
+            "senses": best_synset.get("senses", []),
+            "glosses": best_synset.get("glosses", []),
+            "categories": best_synset.get("categories", []),
+            "domains": best_synset.get("domains", []),
+        }, {}
+
+    topic_scores: Dict[str, int] = {}
+    best_synset = None
+    best_topic = None
+    best_score = -10_000
+
+    for topic in topics:
+        topic_best = max(score_synset_for_topic(synset, topic) for synset in synsets)
+        topic_scores[topic] = topic_best
+        for synset in synsets:
+            score = score_synset_for_topic(synset, topic)
+            if score > best_score:
+                best_score = score
+                best_topic = topic
+                best_synset = synset
+
+    if not best_synset:
+        return None, topic_scores
+
+    return {
+        "id": best_synset.get("id"),
+        "topic": best_topic,
+        "topic_score": best_score,
+        "strong_topic": best_score >= 40,
+        "senses": best_synset.get("senses", []),
+        "glosses": best_synset.get("glosses", []),
+        "categories": best_synset.get("categories", []),
+        "domains": best_synset.get("domains", []),
+    }, topic_scores
+
+
+def normalize_babelnet_status(
+    entry: Dict[str, object], babelnet_entry: Optional[Dict[str, object]], requested_topic: Optional[str]
+) -> Dict[str, object]:
+    if not babelnet_entry:
+        return {"status": "not_requested"}
+
+    raw_babelnet = babelnet_entry.get("babelnet", {})
+    if not isinstance(raw_babelnet, dict):
+        return {"status": "api_error", "reason": "invalid_babelnet_payload"}
+
+    if not raw_babelnet.get("matched"):
+        return {
+            "status": "no_match",
+            "matched": False,
+            "reason": raw_babelnet.get("reason", "no_synsets"),
+            "synsets": [],
+        }
+
+    best_synset, topic_scores = choose_best_synset(raw_babelnet, entry, requested_topic)
+    status = "enriched"
+    if best_synset and int(best_synset.get("topic_score", 0)) <= 0:
+        status = "ambiguous"
+    selected_synset_id = best_synset.get("id") if best_synset else None
+    selected_topic = best_synset.get("topic") if best_synset else None
+    topic_score = int(best_synset.get("topic_score", 0)) if best_synset else 0
+    strong_topic = bool(best_synset.get("strong_topic", False)) if best_synset else False
+
+    return {
+        "status": status,
+        "matched": True,
+        "selected_synset_id": selected_synset_id,
+        "selected_topic": selected_topic,
+        "topic_score": topic_score,
+        "strong_topic": strong_topic,
+        "synset_refs": raw_babelnet.get("synset_refs", []),
+        "synsets": raw_babelnet.get("synsets", []),
+        "topic_scores": topic_scores,
+        "best_synset": best_synset,
+        "source_generated_at": babelnet_entry.get("babelnet_generated_at"),
+    }
+
+
+def build_babelnet_index(payload: Dict[str, object]) -> Dict[Tuple[str, str], Dict[str, object]]:
+    index = {}
+    for entry in payload.get("entries", []) or []:
+        if not isinstance(entry, dict):
+            continue
+        index[entry_key(entry)] = entry
+    return index
+
+
+def build_enriched_lexicon(args: argparse.Namespace) -> Dict[str, object]:
+    semantic_payload = load_json(args.semantic, {})
+    if not isinstance(semantic_payload, dict) or "entries" not in semantic_payload:
+        raise ValueError(f"Lessico semantico non valido: {args.semantic}")
+
+    babelnet_payload = load_json(args.babelnet, {"entries": []})
+    if not isinstance(babelnet_payload, dict):
+        babelnet_payload = {"entries": []}
+
+    babelnet_index = build_babelnet_index(babelnet_payload)
+    enriched_entries = []
+    status_counts: Dict[str, int] = {}
+
+    for entry in semantic_payload.get("entries", []) or []:
+        if not isinstance(entry, dict):
+            continue
+        enriched = deepcopy(entry)
+        babelnet_entry = babelnet_index.get(entry_key(enriched))
+        enriched["babelnet"] = normalize_babelnet_status(enriched, babelnet_entry, args.topic)
+        status = str(enriched["babelnet"].get("status", "unknown"))
+        status_counts[status] = status_counts.get(status, 0) + 1
+        enriched_entries.append(enriched)
+
+    return {
+        "meta": {
+            "language": semantic_payload.get("meta", {}).get("language", "it"),
+            "version": 1,
+            "base_lexicon": args.semantic.name,
+            "babelnet_source": args.babelnet.name if args.babelnet.exists() else None,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "requested_topic": args.topic,
+            "entry_count": len(enriched_entries),
+            "babelnet_status_counts": status_counts,
+        },
+        "entries": enriched_entries,
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    payload = build_enriched_lexicon(args)
+    write_json(args.output, payload)
+    print(f"Lessico arricchito generato: {args.output}")
+    print(f"Voci totali: {payload['meta']['entry_count']}")
+    print(f"Stati BabelNet: {payload['meta']['babelnet_status_counts']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/build_llm_rescue_patch.py
+++ b/build_llm_rescue_patch.py
@@ -0,0 +1,429 @@
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+PRIORITY_INPUT_PATH = Path(__file__).with_name("to_be_review_priority.json")
+PATCH_OUTPUT_PATH = Path(__file__).with_name("llm_rescue_patch.json")
+
+
+SYSTEM_PROMPT = """Sei un lessicografo italiano che prepara definizioni sintetiche per cruciverba.
+Ricevi un lemma con parte del discorso e contesto semantico parziale.
+Devi proporre una definizione breve in italiano, topic plausibili e tag semantici.
+
+Regole:
+- Rispondi solo con JSON valido.
+- La definizione deve essere concisa, naturale e utile per un cruciverba.
+- Evita di includere il lemma o derivati ovvi del lemma nella definizione.
+- Se il termine sembra raro, ambiguo, refuso o poco affidabile, abbassa la confidenza e segnala needs_human_review=true.
+- I topic devono essere pochi, in inglese semplice minuscolo con underscore se serve.
+- I semantic_tags devono essere pochi, descrittivi e in italiano o inglese semplice.
+- Non inventare dettagli enciclopedici troppo specifici se non supportati dal contesto.
+
+Formato JSON obbligatorio:
+{
+  "definition": "...",
+  "topics": ["topic1", "topic2"],
+  "semantic_tags": ["tag1", "tag2"],
+  "confidence": 0.0,
+  "needs_human_review": true,
+  "notes": "..."
+}
+"""
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Costruisce una patch di rescue lessicale usando un LLM su un lotto di voci "
+            "prioritarie tratte da to_be_review_priority.json."
+        )
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=PRIORITY_INPUT_PATH,
+        help="File to_be_review_priority.json di partenza.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=PATCH_OUTPUT_PATH,
+        help="Patch JSON da generare o aggiornare.",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=50,
+        help="Numero massimo di voci da processare nel lotto. Usa 0 per tutte le voci selezionate.",
+    )
+    parser.add_argument(
+        "--bucket",
+        default="red",
+        help="Bucket di priorita da considerare: red, orange, yellow oppure all.",
+    )
+    parser.add_argument(
+        "--provider",
+        choices=("openai_compatible", "ollama"),
+        default="openai_compatible",
+        help="Tipo di endpoint LLM da usare.",
+    )
+    parser.add_argument(
+        "--api-base",
+        default="",
+        help=(
+            "Endpoint API. Per openai_compatible: .../v1/chat/completions. "
+            "Per ollama: .../api/chat."
+        ),
+    )
+    parser.add_argument(
+        "--api-key-env",
+        default="OPENAI_API_KEY",
+        help="Nome della variabile d'ambiente che contiene la API key.",
+    )
+    parser.add_argument(
+        "--model",
+        default="gpt-4.1-mini",
+        help="Nome del modello da interrogare.",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=0.2,
+        help="Temperatura della richiesta LLM.",
+    )
+    parser.add_argument(
+        "--sleep",
+        type=float,
+        default=0.5,
+        help="Pausa tra una richiesta e la successiva.",
+    )
+    parser.add_argument(
+        "--skip-existing",
+        action="store_true",
+        help="Salta le voci gia presenti nell'output con status drafted/reviewed/done.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Non chiama alcun LLM: prepara solo il lotto e marca le voci come selected.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path, default: object) -> object:
+    if not path.exists():
+        return default
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: object) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def build_record(entry: Dict[str, Any]) -> Dict[str, Any]:
+    wiktextract = entry.get("wiktextract") or {}
+    wiktextract_defs = wiktextract.get("definitions") if isinstance(wiktextract, dict) else []
+    babelnet_best = entry.get("babelnet_best_synset") or {}
+    babelnet_glosses = babelnet_best.get("glosses") if isinstance(babelnet_best, dict) else []
+    return {
+        "form": entry.get("form"),
+        "lemma": entry.get("lemma"),
+        "pos": entry.get("pos"),
+        "priority_bucket": entry.get("priority_bucket"),
+        "priority_score": entry.get("priority_score"),
+        "review_reasons": entry.get("review_reasons", []),
+        "current_topics": entry.get("topics", []),
+        "current_definition": entry.get("preferred_definition", ""),
+        "current_source": entry.get("preferred_source", ""),
+        "context": {
+            "topic_suggestions": entry.get("topic_suggestions", []),
+            "semantic_glosses": entry.get("semantic_glosses", []),
+            "senses": entry.get("senses", []),
+            "wiktextract_definitions": wiktextract_defs or [],
+            "wiktextract_topic_hints": wiktextract.get("topic_hints", []) if isinstance(wiktextract, dict) else [],
+            "babelnet_glosses": babelnet_glosses or [],
+        },
+        "rescue_definition": "",
+        "rescue_source": "",
+        "rescue_topics": [],
+        "rescue_semantic_tags": [],
+        "rescue_notes": "",
+        "confidence": 0.0,
+        "needs_human_review": True,
+        "status": "pending",
+    }
+
+
+def build_user_prompt(entry: Dict[str, Any]) -> str:
+    context = entry.get("context") or {}
+    payload = {
+        "form": entry.get("form"),
+        "lemma": entry.get("lemma"),
+        "pos": entry.get("pos"),
+        "current_topics": entry.get("current_topics", []),
+        "review_reasons": entry.get("review_reasons", []),
+        "current_definition": entry.get("current_definition", ""),
+        "context": context,
+    }
+    return (
+        "Genera una proposta di rescue lessicale per questa voce italiana.\n"
+        "Se il termine sembra un refuso o una variante dubbia, segnalalo nelle notes.\n"
+        "Payload:\n"
+        f"{json.dumps(payload, ensure_ascii=False, indent=2)}"
+    )
+
+
+def resolve_api_base(args: argparse.Namespace) -> str:
+    if args.api_base:
+        return args.api_base
+    if args.provider == "ollama":
+        return "http://localhost:11434/api/chat"
+    return "https://api.openai.com/v1/chat/completions"
+
+
+def request_openai_compatible(
+    api_base: str,
+    api_key: str,
+    model: str,
+    temperature: float,
+    user_prompt: str,
+) -> str:
+    payload = {
+        "model": model,
+        "temperature": temperature,
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_prompt},
+        ],
+    }
+    request = urllib.request.Request(
+        api_base,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=90) as response:
+            body = json.loads(response.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"OpenAI-compatible HTTP {exc.code}: {detail}") from exc
+    return str(body["choices"][0]["message"]["content"]).strip()
+
+
+def request_ollama(
+    api_base: str,
+    model: str,
+    temperature: float,
+    user_prompt: str,
+) -> str:
+    payload = {
+        "model": model,
+        "stream": False,
+        "options": {"temperature": temperature},
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_prompt},
+        ],
+    }
+    request = urllib.request.Request(
+        api_base,
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=90) as response:
+            body = json.loads(response.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"Ollama HTTP {exc.code}: {detail}") from exc
+    return str((body.get("message") or {}).get("content", "")).strip()
+
+
+def extract_json_object(text: str) -> Dict[str, Any]:
+    text = text.strip()
+    start = text.find("{")
+    end = text.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        raise ValueError("Risposta LLM senza oggetto JSON riconoscibile.")
+    return json.loads(text[start : end + 1])
+
+
+def normalize_llm_payload(payload: Dict[str, Any], model: str) -> Dict[str, Any]:
+    topics = payload.get("topics")
+    tags = payload.get("semantic_tags")
+    confidence = payload.get("confidence", 0.0)
+    return {
+        "rescue_definition": str(payload.get("definition", "")).strip(),
+        "rescue_source": f"llm_rescue:{model}",
+        "rescue_topics": [str(item).strip().lower() for item in (topics or []) if str(item).strip()],
+        "rescue_semantic_tags": [str(item).strip() for item in (tags or []) if str(item).strip()],
+        "rescue_notes": str(payload.get("notes", "")).strip(),
+        "confidence": max(0.0, min(1.0, float(confidence or 0.0))),
+        "needs_human_review": bool(payload.get("needs_human_review", True)),
+        "status": "drafted",
+    }
+
+
+def should_skip_existing(entry: Dict[str, Any]) -> bool:
+    return str(entry.get("status", "")).lower() in {"drafted", "reviewed", "done"}
+
+
+def generate_patch(args: argparse.Namespace) -> Dict[str, Any]:
+    source_payload = load_json(args.input, {"entries": []})
+    if not isinstance(source_payload, dict):
+        raise ValueError(f"File priority non valido: {args.input}")
+
+    output_payload = load_json(args.output, {"entries": []})
+    if not isinstance(output_payload, dict):
+        output_payload = {"entries": []}
+
+    existing_by_form = {
+        str(entry.get("form", "")).lower(): entry
+        for entry in output_payload.get("entries", []) or []
+        if isinstance(entry, dict) and entry.get("form")
+    }
+
+    bucket = str(args.bucket or "red").strip().lower()
+    source_entries = source_payload.get("practical_entries") or source_payload.get("entries") or []
+
+    max_items = int(args.limit)
+    unlimited = max_items <= 0
+    selected: List[Dict[str, Any]] = []
+    skipped_preselection = 0
+    for entry in source_entries:
+        if not isinstance(entry, dict):
+            continue
+        if bucket != "all" and str(entry.get("priority_bucket", "")).lower() != bucket:
+            continue
+        form = str(entry.get("form", "")).strip().lower()
+        if not form:
+            continue
+        existing = existing_by_form.get(form)
+        if args.skip_existing and existing and should_skip_existing(existing):
+            skipped_preselection += 1
+            continue
+        selected.append(entry)
+        if not unlimited and len(selected) >= max(1, max_items):
+            break
+
+    api_base = resolve_api_base(args)
+    api_key = os.environ.get(args.api_key_env, "") if args.provider == "openai_compatible" else ""
+    if not args.dry_run and args.provider == "openai_compatible" and not api_key:
+        raise RuntimeError(
+            f"Variabile d'ambiente {args.api_key_env} non valorizzata per provider openai_compatible."
+        )
+
+    merged_records: List[Dict[str, Any]] = []
+    processed = 0
+    skipped_existing = 0
+    for source_entry in selected:
+        form_key = str(source_entry.get("form", "")).strip().lower()
+        existing = existing_by_form.get(form_key)
+        record = dict(existing) if isinstance(existing, dict) else build_record(source_entry)
+
+        if args.skip_existing and existing and should_skip_existing(existing):
+            skipped_existing += 1
+            merged_records.append(record)
+            continue
+
+        if args.dry_run:
+            record["status"] = "selected"
+            record["rescue_source"] = f"llm_rescue:{args.model}"
+            merged_records.append(record)
+            processed += 1
+            continue
+
+        user_prompt = build_user_prompt(record)
+        try:
+            if args.provider == "ollama":
+                raw_text = request_ollama(api_base, args.model, args.temperature, user_prompt)
+            else:
+                raw_text = request_openai_compatible(
+                    api_base,
+                    api_key,
+                    args.model,
+                    args.temperature,
+                    user_prompt,
+                )
+            llm_payload = extract_json_object(raw_text)
+            record.update(normalize_llm_payload(llm_payload, args.model))
+        except (urllib.error.URLError, TimeoutError, ValueError, json.JSONDecodeError, RuntimeError) as exc:
+            record["rescue_source"] = f"llm_rescue:{args.model}"
+            record["rescue_notes"] = f"errore_llm: {exc}"
+            record["status"] = "error"
+            record["needs_human_review"] = True
+        merged_records.append(record)
+        processed += 1
+        print(
+            f"[{processed}/{len(selected)}] {record.get('form')}: "
+            f"status={record.get('status')} conf={record.get('confidence', 0.0)}"
+        )
+        if record.get("status") == "error" and record.get("rescue_notes"):
+            print(f"  dettaglio: {record.get('rescue_notes')}")
+        if args.sleep > 0:
+            time.sleep(args.sleep)
+
+    seen_forms = {str(item.get("form", "")).lower() for item in merged_records}
+    for form_key, existing in existing_by_form.items():
+        if form_key not in seen_forms:
+            merged_records.append(existing)
+
+    merged_records.sort(
+        key=lambda item: (
+            {"pending": 0, "selected": 1, "error": 2, "drafted": 3, "reviewed": 4, "done": 5}.get(
+                str(item.get("status", "pending")),
+                9,
+            ),
+            -int(item.get("priority_score", 0) or 0),
+            str(item.get("form", "")),
+        )
+    )
+
+    return {
+        "meta": {
+            "language": "it",
+            "version": 1,
+            "base_priority": args.input.name,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "batch_bucket": bucket,
+            "batch_limit": int(args.limit),
+            "provider": args.provider,
+            "api_base": api_base,
+            "model": args.model,
+            "dry_run": bool(args.dry_run),
+            "entry_count": len(merged_records),
+            "processed_count": processed,
+            "skipped_existing": skipped_existing,
+            "skipped_preselection": skipped_preselection,
+        },
+        "entries": merged_records,
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    payload = generate_patch(args)
+    write_json(args.output, payload)
+    print(f"Patch LLM rescue generata: {args.output}")
+    print(f"Voci nel file: {payload['meta']['entry_count']}")
+    print(f"Voci processate in questo run: {payload['meta']['processed_count']}")
+    print(f"Voci saltate per skip-existing: {payload['meta']['skipped_existing']}")
+    print(f"Voci escluse gia in pre-selezione: {payload['meta']['skipped_preselection']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/build_review_priority.py
+++ b/build_review_priority.py
@@ -0,0 +1,182 @@
+from __future__ import annotations
+
+import argparse
+import json
+from collections import Counter
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+
+REVIEW_INPUT_PATH = Path(__file__).with_name("to_be_review.json")
+PRIORITY_OUTPUT_PATH = Path(__file__).with_name("to_be_review_priority.json")
+
+REASON_WEIGHTS = {
+    "no_viable_definition": 100,
+    "proper_noun_collision": 90,
+    "candidate_mentions_answer": 85,
+    "function_word": 80,
+    "very_short_word": 75,
+    "wiktextract_missing": 55,
+    "only_general_topics": 45,
+    "flagged_by_refined_stage": 35,
+    "unresolved_sense_topics": 30,
+    "babelnet_ambiguous": 20,
+}
+
+SOURCE_WEIGHTS = {
+    "fallback": 50,
+    "babelnet": 18,
+    "semantic": 8,
+    "wiktextract": 6,
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Costruisce un file di review prioritizzato partendo da to_be_review.json."
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=REVIEW_INPUT_PATH,
+        help="File to_be_review.json di partenza.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=PRIORITY_OUTPUT_PATH,
+        help="File to_be_review_priority.json da generare.",
+    )
+    parser.add_argument(
+        "--top",
+        type=int,
+        default=0,
+        help="Numero massimo di voci da tenere nel file priority. 0 = tutte.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path) -> Dict[str, object]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: Dict[str, object]) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def priority_score(entry: Dict[str, object]) -> Tuple[int, int, int, int, str]:
+    reasons = [str(item) for item in entry.get("review_reasons", []) or []]
+    source = str(entry.get("preferred_source", "")).lower()
+    preferred_definition = str(entry.get("preferred_definition", ""))
+    clue_definitions = entry.get("clue_definitions", {}) or {}
+    form = str(entry.get("form", ""))
+
+    score = sum(REASON_WEIGHTS.get(reason, 5) for reason in reasons)
+    score += SOURCE_WEIGHTS.get(source, 0)
+
+    if not preferred_definition:
+        score += 40
+
+    clue_count = len([value for value in clue_definitions.values() if str(value).strip()])
+    if clue_count == 0:
+        score += 20
+    elif clue_count == 1:
+        score += 8
+
+    score += min(len(reasons), 5) * 3
+
+    if len(form) <= 2:
+        score -= 120
+    elif len(form) == 3:
+        score -= 35
+
+    severe_count = sum(
+        1
+        for reason in reasons
+        if reason in {"no_viable_definition", "proper_noun_collision", "candidate_mentions_answer"}
+    )
+    return (
+        score,
+        severe_count,
+        int(source == "fallback"),
+        -len(preferred_definition),
+        str(entry.get("form", "")),
+    )
+
+
+def priority_bucket(entry: Dict[str, object]) -> str:
+    reasons = {str(item) for item in entry.get("review_reasons", []) or []}
+    if reasons.intersection({"no_viable_definition", "proper_noun_collision", "candidate_mentions_answer"}):
+        return "red"
+    if reasons.intersection({"function_word", "very_short_word", "wiktextract_missing", "only_general_topics"}):
+        return "orange"
+    return "yellow"
+
+
+def compact_entry(entry: Dict[str, object], score_tuple: Tuple[int, int, int, int, str]) -> Dict[str, object]:
+    score = score_tuple[0]
+    compact = dict(entry)
+    compact["priority_score"] = score
+    compact["priority_bucket"] = priority_bucket(entry)
+    return compact
+
+
+def build_priority_review(args: argparse.Namespace) -> Dict[str, object]:
+    payload = load_json(args.input)
+    if not isinstance(payload, dict) or "entries" not in payload:
+        raise ValueError(f"File review non valido: {args.input}")
+
+    entries = [entry for entry in payload.get("entries", []) or [] if isinstance(entry, dict)]
+    ranked = sorted(
+        entries,
+        key=priority_score,
+        reverse=True,
+    )
+
+    if args.top > 0:
+        ranked = ranked[: args.top]
+
+    compact_entries = [compact_entry(entry, priority_score(entry)) for entry in ranked]
+
+    practical_entries = [
+        item
+        for item in compact_entries
+        if len(str(item.get("form", ""))) > 2
+    ]
+
+    bucket_counter = Counter(item["priority_bucket"] for item in compact_entries)
+    practical_bucket_counter = Counter(item["priority_bucket"] for item in practical_entries)
+    reason_counter = Counter()
+    for item in compact_entries:
+        for reason in item.get("review_reasons", []):
+            reason_counter[str(reason)] += 1
+
+    return {
+        "meta": {
+            "language": "it",
+            "version": 1,
+            "base_review": args.input.name,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "entry_count": len(compact_entries),
+            "bucket_counts": dict(bucket_counter),
+            "practical_entry_count": len(practical_entries),
+            "practical_bucket_counts": dict(practical_bucket_counter),
+            "top_reason_counts": dict(reason_counter.most_common(12)),
+        },
+        "entries": compact_entries,
+        "practical_entries": practical_entries,
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    payload = build_priority_review(args)
+    write_json(args.output, payload)
+    print(f"Review priority generato: {args.output}")
+    print(f"Voci nel priority file: {payload['meta']['entry_count']}")
+    print(f"Bucket: {payload['meta']['bucket_counts']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/build_treccani_rescue_patch.py
+++ b/build_treccani_rescue_patch.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+import argparse
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List
+
+
+PRIORITY_INPUT_PATH = Path(__file__).with_name("to_be_review_priority.json")
+PATCH_OUTPUT_PATH = Path(__file__).with_name("treccani_rescue_patch.json")
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Estrae un lotto prioritario dal file to_be_review_priority.json per preparare una patch "
+            "manuale/assistita di rescue lessicale."
+        )
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=PRIORITY_INPUT_PATH,
+        help="File to_be_review_priority.json di partenza.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=PATCH_OUTPUT_PATH,
+        help="Patch JSON da generare o aggiornare.",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=100,
+        help="Numero massimo di voci da preparare nel lotto.",
+    )
+    parser.add_argument(
+        "--bucket",
+        default="red",
+        help="Bucket di priorita da considerare: red, orange, yellow oppure all.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path, default: object) -> object:
+    if not path.exists():
+        return default
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: object) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def build_record(entry: Dict[str, object]) -> Dict[str, object]:
+    return {
+        "form": entry.get("form"),
+        "lemma": entry.get("lemma"),
+        "pos": entry.get("pos"),
+        "priority_bucket": entry.get("priority_bucket"),
+        "priority_score": entry.get("priority_score"),
+        "review_reasons": entry.get("review_reasons", []),
+        "current_topics": entry.get("topics", []),
+        "current_definition": entry.get("preferred_definition", ""),
+        "current_source": entry.get("preferred_source", ""),
+        "rescue_definition": "",
+        "rescue_source": "treccani_rescue",
+        "rescue_topics": [],
+        "rescue_semantic_tags": [],
+        "rescue_notes": "",
+        "status": "pending",
+    }
+
+
+def build_patch(args: argparse.Namespace) -> Dict[str, object]:
+    payload = load_json(args.input, {"entries": []})
+    if not isinstance(payload, dict):
+        raise ValueError(f"File priority non valido: {args.input}")
+
+    existing_patch = load_json(args.output, {"entries": []})
+    if not isinstance(existing_patch, dict):
+        existing_patch = {"entries": []}
+
+    existing_by_form = {
+        str(entry.get("form", "")).lower(): entry
+        for entry in existing_patch.get("entries", []) or []
+        if isinstance(entry, dict) and entry.get("form")
+    }
+
+    bucket = str(args.bucket or "red").strip().lower()
+    source_entries = payload.get("practical_entries") or payload.get("entries") or []
+
+    selected: List[Dict[str, object]] = []
+    for entry in source_entries:
+        if not isinstance(entry, dict):
+            continue
+        if bucket != "all" and str(entry.get("priority_bucket", "")).lower() != bucket:
+            continue
+        form = str(entry.get("form", "")).strip().lower()
+        if not form:
+            continue
+        selected.append(entry)
+        if len(selected) >= max(1, int(args.limit)):
+            break
+
+    merged_records = []
+    seen = set()
+    for entry in selected:
+        form = str(entry.get("form", "")).strip().lower()
+        if form in existing_by_form:
+            merged_records.append(existing_by_form[form])
+        else:
+            merged_records.append(build_record(entry))
+        seen.add(form)
+
+    for form, entry in existing_by_form.items():
+        if form not in seen:
+            merged_records.append(entry)
+
+    merged_records.sort(
+        key=lambda item: (
+            {"pending": 0, "drafted": 1, "reviewed": 2, "done": 3}.get(str(item.get("status", "pending")), 9),
+            -int(item.get("priority_score", 0) or 0),
+            str(item.get("form", "")),
+        )
+    )
+
+    return {
+        "meta": {
+            "language": "it",
+            "version": 1,
+            "base_priority": args.input.name,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "batch_bucket": bucket,
+            "batch_limit": int(args.limit),
+            "entry_count": len(merged_records),
+        },
+        "entries": merged_records,
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    payload = build_patch(args)
+    write_json(args.output, payload)
+    print(f"Patch rescue generata: {args.output}")
+    print(f"Voci nel lotto: {payload['meta']['entry_count']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/clue_generator.py
+++ b/clue_generator.py
@@ -0,0 +1,423 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+
+from build_enriched_lexicon import ENRICHED_LEXICON_OUTPUT_PATH, TOPIC_DOMAIN_RULES, load_json
+from crossword_generator import HORIZONTAL, Placement
+
+
+@dataclass(frozen=True)
+class Clue:
+    number: int
+    word: str
+    direction: str
+    x: int
+    y: int
+    text: str
+    source: str
+
+
+@dataclass(frozen=True)
+class ClueCandidate:
+    text: str
+    source: str
+    family: str
+    difficulty_hint: str
+    topic_score: int
+    strong_topic: bool
+
+
+DIFFICULTY_ALIASES = {
+    "1": "easy",
+    "2": "medium",
+    "3": "hard",
+    "4": "expert",
+    "5": "expert",
+    "easy": "easy",
+    "medium": "medium",
+    "hard": "hard",
+    "expert": "expert",
+}
+
+GENERIC_CLUE_PATTERNS = (
+    "termine da ricavare dagli incroci",
+    "termine lessicale collegato",
+    "collegato a:",
+)
+
+
+def load_enriched_entries(path: Path = ENRICHED_LEXICON_OUTPUT_PATH) -> Dict[str, Dict[str, object]]:
+    payload = load_json(path, {"entries": []})
+    if not isinstance(payload, dict):
+        return {}
+    return {
+        str(entry.get("form", "")).lower(): entry
+        for entry in payload.get("entries", []) or []
+        if isinstance(entry, dict) and entry.get("form")
+    }
+
+
+def normalize_difficulty(value: Optional[str]) -> str:
+    return DIFFICULTY_ALIASES.get(str(value or "medium").strip().lower(), "medium")
+
+
+def clean_definition(text: str, answer: str) -> str:
+    clue = str(text or "")
+    clue = re.sub(r"\[[^\]]*\]", " ", clue)
+    clue = re.sub(r"\s+", " ", clue).strip(" .;:-")
+    if not clue:
+        return ""
+    clue = re.sub(re.escape(answer), "questa parola", clue, flags=re.IGNORECASE)
+    clue = re.sub(r"\(\s*\)", "", clue)
+    clue = re.sub(r"\s+,", ",", clue)
+    clue = re.sub(r"\s+;", ";", clue)
+    if clue and clue[0].islower():
+        clue = clue[0].upper() + clue[1:]
+    return clue + "."
+
+
+def synset_has_strong_topic_domain(synset: Dict[str, object], topic: Optional[str]) -> bool:
+    if not topic or topic == "general":
+        return True
+    rules = TOPIC_DOMAIN_RULES.get(topic, {})
+    strong_domains = {str(domain).upper() for domain in rules.get("strong", ())}
+    if not strong_domains:
+        return True
+    domains = {str(domain).upper() for domain in synset.get("domains", []) or []}
+    return bool(domains.intersection(strong_domains))
+
+
+def text_contains_answer(text: str, answer: str) -> bool:
+    return bool(re.search(re.escape(answer), text, flags=re.IGNORECASE))
+
+
+def directness_score(text: str) -> int:
+    lowered = text.lower()
+    score = 0
+    direct_keywords = (
+        "strumento",
+        "veicolo",
+        "animale",
+        "pianta",
+        "titolo",
+        "edificio",
+        "persona",
+        "luogo",
+        "malattia",
+        "farmaco",
+        "mezzo",
+        "parte di",
+    )
+    for keyword in direct_keywords:
+        if keyword in lowered:
+            score += 8
+    if any(marker in lowered for marker in ("cioè", "ossia", "ovvero")):
+        score += 4
+    return score
+
+
+def preferred_length_range(difficulty: str) -> Tuple[int, int]:
+    if difficulty == "easy":
+        return 24, 90
+    if difficulty == "medium":
+        return 20, 75
+    if difficulty == "hard":
+        return 16, 60
+    return 14, 50
+
+
+def score_candidate(candidate: ClueCandidate, answer: str, difficulty: str) -> int:
+    text = candidate.text
+    lowered = text.lower()
+    score = 0
+
+    if not text or len(text) < 12:
+        return -10_000
+
+    if any(pattern in lowered for pattern in GENERIC_CLUE_PATTERNS):
+        score -= 120
+
+    if text_contains_answer(text, answer):
+        score -= 140
+    else:
+        score += 40
+
+    min_len, max_len = preferred_length_range(difficulty)
+    length = len(text)
+    if min_len <= length <= max_len:
+        score += 28
+    else:
+        score -= abs(length - max_len) if length > max_len else abs(min_len - length) // 2
+
+    directness = directness_score(text)
+    if difficulty == "easy":
+        score += directness * 2
+    elif difficulty == "medium":
+        score += directness
+    elif difficulty == "hard":
+        score -= max(0, directness - 6)
+    else:
+        score -= directness
+
+    family_bonus = {
+        "semantic_definition": 56,
+        "semantic_gloss": 34,
+        "refined_sense": 30,
+        "babelnet_best_gloss": 18,
+        "babelnet_gloss": 10,
+        "fallback": 0,
+    }
+    score += family_bonus.get(candidate.family, 0)
+
+    difficulty_pref = {
+        "easy": {"direct", "balanced"},
+        "medium": {"balanced", "direct"},
+        "hard": {"balanced", "oblique"},
+        "expert": {"oblique", "balanced"},
+    }
+    if candidate.difficulty_hint in difficulty_pref.get(difficulty, {"balanced"}):
+        score += 18
+
+    if difficulty == "easy" and ";" in text:
+        score += 8
+    if difficulty in {"hard", "expert"} and ";" in text:
+        score -= 8
+
+    if candidate.topic_score >= 40:
+        score += 18
+    elif candidate.topic_score > 0:
+        score += 8
+    elif candidate.family in {"babelnet_best_gloss", "babelnet_gloss"}:
+        score -= 140
+
+    if candidate.strong_topic:
+        score += 10
+
+    if difficulty in {"easy", "medium"} and re.search(r"\((?:mil|fig|lett|fam)\.\)", lowered):
+        score -= 28
+
+    if length > 120:
+        score -= 45
+    if length > 180:
+        score -= 90
+
+    return score
+
+
+def candidate_hint(text: str, family: str) -> str:
+    lowered = text.lower()
+    if family in {"semantic_definition", "semantic_gloss"} and len(text) <= 70:
+        return "direct"
+    if any(marker in lowered for marker in ("fig.", "figurato", "poetico", "letterario")):
+        return "oblique"
+    if len(text) > 85:
+        return "direct"
+    return "balanced"
+
+
+def add_candidate(
+    candidates: List[ClueCandidate],
+    seen: set[Tuple[str, str]],
+    *,
+    text: str,
+    answer: str,
+    source: str,
+    family: str,
+    topic_score: int = 0,
+    strong_topic: bool = False,
+) -> None:
+    cleaned = clean_definition(text, answer)
+    if not cleaned:
+        return
+    key = (cleaned.lower(), family)
+    if key in seen:
+        return
+    seen.add(key)
+    candidates.append(
+        ClueCandidate(
+            text=cleaned,
+            source=source,
+            family=family,
+            difficulty_hint=candidate_hint(cleaned, family),
+            topic_score=topic_score,
+            strong_topic=strong_topic,
+        )
+    )
+
+
+def semantic_candidates(entry: Dict[str, object], answer: str) -> List[ClueCandidate]:
+    semantic = entry.get("semantic", {})
+    if not isinstance(semantic, dict):
+        return []
+
+    candidates: List[ClueCandidate] = []
+    seen: set[Tuple[str, str]] = set()
+
+    for synset in semantic.get("synsets", []) or []:
+        if not isinstance(synset, dict):
+            continue
+        add_candidate(
+            candidates,
+            seen,
+            text=str(synset.get("definition", "")),
+            answer=answer,
+            source="semantic",
+            family="semantic_definition",
+        )
+
+    for gloss in semantic.get("glosses", []) or []:
+        add_candidate(
+            candidates,
+            seen,
+            text=str(gloss),
+            answer=answer,
+            source="semantic",
+            family="semantic_gloss",
+        )
+
+    return candidates
+
+
+def babelnet_candidates(entry: Dict[str, object], answer: str, topic: Optional[str]) -> List[ClueCandidate]:
+    babelnet = entry.get("babelnet", {})
+    if not isinstance(babelnet, dict) or babelnet.get("status") not in {"enriched", "ambiguous"}:
+        return []
+
+    candidates: List[ClueCandidate] = []
+    seen: set[Tuple[str, str]] = set()
+
+    best_synset = babelnet.get("best_synset", {})
+    if isinstance(best_synset, dict):
+        topic_score = int(best_synset.get("topic_score", 0) or 0)
+        strong_topic = bool(best_synset.get("strong_topic")) or synset_has_strong_topic_domain(best_synset, topic)
+        for gloss in best_synset.get("glosses", []) or []:
+            add_candidate(
+                candidates,
+                seen,
+                text=str(gloss),
+                answer=answer,
+                source="babelnet",
+                family="babelnet_best_gloss",
+                topic_score=topic_score,
+                strong_topic=strong_topic,
+            )
+
+    for synset in babelnet.get("synsets", []) or []:
+        if not isinstance(synset, dict):
+            continue
+        if topic and topic != "general" and not synset_has_strong_topic_domain(synset, topic):
+            continue
+        topic_score = 40 if topic and topic != "general" and synset_has_strong_topic_domain(synset, topic) else 0
+        for gloss in synset.get("glosses", []) or []:
+            add_candidate(
+                candidates,
+                seen,
+                text=str(gloss),
+                answer=answer,
+                source="babelnet",
+                family="babelnet_gloss",
+                topic_score=topic_score,
+                strong_topic=topic_score >= 40,
+            )
+
+    return candidates
+
+
+def refined_sense_candidates(entry: Dict[str, object], answer: str) -> List[ClueCandidate]:
+    senses = entry.get("senses", [])
+    if not isinstance(senses, list):
+        return []
+
+    candidates: List[ClueCandidate] = []
+    seen: set[Tuple[str, str]] = set()
+    for sense in senses:
+        if not isinstance(sense, dict):
+            continue
+        confidence = float(sense.get("confidence", 0.0) or 0.0)
+        add_candidate(
+            candidates,
+            seen,
+            text=str(sense.get("definition", "")),
+            answer=answer,
+            source=str(sense.get("source", "refined")),
+            family="refined_sense",
+            topic_score=int(confidence * 100),
+            strong_topic=confidence >= 0.75,
+        )
+    return candidates
+
+
+def fallback_definition(entry: Dict[str, object], answer: str) -> str:
+    pos = str(entry.get("pos", "")).lower()
+    topics = ", ".join(str(topic) for topic in entry.get("topics", []) if topic and str(topic).lower() != "general")
+    if topics:
+        return f"Termine {pos or 'lessicale'} collegato all'ambito: {topics}."
+    return "Termine da ricavare dagli incroci."
+
+
+def all_candidates(entry: Dict[str, object], answer: str, topic: Optional[str]) -> List[ClueCandidate]:
+    candidates: List[ClueCandidate] = []
+    candidates.extend(semantic_candidates(entry, answer))
+    candidates.extend(refined_sense_candidates(entry, answer))
+    candidates.extend(babelnet_candidates(entry, answer, topic))
+    return candidates
+
+
+def choose_candidate(candidates: Sequence[ClueCandidate], answer: str, difficulty: str) -> Optional[ClueCandidate]:
+    ranked = sorted(
+        candidates,
+        key=lambda candidate: (
+            score_candidate(candidate, answer, difficulty),
+            candidate.topic_score,
+            len(candidate.text),
+        ),
+        reverse=True,
+    )
+    return ranked[0] if ranked else None
+
+
+def definition_for_word(
+    word: str,
+    entries: Dict[str, Dict[str, object]],
+    topic: Optional[str] = None,
+    difficulty: Optional[str] = None,
+) -> tuple[str, str]:
+    answer = word.lower()
+    entry = entries.get(answer, {})
+    if not entry:
+        return "Termine da ricavare dagli incroci.", "fallback"
+
+    normalized_difficulty = normalize_difficulty(difficulty)
+    candidates = all_candidates(entry, answer, topic)
+    best = choose_candidate(candidates, answer, normalized_difficulty)
+    if best:
+        return best.text, best.source
+
+    return fallback_definition(entry, answer), "fallback"
+
+
+def generate_clues(
+    placements: Iterable[Placement],
+    entries: Dict[str, Dict[str, object]],
+    topic: Optional[str] = None,
+    difficulty: Optional[str] = None,
+) -> List[Clue]:
+    clues = []
+    for number, placement in enumerate(placements, start=1):
+        text, source = definition_for_word(placement.word, entries, topic, difficulty)
+        direction = "orizzontale" if placement.direction == HORIZONTAL else "verticale"
+        clues.append(
+            Clue(
+                number=number,
+                word=placement.word,
+                direction=direction,
+                x=placement.x,
+                y=placement.y,
+                text=text,
+                source=source,
+            )
+        )
+    return clues
--- a/crossword_contract.md
+++ b/crossword_contract.md
@@ -0,0 +1,209 @@
+# Contratto JSON del Cruciverba
+
+Questo documento definisce il formato di scambio tra:
+
+- `brain`: il motore che genera e compila il cruciverba
+- `client`: web app, backend, servizio PDF o altra macchina remota che richiede un cruciverba
+
+L'obiettivo e' avere un payload:
+
+- completo
+- stabile
+- espandibile
+- riusabile per stampa PDF, gioco web e archiviazione
+
+## Flusso
+
+1. Il client invia una `request` JSON al motore.
+2. Il motore risponde con una `response` JSON completa del cruciverba.
+3. Lo stesso JSON di risposta puo' essere:
+   - salvato a database
+   - convertito in PDF
+   - renderizzato in una pagina web interattiva
+   - riaperto in futuro senza rigenerare il cruciverba
+
+## Principi di progettazione
+
+- Ogni cruciverba ha un `crossword_id` univoco.
+- La `request` conserva i parametri di generazione originali.
+- La `response` include sia la griglia giocabile sia la soluzione.
+- Le parole hanno metadati ricchi: posizione, direzione, clue, tema, pos, fonte clue.
+- Le coordinate sono sempre assolute e 0-based nella griglia normalizzata esportata.
+- La griglia esportata e' rettangolare e normalizzata: niente coordinate negative.
+- Il formato supporta versioning con `schema_version`.
+
+## Request
+
+Campi principali:
+
+- `schema_version`: versione del contratto
+- `request_id`: id della richiesta lato client
+- `requested_at`: timestamp ISO 8601
+- `generator`: configurazione del motore
+- `output`: preferenze di output
+- `client_context`: metadati opzionali del chiamante
+
+### `generator`
+
+- `topic`: stringa o lista di topic
+- `difficulty`: alias testuale
+- `seed`: opzionale, per riproducibilita'
+- `initial_word_count`
+- `themed_fill_count`
+- `target_empty_ratio`
+- `diffxy`
+- `time_limit_seconds`
+- `max_candidates_per_word`
+- `lexicon_file`
+- `definitions_enabled`
+- `definition_style`: per future varianti clue
+- `preferred_output_language`
+
+### `output`
+
+- `include_solution_grid`
+- `include_clue_sources`
+- `include_diagnostics`
+- `include_generation_log`
+- `format_hints`
+
+## Response
+
+Campi principali:
+
+- `schema_version`
+- `request_id`
+- `crossword_id`
+- `generated_at`
+- `status`
+- `generator`
+- `summary`
+- `grid`
+- `entries`
+- `clues`
+- `solution`
+- `diagnostics`
+- `artifacts`
+
+## Sezione `grid`
+
+- `rows`
+- `cols`
+- `cell_size_hint`
+- `cells`
+
+Ogni cella ha:
+
+- `row`
+- `col`
+- `kind`: `block` oppure `letter`
+- `solution`
+- `display`
+- `number`: numero clue se la cella apre una parola
+- `across_entry_id`
+- `down_entry_id`
+- `is_prefilled`
+
+Note:
+
+- `solution` contiene sempre la lettera corretta per celle attive.
+- `display` e' vuoto per la scheda giocatore.
+- `number` serve per numerazione in stampa e web.
+
+## Sezione `entries`
+
+Ogni entry rappresenta una parola collocata in griglia.
+
+Campi:
+
+- `entry_id`
+- `number`
+- `direction`: `across` o `down`
+- `answer`
+- `answer_length`
+- `row`
+- `col`
+- `cells`: lista coordinate
+- `clue`
+- `clue_source`
+- `topics`
+- `pos`
+- `is_seed`
+- `added_by_filler`
+- `confidence`
+
+## Sezione `clues`
+
+Ridondante ma utile per client semplici.
+
+- `across`: lista clues orizzontali
+- `down`: lista clues verticali
+
+Ogni clue:
+
+- `number`
+- `entry_id`
+- `text`
+- `enumeration`
+- `topic_match`
+- `source`
+
+## Sezione `solution`
+
+- `grid_rows`: lista di stringhe, una per riga
+- `words`: elenco risposte
+
+`grid_rows` usa:
+
+- lettera maiuscola per cella piena
+- `#` per casella nera
+
+## Sezione `diagnostics`
+
+Serve a tuning, benchmark e debug.
+
+- `total_words`
+- `seed_words_requested`
+- `seed_words_placed`
+- `filler_words_added`
+- `intersections`
+- `filled_cells`
+- `empty_cells`
+- `empty_ratio`
+- `target_empty_ratio`
+- `topic_words`
+- `off_topic_words`
+- `pos_counts`
+- `runtime_lexicon`
+- `seed`
+- `generation_seconds`
+
+## Sezione `artifacts`
+
+URL o path futuri per file derivati.
+
+- `pdf_player`
+- `pdf_solution`
+- `thumbnail`
+- `html_preview`
+
+## Estensioni future previste
+
+- `difficulty_profile`: facile/medio/difficile per definizioni separate
+- `hints`: aiuti progressivi per singola parola
+- `theme_story`: testo introduttivo del cruciverba
+- `player_state`: salvataggio partita in corso
+- `stats`: tempi, errori, percentuali di completamento
+
+## Regola pratica consigliata
+
+La macchina "brain" deve esporre almeno due endpoint logici:
+
+- `POST /crosswords/generate`
+  - input: request JSON
+  - output: response JSON
+
+- `GET /crosswords/{crossword_id}`
+  - output: stessa response JSON salvata
+
+In questo modo il contratto resta identico sia via file sia via webservice.
--- a/crossword_contract_example_request.json
+++ b/crossword_contract_example_request.json
@@ -0,0 +1,37 @@
+{
+  "schema_version": "1.0",
+  "request_id": "req-2026-04-28-0001",
+  "requested_at": "2026-04-28T17:05:00+02:00",
+  "generator": {
+    "topic": [
+      "transport"
+    ],
+    "difficulty": "medium",
+    "seed": 2,
+    "initial_word_count": 19,
+    "themed_fill_count": 10,
+    "target_empty_ratio": 0.1667,
+    "diffxy": 7,
+    "time_limit_seconds": 8.0,
+    "max_candidates_per_word": 12,
+    "lexicon_file": "lexicon_it_curated_llm_aggressive.json",
+    "definitions_enabled": true,
+    "definition_style": "classic",
+    "preferred_output_language": "it"
+  },
+  "output": {
+    "include_solution_grid": true,
+    "include_clue_sources": true,
+    "include_diagnostics": true,
+    "include_generation_log": false,
+    "format_hints": {
+      "pdf_page_size": "A4",
+      "mobile_layout": true
+    }
+  },
+  "client_context": {
+    "channel": "web",
+    "user_locale": "it-IT",
+    "app_version": "alpha-1"
+  }
+}
--- a/crossword_contract_example_response.json
+++ b/crossword_contract_example_response.json
@@ -0,0 +1,138 @@
+{
+  "schema_version": "1.0",
+  "request_id": "req-2026-04-28-0001",
+  "crossword_id": "cw-2026-04-28-transport-0001",
+  "generated_at": "2026-04-28T17:06:42+02:00",
+  "status": "ok",
+  "generator": {
+    "topic": [
+      "transport"
+    ],
+    "difficulty": "medium",
+    "seed": 2,
+    "runtime_lexicon": "lexicon_it_curated_llm_aggressive.json"
+  },
+  "summary": {
+    "title": "Cruciverba a tema trasporti",
+    "subtitle": "Schema generato automaticamente",
+    "rows": 12,
+    "cols": 12,
+    "total_words": 6,
+    "intersections": 7
+  },
+  "grid": {
+    "rows": 12,
+    "cols": 12,
+    "cell_size_hint": 32,
+    "cells": [
+      {
+        "row": 0,
+        "col": 0,
+        "kind": "letter",
+        "solution": "A",
+        "display": "",
+        "number": 1,
+        "across_entry_id": "A1",
+        "down_entry_id": null,
+        "is_prefilled": false
+      },
+      {
+        "row": 0,
+        "col": 1,
+        "kind": "letter",
+        "solution": "M",
+        "display": "",
+        "number": null,
+        "across_entry_id": "A1",
+        "down_entry_id": "D2",
+        "is_prefilled": false
+      },
+      {
+        "row": 0,
+        "col": 2,
+        "kind": "block",
+        "solution": null,
+        "display": null,
+        "number": null,
+        "across_entry_id": null,
+        "down_entry_id": null,
+        "is_prefilled": false
+      }
+    ]
+  },
+  "entries": [
+    {
+      "entry_id": "A1",
+      "number": 1,
+      "direction": "across",
+      "answer": "AMBULANZA",
+      "answer_length": 9,
+      "row": 0,
+      "col": 0,
+      "cells": [
+        [0, 0],
+        [0, 1],
+        [0, 2]
+      ],
+      "clue": "Veicolo di soccorso sanitario.",
+      "clue_source": "semantic_definition",
+      "topics": [
+        "transport",
+        "health"
+      ],
+      "pos": "NOUN",
+      "is_seed": true,
+      "added_by_filler": false,
+      "confidence": 0.95
+    }
+  ],
+  "clues": {
+    "across": [
+      {
+        "number": 1,
+        "entry_id": "A1",
+        "text": "Veicolo di soccorso sanitario.",
+        "enumeration": 9,
+        "topic_match": true,
+        "source": "semantic_definition"
+      }
+    ],
+    "down": []
+  },
+  "solution": {
+    "grid_rows": [
+      "AM#ULA######",
+      "##B#########"
+    ],
+    "words": [
+      "AMBULANZA"
+    ]
+  },
+  "diagnostics": {
+    "seed_words_requested": 19,
+    "seed_words_placed": 19,
+    "filler_words_added": 5,
+    "filled_cells": 84,
+    "empty_cells": 18,
+    "empty_ratio": 0.1765,
+    "target_empty_ratio": 0.1667,
+    "topic_words": 21,
+    "off_topic_words": 3,
+    "pos_counts": {
+      "sostantivi": 20,
+      "aggettivi": 2,
+      "verbi": 1,
+      "avverbi": 0,
+      "preposizioni": 0,
+      "congiunzioni": 0,
+      "altri": 1
+    },
+    "generation_seconds": 124.6
+  },
+  "artifacts": {
+    "pdf_player": null,
+    "pdf_solution": null,
+    "thumbnail": null,
+    "html_preview": null
+  }
+}
--- a/crossword_filler.py
+++ b/crossword_filler.py
@@ -87,7 +87,12 @@ class CrosswordFiller:
        self.words_by_length = self._index_vocabulary(self.vocabulary)
        self.vocabulary_metadata = vocabulary_metadata or {}
        self.semantic_metadata = semantic_metadata or {}
-        self.selected_topic = selected_topic.strip().lower()
+        self.selected_topics = [
+            topic.strip().lower()
+            for topic in selected_topic.split(",")
+            if topic.strip()
+        ] or ["general"]
+        self.selected_topic = self.selected_topics[0]
        self.max_themed_fill_words = max(0, max_themed_fill_words)
        self.seed = seed
        self.rng = random.Random(seed)
@@ -333,7 +338,7 @@ class CrosswordFiller:
        return score

    def _semantic_topic_score(self, word: str) -> int:
-        if not self.selected_topic or self.selected_topic == "general":
+        if not self.selected_topics or self.selected_topics == ["general"]:
            return 0

        entry = self._semantic_entry(word)
@@ -350,9 +355,9 @@ class CrosswordFiller:
        semantic = entry.get("semantic", {})
        semantic_topics = {str(item).lower() for item in semantic.get("semantic_topics", [])}
        score = 0
-        if self.selected_topic in topics:
+        if any(topic in topics for topic in self.selected_topics):
            score += 4
-        if self.selected_topic in semantic_topics:
+        if any(topic in semantic_topics for topic in self.selected_topics):
            score += 6
        if "general" in topics:
            score += 1
--- a/curate_lexicon_alpha.py
+++ b/curate_lexicon_alpha.py
@@ -0,0 +1,611 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+
+from refine_lexicon_topics import REFINED_LEXICON_OUTPUT_PATH
+
+
+CURATED_LEXICON_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_curated.json")
+TO_BE_REVIEW_OUTPUT_PATH = Path(__file__).with_name("to_be_review.json")
+
+DIFFICULTIES = ("easy", "medium", "hard", "expert")
+
+TEXT_REPLACEMENTS = {
+    " ngrandimento": " ingrandimento",
+    "superificie": "superficie",
+    "quantitaaa": "quantità",
+    "quantitaaaa": "quantità",
+    "quantit": "quantità",
+    "sanit_militare": "sanità_militare",
+    " unaparola ": " una parola ",
+    "questa parola, ": "",
+    "questa parola; ": "",
+}
+
+SUSPICIOUS_PROPER_PATTERNS = (
+    r"\bepisodio\b",
+    r"\bfilm\b",
+    r"\bserie tv\b",
+    r"\bfamiglia\b",
+    r"\bcomune italiano\b",
+    r"\bfrazione del comune\b",
+    r"\bcitta metropolitana\b",
+    r"\bpersonaggio\b",
+    r"\balbum\b",
+    r"\bcognome\b",
+    r"\bnome proprio\b",
+)
+
+DOMAIN_HINTS = {
+    "religion": ("monastero", "abbazia", "sacerdot", "prete", "vescovo", "clero", "religios"),
+    "transport": ("veicolo", "motore", "aereo", "treno", "nave", "trasport", "rimorch", "reattor"),
+    "health": ("malat", "ferit", "ospedal", "medic", "sanitar", "cura", "paziente"),
+    "nature": ("animale", "pianta", "mare", "bosco", "albero", "fiore", "montagna", "acque", "salate"),
+    "geography": ("comune", "paese", "regione", "provincia", "isola", "citta", "territorio"),
+    "sea": ("acque", "salate", "superficie terrestre", "oceano"),
+}
+
+ABSTRACT_PATTERNS = (
+    r"\bgrande quantita\b",
+    r"\bfigurato\b",
+    r"\bsenso figurato\b",
+)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Cura il lessico refined per la milestone alpha e separa i casi dubbi in to_be_review.json."
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=REFINED_LEXICON_OUTPUT_PATH,
+        help="Lessico refined di partenza.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=CURATED_LEXICON_OUTPUT_PATH,
+        help="Lessico curated da generare.",
+    )
+    parser.add_argument(
+        "--review-output",
+        type=Path,
+        default=TO_BE_REVIEW_OUTPUT_PATH,
+        help="File JSON con le voci che richiedono revisione umana.",
+    )
+    parser.add_argument(
+        "--max-review",
+        type=int,
+        default=0,
+        help="Limite opzionale di voci da esportare in to_be_review.json. 0 = tutte.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path) -> Dict[str, object]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: Dict[str, object]) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def dedupe(items: Iterable[str]) -> List[str]:
+    result: List[str] = []
+    seen = set()
+    for item in items:
+        text = str(item).strip()
+        if not text:
+            continue
+        key = text.lower()
+        if key in seen:
+            continue
+        seen.add(key)
+        result.append(text)
+    return result
+
+
+def ascii_fold(text: str) -> str:
+    replacements = str.maketrans(
+        {
+            "à": "a",
+            "á": "a",
+            "è": "e",
+            "é": "e",
+            "ì": "i",
+            "í": "i",
+            "ò": "o",
+            "ó": "o",
+            "ù": "u",
+            "ú": "u",
+        }
+    )
+    return str(text).translate(replacements)
+
+
+def normalize_text(text: str) -> str:
+    value = str(text or "").strip()
+    if not value:
+        return ""
+    for old, new in TEXT_REPLACEMENTS.items():
+        value = value.replace(old, new)
+    value = re.sub(r"\s+", " ", value)
+    value = re.sub(r"\s*;\s*", "; ", value)
+    value = re.sub(r"\s*,\s*", ", ", value)
+    value = value.strip(" .;:-")
+    if value and value[0].islower():
+        value = value[0].upper() + value[1:]
+    return value + "."
+
+
+def split_definition_text(text: str) -> List[str]:
+    value = str(text or "").strip()
+    if not value:
+        return []
+    pieces = re.split(r"\s*;\s+|\.\s+(?=[a-zàèéìòù])", value, flags=re.IGNORECASE)
+    normalized = []
+    for piece in pieces:
+        cleaned = normalize_text(piece)
+        if cleaned:
+            normalized.append(cleaned)
+    return normalized
+
+
+def entry_is_common_word(entry: Dict[str, object]) -> bool:
+    form = str(entry.get("form", ""))
+    return bool(form) and form[:1].islower() and not (entry.get("name_tags") or [])
+
+
+def definition_mentions_answer(text: str, answer: str) -> bool:
+    normalized_text = ascii_fold(text).lower()
+    normalized_answer = ascii_fold(answer).lower()
+    return bool(re.search(re.escape(normalized_answer), normalized_text))
+
+
+def suspicious_proper_noun_definition(text: str, entry: Dict[str, object]) -> bool:
+    if not entry_is_common_word(entry):
+        return False
+    lowered = ascii_fold(text).lower()
+    return any(re.search(pattern, lowered) for pattern in SUSPICIOUS_PROPER_PATTERNS)
+
+
+def likely_abstract_detour(text: str) -> bool:
+    lowered = ascii_fold(text).lower()
+    return any(re.search(pattern, lowered) for pattern in ABSTRACT_PATTERNS)
+
+
+def semantic_topics(entry: Dict[str, object]) -> List[str]:
+    semantic = entry.get("semantic", {})
+    topics = []
+    if isinstance(semantic, dict):
+        topics.extend(str(item).lower() for item in semantic.get("semantic_topics", []) or [])
+    wiktextract = entry.get("wiktextract", {})
+    if isinstance(wiktextract, dict):
+        topics.extend(str(item).lower() for item in wiktextract.get("topic_hints", []) or [])
+    return dedupe(topics)
+
+
+def lexical_topics(entry: Dict[str, object]) -> List[str]:
+    return [str(item).lower() for item in entry.get("topics", []) or [] if item]
+
+
+def topic_alignment_score(text: str, entry: Dict[str, object]) -> int:
+    lowered = ascii_fold(text).lower()
+    score = 0
+    topics = set(lexical_topics(entry)) | set(semantic_topics(entry))
+    for topic in topics:
+        for hint in DOMAIN_HINTS.get(topic, ()):
+            if hint in lowered:
+                score += 16
+    return score
+
+
+def candidate_style(text: str) -> str:
+    lowered = ascii_fold(text).lower()
+    if ";" in text or len(text) > 90:
+        return "direct"
+    if any(marker in lowered for marker in ("chi ", "che ", "strumento", "veicolo", "titolo", "parte di")):
+        return "balanced"
+    return "oblique"
+
+
+def length_window(difficulty: str) -> Tuple[int, int]:
+    if difficulty == "easy":
+        return 18, 90
+    if difficulty == "medium":
+        return 18, 78
+    if difficulty == "hard":
+        return 14, 62
+    return 12, 55
+
+
+def build_candidate(
+    text: str,
+    *,
+    source: str,
+    family: str,
+    confidence: float,
+    priority: int = 0,
+) -> Dict[str, object]:
+    cleaned = normalize_text(text)
+    return {
+        "text": cleaned,
+        "source": source,
+        "family": family,
+        "confidence": confidence,
+        "style": candidate_style(cleaned),
+        "priority": priority,
+    }
+
+
+def collect_candidates(entry: Dict[str, object]) -> List[Dict[str, object]]:
+    candidates: List[Dict[str, object]] = []
+    seen = set()
+
+    semantic = entry.get("semantic", {})
+    if isinstance(semantic, dict):
+        for index, synset in enumerate(semantic.get("synsets", []) or []):
+            if not isinstance(synset, dict):
+                continue
+            for piece in split_definition_text(str(synset.get("definition", ""))):
+                candidate = build_candidate(
+                    piece,
+                    source="semantic",
+                    family="semantic_definition",
+                    confidence=0.9,
+                    priority=max(0, 100 - index * 12),
+                )
+                key = (candidate["text"].lower(), candidate["family"])
+                if candidate["text"] and key not in seen:
+                    seen.add(key)
+                    candidates.append(candidate)
+        for index, gloss in enumerate(semantic.get("glosses", []) or []):
+            for piece in split_definition_text(str(gloss)):
+                candidate = build_candidate(
+                    piece,
+                    source="semantic_gloss",
+                    family="semantic_gloss",
+                    confidence=0.8,
+                    priority=max(0, 90 - index * 10),
+                )
+                key = (candidate["text"].lower(), candidate["family"])
+                if candidate["text"] and key not in seen:
+                    seen.add(key)
+                    candidates.append(candidate)
+
+    for index, sense in enumerate(entry.get("senses", []) or []):
+        if not isinstance(sense, dict):
+            continue
+        for piece in split_definition_text(str(sense.get("definition", ""))):
+            source = str(sense.get("source", "refined"))
+            candidate = build_candidate(
+                piece,
+                source="refined" if source == "semantic" else source,
+                family="refined_sense",
+                confidence=float(sense.get("confidence", 0.7) or 0.7),
+                priority=max(0, 80 - index * 8),
+            )
+            key = (candidate["text"].lower(), candidate["family"])
+            if candidate["text"] and key not in seen:
+                seen.add(key)
+                candidates.append(candidate)
+
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        best_synset = babelnet.get("best_synset", {})
+        if isinstance(best_synset, dict):
+            confidence = 0.85 if babelnet.get("status") == "enriched" else 0.55
+            for index, gloss in enumerate(best_synset.get("glosses", []) or []):
+                for piece in split_definition_text(str(gloss)):
+                    candidate = build_candidate(
+                        piece,
+                        source="babelnet",
+                        family="babelnet_gloss",
+                        confidence=confidence,
+                        priority=max(0, 60 - index * 8),
+                    )
+                    key = (candidate["text"].lower(), candidate["family"])
+                    if candidate["text"] and key not in seen:
+                        seen.add(key)
+                        candidates.append(candidate)
+
+    wiktextract = entry.get("wiktextract", {})
+    if isinstance(wiktextract, dict):
+        definitions = wiktextract.get("definitions", []) or []
+        confidence = 0.78 if wiktextract.get("matched") else 0.45
+        for index, definition in enumerate(definitions):
+            for piece in split_definition_text(str(definition)):
+                candidate = build_candidate(
+                    piece,
+                    source="wiktextract",
+                    family="wiktextract_definition",
+                    confidence=confidence,
+                    priority=max(0, 88 - index * 9),
+                )
+                key = (candidate["text"].lower(), candidate["family"])
+                if candidate["text"] and key not in seen:
+                    seen.add(key)
+                    candidates.append(candidate)
+
+    return candidates
+
+
+def score_candidate(candidate: Dict[str, object], entry: Dict[str, object], difficulty: str) -> int:
+    text = str(candidate["text"])
+    answer = str(entry.get("form", "")).lower()
+    score = 0
+
+    source = str(candidate.get("source"))
+    family = str(candidate.get("family"))
+    confidence = float(candidate.get("confidence", 0.0) or 0.0)
+
+    if len(text) < 12:
+        return -10_000
+
+    if definition_mentions_answer(text, answer):
+        score -= 140
+    else:
+        score += 30
+
+    if suspicious_proper_noun_definition(text, entry):
+        score -= 220
+
+    if likely_abstract_detour(text):
+        score -= 80
+
+    min_len, max_len = length_window(difficulty)
+    if min_len <= len(text) <= max_len:
+        score += 24
+    else:
+        score -= abs(len(text) - max_len) if len(text) > max_len else abs(min_len - len(text)) // 2
+
+    source_bonus = {
+        "semantic": 55,
+        "semantic_gloss": 40,
+        "babelnet": 24,
+        "refined": 30,
+        "wiktextract": 52,
+    }
+    score += source_bonus.get(source, 10)
+
+    family_bonus = {
+        "semantic_definition": 30,
+        "semantic_gloss": 18,
+        "babelnet_gloss": 8,
+        "refined_sense": 22,
+        "wiktextract_definition": 28,
+    }
+    score += family_bonus.get(family, 0)
+
+    score += int(candidate.get("priority", 0) or 0)
+    score += int(confidence * 35)
+
+    alignment = topic_alignment_score(text, entry)
+    score += alignment
+    topical = set(lexical_topics(entry)) | set(semantic_topics(entry))
+    concrete_topics = topical.intersection({"religion", "transport", "health", "nature", "geography", "sea"})
+    if concrete_topics and alignment == 0:
+        score -= 45
+
+    style = str(candidate.get("style"))
+    if difficulty == "easy" and style == "direct":
+        score += 16
+    elif difficulty == "medium" and style in {"direct", "balanced"}:
+        score += 14
+    elif difficulty == "hard" and style == "balanced":
+        score += 10
+    elif difficulty == "expert" and style == "oblique":
+        score += 10
+
+    if difficulty in {"easy", "medium"} and re.search(r"\((?:mil|fig|lett|fam)\.\)", text.lower()):
+        score -= 30
+    if difficulty in {"hard", "expert"} and ";" in text:
+        score -= 10
+
+    if entry.get("needs_review"):
+        score -= 8
+
+    return score
+
+
+def choose_best_candidate(
+    candidates: Sequence[Dict[str, object]],
+    entry: Dict[str, object],
+    difficulty: str,
+) -> Optional[Dict[str, object]]:
+    ranked = sorted(
+        candidates,
+        key=lambda candidate: (
+            score_candidate(candidate, entry, difficulty),
+            float(candidate.get("confidence", 0.0)),
+            float(candidate.get("priority", 0.0)),
+            -len(str(candidate.get("text", ""))),
+        ),
+        reverse=True,
+    )
+    return ranked[0] if ranked else None
+
+
+def review_reasons(entry: Dict[str, object], candidates: Sequence[Dict[str, object]]) -> List[str]:
+    reasons: List[str] = []
+    form = str(entry.get("form", ""))
+    lowered_topics = set(lexical_topics(entry))
+    semantic_topic_set = set(semantic_topics(entry))
+    babelnet_status = str((entry.get("babelnet") or {}).get("status", ""))
+    wiktextract = entry.get("wiktextract", {})
+    wiktextract_status = str(wiktextract.get("status", "")) if isinstance(wiktextract, dict) else ""
+    preferred_definition = str(entry.get("preferred_definition", ""))
+    preferred_source = str(entry.get("preferred_source", ""))
+
+    if not candidates:
+        reasons.append("no_viable_definition")
+    if not preferred_definition and entry.get("needs_review"):
+        reasons.append("flagged_by_refined_stage")
+    if preferred_definition and suspicious_proper_noun_definition(preferred_definition, entry):
+        reasons.append("proper_noun_collision")
+    if babelnet_status == "ambiguous" and preferred_source == "babelnet":
+        reasons.append("babelnet_ambiguous")
+    if wiktextract_status in {"missing", "no_match"} and not preferred_definition:
+        reasons.append("wiktextract_missing")
+    if lowered_topics == {"general"} and not semantic_topic_set and not preferred_definition:
+        reasons.append("only_general_topics")
+    if len(form) <= 2:
+        reasons.append("very_short_word")
+    if str(entry.get("pos", "")) in {"PREP", "CONJ"}:
+        reasons.append("function_word")
+    if preferred_source == "babelnet" and any("None" in str(sense.get("topics")) for sense in entry.get("senses", []) if isinstance(sense, dict)):
+        reasons.append("unresolved_sense_topics")
+    if preferred_definition and definition_mentions_answer(preferred_definition, form.lower()):
+        reasons.append("candidate_mentions_answer")
+
+    return dedupe(reasons)
+
+
+def curate_entry(entry: Dict[str, object]) -> Tuple[Dict[str, object], Optional[Dict[str, object]]]:
+    curated = deepcopy(entry)
+    candidates = collect_candidates(curated)
+
+    clue_definitions: Dict[str, str] = {}
+    clue_sources: Dict[str, str] = {}
+    clue_scores: Dict[str, int] = {}
+    curation_notes: List[str] = []
+
+    for difficulty in DIFFICULTIES:
+        best = choose_best_candidate(candidates, curated, difficulty)
+        if best:
+            clue_definitions[difficulty] = str(best["text"])
+            clue_sources[difficulty] = str(best["source"])
+            clue_scores[difficulty] = score_candidate(best, curated, difficulty)
+
+    preferred_definition = clue_definitions.get("medium") or clue_definitions.get("easy") or ""
+    preferred_source = clue_sources.get("medium") or clue_sources.get("easy") or "fallback"
+
+    if preferred_definition:
+        curation_notes.append(f"preferred_from={preferred_source}")
+    if clue_scores.get("medium", -9999) < 20:
+        curation_notes.append("weak_medium_definition")
+
+    curated["curated_glosses"] = dedupe(candidate["text"] for candidate in candidates)
+    curated["curated_senses"] = [
+        {
+            "definition": candidate["text"],
+            "source": candidate["source"],
+            "family": candidate["family"],
+            "confidence": candidate["confidence"],
+            "priority": candidate["priority"],
+        }
+        for candidate in candidates
+    ]
+    curated["preferred_definition"] = preferred_definition
+    curated["preferred_source"] = preferred_source
+    curated["clue_definitions"] = clue_definitions
+    curated["clue_sources"] = clue_sources
+    curated["clue_scores"] = clue_scores
+    curated["curation_notes"] = curation_notes
+
+    reasons = review_reasons(curated, candidates)
+    severe = {"no_viable_definition", "proper_noun_collision", "candidate_mentions_answer"}
+    alpha_ready = bool(preferred_definition) and not severe.intersection(reasons)
+    curated["alpha_ready"] = alpha_ready
+    curated["review_reasons"] = reasons
+
+    review_item = None
+    if reasons:
+        review_item = {
+            "form": curated.get("form"),
+            "lemma": curated.get("lemma"),
+            "pos": curated.get("pos"),
+            "topics": curated.get("topics"),
+            "topic_suggestions": curated.get("topic_suggestions"),
+            "preferred_definition": preferred_definition,
+            "preferred_source": preferred_source,
+            "clue_definitions": clue_definitions,
+            "review_reasons": reasons,
+            "semantic_glosses": (curated.get("semantic") or {}).get("glosses", []),
+            "senses": curated.get("senses", []),
+            "babelnet_status": (curated.get("babelnet") or {}).get("status"),
+            "babelnet_best_synset": (curated.get("babelnet") or {}).get("best_synset"),
+            "wiktextract_status": (curated.get("wiktextract") or {}).get("status"),
+            "wiktextract": curated.get("wiktextract"),
+            "candidate_pool": [
+                {
+                    "text": candidate["text"],
+                    "source": candidate["source"],
+                    "family": candidate["family"],
+                    "confidence": candidate["confidence"],
+                    "priority": candidate["priority"],
+                }
+                for candidate in candidates[:12]
+            ],
+        }
+
+    return curated, review_item
+
+
+def build_curated_lexicon(args: argparse.Namespace) -> Tuple[Dict[str, object], Dict[str, object]]:
+    payload = load_json(args.input)
+    if not isinstance(payload, dict) or "entries" not in payload:
+        raise ValueError(f"Lessico refined non valido: {args.input}")
+
+    curated_entries: List[Dict[str, object]] = []
+    review_entries: List[Dict[str, object]] = []
+
+    for entry in payload.get("entries", []) or []:
+        if not isinstance(entry, dict):
+            continue
+        curated, review_item = curate_entry(entry)
+        curated_entries.append(curated)
+        if review_item:
+            review_entries.append(review_item)
+
+    if args.max_review > 0:
+        review_entries = review_entries[: args.max_review]
+
+    curated_payload = {
+        "meta": {
+            "language": "it",
+            "version": 1,
+            "base_lexicon": args.input.name,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "entry_count": len(curated_entries),
+            "alpha_ready_count": sum(1 for item in curated_entries if item.get("alpha_ready")),
+            "review_count": len(review_entries),
+        },
+        "entries": curated_entries,
+    }
+
+    review_payload = {
+        "meta": {
+            "language": "it",
+            "version": 1,
+            "base_lexicon": args.input.name,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "entry_count": len(review_entries),
+        },
+        "entries": review_entries,
+    }
+
+    return curated_payload, review_payload
+
+
+def main() -> None:
+    args = parse_args()
+    curated_payload, review_payload = build_curated_lexicon(args)
+    write_json(args.output, curated_payload)
+    write_json(args.review_output, review_payload)
+    print(f"Lessico curated generato: {args.output}")
+    print(f"Voci totali: {curated_payload['meta']['entry_count']}")
+    print(f"Voci alpha_ready: {curated_payload['meta']['alpha_ready_count']}")
+    print(f"Voci da revisionare: {review_payload['meta']['entry_count']}")
+    print(f"File review generato: {args.review_output}")
+
+
+if __name__ == "__main__":
+    main()
--- a/enrich_review_from_wiktextract_file.py
+++ b/enrich_review_from_wiktextract_file.py
@@ -0,0 +1,492 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+
+from refine_lexicon_topics import REFINED_LEXICON_OUTPUT_PATH
+
+
+REVIEW_INPUT_PATH = Path(__file__).with_name("to_be_review.json")
+WIKTEXTRACT_INPUT_PATH = Path(__file__).with_name("raw-wiktextract-data.jsonl")
+WIKTEXTRACT_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_refined_plus_wiktextract.json")
+WIKTEXTRACT_INDEX_CACHE_PATH = Path(__file__).with_name(".wiktextract_it_index.json")
+
+DEFAULT_REVIEW_REASONS = {"no_viable_definition", "only_general_topics", "babelnet_ambiguous"}
+
+POS_MAP = {
+    "noun": "NOUN",
+    "adj": "ADJ",
+    "adj": "ADJ",
+    "verb": "VERB",
+    "adv": "ADV",
+    "prep": "PREP",
+    "conj": "CONJ",
+    "pron": "PRON",
+    "intj": "INTJ",
+}
+
+TOPIC_MAP = {
+    "christianity": "religion",
+    "religion": "religion",
+    "history": "history",
+    "agriculture": "agriculture",
+    "engineering": "technology",
+    "mechanics": "technology",
+    "technology": "technology",
+    "medicine": "health",
+    "geography": "geography",
+    "biology": "nature",
+    "aeronautics": "transport",
+}
+
+CATEGORY_TOPIC_HINTS = {
+    "religione-it": "religion",
+    "cristianesimo-it": "religion",
+    "storia-it": "history",
+    "agricoltura-it": "agriculture",
+    "medicina-it": "health",
+    "ingegneria-it": "technology",
+    "meccanica-it": "technology",
+    "tecnologia-it": "technology",
+    "geografia-it": "geography",
+    "biologia-it": "nature",
+    "aeronautica-it": "transport",
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Arricchisce il lessico refined leggendo offline il file raw-wiktextract-data.jsonl, "
+            "senza effettuare richieste di rete."
+        )
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=REFINED_LEXICON_OUTPUT_PATH,
+        help="Lessico refined di partenza.",
+    )
+    parser.add_argument(
+        "--review",
+        type=Path,
+        default=REVIEW_INPUT_PATH,
+        help="File to_be_review.json da usare per selezionare i lemmi prioritari.",
+    )
+    parser.add_argument(
+        "--wiktextract",
+        type=Path,
+        default=WIKTEXTRACT_INPUT_PATH,
+        help="File JSONL raw estratto da Wiktionary.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=WIKTEXTRACT_OUTPUT_PATH,
+        help="Lessico refined con blocco wiktextract aggiunto.",
+    )
+    parser.add_argument(
+        "--index-cache",
+        type=Path,
+        default=WIKTEXTRACT_INDEX_CACHE_PATH,
+        help="Cache dell'indice lemmi->righe del JSONL per velocizzare i rilanci.",
+    )
+    parser.add_argument(
+        "--word-limit",
+        type=int,
+        default=0,
+        help="Limite massimo di parole da elaborare. 0 = tutte le candidate.",
+    )
+    parser.add_argument(
+        "--words",
+        default="",
+        help="Lista separata da virgole di lemmi specifici da arricchire.",
+    )
+    parser.add_argument(
+        "--review-reasons",
+        default=",".join(sorted(DEFAULT_REVIEW_REASONS)),
+        help="Motivi del file review da trattare con priorita, separati da virgole.",
+    )
+    parser.add_argument(
+        "--skip-existing",
+        action="store_true",
+        help="Salta le voci che nel lessico di input hanno gia un blocco wiktextract utile.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path, default: object) -> object:
+    if not path.exists():
+        return default
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: object) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def parse_csv_set(value: str) -> set[str]:
+    return {item.strip().lower() for item in str(value or "").split(",") if item.strip()}
+
+
+def entry_key(entry: Dict[str, object]) -> Tuple[str, str]:
+    form = str(entry.get("normalized_form") or entry.get("form") or "").strip().lower()
+    pos = str(entry.get("pos") or "").strip().upper()
+    return form, pos
+
+
+def load_or_build_index(jsonl_path: Path, index_cache_path: Path) -> Dict[str, List[int]]:
+    cached = load_json(index_cache_path, {})
+    expected_meta = {
+        "source": str(jsonl_path.resolve()),
+        "size": jsonl_path.stat().st_size if jsonl_path.exists() else 0,
+        "mtime": jsonl_path.stat().st_mtime if jsonl_path.exists() else 0,
+    }
+    if (
+        isinstance(cached, dict)
+        and cached.get("meta") == expected_meta
+        and isinstance(cached.get("index"), dict)
+    ):
+        return {str(key): list(value) for key, value in cached["index"].items()}
+
+    index: Dict[str, List[int]] = {}
+    with jsonl_path.open("r", encoding="utf-8") as handle:
+        while True:
+            offset = handle.tell()
+            line = handle.readline()
+            if not line:
+                break
+            raw = line.rstrip("\n")
+            if not raw:
+                continue
+            obj = json.loads(raw)
+            if obj.get("lang_code") != "it":
+                continue
+            word = str(obj.get("word", "")).strip().lower()
+            if word:
+                index.setdefault(word, []).append(offset)
+
+    write_json(index_cache_path, {"meta": expected_meta, "index": index})
+    return index
+
+
+def read_jsonl_objects_at_offsets(jsonl_path: Path, offsets: Sequence[int]) -> List[Dict[str, object]]:
+    objects: List[Dict[str, object]] = []
+    with jsonl_path.open("r", encoding="utf-8") as handle:
+        for offset in offsets:
+            handle.seek(offset)
+            line = handle.readline()
+            if not line:
+                continue
+            objects.append(json.loads(line))
+    return objects
+
+
+def map_pos(value: str) -> str:
+    normalized = str(value or "").strip().lower()
+    return POS_MAP.get(normalized, normalized.upper() if normalized else "")
+
+
+def normalize_text(text: str) -> str:
+    value = str(text or "").strip()
+    value = re.sub(r"\s+", " ", value)
+    return value
+
+
+def sense_topics(sense: Dict[str, object], categories: Sequence[str]) -> List[str]:
+    topics = set()
+    for topic in sense.get("topics", []) or []:
+        normalized = TOPIC_MAP.get(str(topic).strip().lower())
+        if normalized:
+            topics.add(normalized)
+    for category in categories:
+        normalized = CATEGORY_TOPIC_HINTS.get(str(category).strip().lower())
+        if normalized:
+            topics.add(normalized)
+    return sorted(topics)
+
+
+def word_level_topics(entries: Sequence[Dict[str, object]], categories: Sequence[str]) -> List[str]:
+    topics = set()
+    for entry in entries:
+        for sense in entry.get("senses", []) or []:
+            if isinstance(sense, dict):
+                topics.update(sense_topics(sense, categories))
+    return sorted(topics)
+
+
+def grammar_hints(entries: Sequence[Dict[str, object]]) -> List[str]:
+    hints = set()
+    for entry in entries:
+        pos = str(entry.get("pos", "")).lower()
+        tags = [str(tag).lower() for tag in entry.get("tags", []) or []]
+        if pos == "verb" and "form-of" in tags:
+            hints.add("voce_verbale")
+        if pos == "noun":
+            for sense in entry.get("senses", []) or []:
+                if not isinstance(sense, dict):
+                    continue
+                for gloss in sense.get("glosses", []) or []:
+                    gloss_text = str(gloss).lower()
+                    if "diminutivo" in gloss_text:
+                        hints.add("diminutivo")
+                    if "accrescitivo" in gloss_text:
+                        hints.add("accrescitivo")
+                    if "peggiorativo" in gloss_text:
+                        hints.add("peggiorativo")
+        for sense in entry.get("senses", []) or []:
+            if not isinstance(sense, dict):
+                continue
+            for gloss in sense.get("glosses", []) or []:
+                gloss_text = str(gloss).lower()
+                if "congiuntivo" in gloss_text:
+                    hints.add("congiuntivo")
+                if "imperativo" in gloss_text:
+                    hints.add("imperativo")
+                if "plurale" in gloss_text:
+                    hints.add("plurale")
+    return sorted(hints)
+
+
+def simplify_entry(obj: Dict[str, object]) -> Dict[str, object]:
+    categories = [normalize_text(item) for item in obj.get("categories", []) or [] if item]
+    senses = []
+    for sense in obj.get("senses", []) or []:
+        if not isinstance(sense, dict):
+            continue
+        glosses = [normalize_text(item) for item in sense.get("glosses", []) or [] if normalize_text(item)]
+        if not glosses:
+            continue
+        senses.append(
+            {
+                "glosses": glosses,
+                "examples": [
+                    normalize_text(example.get("text", ""))
+                    for example in sense.get("examples", []) or []
+                    if isinstance(example, dict) and normalize_text(example.get("text", ""))
+                ],
+                "topics": sense_topics(sense, categories),
+                "tags": [str(tag) for tag in sense.get("tags", []) or [] if tag],
+                "categories": [normalize_text(item) for item in sense.get("categories", []) or [] if item],
+            }
+        )
+    return {
+        "word": obj.get("word"),
+        "lang": obj.get("lang"),
+        "lang_code": obj.get("lang_code"),
+        "pos": map_pos(str(obj.get("pos", ""))),
+        "pos_title": obj.get("pos_title"),
+        "tags": [str(tag) for tag in obj.get("tags", []) or [] if tag],
+        "categories": categories,
+        "senses": senses,
+        "synonyms": [item for item in obj.get("synonyms", []) or [] if isinstance(item, dict) and item.get("word")],
+        "related": [item for item in obj.get("related", []) or [] if isinstance(item, dict) and item.get("word")],
+    }
+
+
+def choose_best_entries(refined_entry: Dict[str, object], candidates: Sequence[Dict[str, object]]) -> List[Dict[str, object]]:
+    target_pos = str(refined_entry.get("pos", "")).upper()
+    exact = [candidate for candidate in candidates if str(candidate.get("pos", "")).upper() == target_pos]
+    if exact:
+        return exact
+    return list(candidates)
+
+
+def wiktextract_already_useful(entry: Dict[str, object]) -> bool:
+    payload = entry.get("wiktextract", {})
+    if not isinstance(payload, dict):
+        return False
+    status = str(payload.get("status", "")).lower()
+    if status == "enriched" and (payload.get("definitions") or payload.get("entries")):
+        return True
+    if status in {"missing", "no_match"}:
+        return True
+    return False
+
+
+def select_targets(
+    refined_payload: Dict[str, object],
+    review_payload: Dict[str, object],
+    review_reasons: set[str],
+    explicit_words: set[str],
+    word_limit: int,
+    skip_existing: bool,
+) -> Tuple[List[Dict[str, object]], int]:
+    refined_entries = [entry for entry in refined_payload.get("entries", []) or [] if isinstance(entry, dict)]
+    refined_by_word = {str(entry.get("form", "")).lower(): entry for entry in refined_entries if entry.get("form")}
+
+    if explicit_words:
+        selected = []
+        skipped_existing_count = 0
+        for word in explicit_words:
+            entry = refined_by_word.get(word)
+            if entry is None:
+                continue
+            if skip_existing and wiktextract_already_useful(entry):
+                skipped_existing_count += 1
+                continue
+            selected.append(entry)
+        selected = selected[:word_limit] if word_limit > 0 else selected
+        return selected, skipped_existing_count
+
+    review_entries = [entry for entry in review_payload.get("entries", []) or [] if isinstance(entry, dict)]
+    selected_words: List[str] = []
+    seen = set()
+    skipped_existing_count = 0
+
+    for review_entry in review_entries:
+        word = str(review_entry.get("form", "")).strip().lower()
+        if not word or word in seen:
+            continue
+        reasons = {str(item).lower() for item in review_entry.get("review_reasons", []) or []}
+        refined = refined_by_word.get(word)
+        if refined is None:
+            continue
+        if skip_existing and wiktextract_already_useful(refined):
+            skipped_existing_count += 1
+            continue
+        babelnet_status = str((refined.get("babelnet") or {}).get("status", "")).lower()
+        if reasons.intersection(review_reasons) or babelnet_status == "no_match":
+            selected_words.append(word)
+            seen.add(word)
+            if word_limit > 0 and len(selected_words) >= word_limit:
+                break
+
+    return [refined_by_word[word] for word in selected_words if word in refined_by_word], skipped_existing_count
+
+
+def wiktextract_payload_for_entry(refined_entry: Dict[str, object], matches: Sequence[Dict[str, object]]) -> Dict[str, object]:
+    if not matches:
+        return {
+            "status": "missing",
+            "matched": False,
+            "definitions": [],
+            "entries": [],
+            "topic_hints": [],
+            "grammar_hints": [],
+        }
+
+    selected_entries = choose_best_entries(refined_entry, matches)
+    definitions = []
+    for item in selected_entries:
+        for sense in item.get("senses", []) or []:
+            if not isinstance(sense, dict):
+                continue
+            definitions.extend(sense.get("glosses", []) or [])
+    definitions = [normalize_text(item) for item in definitions if normalize_text(item)]
+
+    all_categories = []
+    for item in selected_entries:
+        all_categories.extend(item.get("categories", []) or [])
+
+    return {
+        "status": "enriched" if definitions else "entries_without_definitions",
+        "matched": bool(definitions),
+        "definitions": definitions,
+        "entries": selected_entries,
+        "topic_hints": word_level_topics(selected_entries, all_categories),
+        "grammar_hints": grammar_hints(selected_entries),
+        "categories": sorted(set(normalize_text(item) for item in all_categories if normalize_text(item))),
+    }
+
+
+def enrich_from_wiktextract(args: argparse.Namespace) -> Dict[str, object]:
+    refined_payload = load_json(args.input, {"entries": []})
+    if not isinstance(refined_payload, dict) or "entries" not in refined_payload:
+        raise ValueError(f"Lessico refined non valido: {args.input}")
+
+    review_payload = load_json(args.review, {"entries": []})
+    if not isinstance(review_payload, dict):
+        review_payload = {"entries": []}
+
+    targets, skipped_existing_count = select_targets(
+        refined_payload,
+        review_payload,
+        parse_csv_set(args.review_reasons),
+        parse_csv_set(args.words),
+        args.word_limit,
+        args.skip_existing,
+    )
+
+    print(
+        f"Target selezionati: {len(targets)}"
+        + (f" | già saltati per wiktextract esistente: {skipped_existing_count}" if args.skip_existing else "")
+    )
+
+    index = load_or_build_index(args.wiktextract, args.index_cache)
+    refined_index = {
+        entry_key(entry): deepcopy(entry)
+        for entry in refined_payload.get("entries", []) or []
+        if isinstance(entry, dict)
+    }
+
+    matched_count = 0
+    missing_count = 0
+
+    for idx, entry in enumerate(targets, start=1):
+        updated = deepcopy(entry)
+        word = str(entry.get("form", "")).strip().lower()
+        offsets = index.get(word, [])
+        objects = [simplify_entry(obj) for obj in read_jsonl_objects_at_offsets(args.wiktextract, offsets)]
+        payload = wiktextract_payload_for_entry(updated, objects)
+        updated["wiktextract"] = payload
+        updated["wiktextract_generated_at"] = datetime.now().astimezone().isoformat(timespec="seconds")
+        refined_index[entry_key(updated)] = updated
+
+        if payload.get("matched"):
+            matched_count += 1
+        else:
+            missing_count += 1
+
+        print(
+            f"[{idx}/{len(targets)}] {word}: "
+            f"status={payload.get('status')} "
+            f"def={len(payload.get('definitions', []))} "
+            f"topics={len(payload.get('topic_hints', []))} "
+            f"entries={len(payload.get('entries', []))}"
+        )
+
+    merged_entries = list(refined_index.values())
+    merged_entries.sort(key=lambda item: (str(item.get("normalized_form", "")), str(item.get("pos", ""))))
+
+    merged_payload = {
+        "meta": {
+            **(refined_payload.get("meta", {}) if isinstance(refined_payload.get("meta"), dict) else {}),
+            "wiktextract_source": str(args.wiktextract),
+            "wiktextract_generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "wiktextract_target_count": len(targets),
+            "wiktextract_skipped_existing_count": skipped_existing_count,
+            "wiktextract_matched_count": matched_count,
+            "wiktextract_missing_count": missing_count,
+        },
+        "entries": merged_entries,
+    }
+
+    write_json(args.output, merged_payload)
+
+    return {
+        "target_count": len(targets),
+        "skipped_existing_count": skipped_existing_count,
+        "matched_count": matched_count,
+        "missing_count": missing_count,
+        "output": str(args.output),
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    result = enrich_from_wiktextract(args)
+    print(f"Lessico con Wiktextract generato: {result['output']}")
+    print(f"Voci trattate: {result['target_count']}")
+    print(f"Voci già saltate: {result['skipped_existing_count']}")
+    print(f"Match Wiktextract: {result['matched_count']}")
+    print(f"Senza match Wiktextract: {result['missing_count']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/enrich_review_from_wiktionary.py
+++ b/enrich_review_from_wiktionary.py
@@ -0,0 +1,678 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import time
+import urllib.parse
+import urllib.request
+import urllib.error
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+
+from refine_lexicon_topics import REFINED_LEXICON_OUTPUT_PATH
+
+
+REVIEW_INPUT_PATH = Path(__file__).with_name("to_be_review.json")
+WIKTIONARY_CACHE_PATH = Path(__file__).with_name(".wiktionary_cache.json")
+WIKTIONARY_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_refined_plus_wiktionary.json")
+WIKTIONARY_API_URL = "https://it.wiktionary.org/w/api.php"
+
+DEFAULT_REVIEW_REASONS = {"no_viable_definition", "only_general_topics", "babelnet_ambiguous"}
+
+POS_ALIASES = {
+    "sostantivo": "NOUN",
+    "nome": "NOUN",
+    "sost": "NOUN",
+    "aggettivo": "ADJ",
+    "agg": "ADJ",
+    "verbo": "VERB",
+    "verb": "VERB",
+    "verb form": "VERB_FORM",
+    "avverbio": "ADV",
+    "avv": "ADV",
+    "preposizione": "PREP",
+    "prep": "PREP",
+    "congiunzione": "CONJ",
+    "cong": "CONJ",
+    "pronome": "PRON",
+    "pron": "PRON",
+    "articolo": "ART",
+    "interiezione": "INTJ",
+    "inter": "INTJ",
+    "locuzione": "PHRASE",
+    "loc": "PHRASE",
+}
+
+TOPIC_KEYWORDS = {
+    "religion": ("religione", "cattolic", "sacro", "sacra", "devozion", "scapolare", "abbazia", "monastero"),
+    "clothing": ("abito", "vestito", "vestit", "abbigliamento", "indumento", "stoffa"),
+    "grammar": ("diminutivo", "voce verbale", "congiuntivo", "plurale", "singolare", "grammatica", "verbo"),
+    "geography": ("comune", "paese", "regione", "provincia", "citta", "localita", "frazione"),
+    "transport": ("veicolo", "motore", "treno", "aereo", "trasporto", "nave", "imbarcazione"),
+    "health": ("medicina", "ospedale", "malattia", "cura", "feriti", "ammalati", "sanitario"),
+}
+
+GRAMMAR_KEYWORDS = (
+    "diminutivo",
+    "accrescitivo",
+    "peggiorativo",
+    "alterato",
+    "voce verbale",
+    "congiuntivo",
+    "participio",
+    "plurale",
+    "singolare",
+    "maschile",
+    "femminile",
+)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Arricchisce le voci problematiche del lessico refined con definizioni e metadati "
+            "estratti da it.wiktionary.org."
+        )
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=REFINED_LEXICON_OUTPUT_PATH,
+        help="Lessico refined di partenza.",
+    )
+    parser.add_argument(
+        "--review",
+        type=Path,
+        default=REVIEW_INPUT_PATH,
+        help="File to_be_review.json da usare per selezionare le voci prioritarie.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=WIKTIONARY_OUTPUT_PATH,
+        help="Nuovo lessico con blocco wiktionary aggiunto.",
+    )
+    parser.add_argument(
+        "--cache",
+        type=Path,
+        default=WIKTIONARY_CACHE_PATH,
+        help="Cache locale delle risposte Wiktionary.",
+    )
+    parser.add_argument(
+        "--word-limit",
+        type=int,
+        default=0,
+        help="Limite massimo di parole da elaborare. 0 = tutte le candidate.",
+    )
+    parser.add_argument(
+        "--sleep",
+        type=float,
+        default=1.0,
+        help="Pausa tra le richieste HTTP a Wiktionary.",
+    )
+    parser.add_argument(
+        "--save-every",
+        type=int,
+        default=25,
+        help="Salva cache e output ogni N parole elaborate per non perdere progresso.",
+    )
+    parser.add_argument(
+        "--retry-429",
+        type=int,
+        default=3,
+        help="Numero massimo di tentativi aggiuntivi se Wiktionary risponde HTTP 429.",
+    )
+    parser.add_argument(
+        "--backoff-429",
+        type=float,
+        default=30.0,
+        help="Secondi di attesa iniziali dopo un HTTP 429; raddoppiano a ogni nuovo tentativo.",
+    )
+    parser.add_argument(
+        "--stop-on-429",
+        action="store_true",
+        help="Se attivo, al primo HTTP 429 salva lo stato e interrompe il batch senza altri tentativi.",
+    )
+    parser.add_argument(
+        "--words",
+        default="",
+        help="Lista separata da virgole di lemmi specifici da arricchire.",
+    )
+    parser.add_argument(
+        "--review-reasons",
+        default=",".join(sorted(DEFAULT_REVIEW_REASONS)),
+        help="Motivi del file review da trattare con priorita, separati da virgole.",
+    )
+    parser.add_argument(
+        "--api-url",
+        default=WIKTIONARY_API_URL,
+        help="Endpoint MediaWiki Action API di Wiktionary.",
+    )
+    parser.add_argument(
+        "--skip-existing",
+        action="store_true",
+        help="Salta le voci che nel lessico di input hanno già un blocco wiktionary con stato utile.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path, default: object) -> object:
+    if not path.exists():
+        return default
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: object) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def parse_csv_set(value: str) -> set[str]:
+    return {item.strip().lower() for item in str(value or "").split(",") if item.strip()}
+
+
+def entry_key(entry: Dict[str, object]) -> Tuple[str, str]:
+    form = str(entry.get("normalized_form") or entry.get("form") or "").strip().lower()
+    pos = str(entry.get("pos") or "").strip().upper()
+    return form, pos
+
+
+def fetch_wikitext(title: str, api_url: str) -> Dict[str, object]:
+    params = {
+        "action": "query",
+        "prop": "revisions",
+        "titles": title,
+        "rvprop": "content",
+        "rvslots": "main",
+        "formatversion": "2",
+        "format": "json",
+    }
+    url = f"{api_url}?{urllib.parse.urlencode(params)}"
+    request = urllib.request.Request(
+        url,
+        headers={
+            "User-Agent": "cruciverba-alpha/0.1 (local lexical enrichment)",
+            "Accept": "application/json",
+        },
+    )
+    with urllib.request.urlopen(request, timeout=30) as response:
+        payload = json.loads(response.read().decode("utf-8"))
+    pages = ((payload.get("query") or {}).get("pages") or [])
+    if not pages:
+        return {"status": "missing"}
+    page = pages[0]
+    if page.get("missing"):
+        return {"status": "missing", "title": page.get("title", title)}
+    revisions = page.get("revisions") or []
+    content = ""
+    if revisions:
+        slots = revisions[0].get("slots") or {}
+        main_slot = slots.get("main") or {}
+        content = str(main_slot.get("content") or "")
+    return {
+        "status": "ok" if content else "empty",
+        "title": page.get("title", title),
+        "pageid": page.get("pageid"),
+        "wikitext": content,
+    }
+
+
+def fetch_wikitext_with_retry(title: str, args: argparse.Namespace) -> Dict[str, object]:
+    attempts = 0
+    delay = max(1.0, float(args.backoff_429))
+    while True:
+        try:
+            return fetch_wikitext(title, args.api_url)
+        except urllib.error.HTTPError as exc:
+            if exc.code != 429:
+                raise
+            if args.stop_on_429:
+                raise
+            if attempts >= max(0, int(args.retry_429)):
+                raise
+            attempts += 1
+            print(f"[429] {title}: attendo {delay:.1f}s prima del tentativo {attempts}/{args.retry_429}")
+            time.sleep(delay)
+            delay *= 2
+
+
+def normalize_heading(text: str) -> str:
+    raw = str(text or "").strip().lower().replace(" ", "")
+    if raw == "{{-it-}}":
+        return "{{-it-}}"
+    cleaned = strip_wikicode(text).strip().lower()
+    return cleaned
+
+
+def extract_italian_section(wikitext: str) -> str:
+    section_pattern = re.compile(r"^==\s*(.*?)\s*==\s*$", re.MULTILINE)
+    matches = list(section_pattern.finditer(wikitext))
+    for index, match in enumerate(matches):
+        raw_heading = str(match.group(1) or "").strip().lower().replace(" ", "")
+        heading = normalize_heading(match.group(1))
+        if raw_heading == "{{-it-}}" or heading in {"italiano", "it"}:
+            start = match.end()
+            end = matches[index + 1].start() if index + 1 < len(matches) else len(wikitext)
+            return wikitext[start:end]
+    return ""
+
+
+def strip_templates(text: str) -> str:
+    previous = None
+    current = text
+    while previous != current:
+        previous = current
+        current = re.sub(r"\{\{([^{}|]+)\|([^{}]+?)\}\}", r"\2", current)
+        current = re.sub(r"\{\{[^{}]+\}\}", "", current)
+    return current
+
+
+def strip_wikicode(text: str) -> str:
+    value = str(text or "")
+    value = re.sub(r"<!--.*?-->", " ", value, flags=re.DOTALL)
+    value = re.sub(r"<ref[^>]*>.*?</ref>", " ", value, flags=re.DOTALL)
+    value = re.sub(r"<[^>]+>", " ", value)
+    value = strip_templates(value)
+    value = re.sub(r"\[\[([^|\]]+)\|([^\]]+)\]\]", r"\2", value)
+    value = re.sub(r"\[\[([^\]]+)\]\]", r"\1", value)
+    value = value.replace("'''", "").replace("''", "")
+    value = value.replace("&nbsp;", " ")
+    value = re.sub(r"\s+", " ", value)
+    return value.strip(" .;:-")
+
+
+def infer_topics(definitions: Sequence[str], categories: Sequence[str]) -> List[str]:
+    text = " ".join(definitions + list(categories)).lower()
+    topics = []
+    for topic, keywords in TOPIC_KEYWORDS.items():
+        if any(keyword in text for keyword in keywords):
+            topics.append(topic)
+    return sorted(set(topics))
+
+
+def infer_grammar_hints(definitions: Sequence[str], raw_section: str) -> List[str]:
+    text = f"{' '.join(definitions)} {raw_section}".lower()
+    hints = []
+    for keyword in GRAMMAR_KEYWORDS:
+        if keyword in text:
+            hints.append(keyword)
+    return sorted(set(hints))
+
+
+def detect_pos_from_heading(heading: str) -> Optional[str]:
+    normalized = normalize_heading(heading)
+    if not normalized:
+        return None
+    for label, pos in sorted(POS_ALIASES.items(), key=lambda item: len(item[0]), reverse=True):
+        if label in normalized:
+            return pos
+    return None
+
+
+def parse_template_marker(line: str) -> Tuple[Optional[str], Optional[str]]:
+    stripped = line.strip()
+    match = re.match(r"^\{\{-([^{}|]+?)-?(?:\|.*)?\}\}$", stripped, flags=re.IGNORECASE)
+    if not match:
+        return None, None
+    marker = match.group(1).strip().lower()
+    if marker == "it":
+        return "language", "it"
+    for label, pos in sorted(POS_ALIASES.items(), key=lambda item: len(item[0]), reverse=True):
+        if marker.startswith(label):
+            return "pos", pos
+    if marker.startswith("sinon"):
+        return "subsection", "sinonimi"
+    if marker.startswith(("etim", "trad", "sill", "pron", "var", "note")):
+        return "subsection", marker
+    return "subsection", marker
+
+
+def parse_wiktionary_section(section_text: str) -> Dict[str, object]:
+    lines = section_text.splitlines()
+    entries: List[Dict[str, object]] = []
+    categories: List[str] = []
+    current: Optional[Dict[str, object]] = None
+    current_subsection = ""
+
+    heading_pattern = re.compile(r"^(={3,4})\s*(.*?)\s*\1\s*$")
+
+    for raw_line in lines:
+        line = raw_line.rstrip()
+        if not line:
+            continue
+
+        for category_match in re.findall(r"\[\[Categoria:([^\]]+)\]\]", line):
+            categories.append(strip_wikicode(category_match))
+
+        marker_kind, marker_value = parse_template_marker(line)
+        if marker_kind == "pos":
+            current = {
+                "pos": marker_value,
+                "heading": marker_value,
+                "definitions": [],
+                "examples": [],
+                "synonyms": [],
+            }
+            entries.append(current)
+            current_subsection = ""
+            continue
+        if marker_kind == "subsection":
+            current_subsection = str(marker_value or "")
+            continue
+
+        heading_match = heading_pattern.match(line)
+        if heading_match:
+            level = len(heading_match.group(1))
+            heading = heading_match.group(2)
+            if level == 3:
+                pos = detect_pos_from_heading(heading)
+                if pos:
+                    current = {
+                        "pos": pos,
+                        "heading": strip_wikicode(heading),
+                        "definitions": [],
+                        "examples": [],
+                        "synonyms": [],
+                    }
+                    entries.append(current)
+                    current_subsection = ""
+                    continue
+            current_subsection = normalize_heading(heading)
+            continue
+
+        if current is None:
+            continue
+
+        stripped = line.lstrip()
+        if stripped.startswith("#") and not stripped.startswith(("#:", "#*", "#;")):
+            definition = strip_wikicode(stripped.lstrip("#").strip())
+            if definition:
+                current["definitions"].append(definition)
+            continue
+
+        if stripped.startswith("#:") or stripped.startswith("#*"):
+            example = strip_wikicode(stripped[2:].strip())
+            if example:
+                current["examples"].append(example)
+            continue
+
+        if current_subsection.startswith("sinonim") and stripped.startswith("*"):
+            synonym = strip_wikicode(stripped.lstrip("*").strip())
+            if synonym:
+                current["synonyms"].append(synonym)
+
+    flat_definitions = [definition for entry in entries for definition in entry["definitions"]]
+    topic_hints = infer_topics(flat_definitions, categories)
+    grammar_hints = infer_grammar_hints(flat_definitions, section_text)
+
+    return {
+        "entries": entries,
+        "categories": sorted(set(filter(None, categories))),
+        "definitions": flat_definitions,
+        "topic_hints": topic_hints,
+        "grammar_hints": grammar_hints,
+    }
+
+
+def wiktionary_payload_for_entry(entry: Dict[str, object], api_response: Dict[str, object]) -> Dict[str, object]:
+    status = str(api_response.get("status", "missing"))
+    if status != "ok":
+        return {
+            "status": status,
+            "matched": False,
+            "page_title": api_response.get("title") or entry.get("form"),
+            "source_url": f"https://it.wiktionary.org/wiki/{urllib.parse.quote(str(entry.get('form', '')))}",
+            "definitions": [],
+            "entries": [],
+            "topic_hints": [],
+            "grammar_hints": [],
+            "categories": [],
+        }
+
+    italian_section = extract_italian_section(str(api_response.get("wikitext") or ""))
+    if not italian_section:
+        return {
+            "status": "no_italian_section",
+            "matched": False,
+            "page_title": api_response.get("title") or entry.get("form"),
+            "source_url": f"https://it.wiktionary.org/wiki/{urllib.parse.quote(str(api_response.get('title') or entry.get('form', '')))}",
+            "definitions": [],
+            "entries": [],
+            "topic_hints": [],
+            "grammar_hints": [],
+            "categories": [],
+        }
+
+    parsed = parse_wiktionary_section(italian_section)
+    matched = bool(parsed["definitions"])
+
+    return {
+        "status": "enriched" if matched else "section_without_definitions",
+        "matched": matched,
+        "page_title": api_response.get("title") or entry.get("form"),
+        "pageid": api_response.get("pageid"),
+        "source_url": f"https://it.wiktionary.org/wiki/{urllib.parse.quote(str(api_response.get('title') or entry.get('form', '')))}",
+        "definitions": parsed["definitions"],
+        "entries": parsed["entries"],
+        "topic_hints": parsed["topic_hints"],
+        "grammar_hints": parsed["grammar_hints"],
+        "categories": parsed["categories"],
+        "raw_excerpt": italian_section[:4000],
+    }
+
+
+def select_targets(
+    refined_payload: Dict[str, object],
+    review_payload: Dict[str, object],
+    review_reasons: set[str],
+    explicit_words: set[str],
+    word_limit: int,
+    skip_existing: bool,
+) -> Tuple[List[Dict[str, object]], int]:
+    refined_entries = [entry for entry in refined_payload.get("entries", []) or [] if isinstance(entry, dict)]
+    refined_by_word = {str(entry.get("form", "")).lower(): entry for entry in refined_entries if entry.get("form")}
+
+    if explicit_words:
+        selected = []
+        skipped_existing_count = 0
+        for word in explicit_words:
+            entry = refined_by_word.get(word)
+            if entry is None:
+                continue
+            if skip_existing and wiktionary_already_useful(entry):
+                skipped_existing_count += 1
+                continue
+            selected.append(entry)
+        selected = selected[:word_limit] if word_limit > 0 else selected
+        return selected, skipped_existing_count
+
+    review_entries = [entry for entry in review_payload.get("entries", []) or [] if isinstance(entry, dict)]
+    selected_words: List[str] = []
+    seen = set()
+    skipped_existing_count = 0
+
+    for review_entry in review_entries:
+        word = str(review_entry.get("form", "")).strip().lower()
+        if not word or word in seen:
+            continue
+        reasons = {str(item).lower() for item in review_entry.get("review_reasons", []) or []}
+        refined = refined_by_word.get(word)
+        if refined is None:
+            continue
+        if skip_existing and wiktionary_already_useful(refined):
+            skipped_existing_count += 1
+            continue
+        babelnet_status = str((refined.get("babelnet") or {}).get("status", "")).lower()
+        if reasons.intersection(review_reasons) or babelnet_status == "no_match":
+            selected_words.append(word)
+            seen.add(word)
+            if word_limit > 0 and len(selected_words) >= word_limit:
+                break
+
+    return [refined_by_word[word] for word in selected_words if word in refined_by_word], skipped_existing_count
+
+
+def wiktionary_already_useful(entry: Dict[str, object]) -> bool:
+    wiktionary = entry.get("wiktionary", {})
+    if not isinstance(wiktionary, dict):
+        return False
+    status = str(wiktionary.get("status", "")).lower()
+    if status == "enriched" and (wiktionary.get("definitions") or wiktionary.get("entries")):
+        return True
+    if status in {"missing", "no_italian_section", "section_without_definitions", "empty"}:
+        return True
+    return False
+
+
+def enrich_from_wiktionary(args: argparse.Namespace) -> Dict[str, object]:
+    refined_payload = load_json(args.input, {"entries": []})
+    if not isinstance(refined_payload, dict) or "entries" not in refined_payload:
+        raise ValueError(f"Lessico refined non valido: {args.input}")
+
+    review_payload = load_json(args.review, {"entries": []})
+    if not isinstance(review_payload, dict):
+        review_payload = {"entries": []}
+
+    cache = load_json(args.cache, {})
+    if not isinstance(cache, dict):
+        cache = {}
+
+    targets, skipped_existing_count = select_targets(
+        refined_payload,
+        review_payload,
+        parse_csv_set(args.review_reasons),
+        parse_csv_set(args.words),
+        args.word_limit,
+        args.skip_existing,
+    )
+
+    enriched_entries = []
+    cache_hits = 0
+    network_calls = 0
+    network_attempts = 0
+    processed_count = 0
+    stopped_reason = None
+    stop_word = None
+
+    print(
+        f"Target selezionati: {len(targets)}"
+        + (f" | già saltati per wiktionary esistente: {skipped_existing_count}" if args.skip_existing else "")
+    )
+
+    def persist_progress() -> None:
+        refined_index = {
+            entry_key(entry): entry
+            for entry in refined_payload.get("entries", []) or []
+            if isinstance(entry, dict)
+        }
+        for item in enriched_entries:
+            refined_index[entry_key(item)] = item
+
+        merged_entries = list(refined_index.values())
+        merged_entries.sort(key=lambda item: (str(item.get("normalized_form", "")), str(item.get("pos", ""))))
+
+        merged_payload = {
+            "meta": {
+                **(refined_payload.get("meta", {}) if isinstance(refined_payload.get("meta"), dict) else {}),
+                "wiktionary_source": args.api_url,
+                "wiktionary_generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+                "wiktionary_target_count": len(targets),
+                "wiktionary_processed_count": processed_count,
+                "wiktionary_skipped_existing_count": skipped_existing_count,
+                "wiktionary_cache_hits": cache_hits,
+                "wiktionary_network_calls": network_calls,
+                "wiktionary_network_attempts": network_attempts,
+                "wiktionary_stopped_reason": stopped_reason,
+                "wiktionary_stop_word": stop_word,
+            },
+            "entries": merged_entries,
+        }
+
+        write_json(args.cache, cache)
+        write_json(args.output, merged_payload)
+
+    for index, entry in enumerate(targets, start=1):
+        updated = deepcopy(entry)
+        word = str(entry.get("form", "")).strip()
+        cache_key = word.lower()
+
+        if cache_key in cache:
+            api_response = cache[cache_key]
+            cache_hits += 1
+        else:
+            try:
+                network_attempts += 1
+                api_response = fetch_wikitext_with_retry(word, args)
+            except urllib.error.HTTPError as exc:
+                if exc.code == 429:
+                    stop_word = word
+                    stopped_reason = f"http_429_after_{processed_count}_words"
+                    print(f"[STOP] Wiktionary ha risposto 429 su '{word}'. Salvo il progresso e interrompo il batch.")
+                    persist_progress()
+                    return {
+                        "target_count": len(targets),
+                        "processed_count": processed_count,
+                        "skipped_existing_count": skipped_existing_count,
+                        "cache_hits": cache_hits,
+                        "network_calls": network_calls,
+                        "network_attempts": network_attempts,
+                        "output": str(args.output),
+                        "stopped_reason": stopped_reason,
+                        "stop_word": stop_word,
+                    }
+                raise
+            cache[cache_key] = api_response
+            network_calls += 1
+            if args.sleep > 0:
+                time.sleep(args.sleep)
+
+        updated["wiktionary"] = wiktionary_payload_for_entry(updated, api_response)
+        updated["wiktionary_generated_at"] = datetime.now().astimezone().isoformat(timespec="seconds")
+        enriched_entries.append(updated)
+        processed_count += 1
+        print(
+            f"[{index}/{len(targets)}] {word}: "
+            f"status={updated['wiktionary'].get('status')} "
+            f"def={len(updated['wiktionary'].get('definitions', []))} "
+            f"topics={len(updated['wiktionary'].get('topic_hints', []))}"
+        )
+        if args.save_every > 0 and processed_count % int(args.save_every) == 0:
+            persist_progress()
+            print(f"[save] progresso salvato dopo {processed_count} parole")
+
+    persist_progress()
+
+    return {
+        "target_count": len(targets),
+        "processed_count": processed_count,
+        "skipped_existing_count": skipped_existing_count,
+        "cache_hits": cache_hits,
+        "network_calls": network_calls,
+        "network_attempts": network_attempts,
+        "output": str(args.output),
+        "stopped_reason": stopped_reason,
+        "stop_word": stop_word,
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    result = enrich_from_wiktionary(args)
+    print(f"Lessico con Wiktionary generato: {result['output']}")
+    print(f"Voci trattate: {result.get('processed_count', result['target_count'])}/{result['target_count']}")
+    if "skipped_existing_count" in result:
+        print(f"Voci già saltate: {result['skipped_existing_count']}")
+    print(f"Cache hit: {result['cache_hits']}")
+    print(f"Chiamate rete: {result['network_calls']}")
+    if "network_attempts" in result:
+        print(f"Tentativi di rete: {result['network_attempts']}")
+    if result.get("stopped_reason"):
+        print(f"Batch interrotto: {result['stopped_reason']}")
+    if result.get("stop_word"):
+        print(f"Ultima parola bloccante: {result['stop_word']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/main.py
+++ b/main.py
@@ -2,9 +2,14 @@ from __future__ import annotations

 import argparse
 import json
+import os
+import random
 from pathlib import Path
+from types import SimpleNamespace
 from typing import Dict, List

+from build_babelnet_enrichment import BABELNET_ENV_KEY, BABELNET_OUTPUT_PATH, BABELNET_LOCAL_KEY_PATH, load_babelnet_api_key
+from build_enriched_lexicon import ENRICHED_LEXICON_OUTPUT_PATH
 from build_vocabulary import (
    FILTERED_OUTPUT_PATH,
    METADATA_OUTPUT_PATH,
@@ -13,6 +18,7 @@ from build_vocabulary import (
 )
 from build_lexicon import LEXICON_OUTPUT_PATH, build_lexicon
 from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH, build_semantic_lexicon
+from clue_generator import generate_clues, load_enriched_entries
 from crossword_filler import CrosswordFiller, load_vocabulary, load_vocabulary_metadata
 from crossword_generator import CrosswordGenerator, WORDS, render_grid

@@ -26,6 +32,14 @@ DIFFICULTY_ALIASES: Dict[str, int] = {

 DEFAULT_TOPIC = "general"
 DEFAULT_INITIAL_WORD_COUNT = len(WORDS)
+DEFAULT_RUNTIME_LEXICON_CANDIDATES = (
+    "lexicon_it_curated_llm_aggressive.json",
+    "lexicon_it_curated_llm.json",
+    "lexicon_it_curated.json",
+    "lexicon_it_refined_plus_wiktextract.json",
+    ENRICHED_LEXICON_OUTPUT_PATH.name,
+    SEMANTIC_LEXICON_OUTPUT_PATH.name,
+)
 ABSTRACTISH_SUFFIXES = ("zione", "zioni", "mento", "menti", "ita", "ezza", "anza", "enza", "ismo")
 FILL_ALLOWED_POS = {"NOUN", "VERB", "ADJ", "ADV", "PREP", "CONJ"}
 GENERAL_FILL_MIN_QUALITY = 6
@@ -92,6 +106,8 @@ TOPIC_SEED_BLOCKED_SUBSTRINGS: Dict[str, tuple[str, ...]] = {
    ),
 }

+ACTIVE_LEXICON_PATH: Path | None = None
+

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Generatore e filler di cruciverba.")
@@ -115,6 +131,23 @@ def parse_args() -> argparse.Namespace:
        action="store_true",
        help="Rigenera `lexicon_it_semantic.json` arricchendo il lessico con IWN-OMW/ItalWordNet.",
    )
+    parser.add_argument(
+        "--babelnet-enrich",
+        action="store_true",
+        help="Prima di generare il cruciverba arricchisce incrementalmente il lessico con BabelNet.",
+    )
+    parser.add_argument(
+        "--babelnet-limit",
+        type=int,
+        default=20,
+        help="Numero massimo di parole da interrogare su BabelNet in questa esecuzione.",
+    )
+    parser.add_argument(
+        "--babelnet-sleep",
+        type=float,
+        default=0.2,
+        help="Pausa in secondi tra richieste BabelNet consecutive.",
+    )
    parser.add_argument(
        "--vocabulary",
        type=Path,
@@ -159,7 +192,13 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument(
        "--topic",
        default=DEFAULT_TOPIC,
-        help="Tema del cruciverba. Attualmente supporta i topic presenti nel lessico, ad esempio: general, nature, animals, actions, abstract.",
+        help="Tema del cruciverba. Puoi indicare un topic o una lista separata da virgole, es. transport,nature,ecology. Se lasci general, i topic possono essere scelti dal lessico con --max-topics.",
+    )
+    parser.add_argument(
+        "--max-topics",
+        type=int,
+        default=1,
+        help="Numero massimo di topic casuali da scegliere dal lessico arricchito quando --topic e' general. Massimo consigliato: 3.",
    )
    parser.add_argument(
        "--initial-word-count",
@@ -173,6 +212,26 @@ def parse_args() -> argparse.Namespace:
        default=DEFAULT_THEMED_FILL_WORD_COUNT,
        help="Numero massimo indicativo di parole aggiunte dal filler da mantenere fortemente legate al tema.",
    )
+    parser.add_argument(
+        "--definitions",
+        action="store_true",
+        help="Genera e stampa le definizioni per le parole inserite nel cruciverba.",
+    )
+    parser.add_argument(
+        "--lexicon",
+        type=Path,
+        default=None,
+        help=(
+            "File lessicale da usare durante l'esecuzione. Se omesso, il programma usa il lessico "
+            "piu avanzato disponibile, preferendo lexicon_it_curated_llm_aggressive.json."
+        ),
+    )
+    parser.add_argument(
+        "--definition-babelnet-limit",
+        type=int,
+        default=20,
+        help="Numero massimo di parole del cruciverba da arricchire al volo con BabelNet per generare definizioni.",
+    )
    return parser.parse_args()


@@ -222,6 +281,220 @@ def ensure_semantic_lexicon(args: argparse.Namespace) -> None:
    print(f"- match semantici: {matched}")


+def ensure_babelnet_enrichment(args: argparse.Namespace) -> None:
+    if not args.babelnet_enrich:
+        return
+    if args.babelnet_limit <= 0:
+        print("BabelNet enrichment saltato: --babelnet-limit <= 0")
+        return
+
+    from babelnet_incremental_enricher import run_incremental_enrichment
+
+    namespace = SimpleNamespace(
+        api_key=load_babelnet_api_key(),
+        topic=primary_topic(args.topic),
+        difficulty=args.difficulty,
+        limit=args.babelnet_limit,
+        sleep=args.babelnet_sleep,
+        semantic=SEMANTIC_LEXICON_OUTPUT_PATH,
+        babelnet=BABELNET_OUTPUT_PATH,
+        enriched=ENRICHED_LEXICON_OUTPUT_PATH,
+        dry_run=False,
+        retry_no_match=False,
+    )
+
+    print("Arricchimento BabelNet incrementale")
+    print(f"- tema guida: {primary_topic(args.topic)}")
+    print(f"- topic attivi: {args.topic}")
+    print(f"- limite parole: {args.babelnet_limit}")
+    print(f"- chiave: {BABELNET_ENV_KEY} oppure {BABELNET_LOCAL_KEY_PATH.name}")
+    result = run_incremental_enrichment(namespace)
+    print("Riepilogo BabelNet")
+    print(f"- parole interrogate: {result['selected_count']}")
+    print(f"- chiamate API reali: {result['api_call_count']}")
+    print(f"- risposte da cache: {result['cache_hit_count']}")
+    print(f"- match: {result['matched_count']}")
+    for item in result["word_logs"]:
+        print(
+            f"  {item['word']}: api_calls={item['api_calls']}, "
+            f"cache_hits={item['cache_hits']}, risposta={item['responses'] > 0}, "
+            f"match={item['matched']}, synsets={item['synsets']}"
+        )
+    print()
+
+
+def enrich_words_for_definitions(args: argparse.Namespace, words: List[str]) -> None:
+    if not args.definitions:
+        return
+    if args.definition_babelnet_limit <= 0:
+        print("Arricchimento BabelNet per definizioni saltato: --definition-babelnet-limit <= 0")
+        return
+
+    from babelnet_incremental_enricher import run_incremental_enrichment
+
+    namespace = SimpleNamespace(
+        api_key=load_babelnet_api_key(),
+        topic=primary_topic(args.topic),
+        difficulty=args.difficulty,
+        limit=args.definition_babelnet_limit,
+        sleep=args.babelnet_sleep,
+        semantic=SEMANTIC_LEXICON_OUTPUT_PATH,
+        babelnet=BABELNET_OUTPUT_PATH,
+        enriched=ENRICHED_LEXICON_OUTPUT_PATH,
+        dry_run=False,
+        retry_no_match=False,
+        words=words,
+    )
+
+    print()
+    print("Arricchimento BabelNet per definizioni")
+    print(f"- parole nel cruciverba: {len(set(words))}")
+    print(f"- limite parole: {args.definition_babelnet_limit}")
+    result = run_incremental_enrichment(namespace)
+    print("Riepilogo BabelNet definizioni")
+    print(f"- parole interrogate: {result['selected_count']}")
+    print(f"- chiamate API reali: {result['api_call_count']}")
+    print(f"- risposte da cache: {result['cache_hit_count']}")
+    print(f"- match: {result['matched_count']}")
+    for item in result["word_logs"]:
+        print(
+            f"  {item['word']}: api_calls={item['api_calls']}, "
+            f"cache_hits={item['cache_hits']}, risposta={item['responses'] > 0}, "
+            f"match={item['matched']}, synsets={item['synsets']}"
+        )
+
+
+def placement_words(placements) -> List[str]:
+    return [placement.word for placement in placements]
+
+
+def print_definitions(args: argparse.Namespace, state) -> None:
+    if not args.definitions:
+        return
+    entries = load_enriched_entries(resolve_runtime_lexicon_path(args.lexicon))
+    clues = generate_clues(state.placements, entries, primary_topic(args.topic), args.difficulty)
+    print()
+    print("Definizioni:")
+    for clue in clues:
+        print(
+            f"{clue.number:>2}. {clue.direction} ({clue.x}, {clue.y}) "
+            f"[{clue.source}] {clue.text} -> {clue.word.upper()}"
+        )
+    print_alpha_diagnostics(args, state, entries)
+
+
+def word_is_on_topic(entry: Dict[str, object], topic: str) -> bool:
+    active_topics = parse_topics(topic)
+    if len(active_topics) > 1:
+        return any(word_is_on_topic(entry, item) for item in active_topics)
+
+    normalized_topic = active_topics[0]
+    if normalized_topic == DEFAULT_TOPIC:
+        return True
+
+    topics = {str(item).lower() for item in entry.get("topics", []) if item}
+    if normalized_topic in topics:
+        return True
+    semantic = entry.get("semantic", {})
+    if isinstance(semantic, dict):
+        semantic_topics = {str(item).lower() for item in semantic.get("semantic_topics", []) if item}
+        if normalized_topic in semantic_topics:
+            return True
+
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        best_synset = babelnet.get("best_synset", {})
+        if isinstance(best_synset, dict):
+            try:
+                topic_score = int(best_synset.get("topic_score", 0))
+            except (TypeError, ValueError):
+                topic_score = 0
+            if best_synset.get("topic") == normalized_topic and topic_score >= 40:
+                return True
+
+    try:
+        return strong_topic_relevance(entry, normalized_topic) > 0
+    except Exception:
+        return False
+
+
+def pos_label(pos: str) -> str:
+    labels = {
+        "NOUN": "sostantivi",
+        "ADJ": "aggettivi",
+        "VERB": "verbi",
+        "ADV": "avverbi",
+        "PREP": "preposizioni",
+        "CONJ": "congiunzioni",
+    }
+    return labels.get(str(pos).upper(), "altri")
+
+
+def print_alpha_diagnostics(args: argparse.Namespace, state, entries: Dict[str, Dict[str, object]]) -> None:
+    words = placement_words(state.placements)
+    unique_words = list(dict.fromkeys(word.lower() for word in words))
+    active_topics = parse_topics(args.topic)
+    total_cells = state.area()
+    filled_cells = len(state.grid)
+    empty_cells = total_cells - filled_cells
+    empty_ratio = empty_cells / total_cells if total_cells else 0.0
+    filled_ratio = filled_cells / total_cells if total_cells else 0.0
+    target_empty_cells = round(total_cells * args.target_empty_ratio)
+    target_delta = empty_cells - target_empty_cells
+    topic_words = []
+    off_topic_words = []
+    topic_distribution = {topic: 0 for topic in active_topics if topic != DEFAULT_TOPIC}
+    pos_counts = {
+        "sostantivi": 0,
+        "aggettivi": 0,
+        "verbi": 0,
+        "avverbi": 0,
+        "preposizioni": 0,
+        "congiunzioni": 0,
+        "altri": 0,
+    }
+
+    for word in unique_words:
+        entry = entries.get(word, {})
+        label = pos_label(str(entry.get("pos", "")))
+        pos_counts[label] = pos_counts.get(label, 0) + 1
+        if entry and word_is_on_topic(entry, args.topic):
+            topic_words.append(word)
+            for selected_topic in topic_distribution:
+                if word_is_on_topic(entry, selected_topic):
+                    topic_distribution[selected_topic] += 1
+        else:
+            off_topic_words.append(word)
+
+    print()
+    print("Diagnostica alpha:")
+    print(f"- parole uniche nello schema: {len(unique_words)}")
+    print(f"- celle totali: {total_cells}")
+    print(f"- celle riempite: {filled_cells} ({filled_ratio * 100:.1f}%)")
+    print(f"- celle vuote: {empty_cells} ({empty_ratio * 100:.1f}%)")
+    print(f"- target celle vuote: {target_empty_cells} ({args.target_empty_ratio * 100:.1f}%)")
+    if target_delta > 0:
+        print(f"- distanza dal target: {target_delta} celle vuote in piu del target")
+    elif target_delta < 0:
+        print(f"- distanza dal target: {-target_delta} celle vuote in meno del target")
+    else:
+        print("- distanza dal target: centrato")
+    print(f"- topic richiesti: {', '.join(active_topics)}")
+    print(f"- parole in tema: {len(topic_words)}")
+    print(f"- parole fuori tema o non classificate: {len(off_topic_words)}")
+    if topic_distribution:
+        print("- distribuzione topic:")
+        for selected_topic, count in topic_distribution.items():
+            print(f"  {selected_topic}: {count}")
+    if topic_words:
+        print(f"- elenco in tema: {', '.join(topic_words)}")
+    if off_topic_words:
+        print(f"- elenco fuori tema/non classificate: {', '.join(off_topic_words)}")
+    print("- parti del discorso:")
+    for label in ("sostantivi", "aggettivi", "verbi", "avverbi", "preposizioni", "congiunzioni", "altri"):
+        print(f"  {label}: {pos_counts.get(label, 0)}")
+
+
 def parse_difficulty(value: str) -> int:
    text = str(value).strip().lower()
    if text in DIFFICULTY_ALIASES:
@@ -243,7 +516,30 @@ def load_selected_vocabulary(path: Path | None) -> List[str]:
    return path.read_text(encoding="utf-8").splitlines()


-def load_semantic_payload() -> Dict[str, object]:
+def resolve_runtime_lexicon_path(requested: Path | None) -> Path:
+    global ACTIVE_LEXICON_PATH
+    if requested is not None:
+        path = requested if requested.is_absolute() else Path(__file__).resolve().parent / requested
+        if not path.exists():
+            raise SystemExit(f"Il lessico specificato con --lexicon non esiste: {path}")
+        ACTIVE_LEXICON_PATH = path
+        return path
+    if ACTIVE_LEXICON_PATH is not None:
+        return ACTIVE_LEXICON_PATH
+    base_dir = Path(__file__).resolve().parent
+    for candidate in DEFAULT_RUNTIME_LEXICON_CANDIDATES:
+        path = base_dir / candidate
+        if path.exists():
+            ACTIVE_LEXICON_PATH = path
+            return path
+    ACTIVE_LEXICON_PATH = ENRICHED_LEXICON_OUTPUT_PATH
+    return ACTIVE_LEXICON_PATH
+
+
+def load_semantic_payload(path: Path | None = None) -> Dict[str, object]:
+    runtime_path = resolve_runtime_lexicon_path(path)
+    if runtime_path.exists():
+        return json.loads(runtime_path.read_text(encoding="utf-8"))
    if not SEMANTIC_LEXICON_OUTPUT_PATH.exists():
        lexicon = build_semantic_lexicon()
        SEMANTIC_LEXICON_OUTPUT_PATH.write_text(
@@ -253,6 +549,74 @@ def load_semantic_payload() -> Dict[str, object]:
    return json.loads(SEMANTIC_LEXICON_OUTPUT_PATH.read_text(encoding="utf-8"))


+def parse_topics(value: str) -> List[str]:
+    topics = []
+    seen = set()
+    for raw_topic in str(value or DEFAULT_TOPIC).split(","):
+        topic = raw_topic.strip().lower()
+        if not topic or topic in seen:
+            continue
+        topics.append(topic)
+        seen.add(topic)
+    return topics or [DEFAULT_TOPIC]
+
+
+def primary_topic(value: str) -> str:
+    return parse_topics(value)[0]
+
+
+def available_topics_from_lexicon(payload: Dict[str, object], *, min_words: int = 5) -> List[str]:
+    counts: Dict[str, int] = {}
+    excluded = {DEFAULT_TOPIC, "abstract", "actions"}
+    for entry in payload.get("entries", []) or []:
+        if not isinstance(entry, dict):
+            continue
+        if not entry.get("allowed_in_crossword", False):
+            continue
+        for topic in entry.get("topics", []) or []:
+            normalized = str(topic).strip().lower()
+            if not normalized or normalized in excluded:
+                continue
+            counts[normalized] = counts.get(normalized, 0) + 1
+    return sorted(topic for topic, count in counts.items() if count >= min_words)
+
+
+def resolve_topics(args: argparse.Namespace, difficulty_level: int) -> List[str]:
+    requested = parse_topics(args.topic)
+    max_topics = max(1, min(3, int(args.max_topics)))
+    if requested != [DEFAULT_TOPIC]:
+        selected = requested[:max_topics]
+        args.topic = ",".join(selected)
+        args.topic_seed_counts = {
+            topic: len(select_initial_words(difficulty_level, topic, args.initial_word_count))
+            for topic in selected
+        }
+        return selected
+
+    if max_topics <= 1:
+        args.topic = DEFAULT_TOPIC
+        args.topic_seed_counts = {}
+        return [DEFAULT_TOPIC]
+
+    candidates = []
+    for candidate in available_topics_from_lexicon(load_semantic_payload(), min_words=1):
+        available = len(select_initial_words(difficulty_level, candidate, args.initial_word_count))
+        if available > 0:
+            candidates.append((candidate, available))
+    if not candidates:
+        args.topic = DEFAULT_TOPIC
+        args.topic_seed_counts = {}
+        return [DEFAULT_TOPIC]
+
+    rng = random.Random(args.seed)
+    rng.shuffle(candidates)
+    selected_pairs = candidates[: min(max_topics, len(candidates))]
+    selected = [topic for topic, _ in selected_pairs]
+    args.topic = ",".join(selected)
+    args.topic_seed_counts = dict(selected_pairs)
+    return selected
+
+
 def entry_topics(entry: Dict[str, object]) -> tuple[set[str], set[str]]:
    topics = {str(item).lower() for item in entry.get("topics", [])}
    semantic_topics = {
@@ -271,6 +635,10 @@ def matches_topic_roots(word: str, selected_topic: str) -> bool:


 def topic_relevance(entry: Dict[str, object], topic: str) -> int:
+    active_topics = parse_topics(topic)
+    if len(active_topics) > 1:
+        return max(topic_relevance(entry, item) for item in active_topics)
+
    selected_topic = topic.strip().lower()
    if selected_topic == DEFAULT_TOPIC:
        return 20
@@ -295,6 +663,10 @@ def topic_relevance(entry: Dict[str, object], topic: str) -> int:


 def strong_topic_relevance(entry: Dict[str, object], topic: str) -> int:
+    active_topics = parse_topics(topic)
+    if len(active_topics) > 1:
+        return max(strong_topic_relevance(entry, item) for item in active_topics)
+
    selected_topic = topic.strip().lower()
    if selected_topic == DEFAULT_TOPIC:
        return 20
@@ -341,7 +713,7 @@ def is_general_fill_support(entry: Dict[str, object]) -> bool:

 def load_filtered_entries(level: int, topic: str) -> List[Dict[str, object]]:
    payload = load_semantic_payload()
-    normalized_topic = topic.strip().lower()
+    normalized_topic = ",".join(parse_topics(topic))

    eligible = [
        entry
@@ -400,6 +772,40 @@ def load_semantic_metadata_for_vocabulary(words: List[str], topic: str) -> Dict[


 def select_initial_words(level: int, topic: str, count: int) -> List[str]:
+    active_topics = parse_topics(topic)
+    if len(active_topics) > 1:
+        topic_pools = {
+            selected_topic: select_initial_words(level, selected_topic, count)
+            for selected_topic in active_topics
+        }
+        selected: List[str] = []
+        indexes = {selected_topic: 0 for selected_topic in active_topics}
+
+        while len(selected) < count:
+            progressed = False
+            for selected_topic in active_topics:
+                pool = topic_pools.get(selected_topic, [])
+                while indexes[selected_topic] < len(pool) and pool[indexes[selected_topic]] in selected:
+                    indexes[selected_topic] += 1
+                if indexes[selected_topic] >= len(pool):
+                    continue
+                selected.append(pool[indexes[selected_topic]])
+                indexes[selected_topic] += 1
+                progressed = True
+                if len(selected) >= count:
+                    break
+            if not progressed:
+                break
+
+        if len(selected) < count:
+            fallback = select_initial_words(level, DEFAULT_TOPIC, count)
+            for word in fallback:
+                if word not in selected:
+                    selected.append(word)
+                if len(selected) >= count:
+                    break
+        return selected[:count]
+
    payload = load_semantic_payload()
    normalized_topic = topic.strip().lower()
    abstract_like_topics = {"abstract", "actions"}
@@ -408,6 +814,10 @@ def select_initial_words(level: int, topic: str, count: int) -> List[str]:
        topics, semantic_topics = entry_topics(entry)
        return selected_topic in topics

+    def semantic_matches(entry: Dict[str, object], selected_topic: str) -> bool:
+        topics, semantic_topics = entry_topics(entry)
+        return selected_topic in semantic_topics and selected_topic not in topics
+
    def word_score(entry: Dict[str, object], selected_topic: str) -> tuple[int, int, int, int, int, int, str]:
        topics, semantic_topics = entry_topics(entry)
        quality = int(entry.get("quality_score", 0))
@@ -479,6 +889,33 @@ def select_initial_words(level: int, topic: str, count: int) -> List[str]:
            return False
        return True

+    def is_semantic_seed_friendly(entry: Dict[str, object], selected_topic: str) -> bool:
+        word = str(entry.get("form", ""))
+        pos = str(entry.get("pos", ""))
+        topics, semantic_topics = entry_topics(entry)
+        if selected_topic not in semantic_topics:
+            return False
+        if len(word) < 4 or len(word) > 13:
+            return False
+        if pos not in {"NOUN", "ADJ", "VERB"}:
+            return False
+        if word.endswith(ABSTRACTISH_SUFFIXES):
+            return False
+        if "abstract" in topics:
+            return False
+        blocked_substrings = TOPIC_SEED_BLOCKED_SUBSTRINGS.get(selected_topic, ())
+        if any(part in word for part in blocked_substrings):
+            return False
+        required_substrings = TOPIC_SEED_REQUIRED_SUBSTRINGS.get(selected_topic)
+        if (
+            selected_topic in CONCRETE_TOPICS
+            and required_substrings
+            and selected_topic != DEFAULT_TOPIC
+            and not any(part in word for part in required_substrings)
+        ):
+            return False
+        return True
+
    def overlap_score(left: str, right: str) -> int:
        shared = set(left) & set(right)
        return sum(min(left.count(ch), right.count(ch)) for ch in shared)
@@ -548,6 +985,20 @@ def select_initial_words(level: int, topic: str, count: int) -> List[str]:
    relaxed_pool = sorted(pool, key=lambda entry: word_score(entry, normalized_topic), reverse=True)

    selected = pick_seed_set(strict_pool, normalized_topic, count)
+    if len(selected) < count and normalized_topic != DEFAULT_TOPIC:
+        semantic_pool = [
+            entry
+            for entry in eligible
+            if semantic_matches(entry, normalized_topic)
+            and is_semantic_seed_friendly(entry, normalized_topic)
+        ]
+        semantic_selected = pick_seed_set(semantic_pool, normalized_topic, count)
+        for word in semantic_selected:
+            if word not in selected:
+                selected.append(word)
+            if len(selected) >= count:
+                break
+
    if len(selected) < count and normalized_topic == DEFAULT_TOPIC:
        relaxed_selected = pick_seed_set(relaxed_pool, normalized_topic, count)
        for word in relaxed_selected:
@@ -569,10 +1020,13 @@ def select_initial_words(level: int, topic: str, count: int) -> List[str]:

 def main() -> None:
    args = parse_args()
+    args.lexicon = resolve_runtime_lexicon_path(args.lexicon)
    ensure_vocabulary(args)
    ensure_lexicon(args)
    ensure_semantic_lexicon(args)
    difficulty_level = parse_difficulty(args.difficulty)
+    active_topics = resolve_topics(args, difficulty_level)
+    ensure_babelnet_enrichment(args)
    initial_words = select_initial_words(difficulty_level, args.topic, args.initial_word_count)

    generator = CrosswordGenerator(
@@ -590,7 +1044,13 @@ def main() -> None:
    print(f"Intersezioni: {initial_state.intersections}")
    print(f"Dimensioni: {initial_state.width()} x {initial_state.height()} (diff={initial_state.shape_difference()})")
    print(f"Difficolta filler: {args.difficulty} -> livello {difficulty_level}")
-    print(f"Tema filler: {args.topic}")
+    print(f"Topic attivi: {', '.join(active_topics)}")
+    print(f"Lessico runtime: {args.lexicon.name}")
+    if getattr(args, "topic_seed_counts", None):
+        print(
+            "Parole-seme disponibili per topic: "
+            + ", ".join(f"{topic}={count}" for topic, count in args.topic_seed_counts.items())
+        )
    if args.seed is not None:
        print(f"Seed: {args.seed}")
    print()
@@ -600,6 +1060,9 @@ def main() -> None:
    print(", ".join(initial_words))

    if args.skip_fill:
+        initial_words_for_clues = [placement.word for placement in initial_state.placements]
+        enrich_words_for_definitions(args, initial_words_for_clues)
+        print_definitions(args, initial_state)
        return

    vocabulary = load_selected_vocabulary(args.vocabulary) if args.vocabulary else load_filtered_vocabulary(difficulty_level, args.topic)
@@ -632,6 +1095,10 @@ def main() -> None:
            direction = "orizzontale" if placement.direction == "H" else "verticale"
            print(f"{index:>2}. {placement.word} ({placement.x}, {placement.y}) {direction}")

+    final_words = [placement.word for placement in final_state.placements]
+    enrich_words_for_definitions(args, final_words)
+    print_definitions(args, final_state)
+

 if __name__ == "__main__":
    main()
--- a/refine_lexicon_topics.py
+++ b/refine_lexicon_topics.py
@@ -0,0 +1,473 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+from build_enriched_lexicon import ENRICHED_LEXICON_OUTPUT_PATH
+
+
+REFINED_LEXICON_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_refined.json")
+
+TOPIC_KEYWORDS: Dict[str, Tuple[str, ...]] = {
+    "religion": (
+        "abbazia",
+        "abate",
+        "arcivescovo",
+        "cappella",
+        "cardinale",
+        "chiesa",
+        "clero",
+        "convento",
+        "diocesi",
+        "ecclesiast",
+        "fede",
+        "frate",
+        "mistica",
+        "monaco",
+        "monastero",
+        "parrocchia",
+        "prete",
+        "religion",
+        "sacerdot",
+        "santo",
+        "vescovo",
+    ),
+    "ecclesiastical_hierarchy": (
+        "abate",
+        "arcivescovo",
+        "carica ecclesiastica",
+        "cardinale",
+        "clero",
+        "dignità ecclesiastica",
+        "ecclesiast",
+        "ordinazione",
+        "parroco",
+        "patriarca",
+        "pontefice",
+        "prete",
+        "priore",
+        "superiore del monastero",
+        "vescovo",
+    ),
+    "honorific_title": (
+        "carica",
+        "epiteto",
+        "nobile",
+        "onore",
+        "onorific",
+        "titolo",
+    ),
+    "mysticism": (
+        "asceta",
+        "contemplazione",
+        "estasi",
+        "mistica",
+        "mistico",
+        "monachesimo",
+        "spiritual",
+    ),
+    "geography": (
+        "borgo",
+        "città",
+        "comune",
+        "frazione",
+        "geografia",
+        "isola",
+        "località",
+        "paese",
+        "provincia",
+        "regione",
+        "stato",
+        "toponimo",
+        "valle",
+    ),
+    "transport": (
+        "aereo",
+        "aeroplano",
+        "auto",
+        "autobus",
+        "autocarro",
+        "barca",
+        "bicicletta",
+        "imbarcazione",
+        "locomotiva",
+        "motore",
+        "nave",
+        "pista",
+        "porto",
+        "stazione",
+        "traghetto",
+        "treno",
+        "trasport",
+        "veicolo",
+        "viaggio",
+    ),
+    "nature": (
+        "acqua",
+        "albero",
+        "animale",
+        "bosco",
+        "fiore",
+        "fiume",
+        "foresta",
+        "mare",
+        "montagna",
+        "natura",
+        "pianta",
+        "terra",
+    ),
+    "health": (
+        "ambulanza",
+        "anemia",
+        "cura",
+        "farmaco",
+        "malattia",
+        "medic",
+        "ospedale",
+        "paziente",
+        "salute",
+        "soccorso",
+        "terapia",
+    ),
+    "war": (
+        "arma",
+        "artiglieria",
+        "assalto",
+        "battaglia",
+        "bombard",
+        "esercito",
+        "fortezza",
+        "guerra",
+        "militare",
+        "soldato",
+        "trincea",
+    ),
+}
+
+TAG_STOPWORDS = {
+    "and",
+    "con",
+    "da",
+    "dei",
+    "del",
+    "della",
+    "delle",
+    "dello",
+    "di",
+    "e",
+    "il",
+    "in",
+    "la",
+    "le",
+    "lo",
+    "nel",
+    "nella",
+    "per",
+    "su",
+    "the",
+    "un",
+    "una",
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Genera un lessico raffinato con campi aggiuntivi per topic, tag semantici e sensi."
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=ENRICHED_LEXICON_OUTPUT_PATH,
+        help="File lessicale di partenza, tipicamente lexicon_it_enriched.json.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=REFINED_LEXICON_OUTPUT_PATH,
+        help="Nuovo file lessicale raffinato da generare.",
+    )
+    parser.add_argument(
+        "--replace-general",
+        action="store_true",
+        help="Se attivo, sostituisce topic=['general'] con i topic suggeriti quando la confidenza e alta.",
+    )
+    parser.add_argument(
+        "--min-topic-score",
+        type=int,
+        default=40,
+        help="Punteggio minimo per promuovere un topic suggerito nei topics finali.",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path) -> Dict[str, object]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def write_json(path: Path, payload: Dict[str, object]) -> None:
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def dedupe(items: Iterable[str]) -> List[str]:
+    result: List[str] = []
+    seen = set()
+    for item in items:
+        text = str(item).strip()
+        if not text:
+            continue
+        key = text.lower()
+        if key in seen:
+            continue
+        seen.add(key)
+        result.append(text)
+    return result
+
+
+def slugify_tag(text: str) -> str:
+    value = re.sub(r"[^a-z0-9]+", "_", text.strip().lower(), flags=re.IGNORECASE)
+    value = value.strip("_")
+    return value
+
+
+def cleanup_tag(tag: str) -> str:
+    normalized = slugify_tag(tag)
+    if not normalized or normalized in TAG_STOPWORDS or len(normalized) <= 1:
+        return ""
+    return normalized
+
+
+def flatten_text(entry: Dict[str, object]) -> str:
+    chunks: List[str] = []
+    chunks.extend(str(topic) for topic in entry.get("topics", []) or [])
+
+    semantic = entry.get("semantic", {})
+    if isinstance(semantic, dict):
+        chunks.extend(str(topic) for topic in semantic.get("semantic_topics", []) or [])
+        chunks.extend(str(gloss) for gloss in semantic.get("glosses", []) or [])
+        for synset in semantic.get("synsets", []) or []:
+            if isinstance(synset, dict):
+                chunks.append(str(synset.get("definition", "")))
+                chunks.extend(str(item) for item in synset.get("lemmas", []) or [])
+
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        chunks.extend(str(item) for item in babelnet.get("synset_refs", []) or [])
+        best_synset = babelnet.get("best_synset", {})
+        if isinstance(best_synset, dict):
+            chunks.extend(str(item) for item in best_synset.get("glosses", []) or [])
+            chunks.extend(str(item) for item in best_synset.get("categories", []) or [])
+            chunks.extend(str(item) for item in best_synset.get("domains", []) or [])
+            chunks.extend(str(item) for item in best_synset.get("senses", []) or [])
+        for synset in babelnet.get("synsets", []) or []:
+            if isinstance(synset, dict):
+                chunks.extend(str(item) for item in synset.get("glosses", []) or [])
+                chunks.extend(str(item) for item in synset.get("categories", []) or [])
+                chunks.extend(str(item) for item in synset.get("domains", []) or [])
+                chunks.extend(str(item) for item in synset.get("senses", []) or [])
+
+    return " ".join(chunks).lower()
+
+
+def infer_topic_scores(entry: Dict[str, object]) -> Dict[str, int]:
+    text = flatten_text(entry)
+    scores: Dict[str, int] = {}
+    for topic, keywords in TOPIC_KEYWORDS.items():
+        score = 0
+        for keyword in keywords:
+            occurrences = text.count(keyword.lower())
+            if occurrences:
+                score += 12 * occurrences
+        if score:
+            scores[topic] = min(score, 100)
+    return scores
+
+
+def collect_semantic_tags(entry: Dict[str, object]) -> List[str]:
+    tags: List[str] = []
+    tags.extend(str(topic) for topic in entry.get("topics", []) or [])
+
+    semantic = entry.get("semantic", {})
+    if isinstance(semantic, dict):
+        tags.extend(str(topic) for topic in semantic.get("semantic_topics", []) or [])
+        for relation_group in (semantic.get("raw_relation_terms", {}) or {}).values():
+            tags.extend(str(item) for item in relation_group or [])
+
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        best_synset = babelnet.get("best_synset", {})
+        if isinstance(best_synset, dict):
+            tags.extend(str(item) for item in best_synset.get("categories", []) or [])
+            tags.extend(str(item) for item in best_synset.get("domains", []) or [])
+        for synset in babelnet.get("synsets", []) or []:
+            if isinstance(synset, dict):
+                tags.extend(str(item) for item in synset.get("categories", []) or [])
+                tags.extend(str(item) for item in synset.get("domains", []) or [])
+
+    cleaned = [cleanup_tag(tag) for tag in tags]
+    return [tag for tag in dedupe(cleaned) if tag]
+
+
+def collect_senses(entry: Dict[str, object], topic_scores: Dict[str, int]) -> List[Dict[str, object]]:
+    senses: List[Dict[str, object]] = []
+
+    semantic = entry.get("semantic", {})
+    if isinstance(semantic, dict):
+        for synset in semantic.get("synsets", []) or []:
+            if not isinstance(synset, dict):
+                continue
+            definition = str(synset.get("definition", "")).strip()
+            if not definition:
+                continue
+            senses.append(
+                {
+                    "source": "semantic",
+                    "id": synset.get("id"),
+                    "definition": definition,
+                    "lemmas": dedupe(str(item) for item in synset.get("lemmas", []) or []),
+                    "topics": dedupe(
+                        list(semantic.get("semantic_topics", []) or [])
+                        + [topic for topic, score in topic_scores.items() if score >= 50]
+                    ),
+                    "confidence": 0.7,
+                }
+            )
+
+    babelnet = entry.get("babelnet", {})
+    if isinstance(babelnet, dict):
+        best_synset = babelnet.get("best_synset", {})
+        if isinstance(best_synset, dict) and best_synset.get("id"):
+            glosses = [str(item).strip() for item in best_synset.get("glosses", []) or [] if str(item).strip()]
+            if glosses:
+                senses.append(
+                    {
+                        "source": "babelnet",
+                        "id": best_synset.get("id"),
+                        "definition": glosses[0],
+                        "lemmas": dedupe(str(item) for item in best_synset.get("senses", []) or []),
+                        "topics": dedupe(
+                            [str(best_synset.get("topic", "")).strip()]
+                            + [topic for topic, score in topic_scores.items() if score >= 50]
+                        ),
+                        "confidence": round(min(max(float(best_synset.get("topic_score", 0)) / 100.0, 0.4), 0.95), 2),
+                    }
+                )
+
+    return senses
+
+
+def collect_geo_tags(entry: Dict[str, object]) -> List[str]:
+    babelnet = entry.get("babelnet", {})
+    tags: List[str] = []
+    if isinstance(babelnet, dict):
+        for synset in babelnet.get("synsets", []) or []:
+            if not isinstance(synset, dict):
+                continue
+            for category in synset.get("categories", []) or []:
+                text = str(category).lower()
+                if any(keyword in text for keyword in ("comuni_", "province_", "regioni_", "città", "paesi", "località")):
+                    tags.append("toponym_possible")
+    return dedupe(tags)
+
+
+def collect_name_tags(entry: Dict[str, object]) -> List[str]:
+    tags: List[str] = []
+    form = str(entry.get("form", ""))
+    if form[:1].isupper():
+        tags.append("capitalized_form")
+    return dedupe(tags)
+
+
+def should_review(entry: Dict[str, object], topic_scores: Dict[str, int], senses: List[Dict[str, object]]) -> bool:
+    existing_topics = [str(topic).lower() for topic in entry.get("topics", []) or []]
+    best_score = max(topic_scores.values(), default=0)
+    strong_topics = [topic for topic, score in topic_scores.items() if score >= 50]
+    babelnet_status = str((entry.get("babelnet", {}) or {}).get("status", ""))
+
+    if existing_topics == ["general"] and not strong_topics:
+        return True
+    if babelnet_status == "ambiguous" and best_score < 50:
+        return True
+    if len(senses) >= 3 and len(strong_topics) >= 2:
+        return True
+    return False
+
+
+def promoted_topics(
+    existing_topics: List[str], topic_scores: Dict[str, int], replace_general: bool, min_topic_score: int
+) -> List[str]:
+    inferred = [topic for topic, score in sorted(topic_scores.items(), key=lambda item: (-item[1], item[0])) if score >= min_topic_score]
+    existing_clean = dedupe(existing_topics)
+
+    if replace_general and existing_clean == ["general"] and inferred:
+        return inferred
+
+    return dedupe(existing_clean + inferred)
+
+
+def refine_entry(entry: Dict[str, object], replace_general: bool, min_topic_score: int) -> Dict[str, object]:
+    refined = deepcopy(entry)
+    topic_scores = infer_topic_scores(refined)
+    semantic_tags = collect_semantic_tags(refined)
+    senses = collect_senses(refined, topic_scores)
+    geo_tags = collect_geo_tags(refined)
+    name_tags = collect_name_tags(refined)
+    current_topics = [str(topic) for topic in refined.get("topics", []) or []]
+
+    refined["topics"] = promoted_topics(current_topics, topic_scores, replace_general, min_topic_score)
+    refined["semantic_tags"] = semantic_tags
+    refined["senses"] = senses
+    refined["topic_confidence"] = topic_scores
+    refined["topic_suggestions"] = [topic for topic, score in sorted(topic_scores.items(), key=lambda item: (-item[1], item[0]))]
+    refined["geo_tags"] = geo_tags
+    refined["name_tags"] = name_tags
+    refined["needs_review"] = should_review(refined, topic_scores, senses)
+    return refined
+
+
+def build_refined_lexicon(args: argparse.Namespace) -> Dict[str, object]:
+    payload = load_json(args.input)
+    if not isinstance(payload, dict) or "entries" not in payload:
+        raise ValueError(f"Lessico di input non valido: {args.input}")
+
+    refined_entries = [
+        refine_entry(entry, args.replace_general, args.min_topic_score)
+        for entry in payload.get("entries", []) or []
+        if isinstance(entry, dict)
+    ]
+
+    review_count = sum(1 for entry in refined_entries if entry.get("needs_review"))
+    topicful_count = sum(1 for entry in refined_entries if len(entry.get("topic_suggestions", []) or []) > 0)
+
+    return {
+        "meta": {
+            "language": "it",
+            "version": 1,
+            "base_lexicon": args.input.name,
+            "generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
+            "entry_count": len(refined_entries),
+            "replace_general": args.replace_general,
+            "min_topic_score": args.min_topic_score,
+            "review_count": review_count,
+            "topicful_count": topicful_count,
+        },
+        "entries": refined_entries,
+    }
+
+
+def main() -> None:
+    args = parse_args()
+    payload = build_refined_lexicon(args)
+    write_json(args.output, payload)
+    print(f"Lessico raffinato generato: {args.output}")
+    print(f"Voci totali: {payload['meta']['entry_count']}")
+    print(f"Voci con suggerimenti di topic: {payload['meta']['topicful_count']}")
+    print(f"Voci marcate needs_review: {payload['meta']['review_count']}")
+
+
+if __name__ == "__main__":
+    main()
--- a/run_babelnet_daily_batch.bat
+++ b/run_babelnet_daily_batch.bat
@@ -0,0 +1,5 @@
+@echo off
+setlocal
+cd /d "%~dp0"
+python babelnet_daily_batch.py --api-call-limit 1900 --per-key-api-call-limit 950 --sleep 0.2
+endlocal