Files
cruciverba_1/main.py

265 lines
8.7 KiB
Python

from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Dict, List
from build_vocabulary import (
FILTERED_OUTPUT_PATH,
METADATA_OUTPUT_PATH,
OUTPUT_PATH,
build_vocabulary,
)
from build_lexicon import LEXICON_OUTPUT_PATH, build_lexicon
from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH, build_semantic_lexicon
from crossword_filler import CrosswordFiller, load_vocabulary, load_vocabulary_metadata
from crossword_generator import CrosswordGenerator, WORDS, render_grid
DIFFICULTY_ALIASES: Dict[str, int] = {
"easy": 1,
"medium": 2,
"hard": 4,
"expert": 5,
}
DEFAULT_TOPIC = "general"
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generatore e filler di cruciverba.")
parser.add_argument(
"--build-vocabulary",
action="store_true",
help="Rigenera i file lessicali intermedi: vocabolario esteso, filtrato e metadati.",
)
parser.add_argument(
"--build-lexicon",
action="store_true",
help="Rigenera `lexicon_it.json` prima dell'esecuzione.",
)
parser.add_argument(
"--skip-fill",
action="store_true",
help="Genera solo la griglia iniziale e salta il riempimento con il filler.",
)
parser.add_argument(
"--build-semantic-lexicon",
action="store_true",
help="Rigenera `lexicon_it_semantic.json` arricchendo il lessico con IWN-OMW/ItalWordNet.",
)
parser.add_argument(
"--vocabulary",
type=Path,
default=None,
help="Percorso opzionale a un vocabolario testuale personalizzato da usare al posto di quello di default.",
)
parser.add_argument(
"--target-empty-ratio",
type=float,
default=1 / 6,
help="Rapporto target di celle vuote residue dopo il filler. Esempio: 0.1667 lascia circa un sesto di celle vuote.",
)
parser.add_argument(
"--time-limit",
type=float,
default=8.0,
help="Tempo massimo in secondi per la fase di generazione iniziale della griglia.",
)
parser.add_argument(
"--max-candidates",
type=int,
default=12,
help="Numero massimo di candidati esplorati per parola nella generazione iniziale.",
)
parser.add_argument(
"--diffxy",
type=int,
default=7,
help="Differenza massima preferita tra larghezza e altezza della griglia iniziale.",
)
parser.add_argument(
"--seed",
type=int,
default=None,
help="Seed casuale per ottenere varianti riproducibili del cruciverba: stesso seed, stesso risultato.",
)
parser.add_argument(
"--difficulty",
default="medium",
help="Difficolta lessicale del filler. Alias testuali: easy, medium, hard, expert. Internamente mappati a livelli numerici 1-5.",
)
parser.add_argument(
"--topic",
default=DEFAULT_TOPIC,
help="Tema del cruciverba. Attualmente supporta i topic presenti nel lessico, ad esempio: general, nature, animals, actions, abstract.",
)
return parser.parse_args()
def ensure_vocabulary(args: argparse.Namespace) -> None:
needs_build = args.build_vocabulary or not FILTERED_OUTPUT_PATH.exists() or not METADATA_OUTPUT_PATH.exists()
if not needs_build:
return
totals = build_vocabulary()
print("Vocabolario rigenerato")
print(f"- esteso: {OUTPUT_PATH}")
print(f"- filtrato: {FILTERED_OUTPUT_PATH}")
print(f"- metadati: {METADATA_OUTPUT_PATH}")
print(f"- parole estese: {totals['extended_words']}")
print(f"- parole filtrate: {totals['filtered_words']}")
def ensure_lexicon(args: argparse.Namespace) -> None:
needs_build = args.build_lexicon or not LEXICON_OUTPUT_PATH.exists()
if not needs_build:
return
lexicon = build_lexicon()
LEXICON_OUTPUT_PATH.write_text(
json.dumps(lexicon, ensure_ascii=False, indent=2),
encoding="utf-8",
)
print("Lessico rigenerato")
print(f"- file: {LEXICON_OUTPUT_PATH}")
print(f"- voci: {lexicon['meta']['entry_count']}")
def ensure_semantic_lexicon(args: argparse.Namespace) -> None:
needs_build = args.build_semantic_lexicon or not SEMANTIC_LEXICON_OUTPUT_PATH.exists()
if not needs_build:
return
lexicon = build_semantic_lexicon()
SEMANTIC_LEXICON_OUTPUT_PATH.write_text(
json.dumps(lexicon, ensure_ascii=False, indent=2),
encoding="utf-8",
)
matched = sum(1 for entry in lexicon["entries"] if entry.get("semantic", {}).get("matched"))
print("Lessico semantico rigenerato")
print(f"- file: {SEMANTIC_LEXICON_OUTPUT_PATH}")
print(f"- voci: {lexicon['meta']['entry_count']}")
print(f"- match semantici: {matched}")
def parse_difficulty(value: str) -> int:
text = str(value).strip().lower()
if text in DIFFICULTY_ALIASES:
return DIFFICULTY_ALIASES[text]
try:
level = int(text)
except ValueError as exc:
raise SystemExit(
"Valore non valido per --difficulty. Usa easy, medium, hard, expert oppure un intero tra 1 e 5."
) from exc
if not 1 <= level <= 5:
raise SystemExit("Il valore numerico di --difficulty deve essere compreso tra 1 e 5.")
return level
def load_selected_vocabulary(path: Path | None) -> List[str]:
if path is None:
return load_vocabulary()
return path.read_text(encoding="utf-8").splitlines()
def load_filtered_vocabulary(level: int, topic: str) -> List[str]:
if not LEXICON_OUTPUT_PATH.exists():
lexicon = build_lexicon()
LEXICON_OUTPUT_PATH.write_text(
json.dumps(lexicon, ensure_ascii=False, indent=2),
encoding="utf-8",
)
payload = json.loads(LEXICON_OUTPUT_PATH.read_text(encoding="utf-8"))
normalized_topic = topic.strip().lower()
def matches(entry: Dict[str, object], selected_topic: str) -> bool:
topics = [str(item).lower() for item in entry.get("topics", [])]
return selected_topic in topics
words = [
entry["form"]
for entry in payload.get("entries", [])
if entry.get("allowed_in_crossword", False)
and int(entry.get("difficulty_word", 5)) <= level
and matches(entry, normalized_topic)
]
if words:
return words
if normalized_topic != DEFAULT_TOPIC:
return [
entry["form"]
for entry in payload.get("entries", [])
if entry.get("allowed_in_crossword", False)
and int(entry.get("difficulty_word", 5)) <= level
and matches(entry, DEFAULT_TOPIC)
]
return words
def main() -> None:
args = parse_args()
ensure_vocabulary(args)
ensure_lexicon(args)
ensure_semantic_lexicon(args)
difficulty_level = parse_difficulty(args.difficulty)
generator = CrosswordGenerator(
WORDS,
diffxy=args.diffxy,
time_limit_seconds=args.time_limit,
max_candidates_per_word=args.max_candidates,
seed=args.seed,
)
initial_state = generator.solve()
print("Griglia iniziale")
print(f"Parole inserite: {initial_state.placed_words}/{len(generator.words)}")
print(f"Intersezioni: {initial_state.intersections}")
print(f"Dimensioni: {initial_state.width()} x {initial_state.height()} (diff={initial_state.shape_difference()})")
print(f"Difficolta filler: {args.difficulty} -> livello {difficulty_level}")
print(f"Tema filler: {args.topic}")
if args.seed is not None:
print(f"Seed: {args.seed}")
print()
print(render_grid(initial_state.grid, initial_state.placements))
if args.skip_fill:
return
vocabulary = load_selected_vocabulary(args.vocabulary) if args.vocabulary else load_filtered_vocabulary(difficulty_level, args.topic)
metadata = load_vocabulary_metadata()
filler = CrosswordFiller(
initial_state,
vocabulary,
target_empty_ratio=args.target_empty_ratio,
vocabulary_metadata=metadata,
seed=args.seed,
)
final_state = filler.fill()
print()
print("Griglia riempita")
print(f"Parole totali: {final_state.placed_words}")
print(f"Intersezioni totali: {final_state.intersections}")
print(f"Dimensioni: {final_state.width()} x {final_state.height()} (diff={final_state.shape_difference()})")
print()
print(render_grid(final_state.grid, final_state.placements))
if filler.added_words:
print()
print("Parole aggiunte dal filler:")
for index, placement in enumerate(filler.added_words, start=1):
direction = "orizzontale" if placement.direction == "H" else "verticale"
print(f"{index:>2}. {placement.word} ({placement.x}, {placement.y}) {direction}")
if __name__ == "__main__":
main()