feat: aggiunge CLI unificata, build vocabolario e filtro lessicale
This commit is contained in:
451
crossword_filler.py
Normal file
451
crossword_filler.py
Normal file
@@ -0,0 +1,451 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import time
|
||||
from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
|
||||
from crossword_generator import (
|
||||
DIFFXY,
|
||||
HORIZONTAL,
|
||||
VERTICAL,
|
||||
CrosswordGenerator,
|
||||
CrosswordState,
|
||||
Placement,
|
||||
WORDS,
|
||||
render_grid,
|
||||
)
|
||||
|
||||
|
||||
TARGET_EMPTY_RATIO = 1 / 6
|
||||
MIN_WORD_LENGTH = 2
|
||||
MAX_NO_PROGRESS_STEPS = 150
|
||||
MAX_SLOT_CANDIDATES = 8
|
||||
EXTENDED_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_esteso.txt")
|
||||
FILTERED_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_filtrato.txt")
|
||||
METADATA_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_metadata.json")
|
||||
VOCAB_PATH = (
|
||||
FILTERED_VOCAB_PATH
|
||||
if FILTERED_VOCAB_PATH.exists()
|
||||
else EXTENDED_VOCAB_PATH
|
||||
if EXTENDED_VOCAB_PATH.exists()
|
||||
else Path(__file__).with_name("vocaboli_it.txt")
|
||||
)
|
||||
|
||||
Coordinate = Tuple[int, int]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FillSlot:
|
||||
x: int
|
||||
y: int
|
||||
direction: str
|
||||
length: int
|
||||
pattern: str
|
||||
fixed_letters: int
|
||||
empty_cells: int
|
||||
candidate_count: int
|
||||
|
||||
@property
|
||||
def cells(self) -> List[Coordinate]:
|
||||
if self.direction == HORIZONTAL:
|
||||
return [(self.x + offset, self.y) for offset in range(self.length)]
|
||||
return [(self.x, self.y + offset) for offset in range(self.length)]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FillCandidate:
|
||||
word: str
|
||||
slot: FillSlot
|
||||
new_letters: int
|
||||
reused_letters: int
|
||||
local_score: Tuple[int, int, int]
|
||||
|
||||
|
||||
class CrosswordFiller:
|
||||
def __init__(
|
||||
self,
|
||||
state: CrosswordState,
|
||||
vocabulary: Sequence[str],
|
||||
*,
|
||||
target_empty_ratio: float = TARGET_EMPTY_RATIO,
|
||||
vocabulary_metadata: Optional[Dict[str, Dict[str, object]]] = None,
|
||||
) -> None:
|
||||
self.state = state.copy()
|
||||
self.initial_state = state.copy()
|
||||
self.target_empty_ratio = target_empty_ratio
|
||||
self.used_words: Set[str] = {placement.word for placement in self.state.placements}
|
||||
self.added_words: List[Placement] = []
|
||||
self.vocabulary = self._normalize_vocabulary(vocabulary)
|
||||
self.words_by_length = self._index_vocabulary(self.vocabulary)
|
||||
self.vocabulary_metadata = vocabulary_metadata or {}
|
||||
self.bounds = self._compute_bounds(self.state.grid)
|
||||
self.total_cells = self._area(self.bounds)
|
||||
self.target_empty_cells = max(0, int(round(self.total_cells * self.target_empty_ratio)))
|
||||
self.nodes_visited = 0
|
||||
self.last_spinner_update = 0.0
|
||||
self.spinner_frames = ["-", "/", "|", "\\"]
|
||||
self.spinner_index = 0
|
||||
self.started_at = 0.0
|
||||
self.last_word = "-"
|
||||
|
||||
@staticmethod
|
||||
def _normalize_vocabulary(words: Sequence[str]) -> List[str]:
|
||||
normalized: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
for word in words:
|
||||
clean = word.strip().lower()
|
||||
if len(clean) < MIN_WORD_LENGTH or not clean.isalpha() or clean in seen:
|
||||
continue
|
||||
normalized.append(clean)
|
||||
seen.add(clean)
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _index_vocabulary(words: Sequence[str]) -> Dict[int, List[str]]:
|
||||
index: Dict[int, List[str]] = {}
|
||||
for word in words:
|
||||
index.setdefault(len(word), []).append(word)
|
||||
return index
|
||||
|
||||
@staticmethod
|
||||
def _compute_bounds(grid: Dict[Coordinate, str]) -> Tuple[int, int, int, int]:
|
||||
xs = [x for x, _ in grid]
|
||||
ys = [y for _, y in grid]
|
||||
return min(xs), min(ys), max(xs), max(ys)
|
||||
|
||||
@staticmethod
|
||||
def _area(bounds: Tuple[int, int, int, int]) -> int:
|
||||
x_min, y_min, x_max, y_max = bounds
|
||||
return (x_max - x_min + 1) * (y_max - y_min + 1)
|
||||
|
||||
def fill(self) -> CrosswordState:
|
||||
self.started_at = time.perf_counter()
|
||||
self.last_spinner_update = self.started_at
|
||||
no_progress_steps = 0
|
||||
|
||||
while self.empty_cells_count() > self.target_empty_cells and no_progress_steps < MAX_NO_PROGRESS_STEPS:
|
||||
self.nodes_visited += 1
|
||||
slots = self._collect_slots()
|
||||
self._tick_spinner(slots_count=len(slots))
|
||||
|
||||
if not slots:
|
||||
break
|
||||
|
||||
progress = False
|
||||
for slot in slots[:MAX_SLOT_CANDIDATES]:
|
||||
candidate = self._best_candidate_for_slot(slot)
|
||||
if candidate is None:
|
||||
continue
|
||||
self._apply_candidate(candidate)
|
||||
progress = True
|
||||
no_progress_steps = 0
|
||||
break
|
||||
|
||||
if not progress:
|
||||
no_progress_steps += 1
|
||||
|
||||
self._clear_spinner()
|
||||
return self.state
|
||||
|
||||
def empty_cells_count(self) -> int:
|
||||
x_min, y_min, x_max, y_max = self.bounds
|
||||
empty = 0
|
||||
for y in range(y_min, y_max + 1):
|
||||
for x in range(x_min, x_max + 1):
|
||||
if (x, y) not in self.state.grid:
|
||||
empty += 1
|
||||
return empty
|
||||
|
||||
def coverage_ratio(self) -> float:
|
||||
return 1.0 - (self.empty_cells_count() / self.total_cells)
|
||||
|
||||
def _collect_slots(self) -> List[FillSlot]:
|
||||
slots: List[FillSlot] = []
|
||||
x_min, y_min, x_max, y_max = self.bounds
|
||||
for y in range(y_min, y_max + 1):
|
||||
for x in range(x_min, x_max + 1):
|
||||
if (x, y) in self.state.grid:
|
||||
continue
|
||||
for direction in (HORIZONTAL, VERTICAL):
|
||||
slots.extend(self._slots_from_start(x, y, direction))
|
||||
|
||||
unique: Dict[Tuple[int, int, str, int], FillSlot] = {}
|
||||
for slot in slots:
|
||||
key = (slot.x, slot.y, slot.direction, slot.length)
|
||||
current = unique.get(key)
|
||||
if current is None or self._slot_priority(slot) > self._slot_priority(current):
|
||||
unique[key] = slot
|
||||
|
||||
collected = list(unique.values())
|
||||
collected.sort(key=self._slot_priority, reverse=True)
|
||||
return collected
|
||||
|
||||
def _slots_from_start(self, x: int, y: int, direction: str) -> Iterable[FillSlot]:
|
||||
dx, dy = (1, 0) if direction == HORIZONTAL else (0, 1)
|
||||
x_min, y_min, x_max, y_max = self.bounds
|
||||
|
||||
prev_cell = (x - dx, y - dy)
|
||||
if self._inside_bounds(prev_cell) and prev_cell in self.state.grid:
|
||||
return []
|
||||
|
||||
max_length = 0
|
||||
cursor_x = x
|
||||
cursor_y = y
|
||||
while x_min <= cursor_x <= x_max and y_min <= cursor_y <= y_max:
|
||||
max_length += 1
|
||||
cursor_x += dx
|
||||
cursor_y += dy
|
||||
|
||||
slots: List[FillSlot] = []
|
||||
for length in range(max_length, MIN_WORD_LENGTH - 1, -1):
|
||||
end_cell = (x + dx * length, y + dy * length)
|
||||
if self._inside_bounds(end_cell) and end_cell in self.state.grid:
|
||||
continue
|
||||
|
||||
pattern_chars: List[str] = []
|
||||
fixed_letters = 0
|
||||
empty_cells = 0
|
||||
for offset in range(length):
|
||||
cell = (x + dx * offset, y + dy * offset)
|
||||
letter = self.state.grid.get(cell)
|
||||
if letter is None:
|
||||
pattern_chars.append(".")
|
||||
empty_cells += 1
|
||||
else:
|
||||
pattern_chars.append(letter)
|
||||
fixed_letters += 1
|
||||
|
||||
if empty_cells == 0:
|
||||
continue
|
||||
|
||||
pattern = "".join(pattern_chars)
|
||||
candidate_count = self._count_candidates(pattern)
|
||||
if candidate_count == 0:
|
||||
continue
|
||||
|
||||
slots.append(
|
||||
FillSlot(
|
||||
x=x,
|
||||
y=y,
|
||||
direction=direction,
|
||||
length=length,
|
||||
pattern=pattern,
|
||||
fixed_letters=fixed_letters,
|
||||
empty_cells=empty_cells,
|
||||
candidate_count=candidate_count,
|
||||
)
|
||||
)
|
||||
|
||||
return slots
|
||||
|
||||
def _slot_priority(self, slot: FillSlot) -> Tuple[int, int, int, int, int]:
|
||||
return (
|
||||
slot.fixed_letters,
|
||||
-slot.candidate_count,
|
||||
slot.length,
|
||||
-slot.empty_cells,
|
||||
1 if slot.direction == HORIZONTAL else 0,
|
||||
)
|
||||
|
||||
def _count_candidates(self, pattern: str) -> int:
|
||||
count = 0
|
||||
for word in self.words_by_length.get(len(pattern), []):
|
||||
if word in self.used_words:
|
||||
continue
|
||||
if self._matches_pattern(word, pattern):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
@staticmethod
|
||||
def _matches_pattern(word: str, pattern: str) -> bool:
|
||||
return all(p == "." or p == w for w, p in zip(word, pattern))
|
||||
|
||||
def _best_candidate_for_slot(self, slot: FillSlot) -> Optional[FillCandidate]:
|
||||
candidates: List[FillCandidate] = []
|
||||
for word in self.words_by_length.get(slot.length, []):
|
||||
if word in self.used_words or not self._matches_pattern(word, slot.pattern):
|
||||
continue
|
||||
if not self._placement_is_valid(slot, word):
|
||||
continue
|
||||
new_letters = sum(1 for cell in slot.cells if cell not in self.state.grid)
|
||||
reused_letters = slot.fixed_letters
|
||||
local_score = (
|
||||
reused_letters,
|
||||
new_letters,
|
||||
self._word_quality(word),
|
||||
len(set(word)),
|
||||
)
|
||||
candidates.append(
|
||||
FillCandidate(
|
||||
word=word,
|
||||
slot=slot,
|
||||
new_letters=new_letters,
|
||||
reused_letters=reused_letters,
|
||||
local_score=local_score,
|
||||
)
|
||||
)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(key=lambda item: item.local_score, reverse=True)
|
||||
return candidates[0]
|
||||
|
||||
def _word_quality(self, word: str) -> int:
|
||||
metadata = self.vocabulary_metadata.get(word)
|
||||
if not metadata:
|
||||
return 0
|
||||
try:
|
||||
return int(metadata.get("quality", 0))
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
def _placement_is_valid(self, slot: FillSlot, word: str) -> bool:
|
||||
dx, dy = (1, 0) if slot.direction == HORIZONTAL else (0, 1)
|
||||
before = (slot.x - dx, slot.y - dy)
|
||||
after = (slot.x + dx * slot.length, slot.y + dy * slot.length)
|
||||
if self._inside_bounds(before) and before in self.state.grid:
|
||||
return False
|
||||
if self._inside_bounds(after) and after in self.state.grid:
|
||||
return False
|
||||
|
||||
intersects_existing = False
|
||||
for offset, cell in enumerate(slot.cells):
|
||||
current = self.state.grid.get(cell)
|
||||
letter = word[offset]
|
||||
if current is not None and current != letter:
|
||||
return False
|
||||
if current == letter:
|
||||
intersects_existing = True
|
||||
|
||||
return intersects_existing or slot.fixed_letters == 0
|
||||
|
||||
def _apply_candidate(self, candidate: FillCandidate) -> None:
|
||||
slot = candidate.slot
|
||||
dx, dy = (1, 0) if slot.direction == HORIZONTAL else (0, 1)
|
||||
intersections = 0
|
||||
for offset, letter in enumerate(candidate.word):
|
||||
cell = (slot.x + dx * offset, slot.y + dy * offset)
|
||||
if cell in self.state.grid:
|
||||
intersections += 1
|
||||
self.state.grid[cell] = letter
|
||||
|
||||
placement = Placement(
|
||||
word=candidate.word,
|
||||
x=slot.x,
|
||||
y=slot.y,
|
||||
direction=slot.direction,
|
||||
intersections=intersections,
|
||||
)
|
||||
self.state.placements.append(placement)
|
||||
self.state.intersections += intersections
|
||||
self.used_words.add(candidate.word)
|
||||
self.added_words.append(placement)
|
||||
self.last_word = candidate.word
|
||||
print(
|
||||
f"\n[fill] inserita '{candidate.word}' "
|
||||
f"in {slot.direction} da ({slot.x}, {slot.y}), "
|
||||
f"nuove={candidate.new_letters}, intersezioni={intersections}, "
|
||||
f"copertura={self.coverage_ratio() * 100:.1f}%",
|
||||
file=sys.stderr,
|
||||
flush=True,
|
||||
)
|
||||
|
||||
def _inside_bounds(self, cell: Coordinate) -> bool:
|
||||
x_min, y_min, x_max, y_max = self.bounds
|
||||
x, y = cell
|
||||
return x_min <= x <= x_max and y_min <= y <= y_max
|
||||
|
||||
def _tick_spinner(self, *, slots_count: int) -> None:
|
||||
now = time.perf_counter()
|
||||
if now - self.last_spinner_update < 0.08:
|
||||
return
|
||||
|
||||
frame = self.spinner_frames[self.spinner_index]
|
||||
elapsed = now - self.started_at
|
||||
message = (
|
||||
f"\r{frame} fill... "
|
||||
f"slot={slots_count} "
|
||||
f"vuote={self.empty_cells_count()}/{self.total_cells} "
|
||||
f"target={self.target_empty_cells} "
|
||||
f"aggiunte={len(self.added_words)} "
|
||||
f"ultima={self.last_word} "
|
||||
f"t={elapsed:0.1f}s"
|
||||
)
|
||||
print(message, end="", file=sys.stderr, flush=True)
|
||||
self.spinner_index = (self.spinner_index + 1) % len(self.spinner_frames)
|
||||
self.last_spinner_update = now
|
||||
|
||||
@staticmethod
|
||||
def _clear_spinner() -> None:
|
||||
print("\r" + " " * 140 + "\r", end="", file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
def load_vocabulary(path: Path = VOCAB_PATH) -> List[str]:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Vocabolario non trovato: {path}")
|
||||
return path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
|
||||
def load_vocabulary_metadata(path: Path = METADATA_VOCAB_PATH) -> Dict[str, Dict[str, object]]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def summarize_fill(initial_state: CrosswordState, final_state: CrosswordState) -> str:
|
||||
initial_bounds = (
|
||||
initial_state.width(),
|
||||
initial_state.height(),
|
||||
initial_state.shape_difference(),
|
||||
)
|
||||
final_bounds = (
|
||||
final_state.width(),
|
||||
final_state.height(),
|
||||
final_state.shape_difference(),
|
||||
)
|
||||
return (
|
||||
f"Riempimento completato\n"
|
||||
f"Parole iniziali: {initial_state.placed_words}\n"
|
||||
f"Parole finali: {final_state.placed_words}\n"
|
||||
f"Intersezioni finali: {final_state.intersections}\n"
|
||||
f"Dimensioni iniziali: {initial_bounds[0]}x{initial_bounds[1]} (diff={initial_bounds[2]})\n"
|
||||
f"Dimensioni finali: {final_bounds[0]}x{final_bounds[1]} (diff={final_bounds[2]})\n"
|
||||
f"Celle vuote residue: {sum(1 for _ in iter_empty_cells(final_state))}"
|
||||
)
|
||||
|
||||
|
||||
def iter_empty_cells(state: CrosswordState) -> Iterable[Coordinate]:
|
||||
x_min, y_min, x_max, y_max = state.bounds()
|
||||
for y in range(y_min, y_max + 1):
|
||||
for x in range(x_min, x_max + 1):
|
||||
if (x, y) not in state.grid:
|
||||
yield (x, y)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
vocabulary = load_vocabulary()
|
||||
vocabulary_metadata = load_vocabulary_metadata()
|
||||
|
||||
generator = CrosswordGenerator(WORDS, diffxy=DIFFXY)
|
||||
initial_state = generator.solve()
|
||||
|
||||
filler = CrosswordFiller(initial_state, vocabulary, vocabulary_metadata=vocabulary_metadata)
|
||||
final_state = filler.fill()
|
||||
|
||||
print(summarize_fill(initial_state, final_state))
|
||||
print()
|
||||
print(render_grid(final_state.grid, final_state.placements))
|
||||
if filler.added_words:
|
||||
print()
|
||||
print("Parole aggiunte dal filler:")
|
||||
for index, placement in enumerate(filler.added_words, start=1):
|
||||
direction = "orizzontale" if placement.direction == HORIZONTAL else "verticale"
|
||||
print(f"{index:>2}. {placement.word} ({placement.x}, {placement.y}) {direction}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user