from __future__ import annotations from dataclasses import dataclass import json from pathlib import Path import sys import time from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple from crossword_generator import ( DIFFXY, HORIZONTAL, VERTICAL, CrosswordGenerator, CrosswordState, Placement, WORDS, render_grid, ) TARGET_EMPTY_RATIO = 1 / 6 MIN_WORD_LENGTH = 2 MAX_NO_PROGRESS_STEPS = 150 MAX_SLOT_CANDIDATES = 8 EXTENDED_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_esteso.txt") FILTERED_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_filtrato.txt") METADATA_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_metadata.json") VOCAB_PATH = ( FILTERED_VOCAB_PATH if FILTERED_VOCAB_PATH.exists() else EXTENDED_VOCAB_PATH if EXTENDED_VOCAB_PATH.exists() else Path(__file__).with_name("vocaboli_it.txt") ) Coordinate = Tuple[int, int] @dataclass(frozen=True) class FillSlot: x: int y: int direction: str length: int pattern: str fixed_letters: int empty_cells: int candidate_count: int @property def cells(self) -> List[Coordinate]: if self.direction == HORIZONTAL: return [(self.x + offset, self.y) for offset in range(self.length)] return [(self.x, self.y + offset) for offset in range(self.length)] @dataclass(frozen=True) class FillCandidate: word: str slot: FillSlot new_letters: int reused_letters: int local_score: Tuple[int, int, int] class CrosswordFiller: def __init__( self, state: CrosswordState, vocabulary: Sequence[str], *, target_empty_ratio: float = TARGET_EMPTY_RATIO, vocabulary_metadata: Optional[Dict[str, Dict[str, object]]] = None, ) -> None: self.state = state.copy() self.initial_state = state.copy() self.target_empty_ratio = target_empty_ratio self.used_words: Set[str] = {placement.word for placement in self.state.placements} self.added_words: List[Placement] = [] self.vocabulary = self._normalize_vocabulary(vocabulary) self.words_by_length = self._index_vocabulary(self.vocabulary) self.vocabulary_metadata = vocabulary_metadata or {} self.bounds = self._compute_bounds(self.state.grid) self.total_cells = self._area(self.bounds) self.target_empty_cells = max(0, int(round(self.total_cells * self.target_empty_ratio))) self.nodes_visited = 0 self.last_spinner_update = 0.0 self.spinner_frames = ["-", "/", "|", "\\"] self.spinner_index = 0 self.started_at = 0.0 self.last_word = "-" @staticmethod def _normalize_vocabulary(words: Sequence[str]) -> List[str]: normalized: List[str] = [] seen: Set[str] = set() for word in words: clean = word.strip().lower() if len(clean) < MIN_WORD_LENGTH or not clean.isalpha() or clean in seen: continue normalized.append(clean) seen.add(clean) return normalized @staticmethod def _index_vocabulary(words: Sequence[str]) -> Dict[int, List[str]]: index: Dict[int, List[str]] = {} for word in words: index.setdefault(len(word), []).append(word) return index @staticmethod def _compute_bounds(grid: Dict[Coordinate, str]) -> Tuple[int, int, int, int]: xs = [x for x, _ in grid] ys = [y for _, y in grid] return min(xs), min(ys), max(xs), max(ys) @staticmethod def _area(bounds: Tuple[int, int, int, int]) -> int: x_min, y_min, x_max, y_max = bounds return (x_max - x_min + 1) * (y_max - y_min + 1) def fill(self) -> CrosswordState: self.started_at = time.perf_counter() self.last_spinner_update = self.started_at no_progress_steps = 0 while self.empty_cells_count() > self.target_empty_cells and no_progress_steps < MAX_NO_PROGRESS_STEPS: self.nodes_visited += 1 slots = self._collect_slots() self._tick_spinner(slots_count=len(slots)) if not slots: break progress = False for slot in slots[:MAX_SLOT_CANDIDATES]: candidate = self._best_candidate_for_slot(slot) if candidate is None: continue self._apply_candidate(candidate) progress = True no_progress_steps = 0 break if not progress: no_progress_steps += 1 self._clear_spinner() return self.state def empty_cells_count(self) -> int: x_min, y_min, x_max, y_max = self.bounds empty = 0 for y in range(y_min, y_max + 1): for x in range(x_min, x_max + 1): if (x, y) not in self.state.grid: empty += 1 return empty def coverage_ratio(self) -> float: return 1.0 - (self.empty_cells_count() / self.total_cells) def _collect_slots(self) -> List[FillSlot]: slots: List[FillSlot] = [] x_min, y_min, x_max, y_max = self.bounds for y in range(y_min, y_max + 1): for x in range(x_min, x_max + 1): if (x, y) in self.state.grid: continue for direction in (HORIZONTAL, VERTICAL): slots.extend(self._slots_from_start(x, y, direction)) unique: Dict[Tuple[int, int, str, int], FillSlot] = {} for slot in slots: key = (slot.x, slot.y, slot.direction, slot.length) current = unique.get(key) if current is None or self._slot_priority(slot) > self._slot_priority(current): unique[key] = slot collected = list(unique.values()) collected.sort(key=self._slot_priority, reverse=True) return collected def _slots_from_start(self, x: int, y: int, direction: str) -> Iterable[FillSlot]: dx, dy = (1, 0) if direction == HORIZONTAL else (0, 1) x_min, y_min, x_max, y_max = self.bounds prev_cell = (x - dx, y - dy) if self._inside_bounds(prev_cell) and prev_cell in self.state.grid: return [] max_length = 0 cursor_x = x cursor_y = y while x_min <= cursor_x <= x_max and y_min <= cursor_y <= y_max: max_length += 1 cursor_x += dx cursor_y += dy slots: List[FillSlot] = [] for length in range(max_length, MIN_WORD_LENGTH - 1, -1): end_cell = (x + dx * length, y + dy * length) if self._inside_bounds(end_cell) and end_cell in self.state.grid: continue pattern_chars: List[str] = [] fixed_letters = 0 empty_cells = 0 for offset in range(length): cell = (x + dx * offset, y + dy * offset) letter = self.state.grid.get(cell) if letter is None: pattern_chars.append(".") empty_cells += 1 else: pattern_chars.append(letter) fixed_letters += 1 if empty_cells == 0: continue pattern = "".join(pattern_chars) candidate_count = self._count_candidates(pattern) if candidate_count == 0: continue slots.append( FillSlot( x=x, y=y, direction=direction, length=length, pattern=pattern, fixed_letters=fixed_letters, empty_cells=empty_cells, candidate_count=candidate_count, ) ) return slots def _slot_priority(self, slot: FillSlot) -> Tuple[int, int, int, int, int]: return ( slot.fixed_letters, -slot.candidate_count, slot.length, -slot.empty_cells, 1 if slot.direction == HORIZONTAL else 0, ) def _count_candidates(self, pattern: str) -> int: count = 0 for word in self.words_by_length.get(len(pattern), []): if word in self.used_words: continue if self._matches_pattern(word, pattern): count += 1 return count @staticmethod def _matches_pattern(word: str, pattern: str) -> bool: return all(p == "." or p == w for w, p in zip(word, pattern)) def _best_candidate_for_slot(self, slot: FillSlot) -> Optional[FillCandidate]: candidates: List[FillCandidate] = [] for word in self.words_by_length.get(slot.length, []): if word in self.used_words or not self._matches_pattern(word, slot.pattern): continue if not self._placement_is_valid(slot, word): continue new_letters = sum(1 for cell in slot.cells if cell not in self.state.grid) reused_letters = slot.fixed_letters local_score = ( reused_letters, new_letters, self._word_quality(word), len(set(word)), ) candidates.append( FillCandidate( word=word, slot=slot, new_letters=new_letters, reused_letters=reused_letters, local_score=local_score, ) ) if not candidates: return None candidates.sort(key=lambda item: item.local_score, reverse=True) return candidates[0] def _word_quality(self, word: str) -> int: metadata = self.vocabulary_metadata.get(word) if not metadata: return 0 try: return int(metadata.get("quality", 0)) except (TypeError, ValueError): return 0 def _placement_is_valid(self, slot: FillSlot, word: str) -> bool: dx, dy = (1, 0) if slot.direction == HORIZONTAL else (0, 1) before = (slot.x - dx, slot.y - dy) after = (slot.x + dx * slot.length, slot.y + dy * slot.length) if self._inside_bounds(before) and before in self.state.grid: return False if self._inside_bounds(after) and after in self.state.grid: return False intersects_existing = False for offset, cell in enumerate(slot.cells): current = self.state.grid.get(cell) letter = word[offset] if current is not None and current != letter: return False if current == letter: intersects_existing = True return intersects_existing or slot.fixed_letters == 0 def _apply_candidate(self, candidate: FillCandidate) -> None: slot = candidate.slot dx, dy = (1, 0) if slot.direction == HORIZONTAL else (0, 1) intersections = 0 for offset, letter in enumerate(candidate.word): cell = (slot.x + dx * offset, slot.y + dy * offset) if cell in self.state.grid: intersections += 1 self.state.grid[cell] = letter placement = Placement( word=candidate.word, x=slot.x, y=slot.y, direction=slot.direction, intersections=intersections, ) self.state.placements.append(placement) self.state.intersections += intersections self.used_words.add(candidate.word) self.added_words.append(placement) self.last_word = candidate.word print( f"\n[fill] inserita '{candidate.word}' " f"in {slot.direction} da ({slot.x}, {slot.y}), " f"nuove={candidate.new_letters}, intersezioni={intersections}, " f"copertura={self.coverage_ratio() * 100:.1f}%", file=sys.stderr, flush=True, ) def _inside_bounds(self, cell: Coordinate) -> bool: x_min, y_min, x_max, y_max = self.bounds x, y = cell return x_min <= x <= x_max and y_min <= y <= y_max def _tick_spinner(self, *, slots_count: int) -> None: now = time.perf_counter() if now - self.last_spinner_update < 0.08: return frame = self.spinner_frames[self.spinner_index] elapsed = now - self.started_at message = ( f"\r{frame} fill... " f"slot={slots_count} " f"vuote={self.empty_cells_count()}/{self.total_cells} " f"target={self.target_empty_cells} " f"aggiunte={len(self.added_words)} " f"ultima={self.last_word} " f"t={elapsed:0.1f}s" ) print(message, end="", file=sys.stderr, flush=True) self.spinner_index = (self.spinner_index + 1) % len(self.spinner_frames) self.last_spinner_update = now @staticmethod def _clear_spinner() -> None: print("\r" + " " * 140 + "\r", end="", file=sys.stderr, flush=True) def load_vocabulary(path: Path = VOCAB_PATH) -> List[str]: if not path.exists(): raise FileNotFoundError(f"Vocabolario non trovato: {path}") return path.read_text(encoding="utf-8").splitlines() def load_vocabulary_metadata(path: Path = METADATA_VOCAB_PATH) -> Dict[str, Dict[str, object]]: if not path.exists(): return {} return json.loads(path.read_text(encoding="utf-8")) def summarize_fill(initial_state: CrosswordState, final_state: CrosswordState) -> str: initial_bounds = ( initial_state.width(), initial_state.height(), initial_state.shape_difference(), ) final_bounds = ( final_state.width(), final_state.height(), final_state.shape_difference(), ) return ( f"Riempimento completato\n" f"Parole iniziali: {initial_state.placed_words}\n" f"Parole finali: {final_state.placed_words}\n" f"Intersezioni finali: {final_state.intersections}\n" f"Dimensioni iniziali: {initial_bounds[0]}x{initial_bounds[1]} (diff={initial_bounds[2]})\n" f"Dimensioni finali: {final_bounds[0]}x{final_bounds[1]} (diff={final_bounds[2]})\n" f"Celle vuote residue: {sum(1 for _ in iter_empty_cells(final_state))}" ) def iter_empty_cells(state: CrosswordState) -> Iterable[Coordinate]: x_min, y_min, x_max, y_max = state.bounds() for y in range(y_min, y_max + 1): for x in range(x_min, x_max + 1): if (x, y) not in state.grid: yield (x, y) def main() -> None: vocabulary = load_vocabulary() vocabulary_metadata = load_vocabulary_metadata() generator = CrosswordGenerator(WORDS, diffxy=DIFFXY) initial_state = generator.solve() filler = CrosswordFiller(initial_state, vocabulary, vocabulary_metadata=vocabulary_metadata) final_state = filler.fill() print(summarize_fill(initial_state, final_state)) print() print(render_grid(final_state.grid, final_state.placements)) if filler.added_words: print() print("Parole aggiunte dal filler:") for index, placement in enumerate(filler.added_words, start=1): direction = "orizzontale" if placement.direction == HORIZONTAL else "verticale" print(f"{index:>2}. {placement.word} ({placement.x}, {placement.y}) {direction}") if __name__ == "__main__": main()