Files
cruciverba_1/crossword_service.py

355 lines
14 KiB
Python

from __future__ import annotations
import json
import uuid
from datetime import datetime
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import main as engine
from clue_generator import generate_clues, load_enriched_entries
from crossword_filler import CrosswordFiller, load_vocabulary_metadata
from crossword_generator import HORIZONTAL, Placement
def _normalize_topic_list(raw_topic: object) -> List[str]:
if isinstance(raw_topic, list):
topics = [str(item).strip().lower() for item in raw_topic if str(item).strip()]
return topics or [engine.DEFAULT_TOPIC]
return engine.parse_topics(str(raw_topic or engine.DEFAULT_TOPIC))
def _entry_numbering(placements: Iterable[Placement]) -> Tuple[Dict[Tuple[int, int], int], Dict[Tuple[str, int, int, str], int]]:
ordered = sorted(placements, key=lambda item: (item.y, item.x, item.direction))
number_by_start: Dict[Tuple[int, int], int] = {}
placement_numbers: Dict[Tuple[str, int, int, str], int] = {}
next_number = 1
for placement in ordered:
start = (placement.x, placement.y)
if start not in number_by_start:
number_by_start[start] = next_number
next_number += 1
placement_numbers[(placement.word, placement.x, placement.y, placement.direction)] = number_by_start[start]
return number_by_start, placement_numbers
def _clue_lookup(placements: List[Placement], lexicon_path: Path, topic: str, difficulty: str) -> Dict[Tuple[str, int, int, str], Dict[str, object]]:
entries = load_enriched_entries(lexicon_path)
clues = generate_clues(placements, entries, engine.primary_topic(topic), difficulty)
lookup: Dict[Tuple[str, int, int, str], Dict[str, object]] = {}
for clue in clues:
direction = HORIZONTAL if clue.direction.lower().startswith("o") or clue.direction.lower().startswith("a") else "V"
lookup[(clue.word, clue.x, clue.y, direction)] = {"text": clue.text, "source": clue.source}
return lookup
def _build_grid_payload(state, placements: List[Placement], placement_ids: Dict[Tuple[str, int, int, str], str]) -> Dict[str, object]:
x_min, y_min, x_max, y_max = state.bounds()
width = x_max - x_min + 1
height = y_max - y_min + 1
across_map: Dict[Tuple[int, int], str] = {}
down_map: Dict[Tuple[int, int], str] = {}
for placement in placements:
entry_id = placement_ids[(placement.word, placement.x, placement.y, placement.direction)]
target = across_map if placement.direction == HORIZONTAL else down_map
for x, y in placement.cells:
target[(x, y)] = entry_id
number_by_start, _ = _entry_numbering(placements)
cells = []
for row in range(height):
for col in range(width):
x = x_min + col
y = y_min + row
letter = state.grid.get((x, y))
if letter is None:
cells.append(
{
"row": row,
"col": col,
"kind": "block",
"solution": None,
"display": None,
"number": None,
"across_entry_id": None,
"down_entry_id": None,
"is_prefilled": False,
}
)
continue
cells.append(
{
"row": row,
"col": col,
"kind": "letter",
"solution": letter.upper(),
"display": "",
"number": number_by_start.get((x, y)),
"across_entry_id": across_map.get((x, y)),
"down_entry_id": down_map.get((x, y)),
"is_prefilled": False,
}
)
return {
"rows": height,
"cols": width,
"cell_size_hint": 42,
"cells": cells,
}
def _build_entries_payload(
placements: List[Placement],
state,
lexicon_entries: Dict[str, Dict[str, object]],
topic: str,
difficulty: str,
) -> Tuple[List[Dict[str, object]], Dict[str, List[Dict[str, object]]]]:
x_min, y_min, _, _ = state.bounds()
clue_lookup = _clue_lookup(placements, engine.resolve_runtime_lexicon_path(None), topic, difficulty)
_, placement_numbers = _entry_numbering(placements)
entries_payload: List[Dict[str, object]] = []
clues_payload = {"across": [], "down": []}
for placement in sorted(placements, key=lambda item: (item.y, item.x, item.direction)):
number = placement_numbers[(placement.word, placement.x, placement.y, placement.direction)]
direction = "across" if placement.direction == HORIZONTAL else "down"
entry_id = ("A" if direction == "across" else "D") + str(number)
clue_data = clue_lookup.get((placement.word, placement.x, placement.y, placement.direction), {})
lexicon_entry = lexicon_entries.get(placement.word.lower(), {})
row = placement.y - y_min
col = placement.x - x_min
cells = [[y - y_min, x - x_min] for x, y in placement.cells]
confidence = 1.0
llm_rescue = lexicon_entry.get("llm_rescue")
if isinstance(llm_rescue, dict):
try:
confidence = float(llm_rescue.get("confidence", 1.0) or 1.0)
except (TypeError, ValueError):
confidence = 1.0
item = {
"entry_id": entry_id,
"number": number,
"direction": direction,
"answer": placement.word.upper(),
"answer_length": len(placement.word),
"row": row,
"col": col,
"cells": cells,
"clue": clue_data.get("text", ""),
"clue_source": clue_data.get("source", "fallback"),
"topics": lexicon_entry.get("topics", []),
"pos": lexicon_entry.get("pos", ""),
"is_seed": True,
"added_by_filler": False,
"confidence": confidence,
}
entries_payload.append(item)
clue_item = {
"number": number,
"entry_id": entry_id,
"text": item["clue"],
"enumeration": len(placement.word),
"topic_match": bool(lexicon_entry and engine.word_is_on_topic(lexicon_entry, topic)),
"source": item["clue_source"],
}
clues_payload["across" if direction == "across" else "down"].append(clue_item)
return entries_payload, clues_payload
def _solution_rows(state) -> List[str]:
x_min, y_min, x_max, y_max = state.bounds()
rows: List[str] = []
for y in range(y_min, y_max + 1):
chars = []
for x in range(x_min, x_max + 1):
chars.append(state.grid.get((x, y), "#").upper())
rows.append("".join(chars))
return rows
def _diagnostics(args, state, entries_by_word: Dict[str, Dict[str, object]], generation_seconds: float) -> Dict[str, object]:
words = engine.placement_words(state.placements)
unique_words = list(dict.fromkeys(word.lower() for word in words))
total_cells = state.area()
filled_cells = len(state.grid)
empty_cells = total_cells - filled_cells
empty_ratio = empty_cells / total_cells if total_cells else 0.0
topic_words = 0
off_topic_words = 0
pos_counts = {
"sostantivi": 0,
"aggettivi": 0,
"verbi": 0,
"avverbi": 0,
"preposizioni": 0,
"congiunzioni": 0,
"altri": 0,
}
for word in unique_words:
entry = entries_by_word.get(word, {})
label = engine.pos_label(str(entry.get("pos", "")))
pos_counts[label] = pos_counts.get(label, 0) + 1
if entry and engine.word_is_on_topic(entry, args.topic):
topic_words += 1
else:
off_topic_words += 1
return {
"seed_words_requested": args.initial_word_count,
"seed_words_placed": state.placed_words,
"filler_words_added": 0,
"filled_cells": filled_cells,
"empty_cells": empty_cells,
"empty_ratio": round(empty_ratio, 4),
"target_empty_ratio": args.target_empty_ratio,
"topic_words": topic_words,
"off_topic_words": off_topic_words,
"pos_counts": pos_counts,
"generation_seconds": round(generation_seconds, 2),
}
def generate_crossword_response(request_payload: Dict[str, object]) -> Dict[str, object]:
generator = dict(request_payload.get("generator") or {})
locale = str(generator.get("preferred_output_language", "it") or "it")
request_id = str(request_payload.get("request_id") or uuid.uuid4())
topic_list = _normalize_topic_list(generator.get("topic"))
topic_string = ",".join(topic_list)
lexicon_file = Path(str(generator.get("lexicon_file") or "lexicon_it_curated_llm_aggressive.json"))
args = type("Args", (), {})()
args.build_vocabulary = False
args.build_lexicon = False
args.skip_fill = False
args.build_semantic_lexicon = False
args.babelnet_enrich = False
args.babelnet_limit = 0
args.babelnet_sleep = 0.0
args.vocabulary = None
args.target_empty_ratio = float(generator.get("target_empty_ratio", 1 / 6))
args.time_limit = float(generator.get("time_limit_seconds", 8.0))
args.max_candidates = int(generator.get("max_candidates_per_word", 12))
args.diffxy = int(generator.get("diffxy", 7))
args.seed = generator.get("seed")
args.difficulty = str(generator.get("difficulty", "medium"))
args.topic = topic_string
args.max_topics = 1
args.initial_word_count = int(generator.get("initial_word_count", engine.DEFAULT_INITIAL_WORD_COUNT))
args.themed_fill_count = int(generator.get("themed_fill_count", engine.DEFAULT_THEMED_FILL_WORD_COUNT))
args.definitions = bool(generator.get("definitions_enabled", True))
args.lexicon = engine.resolve_runtime_lexicon_path(lexicon_file)
args.definition_babelnet_limit = 0
args.topic_seed_counts = {}
engine.ensure_vocabulary(args)
engine.ensure_lexicon(args)
engine.ensure_semantic_lexicon(args)
difficulty_level = engine.parse_difficulty(args.difficulty)
active_topics = engine.resolve_topics(args, difficulty_level)
initial_words = engine.select_initial_words(difficulty_level, args.topic, args.initial_word_count)
started = datetime.now()
generator_engine = engine.CrosswordGenerator(
initial_words,
diffxy=args.diffxy,
time_limit_seconds=args.time_limit,
max_candidates_per_word=args.max_candidates,
seed=args.seed,
)
initial_state = generator_engine.solve()
vocabulary = engine.load_filtered_vocabulary(difficulty_level, args.topic)
metadata = load_vocabulary_metadata()
semantic_metadata = engine.load_semantic_metadata_for_vocabulary(vocabulary, args.topic)
filler = CrosswordFiller(
initial_state,
vocabulary,
target_empty_ratio=args.target_empty_ratio,
vocabulary_metadata=metadata,
semantic_metadata=semantic_metadata,
selected_topic=args.topic,
max_themed_fill_words=args.themed_fill_count,
seed=args.seed,
)
final_state = filler.fill()
generation_seconds = (datetime.now() - started).total_seconds()
lexicon_entries = load_enriched_entries(args.lexicon)
placements = list(final_state.placements)
placement_numbers = _entry_numbering(placements)[1]
placement_ids = {
key: (("A" if key[3] == HORIZONTAL else "D") + str(number))
for key, number in placement_numbers.items()
}
entries_payload, clues_payload = _build_entries_payload(placements, final_state, lexicon_entries, args.topic, args.difficulty)
grid_payload = _build_grid_payload(final_state, placements, placement_ids)
diagnostics = _diagnostics(args, final_state, lexicon_entries, generation_seconds)
diagnostics["filler_words_added"] = len(filler.added_words)
topic_label = ", ".join(active_topics)
title_map = {
"it": f"Cruciverba a tema {topic_label}",
"en": f"{topic_label.title()} crossword",
"es": f"Crucigrama sobre {topic_label}",
}
subtitle_map = {
"it": "Generato dal motore Python del backoffice",
"en": "Generated by the Python backoffice engine",
"es": "Generado por el motor Python del backoffice",
}
return {
"schema_version": "1.0",
"request_id": request_id,
"crossword_id": f"cw-{uuid.uuid4().hex[:12]}",
"generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
"status": "ok",
"generator": {
"topic": active_topics,
"difficulty": args.difficulty,
"seed": args.seed,
"runtime_lexicon": args.lexicon.name,
},
"summary": {
"title": title_map.get(locale, title_map["it"]),
"subtitle": subtitle_map.get(locale, subtitle_map["it"]),
"rows": grid_payload["rows"],
"cols": grid_payload["cols"],
"total_words": len(placements),
"intersections": final_state.intersections,
},
"grid": grid_payload,
"entries": entries_payload,
"clues": clues_payload,
"solution": {
"grid_rows": _solution_rows(final_state),
"words": [placement.word.upper() for placement in placements],
},
"diagnostics": diagnostics,
"artifacts": {
"pdf_player": None,
"pdf_solution": None,
"thumbnail": None,
"html_preview": None,
},
}
def main() -> None:
import sys
payload = json.loads(sys.stdin.read())
response = generate_crossword_response(payload)
sys.stdout.write(json.dumps(response, ensure_ascii=False))
if __name__ == "__main__":
main()