355 lines
14 KiB
Python
355 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import uuid
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Tuple
|
|
|
|
import main as engine
|
|
from clue_generator import generate_clues, load_enriched_entries
|
|
from crossword_filler import CrosswordFiller, load_vocabulary_metadata
|
|
from crossword_generator import HORIZONTAL, Placement
|
|
|
|
|
|
def _normalize_topic_list(raw_topic: object) -> List[str]:
|
|
if isinstance(raw_topic, list):
|
|
topics = [str(item).strip().lower() for item in raw_topic if str(item).strip()]
|
|
return topics or [engine.DEFAULT_TOPIC]
|
|
return engine.parse_topics(str(raw_topic or engine.DEFAULT_TOPIC))
|
|
|
|
|
|
def _entry_numbering(placements: Iterable[Placement]) -> Tuple[Dict[Tuple[int, int], int], Dict[Tuple[str, int, int, str], int]]:
|
|
ordered = sorted(placements, key=lambda item: (item.y, item.x, item.direction))
|
|
number_by_start: Dict[Tuple[int, int], int] = {}
|
|
placement_numbers: Dict[Tuple[str, int, int, str], int] = {}
|
|
next_number = 1
|
|
for placement in ordered:
|
|
start = (placement.x, placement.y)
|
|
if start not in number_by_start:
|
|
number_by_start[start] = next_number
|
|
next_number += 1
|
|
placement_numbers[(placement.word, placement.x, placement.y, placement.direction)] = number_by_start[start]
|
|
return number_by_start, placement_numbers
|
|
|
|
|
|
def _clue_lookup(placements: List[Placement], lexicon_path: Path, topic: str, difficulty: str) -> Dict[Tuple[str, int, int, str], Dict[str, object]]:
|
|
entries = load_enriched_entries(lexicon_path)
|
|
clues = generate_clues(placements, entries, engine.primary_topic(topic), difficulty)
|
|
lookup: Dict[Tuple[str, int, int, str], Dict[str, object]] = {}
|
|
for clue in clues:
|
|
direction = HORIZONTAL if clue.direction.lower().startswith("o") or clue.direction.lower().startswith("a") else "V"
|
|
lookup[(clue.word, clue.x, clue.y, direction)] = {"text": clue.text, "source": clue.source}
|
|
return lookup
|
|
|
|
|
|
def _build_grid_payload(state, placements: List[Placement], placement_ids: Dict[Tuple[str, int, int, str], str]) -> Dict[str, object]:
|
|
x_min, y_min, x_max, y_max = state.bounds()
|
|
width = x_max - x_min + 1
|
|
height = y_max - y_min + 1
|
|
|
|
across_map: Dict[Tuple[int, int], str] = {}
|
|
down_map: Dict[Tuple[int, int], str] = {}
|
|
for placement in placements:
|
|
entry_id = placement_ids[(placement.word, placement.x, placement.y, placement.direction)]
|
|
target = across_map if placement.direction == HORIZONTAL else down_map
|
|
for x, y in placement.cells:
|
|
target[(x, y)] = entry_id
|
|
|
|
number_by_start, _ = _entry_numbering(placements)
|
|
cells = []
|
|
for row in range(height):
|
|
for col in range(width):
|
|
x = x_min + col
|
|
y = y_min + row
|
|
letter = state.grid.get((x, y))
|
|
if letter is None:
|
|
cells.append(
|
|
{
|
|
"row": row,
|
|
"col": col,
|
|
"kind": "block",
|
|
"solution": None,
|
|
"display": None,
|
|
"number": None,
|
|
"across_entry_id": None,
|
|
"down_entry_id": None,
|
|
"is_prefilled": False,
|
|
}
|
|
)
|
|
continue
|
|
|
|
cells.append(
|
|
{
|
|
"row": row,
|
|
"col": col,
|
|
"kind": "letter",
|
|
"solution": letter.upper(),
|
|
"display": "",
|
|
"number": number_by_start.get((x, y)),
|
|
"across_entry_id": across_map.get((x, y)),
|
|
"down_entry_id": down_map.get((x, y)),
|
|
"is_prefilled": False,
|
|
}
|
|
)
|
|
|
|
return {
|
|
"rows": height,
|
|
"cols": width,
|
|
"cell_size_hint": 42,
|
|
"cells": cells,
|
|
}
|
|
|
|
|
|
def _build_entries_payload(
|
|
placements: List[Placement],
|
|
state,
|
|
lexicon_entries: Dict[str, Dict[str, object]],
|
|
topic: str,
|
|
difficulty: str,
|
|
) -> Tuple[List[Dict[str, object]], Dict[str, List[Dict[str, object]]]]:
|
|
x_min, y_min, _, _ = state.bounds()
|
|
clue_lookup = _clue_lookup(placements, engine.resolve_runtime_lexicon_path(None), topic, difficulty)
|
|
_, placement_numbers = _entry_numbering(placements)
|
|
entries_payload: List[Dict[str, object]] = []
|
|
clues_payload = {"across": [], "down": []}
|
|
|
|
for placement in sorted(placements, key=lambda item: (item.y, item.x, item.direction)):
|
|
number = placement_numbers[(placement.word, placement.x, placement.y, placement.direction)]
|
|
direction = "across" if placement.direction == HORIZONTAL else "down"
|
|
entry_id = ("A" if direction == "across" else "D") + str(number)
|
|
clue_data = clue_lookup.get((placement.word, placement.x, placement.y, placement.direction), {})
|
|
lexicon_entry = lexicon_entries.get(placement.word.lower(), {})
|
|
row = placement.y - y_min
|
|
col = placement.x - x_min
|
|
cells = [[y - y_min, x - x_min] for x, y in placement.cells]
|
|
confidence = 1.0
|
|
llm_rescue = lexicon_entry.get("llm_rescue")
|
|
if isinstance(llm_rescue, dict):
|
|
try:
|
|
confidence = float(llm_rescue.get("confidence", 1.0) or 1.0)
|
|
except (TypeError, ValueError):
|
|
confidence = 1.0
|
|
|
|
item = {
|
|
"entry_id": entry_id,
|
|
"number": number,
|
|
"direction": direction,
|
|
"answer": placement.word.upper(),
|
|
"answer_length": len(placement.word),
|
|
"row": row,
|
|
"col": col,
|
|
"cells": cells,
|
|
"clue": clue_data.get("text", ""),
|
|
"clue_source": clue_data.get("source", "fallback"),
|
|
"topics": lexicon_entry.get("topics", []),
|
|
"pos": lexicon_entry.get("pos", ""),
|
|
"is_seed": True,
|
|
"added_by_filler": False,
|
|
"confidence": confidence,
|
|
}
|
|
entries_payload.append(item)
|
|
clue_item = {
|
|
"number": number,
|
|
"entry_id": entry_id,
|
|
"text": item["clue"],
|
|
"enumeration": len(placement.word),
|
|
"topic_match": bool(lexicon_entry and engine.word_is_on_topic(lexicon_entry, topic)),
|
|
"source": item["clue_source"],
|
|
}
|
|
clues_payload["across" if direction == "across" else "down"].append(clue_item)
|
|
|
|
return entries_payload, clues_payload
|
|
|
|
|
|
def _solution_rows(state) -> List[str]:
|
|
x_min, y_min, x_max, y_max = state.bounds()
|
|
rows: List[str] = []
|
|
for y in range(y_min, y_max + 1):
|
|
chars = []
|
|
for x in range(x_min, x_max + 1):
|
|
chars.append(state.grid.get((x, y), "#").upper())
|
|
rows.append("".join(chars))
|
|
return rows
|
|
|
|
|
|
def _diagnostics(args, state, entries_by_word: Dict[str, Dict[str, object]], generation_seconds: float) -> Dict[str, object]:
|
|
words = engine.placement_words(state.placements)
|
|
unique_words = list(dict.fromkeys(word.lower() for word in words))
|
|
total_cells = state.area()
|
|
filled_cells = len(state.grid)
|
|
empty_cells = total_cells - filled_cells
|
|
empty_ratio = empty_cells / total_cells if total_cells else 0.0
|
|
topic_words = 0
|
|
off_topic_words = 0
|
|
pos_counts = {
|
|
"sostantivi": 0,
|
|
"aggettivi": 0,
|
|
"verbi": 0,
|
|
"avverbi": 0,
|
|
"preposizioni": 0,
|
|
"congiunzioni": 0,
|
|
"altri": 0,
|
|
}
|
|
|
|
for word in unique_words:
|
|
entry = entries_by_word.get(word, {})
|
|
label = engine.pos_label(str(entry.get("pos", "")))
|
|
pos_counts[label] = pos_counts.get(label, 0) + 1
|
|
if entry and engine.word_is_on_topic(entry, args.topic):
|
|
topic_words += 1
|
|
else:
|
|
off_topic_words += 1
|
|
|
|
return {
|
|
"seed_words_requested": args.initial_word_count,
|
|
"seed_words_placed": state.placed_words,
|
|
"filler_words_added": 0,
|
|
"filled_cells": filled_cells,
|
|
"empty_cells": empty_cells,
|
|
"empty_ratio": round(empty_ratio, 4),
|
|
"target_empty_ratio": args.target_empty_ratio,
|
|
"topic_words": topic_words,
|
|
"off_topic_words": off_topic_words,
|
|
"pos_counts": pos_counts,
|
|
"generation_seconds": round(generation_seconds, 2),
|
|
}
|
|
|
|
|
|
def generate_crossword_response(request_payload: Dict[str, object]) -> Dict[str, object]:
|
|
generator = dict(request_payload.get("generator") or {})
|
|
locale = str(generator.get("preferred_output_language", "it") or "it")
|
|
request_id = str(request_payload.get("request_id") or uuid.uuid4())
|
|
topic_list = _normalize_topic_list(generator.get("topic"))
|
|
topic_string = ",".join(topic_list)
|
|
lexicon_file = Path(str(generator.get("lexicon_file") or "lexicon_it_curated_llm_aggressive.json"))
|
|
|
|
args = type("Args", (), {})()
|
|
args.build_vocabulary = False
|
|
args.build_lexicon = False
|
|
args.skip_fill = False
|
|
args.build_semantic_lexicon = False
|
|
args.babelnet_enrich = False
|
|
args.babelnet_limit = 0
|
|
args.babelnet_sleep = 0.0
|
|
args.vocabulary = None
|
|
args.target_empty_ratio = float(generator.get("target_empty_ratio", 1 / 6))
|
|
args.time_limit = float(generator.get("time_limit_seconds", 8.0))
|
|
args.max_candidates = int(generator.get("max_candidates_per_word", 12))
|
|
args.diffxy = int(generator.get("diffxy", 7))
|
|
args.seed = generator.get("seed")
|
|
args.difficulty = str(generator.get("difficulty", "medium"))
|
|
args.topic = topic_string
|
|
args.max_topics = 1
|
|
args.initial_word_count = int(generator.get("initial_word_count", engine.DEFAULT_INITIAL_WORD_COUNT))
|
|
args.themed_fill_count = int(generator.get("themed_fill_count", engine.DEFAULT_THEMED_FILL_WORD_COUNT))
|
|
args.definitions = bool(generator.get("definitions_enabled", True))
|
|
args.lexicon = engine.resolve_runtime_lexicon_path(lexicon_file)
|
|
args.definition_babelnet_limit = 0
|
|
args.topic_seed_counts = {}
|
|
|
|
engine.ensure_vocabulary(args)
|
|
engine.ensure_lexicon(args)
|
|
engine.ensure_semantic_lexicon(args)
|
|
difficulty_level = engine.parse_difficulty(args.difficulty)
|
|
active_topics = engine.resolve_topics(args, difficulty_level)
|
|
initial_words = engine.select_initial_words(difficulty_level, args.topic, args.initial_word_count)
|
|
|
|
started = datetime.now()
|
|
generator_engine = engine.CrosswordGenerator(
|
|
initial_words,
|
|
diffxy=args.diffxy,
|
|
time_limit_seconds=args.time_limit,
|
|
max_candidates_per_word=args.max_candidates,
|
|
seed=args.seed,
|
|
)
|
|
initial_state = generator_engine.solve()
|
|
|
|
vocabulary = engine.load_filtered_vocabulary(difficulty_level, args.topic)
|
|
metadata = load_vocabulary_metadata()
|
|
semantic_metadata = engine.load_semantic_metadata_for_vocabulary(vocabulary, args.topic)
|
|
filler = CrosswordFiller(
|
|
initial_state,
|
|
vocabulary,
|
|
target_empty_ratio=args.target_empty_ratio,
|
|
vocabulary_metadata=metadata,
|
|
semantic_metadata=semantic_metadata,
|
|
selected_topic=args.topic,
|
|
max_themed_fill_words=args.themed_fill_count,
|
|
seed=args.seed,
|
|
)
|
|
final_state = filler.fill()
|
|
generation_seconds = (datetime.now() - started).total_seconds()
|
|
|
|
lexicon_entries = load_enriched_entries(args.lexicon)
|
|
placements = list(final_state.placements)
|
|
placement_numbers = _entry_numbering(placements)[1]
|
|
placement_ids = {
|
|
key: (("A" if key[3] == HORIZONTAL else "D") + str(number))
|
|
for key, number in placement_numbers.items()
|
|
}
|
|
entries_payload, clues_payload = _build_entries_payload(placements, final_state, lexicon_entries, args.topic, args.difficulty)
|
|
grid_payload = _build_grid_payload(final_state, placements, placement_ids)
|
|
diagnostics = _diagnostics(args, final_state, lexicon_entries, generation_seconds)
|
|
diagnostics["filler_words_added"] = len(filler.added_words)
|
|
|
|
topic_label = ", ".join(active_topics)
|
|
title_map = {
|
|
"it": f"Cruciverba a tema {topic_label}",
|
|
"en": f"{topic_label.title()} crossword",
|
|
"es": f"Crucigrama sobre {topic_label}",
|
|
}
|
|
subtitle_map = {
|
|
"it": "Generato dal motore Python del backoffice",
|
|
"en": "Generated by the Python backoffice engine",
|
|
"es": "Generado por el motor Python del backoffice",
|
|
}
|
|
|
|
return {
|
|
"schema_version": "1.0",
|
|
"request_id": request_id,
|
|
"crossword_id": f"cw-{uuid.uuid4().hex[:12]}",
|
|
"generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
|
|
"status": "ok",
|
|
"generator": {
|
|
"topic": active_topics,
|
|
"difficulty": args.difficulty,
|
|
"seed": args.seed,
|
|
"runtime_lexicon": args.lexicon.name,
|
|
},
|
|
"summary": {
|
|
"title": title_map.get(locale, title_map["it"]),
|
|
"subtitle": subtitle_map.get(locale, subtitle_map["it"]),
|
|
"rows": grid_payload["rows"],
|
|
"cols": grid_payload["cols"],
|
|
"total_words": len(placements),
|
|
"intersections": final_state.intersections,
|
|
},
|
|
"grid": grid_payload,
|
|
"entries": entries_payload,
|
|
"clues": clues_payload,
|
|
"solution": {
|
|
"grid_rows": _solution_rows(final_state),
|
|
"words": [placement.word.upper() for placement in placements],
|
|
},
|
|
"diagnostics": diagnostics,
|
|
"artifacts": {
|
|
"pdf_player": None,
|
|
"pdf_solution": None,
|
|
"thumbnail": None,
|
|
"html_preview": None,
|
|
},
|
|
}
|
|
|
|
|
|
def main() -> None:
|
|
import sys
|
|
|
|
payload = json.loads(sys.stdin.read())
|
|
response = generate_crossword_response(payload)
|
|
sys.stdout.write(json.dumps(response, ensure_ascii=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|