alpha01 backoffice: crossword engine, lexicon curation and JSON contract

This commit is contained in:
2026-04-29 13:24:04 +02:00
parent a1f8cb8577
commit 47d8957e15
20 changed files with 5985 additions and 16 deletions

View File

@@ -12,11 +12,11 @@ from pathlib import Path
from typing import Dict, Iterable, List, Optional
from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH
from main import parse_difficulty
BABELNET_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_babelnet.json")
BABELNET_CACHE_PATH = Path(__file__).with_name(".babelnet_cache.json")
BABELNET_LOCAL_KEY_PATH = Path(__file__).with_name(".babelnet_api_key.local")
BABELNET_API_BASE = "https://babelnet.io/v9"
BABELNET_ENV_KEY = "BABELNET_API_KEY"
@@ -28,14 +28,76 @@ POS_TO_BABELNET = {
}
class BabelNetApiCallLimitReached(RuntimeError):
pass
class BabelNetKeyUnavailable(RuntimeError):
pass
DIFFICULTY_ALIASES: Dict[str, int] = {
"easy": 1,
"medium": 2,
"hard": 4,
"expert": 5,
}
def parse_difficulty(value: str) -> int:
text = str(value).strip().lower()
if text in DIFFICULTY_ALIASES:
return DIFFICULTY_ALIASES[text]
try:
level = int(text)
except ValueError as exc:
raise SystemExit(
"Valore non valido per --difficulty. Usa easy, medium, hard, expert oppure un intero tra 1 e 5."
) from exc
if not 1 <= level <= 5:
raise SystemExit("Il valore numerico di --difficulty deve essere compreso tra 1 e 5.")
return level
def _split_api_keys(text: str) -> List[str]:
keys = []
seen = set()
normalized = text.replace(";", "\n").replace(",", "\n")
for line in normalized.splitlines():
key = line.strip()
if not key or key.startswith("#") or key in seen:
continue
keys.append(key)
seen.add(key)
return keys
def load_babelnet_api_keys() -> List[str]:
env_key = os.environ.get(BABELNET_ENV_KEY)
if env_key:
return _split_api_keys(env_key)
if BABELNET_LOCAL_KEY_PATH.exists():
return _split_api_keys(BABELNET_LOCAL_KEY_PATH.read_text(encoding="utf-8"))
return []
def load_babelnet_api_key() -> Optional[str]:
keys = load_babelnet_api_keys()
if keys:
return keys[0]
return None
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Arricchisce lexicon_it_semantic.json usando BabelNet, se disponibile una API key."
)
parser.add_argument(
"--api-key",
default=os.environ.get(BABELNET_ENV_KEY),
help=f"Chiave API BabelNet. In alternativa imposta la variabile ambiente {BABELNET_ENV_KEY}.",
default=load_babelnet_api_key(),
help=(
f"Chiave API BabelNet. In alternativa imposta {BABELNET_ENV_KEY} "
f"o crea {BABELNET_LOCAL_KEY_PATH.name}."
),
)
parser.add_argument(
"--topic",
@@ -78,10 +140,29 @@ def write_json(path: Path, payload: object) -> None:
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def request_json(endpoint: str, params: Dict[str, str], cache: Dict[str, object]) -> object:
def cache_key(endpoint: str, params: Dict[str, str]) -> str:
safe_params = {key: value for key, value in params.items() if key != "key"}
return f"{endpoint}?{urllib.parse.urlencode(sorted(safe_params.items()))}"
def request_json(
endpoint: str,
params: Dict[str, str],
cache: Dict[str, object],
stats: Optional[Dict[str, int]] = None,
) -> object:
url = f"{BABELNET_API_BASE}/{endpoint}?{urllib.parse.urlencode(params)}"
if url in cache:
return cache[url]
key = cache_key(endpoint, params)
if key in cache:
if stats is not None:
stats["cache_hits"] = stats.get("cache_hits", 0) + 1
return cache[key]
if stats is not None:
limit = stats.get("api_call_limit")
current = stats.get("api_calls", 0)
if limit is not None and current >= limit:
raise BabelNetApiCallLimitReached("Limite chiamate API BabelNet raggiunto")
request = urllib.request.Request(url, headers={"Accept": "application/json"})
try:
@@ -89,9 +170,14 @@ def request_json(endpoint: str, params: Dict[str, str], cache: Dict[str, object]
payload = json.loads(response.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace")
if exc.code == 403:
raise BabelNetKeyUnavailable(f"Chiave BabelNet non valida o limite giornaliero raggiunto: {detail}") from exc
raise RuntimeError(f"Errore BabelNet HTTP {exc.code}: {detail}") from exc
cache[url] = payload
cache[key] = payload
if stats is not None:
stats["api_calls"] = stats.get("api_calls", 0) + 1
stats["responses"] = stats.get("responses", 0) + 1
return payload
@@ -180,7 +266,13 @@ def dedupe(items: Iterable[str]) -> List[str]:
return result
def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object], sleep_seconds: float) -> Dict[str, object]:
def enrich_entry(
entry: Dict[str, object],
api_key: str,
cache: Dict[str, object],
sleep_seconds: float,
stats: Optional[Dict[str, int]] = None,
) -> Dict[str, object]:
word = str(entry.get("form", ""))
pos = POS_TO_BABELNET.get(str(entry.get("pos", "")))
if not pos:
@@ -195,6 +287,7 @@ def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object
"key": api_key,
},
cache,
stats,
)
if sleep_seconds:
time.sleep(sleep_seconds)
@@ -215,6 +308,7 @@ def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object
"key": api_key,
},
cache,
stats,
)
if sleep_seconds:
time.sleep(sleep_seconds)