alpha01 backoffice: crossword engine, lexicon curation and JSON contract
This commit is contained in:
@@ -12,11 +12,11 @@ from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
from build_semantic_lexicon import SEMANTIC_LEXICON_OUTPUT_PATH
|
||||
from main import parse_difficulty
|
||||
|
||||
|
||||
BABELNET_OUTPUT_PATH = Path(__file__).with_name("lexicon_it_babelnet.json")
|
||||
BABELNET_CACHE_PATH = Path(__file__).with_name(".babelnet_cache.json")
|
||||
BABELNET_LOCAL_KEY_PATH = Path(__file__).with_name(".babelnet_api_key.local")
|
||||
BABELNET_API_BASE = "https://babelnet.io/v9"
|
||||
BABELNET_ENV_KEY = "BABELNET_API_KEY"
|
||||
|
||||
@@ -28,14 +28,76 @@ POS_TO_BABELNET = {
|
||||
}
|
||||
|
||||
|
||||
class BabelNetApiCallLimitReached(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class BabelNetKeyUnavailable(RuntimeError):
|
||||
pass
|
||||
|
||||
DIFFICULTY_ALIASES: Dict[str, int] = {
|
||||
"easy": 1,
|
||||
"medium": 2,
|
||||
"hard": 4,
|
||||
"expert": 5,
|
||||
}
|
||||
|
||||
|
||||
def parse_difficulty(value: str) -> int:
|
||||
text = str(value).strip().lower()
|
||||
if text in DIFFICULTY_ALIASES:
|
||||
return DIFFICULTY_ALIASES[text]
|
||||
try:
|
||||
level = int(text)
|
||||
except ValueError as exc:
|
||||
raise SystemExit(
|
||||
"Valore non valido per --difficulty. Usa easy, medium, hard, expert oppure un intero tra 1 e 5."
|
||||
) from exc
|
||||
if not 1 <= level <= 5:
|
||||
raise SystemExit("Il valore numerico di --difficulty deve essere compreso tra 1 e 5.")
|
||||
return level
|
||||
|
||||
|
||||
def _split_api_keys(text: str) -> List[str]:
|
||||
keys = []
|
||||
seen = set()
|
||||
normalized = text.replace(";", "\n").replace(",", "\n")
|
||||
for line in normalized.splitlines():
|
||||
key = line.strip()
|
||||
if not key or key.startswith("#") or key in seen:
|
||||
continue
|
||||
keys.append(key)
|
||||
seen.add(key)
|
||||
return keys
|
||||
|
||||
|
||||
def load_babelnet_api_keys() -> List[str]:
|
||||
env_key = os.environ.get(BABELNET_ENV_KEY)
|
||||
if env_key:
|
||||
return _split_api_keys(env_key)
|
||||
if BABELNET_LOCAL_KEY_PATH.exists():
|
||||
return _split_api_keys(BABELNET_LOCAL_KEY_PATH.read_text(encoding="utf-8"))
|
||||
return []
|
||||
|
||||
|
||||
def load_babelnet_api_key() -> Optional[str]:
|
||||
keys = load_babelnet_api_keys()
|
||||
if keys:
|
||||
return keys[0]
|
||||
return None
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Arricchisce lexicon_it_semantic.json usando BabelNet, se disponibile una API key."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
default=os.environ.get(BABELNET_ENV_KEY),
|
||||
help=f"Chiave API BabelNet. In alternativa imposta la variabile ambiente {BABELNET_ENV_KEY}.",
|
||||
default=load_babelnet_api_key(),
|
||||
help=(
|
||||
f"Chiave API BabelNet. In alternativa imposta {BABELNET_ENV_KEY} "
|
||||
f"o crea {BABELNET_LOCAL_KEY_PATH.name}."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--topic",
|
||||
@@ -78,10 +140,29 @@ def write_json(path: Path, payload: object) -> None:
|
||||
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
def request_json(endpoint: str, params: Dict[str, str], cache: Dict[str, object]) -> object:
|
||||
def cache_key(endpoint: str, params: Dict[str, str]) -> str:
|
||||
safe_params = {key: value for key, value in params.items() if key != "key"}
|
||||
return f"{endpoint}?{urllib.parse.urlencode(sorted(safe_params.items()))}"
|
||||
|
||||
|
||||
def request_json(
|
||||
endpoint: str,
|
||||
params: Dict[str, str],
|
||||
cache: Dict[str, object],
|
||||
stats: Optional[Dict[str, int]] = None,
|
||||
) -> object:
|
||||
url = f"{BABELNET_API_BASE}/{endpoint}?{urllib.parse.urlencode(params)}"
|
||||
if url in cache:
|
||||
return cache[url]
|
||||
key = cache_key(endpoint, params)
|
||||
if key in cache:
|
||||
if stats is not None:
|
||||
stats["cache_hits"] = stats.get("cache_hits", 0) + 1
|
||||
return cache[key]
|
||||
|
||||
if stats is not None:
|
||||
limit = stats.get("api_call_limit")
|
||||
current = stats.get("api_calls", 0)
|
||||
if limit is not None and current >= limit:
|
||||
raise BabelNetApiCallLimitReached("Limite chiamate API BabelNet raggiunto")
|
||||
|
||||
request = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||
try:
|
||||
@@ -89,9 +170,14 @@ def request_json(endpoint: str, params: Dict[str, str], cache: Dict[str, object]
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
if exc.code == 403:
|
||||
raise BabelNetKeyUnavailable(f"Chiave BabelNet non valida o limite giornaliero raggiunto: {detail}") from exc
|
||||
raise RuntimeError(f"Errore BabelNet HTTP {exc.code}: {detail}") from exc
|
||||
|
||||
cache[url] = payload
|
||||
cache[key] = payload
|
||||
if stats is not None:
|
||||
stats["api_calls"] = stats.get("api_calls", 0) + 1
|
||||
stats["responses"] = stats.get("responses", 0) + 1
|
||||
return payload
|
||||
|
||||
|
||||
@@ -180,7 +266,13 @@ def dedupe(items: Iterable[str]) -> List[str]:
|
||||
return result
|
||||
|
||||
|
||||
def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object], sleep_seconds: float) -> Dict[str, object]:
|
||||
def enrich_entry(
|
||||
entry: Dict[str, object],
|
||||
api_key: str,
|
||||
cache: Dict[str, object],
|
||||
sleep_seconds: float,
|
||||
stats: Optional[Dict[str, int]] = None,
|
||||
) -> Dict[str, object]:
|
||||
word = str(entry.get("form", ""))
|
||||
pos = POS_TO_BABELNET.get(str(entry.get("pos", "")))
|
||||
if not pos:
|
||||
@@ -195,6 +287,7 @@ def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object
|
||||
"key": api_key,
|
||||
},
|
||||
cache,
|
||||
stats,
|
||||
)
|
||||
if sleep_seconds:
|
||||
time.sleep(sleep_seconds)
|
||||
@@ -215,6 +308,7 @@ def enrich_entry(entry: Dict[str, object], api_key: str, cache: Dict[str, object
|
||||
"key": api_key,
|
||||
},
|
||||
cache,
|
||||
stats,
|
||||
)
|
||||
if sleep_seconds:
|
||||
time.sleep(sleep_seconds)
|
||||
|
||||
Reference in New Issue
Block a user