feat: consolida lessico semantico, temi controllati e filler a quota tematica

This commit is contained in:
2026-04-15 15:37:52 +02:00
parent b172b9c04b
commit a1f8cb8577
8 changed files with 14030 additions and 46434 deletions

View File

@@ -9,7 +9,7 @@ from datetime import datetime
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
from build_lexicon import LEXICON_OUTPUT_PATH, infer_topics
from build_lexicon import LEXICON_OUTPUT_PATH
IWN_XML_PATH = Path(__file__).with_name("iwn-omw-main") / "IWN-OMW-main" / "data" / "LMF-XML" / "IWN-OMW_LMF_v1.0.xml"
@@ -356,8 +356,7 @@ def enrich_entry(
][:20]
glosses = dedupe_keep_order(glosses)
semantic_topics = dedupe_keep_order(
list(entry.get("topics", []))
+ semantic_topics_from_text(
semantic_topics_from_text(
glosses
+ synonyms
+ raw_relation_terms.get("hypernym", [])
@@ -365,7 +364,6 @@ def enrich_entry(
+ raw_relation_terms.get("similar", [])
)
)
entry["topics"] = dedupe_keep_order(list(entry.get("topics", [])) + semantic_topics)
entry["semantic"] = {
"source": "iwn-omw",
"matched": True,