2 Commits

24 changed files with 161800 additions and 9 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

243
build_vocabulary.py Normal file
View File

@@ -0,0 +1,243 @@
from __future__ import annotations
import json
import re
import unicodedata
from pathlib import Path
from typing import Dict, Iterable, List, Optional
PACKAGE_WORDS_PATH = Path(__file__).with_name("package") / "dist" / "words.json"
OUTPUT_PATH = Path(__file__).with_name("vocaboli_it_esteso.txt")
FILTERED_OUTPUT_PATH = Path(__file__).with_name("vocaboli_it_filtrato.txt")
METADATA_OUTPUT_PATH = Path(__file__).with_name("vocaboli_it_metadata.json")
MIN_WORD_LENGTH = 2
MAX_WORD_LENGTH = 14
COMMON_FUNCTION_WORDS = {
"a",
"ad",
"al",
"allo",
"ai",
"agli",
"alla",
"alle",
"con",
"col",
"coi",
"da",
"dal",
"dallo",
"dai",
"dagli",
"dalla",
"dalle",
"di",
"del",
"dello",
"dei",
"degli",
"della",
"delle",
"e",
"ed",
"in",
"nel",
"nello",
"nei",
"negli",
"nella",
"nelle",
"o",
"od",
"per",
"su",
"sul",
"sullo",
"sui",
"sugli",
"sulla",
"sulle",
"tra",
"fra",
}
COMMON_VERB_SUFFIXES = ("are", "ere", "ire")
COMMON_ADVERB_SUFFIXES = ("mente",)
COMMON_NOUN_SUFFIXES = (
"zione",
"zioni",
"tore",
"tori",
"trice",
"trici",
"ista",
"isti",
"ismo",
"ismi",
"anza",
"enze",
"enza",
"ezza",
"ezze",
"ita",
"ore",
"ori",
)
COMMON_ADJECTIVE_SUFFIXES = (
"ale",
"ali",
"oso",
"osa",
"osi",
"ose",
"ivo",
"iva",
"ivi",
"ive",
"ente",
"enti",
)
def maybe_fix_mojibake(text: str) -> str:
if "Ã" not in text and "Â" not in text:
return text
try:
return text.encode("latin-1").decode("utf-8")
except (UnicodeEncodeError, UnicodeDecodeError):
return text
def strip_accents(text: str) -> str:
normalized = unicodedata.normalize("NFKD", text)
return "".join(char for char in normalized if not unicodedata.combining(char))
def normalize_word(word: str) -> Optional[str]:
clean = maybe_fix_mojibake(word.strip().lower())
clean = clean.replace("", "'").replace("`", "'")
clean = strip_accents(clean)
clean = clean.replace("'", "")
clean = clean.replace("-", "")
clean = clean.replace(" ", "")
if len(clean) < MIN_WORD_LENGTH:
return None
if not re.fullmatch(r"[a-z]+", clean):
return None
return clean
def categorize_word(word: str) -> Dict[str, object]:
tags: List[str] = []
score = 0
if word in COMMON_FUNCTION_WORDS:
tags.append("function")
score += 6
if word.endswith(COMMON_VERB_SUFFIXES):
tags.append("verb_infinitive")
score += 4
if word.endswith(COMMON_ADVERB_SUFFIXES):
tags.append("adverb")
score += 3
if word.endswith(COMMON_NOUN_SUFFIXES):
tags.append("noun_like")
score += 3
if word.endswith(COMMON_ADJECTIVE_SUFFIXES):
tags.append("adjective_like")
score += 2
if len(word) <= 4:
tags.append("short")
score += 2
elif 5 <= len(word) <= 9:
tags.append("medium")
score += 3
else:
tags.append("long")
score += 1
if len(set(word)) >= max(4, len(word) // 2):
tags.append("varied_letters")
score += 2
penalty = 0
repeated_run = max((len(match.group(0)) for match in re.finditer(r"(.)\1{2,}", word)), default=0)
if repeated_run >= 3:
tags.append("repetition_penalty")
penalty += 3
consonant_clusters = re.findall(r"[^aeiou]{4,}", word)
if consonant_clusters:
tags.append("cluster_penalty")
penalty += 2
if len(word) > MAX_WORD_LENGTH:
tags.append("too_long")
penalty += 6
quality = score - penalty
return {"tags": sorted(set(tags)), "quality": quality}
def is_good_crossword_word(word: str, meta: Dict[str, object]) -> bool:
tags = set(meta["tags"])
quality = int(meta["quality"])
if "too_long" in tags:
return False
if len(word) >= 13 and "function" not in tags and "verb_infinitive" not in tags and quality < 5:
return False
if quality < 2:
return False
return True
def extract_words(raw_words: Iterable[str]) -> List[str]:
normalized = set()
for word in raw_words:
clean = normalize_word(word)
if clean is not None:
normalized.add(clean)
return sorted(normalized)
def build_vocabulary(source_path: Path = PACKAGE_WORDS_PATH, output_path: Path = OUTPUT_PATH) -> Dict[str, int]:
payload = json.loads(source_path.read_text(encoding="utf-8"))
words = extract_words(payload.keys())
output_path.write_text("\n".join(words) + "\n", encoding="utf-8")
metadata = {word: categorize_word(word) for word in words}
filtered_words = [word for word in words if is_good_crossword_word(word, metadata[word])]
FILTERED_OUTPUT_PATH.write_text("\n".join(filtered_words) + "\n", encoding="utf-8")
METADATA_OUTPUT_PATH.write_text(
json.dumps(metadata, ensure_ascii=False, indent=2, sort_keys=True),
encoding="utf-8",
)
return {
"extended_words": len(words),
"filtered_words": len(filtered_words),
"metadata_entries": len(metadata),
}
def main() -> None:
totals = build_vocabulary()
print(f"Vocabolario esteso: {OUTPUT_PATH}")
print(f"Vocabolario filtrato: {FILTERED_OUTPUT_PATH}")
print(f"Metadati: {METADATA_OUTPUT_PATH}")
print(f"Parole estese: {totals['extended_words']}")
print(f"Parole filtrate: {totals['filtered_words']}")
print(f"Metadati generati: {totals['metadata_entries']}")
if __name__ == "__main__":
main()

55
create_passo3.bat Normal file
View File

@@ -0,0 +1,55 @@
@echo off
setlocal
cd /d "%~dp0"
set "BRANCH_NAME=passo3"
set "COMMIT_MSG=feat: aggiunge CLI unificata, build vocabolario e filtro lessicale"
if not "%~1"=="" (
set "COMMIT_MSG=%~1"
)
echo Repository: %cd%
echo Branch target: %BRANCH_NAME%
echo Commit message: %COMMIT_MSG%
echo.
git rev-parse --is-inside-work-tree >nul 2>nul
if errorlevel 1 (
echo Errore: questa cartella non e' un repository Git.
exit /b 1
)
git show-ref --verify --quiet refs/heads/%BRANCH_NAME%
if errorlevel 1 (
echo Creo il branch %BRANCH_NAME%...
git checkout -b %BRANCH_NAME%
) else (
echo Il branch %BRANCH_NAME% esiste gia', ci passo sopra...
git checkout %BRANCH_NAME%
)
if errorlevel 1 exit /b 1
echo.
echo Aggiungo le modifiche...
git add .
if errorlevel 1 exit /b 1
echo.
echo Creo il commit...
git commit -m "%COMMIT_MSG%"
if errorlevel 1 (
echo.
echo Nessun commit creato. Potrebbe non esserci nulla di nuovo da salvare.
exit /b 1
)
echo.
echo Eseguo il push del branch %BRANCH_NAME%...
git push -u origin %BRANCH_NAME%
if errorlevel 1 exit /b 1
echo.
echo Operazione completata con successo.
endlocal

451
crossword_filler.py Normal file
View File

@@ -0,0 +1,451 @@
from __future__ import annotations
from dataclasses import dataclass
import json
from pathlib import Path
import sys
import time
from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
from crossword_generator import (
DIFFXY,
HORIZONTAL,
VERTICAL,
CrosswordGenerator,
CrosswordState,
Placement,
WORDS,
render_grid,
)
TARGET_EMPTY_RATIO = 1 / 6
MIN_WORD_LENGTH = 2
MAX_NO_PROGRESS_STEPS = 150
MAX_SLOT_CANDIDATES = 8
EXTENDED_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_esteso.txt")
FILTERED_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_filtrato.txt")
METADATA_VOCAB_PATH = Path(__file__).with_name("vocaboli_it_metadata.json")
VOCAB_PATH = (
FILTERED_VOCAB_PATH
if FILTERED_VOCAB_PATH.exists()
else EXTENDED_VOCAB_PATH
if EXTENDED_VOCAB_PATH.exists()
else Path(__file__).with_name("vocaboli_it.txt")
)
Coordinate = Tuple[int, int]
@dataclass(frozen=True)
class FillSlot:
x: int
y: int
direction: str
length: int
pattern: str
fixed_letters: int
empty_cells: int
candidate_count: int
@property
def cells(self) -> List[Coordinate]:
if self.direction == HORIZONTAL:
return [(self.x + offset, self.y) for offset in range(self.length)]
return [(self.x, self.y + offset) for offset in range(self.length)]
@dataclass(frozen=True)
class FillCandidate:
word: str
slot: FillSlot
new_letters: int
reused_letters: int
local_score: Tuple[int, int, int]
class CrosswordFiller:
def __init__(
self,
state: CrosswordState,
vocabulary: Sequence[str],
*,
target_empty_ratio: float = TARGET_EMPTY_RATIO,
vocabulary_metadata: Optional[Dict[str, Dict[str, object]]] = None,
) -> None:
self.state = state.copy()
self.initial_state = state.copy()
self.target_empty_ratio = target_empty_ratio
self.used_words: Set[str] = {placement.word for placement in self.state.placements}
self.added_words: List[Placement] = []
self.vocabulary = self._normalize_vocabulary(vocabulary)
self.words_by_length = self._index_vocabulary(self.vocabulary)
self.vocabulary_metadata = vocabulary_metadata or {}
self.bounds = self._compute_bounds(self.state.grid)
self.total_cells = self._area(self.bounds)
self.target_empty_cells = max(0, int(round(self.total_cells * self.target_empty_ratio)))
self.nodes_visited = 0
self.last_spinner_update = 0.0
self.spinner_frames = ["-", "/", "|", "\\"]
self.spinner_index = 0
self.started_at = 0.0
self.last_word = "-"
@staticmethod
def _normalize_vocabulary(words: Sequence[str]) -> List[str]:
normalized: List[str] = []
seen: Set[str] = set()
for word in words:
clean = word.strip().lower()
if len(clean) < MIN_WORD_LENGTH or not clean.isalpha() or clean in seen:
continue
normalized.append(clean)
seen.add(clean)
return normalized
@staticmethod
def _index_vocabulary(words: Sequence[str]) -> Dict[int, List[str]]:
index: Dict[int, List[str]] = {}
for word in words:
index.setdefault(len(word), []).append(word)
return index
@staticmethod
def _compute_bounds(grid: Dict[Coordinate, str]) -> Tuple[int, int, int, int]:
xs = [x for x, _ in grid]
ys = [y for _, y in grid]
return min(xs), min(ys), max(xs), max(ys)
@staticmethod
def _area(bounds: Tuple[int, int, int, int]) -> int:
x_min, y_min, x_max, y_max = bounds
return (x_max - x_min + 1) * (y_max - y_min + 1)
def fill(self) -> CrosswordState:
self.started_at = time.perf_counter()
self.last_spinner_update = self.started_at
no_progress_steps = 0
while self.empty_cells_count() > self.target_empty_cells and no_progress_steps < MAX_NO_PROGRESS_STEPS:
self.nodes_visited += 1
slots = self._collect_slots()
self._tick_spinner(slots_count=len(slots))
if not slots:
break
progress = False
for slot in slots[:MAX_SLOT_CANDIDATES]:
candidate = self._best_candidate_for_slot(slot)
if candidate is None:
continue
self._apply_candidate(candidate)
progress = True
no_progress_steps = 0
break
if not progress:
no_progress_steps += 1
self._clear_spinner()
return self.state
def empty_cells_count(self) -> int:
x_min, y_min, x_max, y_max = self.bounds
empty = 0
for y in range(y_min, y_max + 1):
for x in range(x_min, x_max + 1):
if (x, y) not in self.state.grid:
empty += 1
return empty
def coverage_ratio(self) -> float:
return 1.0 - (self.empty_cells_count() / self.total_cells)
def _collect_slots(self) -> List[FillSlot]:
slots: List[FillSlot] = []
x_min, y_min, x_max, y_max = self.bounds
for y in range(y_min, y_max + 1):
for x in range(x_min, x_max + 1):
if (x, y) in self.state.grid:
continue
for direction in (HORIZONTAL, VERTICAL):
slots.extend(self._slots_from_start(x, y, direction))
unique: Dict[Tuple[int, int, str, int], FillSlot] = {}
for slot in slots:
key = (slot.x, slot.y, slot.direction, slot.length)
current = unique.get(key)
if current is None or self._slot_priority(slot) > self._slot_priority(current):
unique[key] = slot
collected = list(unique.values())
collected.sort(key=self._slot_priority, reverse=True)
return collected
def _slots_from_start(self, x: int, y: int, direction: str) -> Iterable[FillSlot]:
dx, dy = (1, 0) if direction == HORIZONTAL else (0, 1)
x_min, y_min, x_max, y_max = self.bounds
prev_cell = (x - dx, y - dy)
if self._inside_bounds(prev_cell) and prev_cell in self.state.grid:
return []
max_length = 0
cursor_x = x
cursor_y = y
while x_min <= cursor_x <= x_max and y_min <= cursor_y <= y_max:
max_length += 1
cursor_x += dx
cursor_y += dy
slots: List[FillSlot] = []
for length in range(max_length, MIN_WORD_LENGTH - 1, -1):
end_cell = (x + dx * length, y + dy * length)
if self._inside_bounds(end_cell) and end_cell in self.state.grid:
continue
pattern_chars: List[str] = []
fixed_letters = 0
empty_cells = 0
for offset in range(length):
cell = (x + dx * offset, y + dy * offset)
letter = self.state.grid.get(cell)
if letter is None:
pattern_chars.append(".")
empty_cells += 1
else:
pattern_chars.append(letter)
fixed_letters += 1
if empty_cells == 0:
continue
pattern = "".join(pattern_chars)
candidate_count = self._count_candidates(pattern)
if candidate_count == 0:
continue
slots.append(
FillSlot(
x=x,
y=y,
direction=direction,
length=length,
pattern=pattern,
fixed_letters=fixed_letters,
empty_cells=empty_cells,
candidate_count=candidate_count,
)
)
return slots
def _slot_priority(self, slot: FillSlot) -> Tuple[int, int, int, int, int]:
return (
slot.fixed_letters,
-slot.candidate_count,
slot.length,
-slot.empty_cells,
1 if slot.direction == HORIZONTAL else 0,
)
def _count_candidates(self, pattern: str) -> int:
count = 0
for word in self.words_by_length.get(len(pattern), []):
if word in self.used_words:
continue
if self._matches_pattern(word, pattern):
count += 1
return count
@staticmethod
def _matches_pattern(word: str, pattern: str) -> bool:
return all(p == "." or p == w for w, p in zip(word, pattern))
def _best_candidate_for_slot(self, slot: FillSlot) -> Optional[FillCandidate]:
candidates: List[FillCandidate] = []
for word in self.words_by_length.get(slot.length, []):
if word in self.used_words or not self._matches_pattern(word, slot.pattern):
continue
if not self._placement_is_valid(slot, word):
continue
new_letters = sum(1 for cell in slot.cells if cell not in self.state.grid)
reused_letters = slot.fixed_letters
local_score = (
reused_letters,
new_letters,
self._word_quality(word),
len(set(word)),
)
candidates.append(
FillCandidate(
word=word,
slot=slot,
new_letters=new_letters,
reused_letters=reused_letters,
local_score=local_score,
)
)
if not candidates:
return None
candidates.sort(key=lambda item: item.local_score, reverse=True)
return candidates[0]
def _word_quality(self, word: str) -> int:
metadata = self.vocabulary_metadata.get(word)
if not metadata:
return 0
try:
return int(metadata.get("quality", 0))
except (TypeError, ValueError):
return 0
def _placement_is_valid(self, slot: FillSlot, word: str) -> bool:
dx, dy = (1, 0) if slot.direction == HORIZONTAL else (0, 1)
before = (slot.x - dx, slot.y - dy)
after = (slot.x + dx * slot.length, slot.y + dy * slot.length)
if self._inside_bounds(before) and before in self.state.grid:
return False
if self._inside_bounds(after) and after in self.state.grid:
return False
intersects_existing = False
for offset, cell in enumerate(slot.cells):
current = self.state.grid.get(cell)
letter = word[offset]
if current is not None and current != letter:
return False
if current == letter:
intersects_existing = True
return intersects_existing or slot.fixed_letters == 0
def _apply_candidate(self, candidate: FillCandidate) -> None:
slot = candidate.slot
dx, dy = (1, 0) if slot.direction == HORIZONTAL else (0, 1)
intersections = 0
for offset, letter in enumerate(candidate.word):
cell = (slot.x + dx * offset, slot.y + dy * offset)
if cell in self.state.grid:
intersections += 1
self.state.grid[cell] = letter
placement = Placement(
word=candidate.word,
x=slot.x,
y=slot.y,
direction=slot.direction,
intersections=intersections,
)
self.state.placements.append(placement)
self.state.intersections += intersections
self.used_words.add(candidate.word)
self.added_words.append(placement)
self.last_word = candidate.word
print(
f"\n[fill] inserita '{candidate.word}' "
f"in {slot.direction} da ({slot.x}, {slot.y}), "
f"nuove={candidate.new_letters}, intersezioni={intersections}, "
f"copertura={self.coverage_ratio() * 100:.1f}%",
file=sys.stderr,
flush=True,
)
def _inside_bounds(self, cell: Coordinate) -> bool:
x_min, y_min, x_max, y_max = self.bounds
x, y = cell
return x_min <= x <= x_max and y_min <= y <= y_max
def _tick_spinner(self, *, slots_count: int) -> None:
now = time.perf_counter()
if now - self.last_spinner_update < 0.08:
return
frame = self.spinner_frames[self.spinner_index]
elapsed = now - self.started_at
message = (
f"\r{frame} fill... "
f"slot={slots_count} "
f"vuote={self.empty_cells_count()}/{self.total_cells} "
f"target={self.target_empty_cells} "
f"aggiunte={len(self.added_words)} "
f"ultima={self.last_word} "
f"t={elapsed:0.1f}s"
)
print(message, end="", file=sys.stderr, flush=True)
self.spinner_index = (self.spinner_index + 1) % len(self.spinner_frames)
self.last_spinner_update = now
@staticmethod
def _clear_spinner() -> None:
print("\r" + " " * 140 + "\r", end="", file=sys.stderr, flush=True)
def load_vocabulary(path: Path = VOCAB_PATH) -> List[str]:
if not path.exists():
raise FileNotFoundError(f"Vocabolario non trovato: {path}")
return path.read_text(encoding="utf-8").splitlines()
def load_vocabulary_metadata(path: Path = METADATA_VOCAB_PATH) -> Dict[str, Dict[str, object]]:
if not path.exists():
return {}
return json.loads(path.read_text(encoding="utf-8"))
def summarize_fill(initial_state: CrosswordState, final_state: CrosswordState) -> str:
initial_bounds = (
initial_state.width(),
initial_state.height(),
initial_state.shape_difference(),
)
final_bounds = (
final_state.width(),
final_state.height(),
final_state.shape_difference(),
)
return (
f"Riempimento completato\n"
f"Parole iniziali: {initial_state.placed_words}\n"
f"Parole finali: {final_state.placed_words}\n"
f"Intersezioni finali: {final_state.intersections}\n"
f"Dimensioni iniziali: {initial_bounds[0]}x{initial_bounds[1]} (diff={initial_bounds[2]})\n"
f"Dimensioni finali: {final_bounds[0]}x{final_bounds[1]} (diff={final_bounds[2]})\n"
f"Celle vuote residue: {sum(1 for _ in iter_empty_cells(final_state))}"
)
def iter_empty_cells(state: CrosswordState) -> Iterable[Coordinate]:
x_min, y_min, x_max, y_max = state.bounds()
for y in range(y_min, y_max + 1):
for x in range(x_min, x_max + 1):
if (x, y) not in state.grid:
yield (x, y)
def main() -> None:
vocabulary = load_vocabulary()
vocabulary_metadata = load_vocabulary_metadata()
generator = CrosswordGenerator(WORDS, diffxy=DIFFXY)
initial_state = generator.solve()
filler = CrosswordFiller(initial_state, vocabulary, vocabulary_metadata=vocabulary_metadata)
final_state = filler.fill()
print(summarize_fill(initial_state, final_state))
print()
print(render_grid(final_state.grid, final_state.placements))
if filler.added_words:
print()
print("Parole aggiunte dal filler:")
for index, placement in enumerate(filler.added_words, start=1):
direction = "orizzontale" if placement.direction == HORIZONTAL else "verticale"
print(f"{index:>2}. {placement.word} ({placement.x}, {placement.y}) {direction}")
if __name__ == "__main__":
main()

View File

@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import locale
import sys import sys
import time import time
from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
@@ -35,6 +36,9 @@ WORDS = [
HORIZONTAL = "H" HORIZONTAL = "H"
VERTICAL = "V" VERTICAL = "V"
DIFFXY = 7
EMPTY_CELL_RENDER = ""
EMPTY_CELL_FALLBACK = "#"
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -78,9 +82,28 @@ class CrosswordState:
x_min, y_min, x_max, y_max = self.bounds() x_min, y_min, x_max, y_max = self.bounds()
return (x_max - x_min + 1) * (y_max - y_min + 1) return (x_max - x_min + 1) * (y_max - y_min + 1)
def score(self) -> Tuple[int, int, int]: def width(self) -> int:
# Maximize placed words first, then intersections, then prefer compact grids. x_min, _, x_max, _ = self.bounds()
return (self.placed_words, self.intersections, -self.area()) return x_max - x_min + 1
def height(self) -> int:
_, y_min, _, y_max = self.bounds()
return y_max - y_min + 1
def shape_difference(self) -> int:
return abs(self.width() - self.height())
def score(self, diffxy: int) -> Tuple[int, int, int, int, int]:
# Prefer valid shapes first; within those, maximize placed words and intersections,
# then prefer squarer and more compact grids.
is_shape_valid = 1 if self.shape_difference() <= diffxy else 0
return (
is_shape_valid,
self.placed_words,
self.intersections,
-self.shape_difference(),
-self.area(),
)
class CrosswordGenerator: class CrosswordGenerator:
@@ -90,6 +113,7 @@ class CrosswordGenerator:
*, *,
max_candidates_per_word: int = 12, max_candidates_per_word: int = 12,
time_limit_seconds: float = 8.0, time_limit_seconds: float = 8.0,
diffxy: int = DIFFXY,
) -> None: ) -> None:
normalized = [self._normalize(word) for word in words] normalized = [self._normalize(word) for word in words]
unique_words = list(dict.fromkeys(word for word in normalized if len(word) >= 2)) unique_words = list(dict.fromkeys(word for word in normalized if len(word) >= 2))
@@ -97,6 +121,7 @@ class CrosswordGenerator:
self.best_state = CrosswordState(grid={}, placements=[], intersections=0) self.best_state = CrosswordState(grid={}, placements=[], intersections=0)
self.max_candidates_per_word = max_candidates_per_word self.max_candidates_per_word = max_candidates_per_word
self.time_limit_seconds = time_limit_seconds self.time_limit_seconds = time_limit_seconds
self.diffxy = diffxy
self.started_at = 0.0 self.started_at = 0.0
self.visited: Dict[Tuple[frozenset, Tuple[str, ...]], Tuple[int, int, int]] = {} self.visited: Dict[Tuple[frozenset, Tuple[str, ...]], Tuple[int, int, int]] = {}
self.nodes_visited = 0 self.nodes_visited = 0
@@ -151,7 +176,7 @@ class CrosswordGenerator:
if time.perf_counter() - self.started_at > self.time_limit_seconds: if time.perf_counter() - self.started_at > self.time_limit_seconds:
return return
if state.score() > self.best_state.score(): if state.score(self.diffxy) > self.best_state.score(self.diffxy):
self.best_state = state.copy() self.best_state = state.copy()
if not remaining_words: if not remaining_words:
@@ -167,9 +192,10 @@ class CrosswordGenerator:
signature = (frozenset(state.grid.items()), tuple(sorted(remaining_words))) signature = (frozenset(state.grid.items()), tuple(sorted(remaining_words)))
best_seen = self.visited.get(signature) best_seen = self.visited.get(signature)
if best_seen is not None and best_seen >= state.score(): current_score = state.score(self.diffxy)
if best_seen is not None and best_seen >= current_score:
return return
self.visited[signature] = state.score() self.visited[signature] = current_score
next_word = self._select_next_word(state, remaining_words) next_word = self._select_next_word(state, remaining_words)
candidates = self._generate_candidates(state, next_word) candidates = self._generate_candidates(state, next_word)
@@ -206,8 +232,8 @@ class CrosswordGenerator:
message = ( message = (
f"\r{frame} elaborazione... " f"\r{frame} elaborazione... "
f"nodi={self.nodes_visited} " f"nodi={self.nodes_visited} "
f"migliore={self.best_state.placed_words} parole, {self.best_state.intersections} incroci " f"migliore={self.best_state.placed_words} parole, {self.best_state.intersections} incroci, diff={self.best_state.shape_difference()} "
f"attuale={state.placed_words} parole " f"attuale={state.placed_words} parole, diff={state.shape_difference()} "
f"t={elapsed:0.1f}s" f"t={elapsed:0.1f}s"
) )
print(message, end="", file=sys.stderr, flush=True) print(message, end="", file=sys.stderr, flush=True)
@@ -338,6 +364,9 @@ def render_grid(grid: Grid, placements: Sequence[Placement]) -> str:
if not grid: if not grid:
return "(griglia vuota)" return "(griglia vuota)"
encoding = (getattr(sys.stdout, "encoding", None) or locale.getpreferredencoding(False) or "").lower()
empty_cell = EMPTY_CELL_RENDER if "utf" in encoding else EMPTY_CELL_FALLBACK
x_min = min(x for x, _ in grid) x_min = min(x for x, _ in grid)
x_max = max(x for x, _ in grid) x_max = max(x for x, _ in grid)
y_min = min(y for _, y in grid) y_min = min(y for _, y in grid)
@@ -350,7 +379,7 @@ def render_grid(grid: Grid, placements: Sequence[Placement]) -> str:
for y in range(y_min, y_max + 1): for y in range(y_min, y_max + 1):
row = [f"{y:>3} "] row = [f"{y:>3} "]
for x in range(x_min, x_max + 1): for x in range(x_min, x_max + 1):
row.append(grid.get((x, y), ".").upper().rjust(2)) row.append(grid.get((x, y), empty_cell).upper().rjust(2))
lines.append(" ".join(row)) lines.append(" ".join(row))
lines.append("") lines.append("")
@@ -371,6 +400,8 @@ def main() -> None:
print("Miglior soluzione trovata") print("Miglior soluzione trovata")
print(f"Parole inserite: {solution.placed_words}/{len(generator.words)}") print(f"Parole inserite: {solution.placed_words}/{len(generator.words)}")
print(f"Intersezioni totali: {solution.intersections}") print(f"Intersezioni totali: {solution.intersections}")
print(f"Dimensioni: {solution.width()} colonne x {solution.height()} righe")
print(f"Differenza righe/colonne: {solution.shape_difference()} (vincolo DIFFXY={generator.diffxy})")
print(f"Area occupata: {solution.area()}") print(f"Area occupata: {solution.area()}")
print() print()
print(render_grid(solution.grid, solution.placements)) print(render_grid(solution.grid, solution.placements))

Binary file not shown.

131
main.py Normal file
View File

@@ -0,0 +1,131 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import List
from build_vocabulary import (
FILTERED_OUTPUT_PATH,
METADATA_OUTPUT_PATH,
OUTPUT_PATH,
build_vocabulary,
)
from crossword_filler import CrosswordFiller, load_vocabulary, load_vocabulary_metadata
from crossword_generator import CrosswordGenerator, WORDS, render_grid
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generatore e filler di cruciverba.")
parser.add_argument(
"--build-vocabulary",
action="store_true",
help="Rigenera il vocabolario esteso, filtrato e i metadati prima dell'esecuzione.",
)
parser.add_argument(
"--skip-fill",
action="store_true",
help="Genera solo la griglia iniziale senza eseguire il filler.",
)
parser.add_argument(
"--vocabulary",
type=Path,
default=None,
help="Percorso opzionale a un vocabolario personalizzato.",
)
parser.add_argument(
"--target-empty-ratio",
type=float,
default=1 / 6,
help="Rapporto target di celle vuote residue dopo il filler.",
)
parser.add_argument(
"--time-limit",
type=float,
default=8.0,
help="Tempo massimo in secondi per la fase di generazione iniziale.",
)
parser.add_argument(
"--max-candidates",
type=int,
default=12,
help="Numero massimo di candidati esplorati per parola nella generazione iniziale.",
)
parser.add_argument(
"--diffxy",
type=int,
default=7,
help="Differenza massima preferita tra larghezza e altezza della griglia.",
)
return parser.parse_args()
def ensure_vocabulary(args: argparse.Namespace) -> None:
needs_build = args.build_vocabulary or not FILTERED_OUTPUT_PATH.exists() or not METADATA_OUTPUT_PATH.exists()
if not needs_build:
return
totals = build_vocabulary()
print("Vocabolario rigenerato")
print(f"- esteso: {OUTPUT_PATH}")
print(f"- filtrato: {FILTERED_OUTPUT_PATH}")
print(f"- metadati: {METADATA_OUTPUT_PATH}")
print(f"- parole estese: {totals['extended_words']}")
print(f"- parole filtrate: {totals['filtered_words']}")
def load_selected_vocabulary(path: Path | None) -> List[str]:
if path is None:
return load_vocabulary()
return path.read_text(encoding="utf-8").splitlines()
def main() -> None:
args = parse_args()
ensure_vocabulary(args)
generator = CrosswordGenerator(
WORDS,
diffxy=args.diffxy,
time_limit_seconds=args.time_limit,
max_candidates_per_word=args.max_candidates,
)
initial_state = generator.solve()
print("Griglia iniziale")
print(f"Parole inserite: {initial_state.placed_words}/{len(generator.words)}")
print(f"Intersezioni: {initial_state.intersections}")
print(f"Dimensioni: {initial_state.width()} x {initial_state.height()} (diff={initial_state.shape_difference()})")
print()
print(render_grid(initial_state.grid, initial_state.placements))
if args.skip_fill:
return
vocabulary = load_selected_vocabulary(args.vocabulary)
metadata = load_vocabulary_metadata()
filler = CrosswordFiller(
initial_state,
vocabulary,
target_empty_ratio=args.target_empty_ratio,
vocabulary_metadata=metadata,
)
final_state = filler.fill()
print()
print("Griglia riempita")
print(f"Parole totali: {final_state.placed_words}")
print(f"Intersezioni totali: {final_state.intersections}")
print(f"Dimensioni: {final_state.width()} x {final_state.height()} (diff={final_state.shape_difference()})")
print()
print(render_grid(final_state.grid, final_state.placements))
if filler.added_words:
print()
print("Parole aggiunte dal filler:")
for index, placement in enumerate(filler.added_words, start=1):
direction = "orizzontale" if placement.direction == "H" else "verticale"
print(f"{index:>2}. {placement.word} ({placement.x}, {placement.y}) {direction}")
if __name__ == "__main__":
main()

203
package/LICENSE Normal file
View File

@@ -0,0 +1,203 @@
Copyright 2019 Ludan Stoecklé
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

17
package/README.md Normal file
View File

@@ -0,0 +1,17 @@
<!--
Copyright 2019 Ludan Stoecklé
SPDX-License-Identifier: CC-BY-4.0
-->
# Italian Word Dict
List of Italian words.
It is based on [morph-it](https://docs.sslmit.unibo.it/doku.php?id=resources:morph-it) which provides an extensive morphological resource for the Italian language.
You can use `italian-words` to use this resource properly.
## dependencies and licences
[morph-it](https://docs.sslmit.unibo.it/doku.php?id=resources:morph-it) provides an extensive morphological resource for the Italian language. It is dual-licensed free software and can be redistributed it and/or modified under the terms of the under the Creative Commons Attribution ShareAlike 2.0 License and the GNU Lesser General Public License.
The derived file `words.json` remains under the same licence.

85
package/dist/CC-BY-SA-2.0.txt vendored Normal file
View File

@@ -0,0 +1,85 @@
Creative Commons Attribution-ShareAlike 2.0
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM ITS USE.
License
THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED.
BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
1. Definitions
a. "Collective Work" means a work, such as a periodical issue, anthology or encyclopedia, in which the Work in its entirety in unmodified form, along with a number of other contributions, constituting separate and independent works in themselves, are assembled into a collective whole. A work that constitutes a Collective Work will not be considered a Derivative Work (as defined below) for the purposes of this License.
b. "Derivative Work" means a work based upon the Work or upon the Work and other pre-existing works, such as a translation, musical arrangement, dramatization, fictionalization, motion picture version, sound recording, art reproduction, abridgment, condensation, or any other form in which the Work may be recast, transformed, or adapted, except that a work that constitutes a Collective Work will not be considered a Derivative Work for the purpose of this License. For the avoidance of doubt, where the Work is a musical composition or sound recording, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered a Derivative Work for the purpose of this License.
c. "Licensor" means the individual or entity that offers the Work under the terms of this License.
d. "Original Author" means the individual or entity who created the Work.
e. "Work" means the copyrightable work of authorship offered under the terms of this License.
f. "You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation.
g. "License Elements" means the following high-level license attributes as selected by Licensor and indicated in the title of this License: Attribution, ShareAlike.
2. Fair Use Rights. Nothing in this license is intended to reduce, limit, or restrict any rights arising from fair use, first sale or other limitations on the exclusive rights of the copyright owner under copyright law or other applicable laws.
3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below:
a. to reproduce the Work, to incorporate the Work into one or more Collective Works, and to reproduce the Work as incorporated in the Collective Works;
b. to create and reproduce Derivative Works;
c. to distribute copies or phonorecords of, display publicly, perform publicly, and perform publicly by means of a digital audio transmission the Work including as incorporated in Collective Works;
d. to distribute copies or phonorecords of, display publicly, perform publicly, and perform publicly by means of a digital audio transmission Derivative Works.
e. For the avoidance of doubt, where the work is a musical composition:
i. Performance Royalties Under Blanket Licenses. Licensor waives the exclusive right to collect, whether individually or via a performance rights society (e.g. ASCAP, BMI, SESAC), royalties for the public performance or public digital performance (e.g. webcast) of the Work.
ii. Mechanical Rights and Statutory Royalties. Licensor waives the exclusive right to collect, whether individually or via a music rights society or designated agent (e.g. Harry Fox Agency), royalties for any phonorecord You create from the Work ("cover version") and distribute, subject to the compulsory license created by 17 USC Section 115 of the US Copyright Act (or the equivalent in other jurisdictions).
f. Webcasting Rights and Statutory Royalties. For the avoidance of doubt, where the Work is a sound recording, Licensor waives the exclusive right to collect, whether individually or via a performance-rights society (e.g. SoundExchange), royalties for the public digital performance (e.g. webcast) of the Work, subject to the compulsory license created by 17 USC Section 114 of the US Copyright Act (or the equivalent in other jurisdictions).
The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. All rights not expressly granted by Licensor are hereby reserved.
4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions:
a. You may distribute, publicly display, publicly perform, or publicly digitally perform the Work only under the terms of this License, and You must include a copy of, or the Uniform Resource Identifier for, this License with every copy or phonorecord of the Work You distribute, publicly display, publicly perform, or publicly digitally perform. You may not offer or impose any terms on the Work that alter or restrict the terms of this License or the recipients' exercise of the rights granted hereunder. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties. You may not distribute, publicly display, publicly perform, or publicly digitally perform the Work with any technological measures that control access or use of the Work in a manner inconsistent with the terms of this License Agreement. The above applies to the Work as incorporated in a Collective Work, but this does not require the Collective Work apart from the Work itself to be made subject to the terms of this License. If You create a Collective Work, upon notice from any Licensor You must, to the extent practicable, remove from the Collective Work any reference to such Licensor or the Original Author, as requested. If You create a Derivative Work, upon notice from any Licensor You must, to the extent practicable, remove from the Derivative Work any reference to such Licensor or the Original Author, as requested.
b. You may distribute, publicly display, publicly perform, or publicly digitally perform a Derivative Work only under the terms of this License, a later version of this License with the same License Elements as this License, or a Creative Commons iCommons license that contains the same License Elements as this License (e.g. Attribution-ShareAlike 2.0 Japan). You must include a copy of, or the Uniform Resource Identifier for, this License or other license specified in the previous sentence with every copy or phonorecord of each Derivative Work You distribute, publicly display, publicly perform, or publicly digitally perform. You may not offer or impose any terms on the Derivative Works that alter or restrict the terms of this License or the recipients' exercise of the rights granted hereunder, and You must keep intact all notices that refer to this License and to the disclaimer of warranties. You may not distribute, publicly display, publicly perform, or publicly digitally perform the Derivative Work with any technological measures that control access or use of the Work in a manner inconsistent with the terms of this License Agreement. The above applies to the Derivative Work as incorporated in a Collective Work, but this does not require the Collective Work apart from the Derivative Work itself to be made subject to the terms of this License.
c. If you distribute, publicly display, publicly perform, or publicly digitally perform the Work or any Derivative Works or Collective Works, You must keep intact all copyright notices for the Work and give the Original Author credit reasonable to the medium or means You are utilizing by conveying the name (or pseudonym if applicable) of the Original Author if supplied; the title of the Work if supplied; to the extent reasonably practicable, the Uniform Resource Identifier, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and in the case of a Derivative Work, a credit identifying the use of the Work in the Derivative Work (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). Such credit may be implemented in any reasonable manner; provided, however, that in the case of a Derivative Work or Collective Work, at a minimum such credit will appear where any other comparable authorship credit appears and in a manner at least as prominent as such other comparable authorship credit.
5. Representations, Warranties and Disclaimer
UNLESS OTHERWISE AGREED TO BY THE PARTIES IN WRITING, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE MATERIALS, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU.
6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. Termination
a. This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Derivative Works or Collective Works from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License.
b. Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above.
8. Miscellaneous
a. Each time You distribute or publicly digitally perform the Work or a Collective Work, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License.
b. Each time You distribute or publicly digitally perform a Derivative Work, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License.
c. If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
d. No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent.
e. This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You.
Creative Commons is not a party to this License, and makes no warranty whatsoever in connection with the Work. Creative Commons will not be liable to You or any party on any legal theory for any damages whatsoever, including without limitation any general, special, incidental or consequential damages arising in connection to this license. Notwithstanding the foregoing two (2) sentences, if Creative Commons has expressly identified itself as the Licensor hereunder, it shall have all rights and obligations of Licensor.
Except for the limited purpose of indicating to the public that the Work is licensed under the CCPL, neither party will use the trademark "Creative Commons" or any related trademark or logo of Creative Commons without the prior written consent of Creative Commons. Any permitted use will be in compliance with Creative Commons' then-current trademark usage guidelines, as may be published on its website or otherwise made available upon request from time to time.
Creative Commons may be contacted at http://creativecommons.org/.

14
package/dist/index.d.ts vendored Normal file
View File

@@ -0,0 +1,14 @@
/**
* @license
* Copyright 2021 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
export type Genders = 'M' | 'F';
export interface WordInfo {
G: Genders | null;
S?: string | null;
P: string | null;
}
export interface WordsInfo {
[key: string]: WordInfo;
}

8
package/dist/index.js vendored Normal file
View File

@@ -0,0 +1,8 @@
"use strict";
/**
* @license
* Copyright 2021 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
Object.defineProperty(exports, "__esModule", { value: true });
//# sourceMappingURL=index.js.map

1
package/dist/index.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;GAIG"}

415
package/dist/readme-morph-it.txt vendored Normal file
View File

@@ -0,0 +1,415 @@
===================================================================
Morph-it!
A free morphological lexicon for the Italian Language
===================================================================
version 0.4.8
February 23 2009
*******************************************************************
THIS README IS NOT REALLY UP TO DATE
A NEW VERSION OF THIS
README FILE WILL BE
RELEASED (HOPEFULLY) SOON
(BUT I WOULDN'T COUNT ON THAT...)
*******************************************************************
Copyright (c) 2004-2009
Marco Baroni (marco.baroni@unitn.it)
Eros Zanchetta (eros@sslmit.unibo.it)
http://sslmit.unibo.it/morphit
Morph-it! is a free (as in free speech and in free beer) morphological
resource for the Italian language.
Morph-it! is a lexicon of inflected forms with their lemma and
morphological features. For example:
gattini gattino NOUN-M:p
andarono andare VER:ind+past+3+p
fastidiosetto fastidioso ADJ:dim+m+s
As of version 0.4.7 the list contains 504,906 entries and 34,968
lemmas.
Morph-it! can be used as a data source for an Italian lemmatizer /
morphological analyzer / morphological generator.
As example applications, on the Morph-it! site you can download the
lexicon compiled for the SFST [1] and Finite State Utilities [2]
packages.
The data for Morph-it! were prepared by Marco Baroni and Eros
Zanchetta using a mixture of corpus-based methods,
regular-expression-based rules and manual checking. We are currently
writing a paper that describes the procedure we used to build the
resource.
Morph-it! is still under development and there may still be gaps,
unlikely forms, etc. We will be very grateful if you let us know
about missing forms, problems, and ideas/resources that can help
us expanding or cleaning the list (sslmitdevonline@sslmit.unibo.it).
Notice in particular that, since we extracted data from an Italian
newspaper corpus (the la Repubblica corpus, also accessible from our
site), we have many gaps in basic, every-day vocabulary.
Also, the current version does not distinguish between coordinative
and subordinative conjunctions. We plan to do this in the near
future. More in general, we are not fully satisfied with our current
features for function words, and we plan to revise them.
A more ambitious plan we would like to pursue is the identification
of derivational structure and derivationally related lemmas. Then, we
will add full semantic representations. Then, we will take over the
world and reign supreme for the next 100 years.
The remainder of this document contains a commented list of the
morphological features used in the lexicon, licensing information and
aknowledgments.
FEATURES
========
We distinguish between derivational features, that pertain to the
lemma, and inflectional features, that pertain to the wordform.
Derivational and inflectional features are separated by a colon.
The derivational features are in upper case and they are
dash-delimited. The inflectional features are in lower case and they
are plus-sign-delimited.
For example, we represent gender as a derivational feature of nouns
(we take "cameriere" and "cameriera" to belong to different lemmas),
whereas we treat number as an inflectional feature of nouns. Thus,
gender and number are represented as in the following examples:
cameriere cameriera NOUN-F:p
cameriera cameriera NOUN-F:s
camerieri cameriere NOUN-M:p
cameriere cameriere NOUN-M:s
For adjectives, gender is considered an inflectional feature. Thus,
gender is represented differently in adjectives and nouns:
azzurre azzurra NOUN-F:p
azzurra azzurra NOUN-F:s
azzurri azzurro NOUN-M:p
azzurro azzurro NOUN-M:s
azzurra azzurro ADJ:pos+f+s
azzurri azzurro ADJ:pos+m+p
azzurro azzurro ADJ:pos+m+s
azzurre azzurro ADJ:pos+f+p
Changes that are purely orthographical/phonological but do not affect
morphology/syntax/meaning are not reflected in the features. For
example, the following variants of "cento" share the same lemma and
the same features:
cent' cento DET-NUM-CARD
cento cento DET-NUM-CARD
We now present the full list of features we used, organized by major
syntactic categories.
ABL
Abbreviated locutions, such as "a.C.", "ecc." and "i.e."
ADJ
Adjectives, with the following inflectional features:
pos/comp/sup
Thas is: positive, comparative, superlative. Although these are not
true inflectional features, given their high productivity we decided
to represent them as properties of inflected forms.
f/m
That is: feminine, masculine.
s/p
Thas is: singular, plural.
ADV
Adverbs.
ART
Articles, with gender as a derivational feature (F/M) and number as an
inflectional feature (s/p).
ARTPRE
Preposition+article compounds ("col", "della", "nei"...), with gender
as a derivational feature (F/M) and number as an inflectional feature
(s/p).
ASP
Aspectuals ("stare" in "stare per"). Same inflectional features as VER
(see below).
AUX
Auxiliaries ("essere", "avere", "venire"). Same inflectional features
as VER (see below).
CAU
Causatives ("fare" in "far sapere"). Same inflectional features as VER
(see below).
CE
Clitic "ce" as in "ce l'ho fatta".
CI
Clitic "ci" as in "ci prova".
CON
Conjunctions.
DET-DEMO
Demonstrative determiners (such as "questa" in "questa sera"), with
inflectional gender (f/s) and number (s/p) features.
DET-INDEF
Indefinite determiners (such as "molti" in "molti amici") with
inflectional gender (f/s) and number (s/p) features.
DET-NUM-CARD
Cardinal number determiners (e.g., "cinque" in "cinque
amici"). Pure-digit numbers are not included (i.e., the list includes
"100mila" but not "100000" nor "100,000", "100.000", etc.)
DET-POSS
Possessive determiners (e.g., "mio", "suo"), with inflectional gender
(f/s) and number (s/p) features.
DET-WH
Wh determiners (e.g., quale in "quale amico"), with inflectional
gender (f/s) and number (s/p) features.
INT
Interjections.
MOD
Modal verbs (e.g. "dover" in "dover ricostruire"). Same inflectional
features as VER (see below).
NE
Clitic "ne" (as in: "ne hanno molte").
NOUN
Nouns, with gender as a derivational feature (F/M) and number as an
inflectional feature (s/p).
PON
Non-sentential punctuation marks (e.g. , " $).
PRE
Prepositions.
PRO-DEMO
Demonstrative pronouns (e.g. "questa" in "voglio questa"), with both
gender and number as derivational features (F/M, S/P).
PRO-INDEF
Indefinite pronouns (e.g., "molti" in "vengono molti"), with both
gender and number as derivational features (F/M, S/P).
PRO-NUM
Numeral pronouns (e.g., "cinque" in "cinque sono
sopravvissuti"). Pure-digit numbers are not included (e.g., the list
includes "100mila" but not 100000 nor 100,000, 100.000, etc.)
PRO-PERS
Personal pronouns, such as "lui" and "loro". Clitic possessive
pronouns (such as pronominal "lo" and "si") are marked by the
derivational feature CLI. Person, gender and number are also encoded
as derivational features (1/2/3, F/M, S/P).
PRO-POSS
Possessive pronouns, such as "loro" in "non era uno dei loro"), with
gender and number encoded as derivational features (F/M, S/P).
PRO-WH
Wh-pronouns, such as "quale" in "quale e' venuto?"
SENT
End of sentence marker (! . ... : ?).
SI
Clitic "si" as in "di cui si discute".
TALE
"Tale" in constructions such as "una fortuna tale che...", "la tal
cosa", "tali amici", ecc. Gender (f/m) and number (s/p) as
inflectional features.
VER
Verbs, with the following inflectional features:
cond/ger/impr/ind/inf/part/sub
Conditional, gerundive, imperative, indicative, infinitive,
participle, subjunctive.
pre/past/impf/fut
Present, past, imperfective, future.
1/2/3
Person.
s/p
Number.
f/m
Gender (only relevant for participles).
cela/cele/celi/celo/cene/ci/gli/gliela/gliele/glieli/glielo/gliene/la/
le/li/lo/mela/mele/meli/melo/mene/mi/ne/sela/sele/seli/selo/sene/si/
tela/tele/teli/telo/tene/ti/vela/vele/veli/velo/vene/vi
Clitics attached to the verb.
WH
Wh elements ("come", "qualora", "quando"...)
WH-CHE
"Che" as a wh element (e.g., "l'uomo che hai visto", "hai detto che").
LICENSING INFORMATION
======================
This program is dual-licensed free software; you can redistribute it
and/or modify it under the terms of the under the Creative Commons
Attribution ShareAlike 2.0 License and the GNU Lesser General Public
License.
***********************************************
* Creative Commons Attribution ShareAlike 2.0 *
***********************************************
Morph-it! is licensed under the Creative Commons Attribution
ShareAlike 2.0 License.
You are free:
- to copy, distribute and display the resource;
- to make derivative works;
- to make commercial use of the resource;
under the following conditions:
- you must give the original authors credit;
- if you alter, transform, or build upon this work, you may distribute
the resulting work only under a license identical to this one;
- for any reuse or distribution, you must make clear to others the
license terms of this work;
- any of these conditions can be waived if you get permission from the
copyright holders.
Your fair use and other rights are in no way affected by the above.
You can find a link to the full license from the Morph-it! website.
Copyright (C) 2004-2007 Marco Baroni and Eros Zanchetta.
*************************************
* GNU Lesser General Public License *
*************************************
Morph-it! A free morphological lexicon for the Italian Language
Copyright (C) 2004-2007 Marco Baroni and Eros Zanchetta
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
AKNOWLEDGMENTS
==============
The main data source for the Morph-it! lexicon was the "la Repubblica"
corpus. Thus, we would like to thank the colleagues who developed this
resource with us: Lorenzo Piccioni, Guy Aston, Silvia Bernardini,
Federica Comastri, Alessandra Volpi, Marco Mazzoleni.
We would like to thank the developers of the tools we used to tag,
lemmatize and index the Repubblica corpus: the (Italian) TreeTagger
(Helmut Schmid, Achim Stein), the ACOPOST taggers (Ingo Schroeder) and
the IMS Corpus WorkBench (Oli Christ, Arne Fitschen and Stefan Evert).
Thanks to Helmut Schmid also for converting the Morph-it! lexicon into
a SFST transducer.
We would like to thank Aldo Calpini, who developed the perl module
Lingua:IT:Conjugate.
We are also very grateful to Jan Daciuk for creating his finite-state
utilities and for helping us learn to use them.
Finally, a big thanks to the members of the FoLUG, SannioLUG and
Scuola (software libero nella scuola) mailing lists, for advice about
licensing and dissemination.
...and kudos to Lorenzo for creating and maintaining the SSLMITDev
site!
FOOTNOTES
=========
[1] http://www.ims.uni-stuttgart.de/projekte/gramotron/SOFTWARE/SFST.html
[2] http://juggernaut.eti.pg.gda.pl/~jandac/fsa.html

1
package/dist/words.json vendored Normal file

File diff suppressed because one or more lines are too long

21
package/gulpfile.js Normal file
View File

@@ -0,0 +1,21 @@
/**
* @license
* Copyright 2019 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
const fs = require('fs');
const { processItalianWords } = require('./dist/create/createList');
const { series } = require('gulp');
function createWords(cb) {
processItalianWords('resources/morph-it_048.txt', 'dist/words.json', cb);
}
function copyLicences(cb) {
fs.copyFileSync('./resources/CC-BY-SA-2.0.txt', './dist/CC-BY-SA-2.0.txt');
fs.copyFileSync('./resources/readme-morph-it.txt', './dist/readme-morph-it.txt');
cb();
}
exports.build = series(createWords, copyLicences);

23
package/package.json Normal file
View File

@@ -0,0 +1,23 @@
{
"name": "italian-words-dict",
"version": "3.4.0",
"description": "Italian words dictionnary, based on the morph-it linguistic resource",
"main": "dist/index.js",
"scripts": {
"clean": "rm -rf dist",
"test": "nyc --reporter=lcov --reporter=text mocha",
"build": "tsc && gulp build"
},
"repository": {
"type": "git",
"url": "https://github.com/RosaeNLG/rosaenlg.git"
},
"keywords": [
"words",
"Italian",
"morph-it"
],
"author": "Ludan Stoecklé <ludan.stoeckle@rosaenlg.org>",
"license": "Apache-2.0",
"gitHead": "745dc50c54690936fba332ca465308c607053e46"
}

21
package/test/test.js Normal file
View File

@@ -0,0 +1,21 @@
/**
* @license
* Copyright 2019 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
const assert = require('assert');
const italianWords = require('../dist/words.json');
describe('italian-words-dict', function () {
it('should contain something', function () {
assert(italianWords != null);
assert(Object.keys(italianWords).length > 100);
});
it('pizza should be ok', function () {
const pizza = italianWords['pizza'];
assert(pizza != null);
assert.strictEqual(pizza['G'], 'F');
assert.strictEqual(pizza['P'], 'pizze');
});
});

558
vocaboli_it.txt Normal file
View File

@@ -0,0 +1,558 @@
adesso
adige
adone
agave
agile
ago
aiuto
alba
albero
alga
alito
alloro
aloe
alpaca
alta
amaca
amare
ambra
ameno
amico
amore
anatra
anello
angelo
anice
anima
anno
antenna
aprire
aratro
arco
arena
argento
aria
arpa
arredo
arrivo
arte
asilo
asino
aspro
asta
atomo
attesa
auguri
aula
aurora
autore
avena
avere
avviso
azione
azzurro
bacio
bagaglio
balcone
banco
barca
barone
base
basso
bastone
becco
bello
bene
biondo
biscia
blocco
borsa
bosco
breve
brina
bronzo
bruco
buio
burro
cacao
cadere
calamaio
caldo
calice
camera
camino
campana
canale
candela
capace
capello
capire
capra
carbone
carta
casale
cassone
castoro
cavallo
cedere
celeste
cena
centro
cerchio
certezza
cervo
chiaro
chitarra
cielo
cifra
cigno
cima
cintura
circolo
cittadino
classe
clima
collina
colore
cometa
comune
conca
condurre
confine
coniglio
conto
corda
corona
cortile
cosa
costa
creare
crescere
crinale
croce
cuore
cura
dado
danza
dare
debole
decoro
denso
dente
deserto
destino
detto
dialogo
difesa
digitale
dipingere
diritto
divano
docile
dogana
dolce
domanda
dono
dormire
dorso
drago
ebano
eco
edera
educare
effetto
elica
elogio
elmo
energia
enorme
entrare
epoca
equatore
erba
erede
eroe
errore
esame
esilio
esistere
esito
eterno
etica
fare
favola
febbre
felice
fermare
ferro
festa
fiaba
fiducia
figura
filo
finale
fiume
fiore
firmare
flauto
foglia
fonte
forza
fosso
frase
freccia
freno
frutto
fuga
fumo
fuoco
futuro
gabbiano
galassia
gamba
gatto
gelato
gemma
geniale
gesto
giallo
giardino
girare
giudice
giorno
giovane
giubba
giugno
globo
goccia
gomito
grado
grammo
grande
grano
gravare
greto
guadagno
guanto
guida
guscio
idea
idolo
illeso
impero
impronta
incanto
incontro
indicare
indole
inerzia
infinito
inizio
inno
insalata
insieme
intesa
invito
isola
istante
labbro
lago
lana
largo
lastra
latte
laurea
lavare
legame
legenda
leggere
legno
lente
lezione
libellula
limite
linea
liquido
liscia
litigare
livello
locale
lodo
lontano
lotta
lucente
luce
luna
lupo
macchia
madre
maestro
magnete
magro
maiolica
mandorla
maniglia
mano
mare
margine
martello
maschera
massa
materia
medaglia
melodia
memoria
menta
merito
metallo
metodo
mezzo
miraggio
misura
modello
moderno
momento
mondo
montone
morbido
mordere
mosaico
motore
muovere
nascere
nastro
nave
nebbia
neutro
nocciola
nome
notare
notizia
nuvola
oblio
odore
offerta
ombra
onda
onesto
opera
opinione
ordine
oriente
origine
oro
orso
ortica
ospite
ovale
ovest
padre
palazzo
palude
pane
parete
parlare
partita
passero
patto
paura
pedana
pellicola
pensare
perla
persona
pesare
pianeta
pianta
pietra
pigro
pilota
piuma
piuttosto
plastica
poesia
polline
ponte
popolo
porta
pozzo
pranzo
pregio
premio
presa
primato
principe
prisma
produrre
profilo
profumo
progetto
promessa
pronto
prova
prudente
quaderno
quercia
questione
quota
radice
ragione
ramo
rapido
rasoio
reale
regola
respiro
restare
rete
ricamo
ricerca
riccio
ricordo
ridere
riflesso
riga
rigore
rimanere
rimedio
riparo
ripetere
riposo
ritmo
ritorno
riva
roccia
rompere
rosa
rotazione
rotta
rubino
ruga
rumore
ruota
salire
salone
saltare
salute
sapere
sasso
sedia
segnale
segreto
selva
seme
sentire
sereno
serpente
servire
sestante
settore
sfera
sfida
sguardo
silenzio
simbolo
sincero
slancio
smeraldo
soglia
solare
solido
soltanto
sonno
sopra
sorgere
sorriso
sospeso
sosta
spada
spazio
specchio
spessore
spiga
spirito
sponda
sportivo
sprone
stabile
stagione
stella
stelo
stendere
stile
stima
storia
strada
studiare
subito
suono
superare
tacere
talento
tappeto
tavolo
teatro
tecnica
telaio
tempo
tendere
tenere
tensione
terra
tetto
tigre
timore
titolo
tornare
torre
traccia
tradurre
trama
trasporto
trattare
treno
triangolo
trionfo
trovare
tulipano
turbine
udire
ulivo
umile
unione
urbano
usanza
uscire
utile
valore
variare
vasca
vecchio
vedetta
vela
veloce
vendere
vento
verita
vernice
versare
viaggio
vicenda
vicino
vigore
villaggio
viola
virgola
virtu
visione
vistoso
vita
vivere
vocazione
voce
volere
volpe
zaino
zefiro
zolla
zucchero

17311
vocaboli_it_esteso.txt Normal file

File diff suppressed because it is too large Load Diff

16357
vocaboli_it_filtrato.txt Normal file

File diff suppressed because it is too large Load Diff

125845
vocaboli_it_metadata.json Normal file

File diff suppressed because it is too large Load Diff