• Ven. Gen 23rd, 2026

Atlantis is real: Official discovery of Atlantis, language and migrations

Atlantis is the Sardo Corso Graben Horst underwater continental block submerged by the Meltwater Pulses and destroyed by a subduction zone, Capital is Sulcis

Spread the love

import re
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox, filedialog
from collections import Counter, defaultdict

import spacy
from spacy.lang.it.stop_words import STOP_WORDS as IT_STOP
from spacy.lang.en.stop_words import STOP_WORDS as EN_STOP

import matplotlib
matplotlib.use(“TkAgg”)
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

from wordfreq import zipf_frequency

# NLTK WordNet (EN only)
from nltk.corpus import wordnet as wn

# ———————–
# Language resources
# ———————–
NLP_MODELS = {}
STOPWORDS = {
“it”: IT_STOP,
“en”: EN_STOP
}
ABSTRACT_SUFFIXES_IT = (“ità”, “zione”, “tudine”, “enza”, “mento”, “ismo”, “logia”, “ica”)
ABSTRACT_SUFFIXES_EN = (“ness”, “ity”, “ism”, “tion”, “ment”, “ship”, “hood”, “acy”)

def get_nlp(lang):
if lang not in NLP_MODELS:
if lang == “it”:
NLP_MODELS[lang] = spacy.load(“it_core_news_sm”)
elif lang == “en”:
NLP_MODELS[lang] = spacy.load(“en_core_web_sm”)
else:
raise ValueError(“Lingua non supportata.”)
return NLP_MODELS[lang]

# ———————–
# Concept extraction
# ———————–
def normalize_space(text):
return re.sub(r”\s+”, ” “, text.strip())

def strip_stopwords_inside(tokens, lang):
stop = STOPWORDS.get(lang, set())
return [t for t in tokens if t not in stop]

def noun_chunk_to_lemma(chunk):
# Join lemmas of tokens that are part of the chunk and are not punct/space
lemmas = []
for t in chunk:
if t.is_space or t.is_punct:
continue
lemmas.append(t.lemma_.lower())
return ” “.join(lemmas)

def extract_concepts(doc, lang):
# Candidate concepts: noun chunks + NOUN/PROPN lemmas
candidates = []

# Multi-word noun chunks
for ch in doc.noun_chunks:
lemma_chunk = noun_chunk_to_lemma(ch)
# Remove internal stopwords for multiword normalization
tokens = [w for w in lemma_chunk.split() if w.isalpha()]
tokens = strip_stopwords_inside(tokens, lang)
if tokens:
candidates.append(” “.join(tokens))

# Single nouns and proper nouns
for tok in doc:
if tok.pos_ in (“NOUN”, “PROPN”) and not tok.is_stop and tok.is_alpha:
candidates.append(tok.lemma_.lower())

# Clean and dedupe
cleaned = []
for c in candidates:
c = c.strip()
c = re.sub(r”\s+”, ” “, c)
if c:
cleaned.append(c)

# Prefer multi-word terms over overlapping single words:
# Keep both but frequency will naturally reflect salience.
counts = Counter(cleaned)
return counts

# ———————–
# Complexity factors
# ———————–
def normalize(x, lo, hi):
if hi <= lo:
return 0.0
v = (x – lo) / (hi – lo)
return max(0.0, min(1.0, v))

def fd_semantic_depth(concept, lang):
“””
Fd(c): semantic depth proxy.
EN: use WordNet max hypernym depth of head lemma (approx: last token).
IT: proxy via rarity (zipf frequency).
“””
head = concept.split()[-1] # heuristic: head as last token
if lang == “en”:
synsets = wn.synsets(head, pos=wn.NOUN)
if synsets:
depths = []
for s in synsets:
try:
depths.append(s.max_depth())
except Exception:
pass
if depths:
# WordNet noun depth typical range ~0..20
return normalize(max(depths), 0, 20)
# Fallback to rarity proxy
z = zipf_frequency(head, “en”)
return normalize(7 – z, 0, 7) # higher when rarer
else:
# IT: rarity proxy
z = zipf_frequency(head, “it”)
return normalize(7 – z, 0, 7)

def fa_abstraction(concept, lang):
“””
Fa(c): abstraction factor.
EN: check affiliation to ‘abstraction’ branch or abstract suffixes; multiword length boost.
IT: suffix heuristics for abstraction; penalize proper names (titlecase single-token).
“””
tokens = concept.split()
head = tokens[-1] if tokens else concept

# Multiword boost (abstract notions are often multiword/technical)
multiword_bonus = normalize(len(tokens), 1, 4) * 0.3 # up to +0.3

if lang == “en”:
# WordNet abstraction lineage
abstract_score = 0.0
synsets = wn.synsets(head, pos=wn.NOUN)
if synsets:
for s in synsets:
try:
for path in s.hypernym_paths():
if any((ss.name().startswith(“abstraction.n.”) for ss in path)):
abstract_score = max(abstract_score, 0.7) # strong hint of abstraction
except Exception:
pass
# Suffix heuristic
if head.endswith(ABSTRACT_SUFFIXES_EN):
abstract_score = max(abstract_score, 0.6)
return min(1.0, abstract_score + multiword_bonus)
else:
# IT: suffix heuristic
abstract_score = 0.0
if head.endswith(ABSTRACT_SUFFIXES_IT):
abstract_score = max(abstract_score, 0.6)
# Penalize likely proper names (single token titlecase) by reducing abstraction
if len(tokens) == 1 and head.istitle():
abstract_score = max(abstract_score – 0.2, 0.0)
return min(1.0, abstract_score + multiword_bonus)

def concept_weight(concept, lang, alpha=0.5, beta=0.5):
fd = fd_semantic_depth(concept, lang)
fa = fa_abstraction(concept, lang)
w = alpha * fd + beta * fa
return max(0.0, min(1.0, w)), fd, fa

# ———————–
# Metrics
# ———————–
def compute_metrics(text, lang):
nlp = get_nlp(lang)
doc = nlp(text)

# total tokens excluding punct/space
N = sum(1 for t in doc if (not t.is_space and not t.is_punct))

concept_counts = extract_concepts(doc, lang)
C_unique = len(concept_counts)
total_mentions = sum(concept_counts.values()) if concept_counts else 0

# DCg
DCg = (C_unique / N) if N > 0 else 0.0

# Weights
concept_data = []
total_weight = 0.0
for c, freq in concept_counts.items():
w, fd, fa = concept_weight(c, lang)
total_weight += w
concept_data.append({
“concept”: c,
“freq”: freq,
“w”: w,
“fd”: fd,
“fa”: fa
})

# DCp
DCp = (total_weight / N) if N > 0 else 0.0

# IRC: 1 – |C| / total_mentions
IRC = 0.0
if total_mentions > 0:
IRC = 1.0 – (C_unique / total_mentions)
IRC = max(0.0, min(1.0, IRC))

# EI
EI = DCp * (1.0 – IRC)

# Sort for display
concept_data.sort(key=lambda x: (x[“w”], x[“freq”]), reverse=True)

return {
“N”: N,
“C_unique”: C_unique,
“total_mentions”: total_mentions,
“DCg”: DCg,
“DCp”: DCp,
“IRC”: IRC,
“EI”: EI,
“concepts”: concept_data
}

# ———————–
# GUI
# ———————–
class ConceptometryApp:
def __init__(self, root):
self.root = root
root.title(“Concettometria — Analisi”)
root.geometry(“1100×750″)

# Create menu bar
menubar = tk.Menu(root)
root.config(menu=menubar)

# Create Help menu
help_menu = tk.Menu(menubar, tearoff=0)
menubar.add_cascade(label=”Aiuto”, menu=help_menu)
help_menu.add_command(label=”Informazioni”, command=self.show_about)

self.lang_var = tk.StringVar(value=”it”)

# Controls frame
top = ttk.Frame(root, padding=10)
top.pack(side=tk.TOP, fill=tk.X)

ttk.Label(top, text=”Lingua:”).pack(side=tk.LEFT)
self.lang_combo = ttk.Combobox(top, textvariable=self.lang_var, values=[“it”, “en”], width=5, state=”readonly”)
self.lang_combo.pack(side=tk.LEFT, padx=5)

ttk.Button(top, text=”Analizza”, command=self.run_analysis).pack(side=tk.LEFT, padx=5)
ttk.Button(top, text=”Carica file…”, command=self.load_file).pack(side=tk.LEFT, padx=5)
ttk.Button(top, text=”Esporta CSV”, command=self.export_csv).pack(side=tk.LEFT, padx=5)

# Text input
self.text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, height=12, font=(“Segoe UI”, 11))
self.text_area.pack(fill=tk.BOTH, expand=False, padx=10, pady=5)

# Metrics frame
self.metrics_frame = ttk.LabelFrame(root, text=”Metriche”, padding=10)
self.metrics_frame.pack(fill=tk.X, padx=10, pady=5)

self.metrics_vars = {
“N”: tk.StringVar(value=”-“),
“C_unique”: tk.StringVar(value=”-“),
“total_mentions”: tk.StringVar(value=”-“),
“DCg”: tk.StringVar(value=”-“),
“DCp”: tk.StringVar(value=”-“),
“IRC”: tk.StringVar(value=”-“),
“EI”: tk.StringVar(value=”-“)
}

grid = ttk.Frame(self.metrics_frame)
grid.pack(fill=tk.X)
row = 0
for label, key in [(“Token (N)”, “N”),
(“Concetti unici (|C|)”, “C_unique”),
(“Menzioni concetti”, “total_mentions”),
(“DCg”, “DCg”),
(“DCp”, “DCp”),
(“IRC”, “IRC”),
(“EI”, “EI”)]:
ttk.Label(grid, text=label + “:”).grid(row=row, column=0, sticky=”w”, padx=5, pady=2)
ttk.Label(grid, textvariable=self.metrics_vars[key]).grid(row=row, column=1, sticky=”w”, padx=5, pady=2)
row += 1

# Table of concepts
self.table_frame = ttk.LabelFrame(root, text=”Concetti (Top)”, padding=10)
self.table_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)

cols = (“concept”, “freq”, “w”, “fd”, “fa”)
self.tree = ttk.Treeview(self.table_frame, columns=cols, show=”headings”, height=10)
headings = {
“concept”: “Concetto”,
“freq”: “Frequenza”,
“w”: “Peso w(c)”,
“fd”: “Fd”,
“fa”: “Fa”
}
for c in cols:
self.tree.heading(c, text=headings[c])
self.tree.column(c, anchor=”w”, width=160 if c == “concept” else 100)

self.tree.pack(fill=tk.BOTH, expand=True)

# Chart
self.chart_frame = ttk.LabelFrame(root, text=”Top concetti per peso”, padding=10)
self.chart_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)

self.figure = plt.Figure(figsize=(7, 3), dpi=100)
self.ax = self.figure.add_subplot(111)
self.canvas = FigureCanvasTkAgg(self.figure, master=self.chart_frame)
self.canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

# Data
self.last_results = None

def run_analysis(self):
text = normalize_space(self.text_area.get(“1.0”, tk.END))
if not text:
messagebox.showwarning(“Attenzione”, “Inserisci del testo da analizzare.”)
return
lang = self.lang_var.get()
try:
results = compute_metrics(text, lang)
except Exception as e:
messagebox.showerror(“Errore”, f”Analisi fallita:\n{e}”)
return

self.last_results = results
self.update_metrics(results)
self.update_table(results)
self.update_chart(results)

def update_metrics(self, res):
self.metrics_vars[“N”].set(res[“N”])
self.metrics_vars[“C_unique”].set(res[“C_unique”])
self.metrics_vars[“total_mentions”].set(res[“total_mentions”])
self.metrics_vars[“DCg”].set(f”{res[‘DCg’]:.4f}”)
self.metrics_vars[“DCp”].set(f”{res[‘DCp’]:.4f}”)
self.metrics_vars[“IRC”].set(f”{res[‘IRC’]:.4f}”)
self.metrics_vars[“EI”].set(f”{res[‘EI’]:.4f}”)

def update_table(self, res, top_k=30):
for row in self.tree.get_children():
self.tree.delete(row)
for item in res[“concepts”][:top_k]:
self.tree.insert(“”, “end”, values=(
item[“concept”],
item[“freq”],
f”{item[‘w’]:.3f}”,
f”{item[‘fd’]:.3f}”,
f”{item[‘fa’]:.3f}”
))

def update_chart(self, res, top_k=10):
self.ax.clear()
data = res[“concepts”][:top_k]
if not data:
self.canvas.draw()
return
labels = [d[“concept”] for d in data]
weights = [d[“w”] for d in data]
bars = self.ax.barh(range(len(labels)), weights, color=”#3b82f6″)
self.ax.set_yticks(range(len(labels)))
self.ax.set_yticklabels(labels)
self.ax.invert_yaxis()
self.ax.set_xlabel(“Peso w(c)”)
self.ax.set_xlim(0, 1)
for i, b in enumerate(bars):
self.ax.text(b.get_width() + 0.01, b.get_y() + b.get_height()/2, f”{weights[i]:.2f}”, va=”center”)
self.figure.tight_layout()
self.canvas.draw()

def load_file(self):
fp = filedialog.askopenfilename(filetypes=[(“Testo”, “*.txt”), (“Tutti i file”, “*.*”)])
if not fp:
return
try:
with open(fp, “r”, encoding=”utf-8″) as f:
content = f.read()
self.text_area.delete(“1.0”, tk.END)
self.text_area.insert(tk.END, content)
except Exception as e:
messagebox.showerror(“Errore”, f”Impossibile aprire il file:\n{e}”)

def export_csv(self):
if not self.last_results:
messagebox.showwarning(“Attenzione”, “Esegui prima un’analisi.”)
return
fp = filedialog.asksaveasfilename(defaultextension=”.csv”, filetypes=[(“CSV”, “*.csv”)])
if not fp:
return
try:
import csv
with open(fp, “w”, encoding=”utf-8″, newline=””) as f:
writer = csv.writer(f, delimiter=”;”)
writer.writerow([“metric”, “value”])
for k in [“N”, “C_unique”, “total_mentions”, “DCg”, “DCp”, “IRC”, “EI”]:
writer.writerow([k, self.last_results[k]])
writer.writerow([])
writer.writerow([“concept”, “freq”, “w”, “fd”, “fa”])
for item in self.last_results[“concepts”]:
writer.writerow([item[“concept”], item[“freq”], f”{item[‘w’]:.6f}”, f”{item[‘fd’]:.6f}”, f”{item[‘fa’]:.6f}”])
messagebox.showinfo(“OK”, “Esportazione completata.”)
except Exception as e:
messagebox.showerror(“Errore”, f”Esportazione fallita:\n{e}”)

def show_about(self):
about_text = “””Concettometro

Autore: Luigi Usai
Luogo: Quartucciu (CA) Italy
Data: 11 Agosto 2025

Uno strumento per l’analisi della complessità concettuale del testo.”””
messagebox.showinfo(“Informazioni”, about_text)

def main():
root = tk.Tk()
app = ConceptometryApp(root)
root.mainloop()

if __name__ == “__main__”:
main()

 

 

Published August 11, 2025 | Version v3
Preprint  Open

L’invenzione della Concettometria

Description

Abstract (Italiano)

L’analisi quantitativa dei testi si è tradizionalmente concentrata su metriche lessicali, come la densità lessicale, o su approcci semantici esplorativi. Tuttavia, manca un metodo sistematico per quantificare la “ricchezza concettuale” di un testo in relazione alla sua lunghezza, tenendo conto della complessità intrinseca delle idee espresse. Questo paper introduce la Concettometria, una nuova disciplina scientifica finalizzata alla misurazione della densità, distribuzione e complessità dei concetti all’interno di un testo. Proponiamo un framework teorico e una metodologia computazionale che si basa sull’estrazione di concetti tramite Natural Language Processing (NLP), seguita da una valutazione della loro complessità attraverso un sistema di pesi basato su fattori di profondità semantica (

FdFd

) e astrazione (

FaFa

). Vengono definite e formalizzate diverse metriche chiave: la Densità Concettuale Grezza (DCg), la Densità Concettuale Ponderata (DCp), l’Indice di Ridondanza Concettuale (IRC) e l’Efficienza Informativa (EI). Le potenziali applicazioni spaziano dall’analisi dell’efficienza comunicativa nella letteratura scientifica, allo studio della complessità cognitiva dei testi didattici, fino alla valutazione oggettiva della qualità dei contenuti generati da Intelligenza Artificiale. Questo lavoro pone le basi per un nuovo paradigma nell’analisi quantitativa del linguaggio e dell’informazione.

 

Abstract (English)

Quantitative text analysis has traditionally focused on lexical metrics, such as lexical density, or on exploratory semantic approaches. However, a systematic method for quantifying the “conceptual richness” of a text in relation to its length, while accounting for the intrinsic complexity of the expressed ideas, is currently lacking. This paper introduces Conceptometry, a new scientific discipline for the systematic measurement of the density, distribution, and complexity of concepts within a text. We propose a theoretical framework and a computational methodology based on concept extraction via Natural Language Processing (NLP), followed by a complexity assessment through a weighting system based on semantic depth (

FdFd

) and abstraction factors (

FaFa

). Several key metrics are defined and formalized: Raw Conceptual Density (DCg), Weighted Conceptual Density (DCp), the Conceptual Redundancy Index (IRC), and Informational Efficiency (EI). Potential applications range from analyzing communicative efficiency in scientific literature and the cognitive complexity of educational materials, to the objective quality assessment of AI-generated content. This work lays the foundation for a new paradigm in the quantitative analysis of language and information.

 

Files

1 Conceptometry.pdf
Files (463.6 kB)
Name Size

md5:ae845121386bd4aeadb89e825426864c 

188.0 kB

md5:7d5e573299b04aeb28da15e18618a2a0 

260.6 kB

md5:45e8d4f5e50ab324bd3c75652b267bc0 

15.0 kB