2026
Lenci, Alessandro
GloVe Pre-trained Spaces Book Chapter
In: Nesi, Hilary; Milin, Petar (Ed.): International Encyclopedia of Language and Linguistics, Elsevier, 2026.
@inbook{Lenci2026,
title = {GloVe Pre-trained Spaces},
author = {Alessandro Lenci},
editor = {Hilary Nesi and Petar Milin},
url = {https://linkinghub.elsevier.com/retrieve/pii/B9780323955041011406},
doi = {10.1016/B978-0-323-95504-1.01140-6},
year = {2026},
date = {2026-01-01},
booktitle = {International Encyclopedia of Language and Linguistics},
publisher = {Elsevier},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
2025
Bondielli, Alessandro; Miliani, Martina; Paglione, Luca; Auriemma, Serena; Passaro, Lucia C.; Lenci, Alessandro
LLMs Struggle on Explicit Causality in Italian Conference
Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025), CEUR Workshop Proceedings, Cagliari, Italy, 2025, ISBN: 979-12-243-0587-3.
@conference{bondielli-etal-2025-llms,
title = {LLMs Struggle on Explicit Causality in Italian},
author = {Alessandro Bondielli and Martina Miliani and Luca Paglione and Serena Auriemma and Lucia C. Passaro and Alessandro Lenci},
editor = {Cristina Bosco and Elisabetta Jezek and Marco Polignano and Manuela Sanguinetti},
url = {https://aclanthology.org/2025.clicit-1.10/},
isbn = {979-12-243-0587-3},
year = {2025},
date = {2025-09-01},
urldate = {2025-09-01},
booktitle = {Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025)},
pages = {83–94},
publisher = {CEUR Workshop Proceedings},
address = {Cagliari, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Proietti, Mattia; Passaro, Lucia C.; Lenci, Alessandro
Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025), CEUR Workshop Proceedings, Cagliari, Italy, 2025, ISBN: 979-12-243-0587-3.
@conference{proietti-etal-2025-leveraging,
title = {Leveraging LLMs to Build a Semi-synthetic Dataset for Legal Information Retrieval: A Case Study on the Italian Civil Code and GPT4-O},
author = {Mattia Proietti and Lucia C. Passaro and Alessandro Lenci},
editor = {Cristina Bosco and Elisabetta Jezek and Marco Polignano and Manuela Sanguinetti},
url = {https://aclanthology.org/2025.clicit-1.87/},
isbn = {979-12-243-0587-3},
year = {2025},
date = {2025-09-01},
urldate = {2025-09-01},
booktitle = {Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025)},
pages = {933–941},
publisher = {CEUR Workshop Proceedings},
address = {Cagliari, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Lombardi, Agnese; Lenci, Alessandro
Doing Things with Words: Rethinking Theory of Mind Simulation in Large Language Models Conference
Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025), CEUR Workshop Proceedings, Cagliari, Italy, 2025, ISBN: 979-12-243-0587-3.
@conference{lombardi-lenci-2025-things,
title = {Doing Things with Words: Rethinking Theory of Mind Simulation in Large Language Models},
author = {Agnese Lombardi and Alessandro Lenci},
editor = {Cristina Bosco and Elisabetta Jezek and Marco Polignano and Manuela Sanguinetti},
url = {https://aclanthology.org/2025.clicit-1.59/},
isbn = {979-12-243-0587-3},
year = {2025},
date = {2025-09-01},
urldate = {2025-09-01},
booktitle = {Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025)},
pages = {613–624},
publisher = {CEUR Workshop Proceedings},
address = {Cagliari, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Miliani, Martina; Auriemma, Serena; Bondielli, Alessandro; Chersoni, Emmanuele; Passaro, Lucia; Sucameli, Irene; Lenci, Alessandro
ExpliCa: Evaluating Explicit Causal Reasoning in Large Language Models Conference
Findings of the Association for Computational Linguistics: ACL 2025, Association for Computational Linguistics, Vienna, Austria, 2025, ISBN: 979-8-89176-256-5.
@conference{miliani-etal-2025-explica,
title = {ExpliCa: Evaluating Explicit Causal Reasoning in Large Language Models},
author = {Martina Miliani and Serena Auriemma and Alessandro Bondielli and Emmanuele Chersoni and Lucia Passaro and Irene Sucameli and Alessandro Lenci},
editor = {Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Mohammad Taher Pilehvar},
url = {https://aclanthology.org/2025.findings-acl.891/},
doi = {10.18653/v1/2025.findings-acl.891},
isbn = {979-8-89176-256-5},
year = {2025},
date = {2025-07-01},
urldate = {2025-07-01},
booktitle = {Findings of the Association for Computational Linguistics: ACL 2025},
pages = {17335–17355},
publisher = {Association for Computational Linguistics},
address = {Vienna, Austria},
abstract = {Large Language Models (LLMs) are increasingly used in tasks requiring interpretive and inferential accuracy. In this paper, we introduce ExpliCa, a new dataset for evaluating LLMs in explicit causal reasoning. ExpliCa uniquely integrates both causal and temporal relations presented in different linguistic orders and explicitly expressed by linguistic connectives. The dataset is enriched with crowdsourced human acceptability ratings. We tested LLMs on ExpliCa through prompting and perplexity-based metrics. We assessed seven commercial and open-source LLMs, revealing that even top models struggle to reach 0.80 accuracy. Interestingly, models tend to confound temporal relations with causal ones, and their performance is also strongly influenced by the linguistic order of the events. Finally, perplexity-based scores and prompting performance are differently affected by model size.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Tosi, Filippo; Arzilli, Guglielmo; Baglivo, Francesco; Fondelli, Serena; Renda, Alessandro; Bondielli, Alessandro; Dell’Oglio, Pietro; Acampora, Vittoria; Marcelloni, Francesco; Rizzo, Caterina
Atti 58° Congresso Nazionale SItI, 2025.
@conference{ 11568_1329807,
title = {Un approccio basato su NLP per la sorveglianza delle infezioni del sito chirurgico utilizzando le lettere di dimissione ospedaliera},
author = {Filippo Tosi and Guglielmo Arzilli and Francesco Baglivo and Serena Fondelli and Alessandro Renda and Alessandro Bondielli and Pietro Dell’Oglio and Vittoria Acampora and Francesco Marcelloni and Caterina Rizzo},
url = {https://siti2025.it/Download/EasyCms/SITI_ott_2025_provvisorio_26307.pdf},
year = {2025},
date = {2025-01-01},
booktitle = {Atti 58° Congresso Nazionale SItI},
pages = {438–439},
abstract = {Background
Le infezioni del sito chirurgico (ISC) rappresentano una per- centuale significativa delle infezioni associate all’assistenza sanitaria e costituiscono un onere importante in ambito ospe- daliero. La sorveglianza tradizionale delle ISC, basata sulla re- visione manuale delle cartelle cliniche a testo libero, richiede molto tempo ed è scarsamente modulabile. Questo studio si propone di sviluppare e valutare un sistema di sorveglianza semi-automatizzato basato sull’intelligenza artificiale (AI) per il rilevamento delle ISC utilizzando le cartelle cliniche in lin- gua italiana di un grande ospedale universitario.
Metodi
È stata condotta un’analisi retrospettiva sulle lettere di dimissio- ne ospedaliera non strutturate e anonime dell’Ospedale di Pisa (Italia). Il testo clinico è stato elaborato e vettorizzato utilizzan- do TF-IDF, Word2Vec e BERT (incluso il pre-training e il fine tuning). I classificatori comprendevano regressione logistica, Decision Trees, random forest e XGBoost. Le prestazioni finali sono state valutate utilizzando la convalida incrociata stratificata a 10 volte, il punteggio F1 e l’AUC. Dato lo sbilanciamento 1:100 tra record ISC e non ISC, sono state testate tecniche di oversampling and undersampling. Inoltre, sono stati valutati due modelli linguistici di grandi dimensioni (LLM) - OpenBioLLM e Minerva - utilizzando prompting a zero e a pochi colpi.
Risultati Le migliori prestazioni sono state ottenute da un modello BERT ulteriormente pre-addestrato sul corpus specifico del dominio e messo a punto per la classificazione (BERT-FT), con XGBoost come classificatore: F1-score=0,79},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Le infezioni del sito chirurgico (ISC) rappresentano una per- centuale significativa delle infezioni associate all’assistenza sanitaria e costituiscono un onere importante in ambito ospe- daliero. La sorveglianza tradizionale delle ISC, basata sulla re- visione manuale delle cartelle cliniche a testo libero, richiede molto tempo ed è scarsamente modulabile. Questo studio si propone di sviluppare e valutare un sistema di sorveglianza semi-automatizzato basato sull’intelligenza artificiale (AI) per il rilevamento delle ISC utilizzando le cartelle cliniche in lin- gua italiana di un grande ospedale universitario.
Metodi
È stata condotta un’analisi retrospettiva sulle lettere di dimissio- ne ospedaliera non strutturate e anonime dell’Ospedale di Pisa (Italia). Il testo clinico è stato elaborato e vettorizzato utilizzan- do TF-IDF, Word2Vec e BERT (incluso il pre-training e il fine tuning). I classificatori comprendevano regressione logistica, Decision Trees, random forest e XGBoost. Le prestazioni finali sono state valutate utilizzando la convalida incrociata stratificata a 10 volte, il punteggio F1 e l’AUC. Dato lo sbilanciamento 1:100 tra record ISC e non ISC, sono state testate tecniche di oversampling and undersampling. Inoltre, sono stati valutati due modelli linguistici di grandi dimensioni (LLM) - OpenBioLLM e Minerva - utilizzando prompting a zero e a pochi colpi.
Risultati Le migliori prestazioni sono state ottenute da un modello BERT ulteriormente pre-addestrato sul corpus specifico del dominio e messo a punto per la classificazione (BERT-FT), con XGBoost come classificatore: F1-score=0,79
Mala, Chandana Sree; Maio, Christian Di; Proietti, Mattia; Gezici, Gizem; Giannotti, Fosca; Melacci, Stefano; Lenci, Alessandro; Gori, Marco
Towards Building a Trustworthy RAG-Based Chatbot for the Italian Public Administration Journal Article
In: Frontiers in Artificial Intelligence and Applications, vol. 408, pp. 196 – 204, 2025.
@article{Mala2025196,
title = {Towards Building a Trustworthy RAG-Based Chatbot for the Italian Public Administration},
author = {Chandana Sree Mala and Christian Di Maio and Mattia Proietti and Gizem Gezici and Fosca Giannotti and Stefano Melacci and Alessandro Lenci and Marco Gori},
editor = {Pedreschi D. and Milano M. and Tiddi I. and Russell S. and Boldrini C. and Pappalardo L. and Passerini A. and Wang S.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105020935873&doi=10.3233%2fFAIA250637&partnerID=40&md5=590a91bfd34f9af45a6c2e4b8990b204},
doi = {10.3233/FAIA250637},
year = {2025},
date = {2025-01-01},
journal = {Frontiers in Artificial Intelligence and Applications},
volume = {408},
pages = {196 – 204},
publisher = {IOS Press BV},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Auriemma, Serena; Miliani, Martina; Madeddu, Mauro; Bondielli, Alessandro; Passaro, Lucia; Lenci, Alessandro
Prompting encoder models for zero-shot classification: a cross-domain study in Italian Journal Article
In: LANGUAGE RESOURCES AND EVALUATION, 2025.
@article{ 11568_1321427,
title = {Prompting encoder models for zero-shot classification: a cross-domain study in Italian},
author = {Serena Auriemma and Martina Miliani and Mauro Madeddu and Alessandro Bondielli and Lucia Passaro and Alessandro Lenci},
url = {https://link.springer.com/article/10.1007/s10579-025-09853-0},
doi = {10.1007/s10579-025-09853-0},
year = {2025},
date = {2025-01-01},
journal = {LANGUAGE RESOURCES AND EVALUATION},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Muscato, Benedetta; Passaro, Lucia; Gezici, Gizem; Giannotti, Fosca
Perspectives in Play: A Multi-Perspective Approach for More Inclusive NLP Systems Conference
Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, IJCAI-25, International Joint Conferences on Artificial Intelligence, 2025, ISBN: 978-1-956792-06-5.
@conference{ 11568_1322168,
title = {Perspectives in Play: A Multi-Perspective Approach for More Inclusive NLP Systems},
author = {Benedetta Muscato and Lucia Passaro and Gizem Gezici and Fosca Giannotti},
url = {https://doi.org/10.24963/ijcai.2025/1092},
doi = {10.24963/ijcai.2025/1092},
isbn = {978-1-956792-06-5},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence, IJCAI-25},
pages = {9827–9835},
publisher = {International Joint Conferences on Artificial Intelligence},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Lenci, Alessandro; Auriemma, Serena; Miliani, Martina
Linguistica computazionale. Natural Language Processing e Intelligenza Artificiale Book
Hoepli, Milano, 2025, ISBN: 978-88-360-1830-7, (Hoepli Academy).
@book{lenci_auriemma_miliani_2025,
title = {Linguistica computazionale. Natural Language Processing e Intelligenza Artificiale},
author = {Alessandro Lenci and Serena Auriemma and Martina Miliani},
url = {https://www.hoeplieditore.it/universita/articolo/linguistica-computazionale-alessandro-lenci/9788836018307/3144},
isbn = {978-88-360-1830-7},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
publisher = {Hoepli},
address = {Milano},
abstract = {Il volume presenta un quadro generale dei principi teorici e delle metodologie pratiche della linguistica computazionale, e fornisce gli strumenti per la corretta comprensione e implementazione dei modelli di machine learning e di intelligenza artificiale per l’analisi del linguaggio naturale. Il testo si articola in tre parti: la prima è una panoramica delle nozioni fondamentali, la seconda si focalizza sui metodi di machine learning per il natural language processing (NLP), sia sui modelli più tradizionali, sia su quelli basati su reti neurali; la terza parte è dedicata ai modelli fondazionali, che rappresentano l’approccio metodologico più recente nell’ambito del NLP. La trattazione teorica è integrata da approfondimenti su temi applicativi delle tecnologie in contesti reali e da risorse online come tutorial per l’apprendimento del linguaggio di programmazione Python applicato al NLP. Al termine di ciascun capitolo sono previste domande sui principali argomenti trattati per l’autovalutazione delle conoscenze acquisite. La sequenza degli argomenti segue un percorso graduale di sviluppo dai concetti di base fino ai temi più avanzati; tuttavia, ogni capitolo è concepito come un’unità autonoma, offrendo così maggiore flessibilità nell’organizzazione dello studio.},
note = {Hoepli Academy},
keywords = {},
pubstate = {published},
tppubtype = {book}
}
Baccheschi, Corrado; Bondielli, Alessandro; Lenci, Alessandro; Micheli, Alessio; Passaro, Lucia; Podda, Marco; Tortorella, Domenico
Investigating Time-Scales in Deep Echo State Networks for Natural Language Processing Conference
Artificial Neural Networks and Machine Learning. ICANN 2025 International Workshops and Special Sessions, vol. 16072, Springer, 2025, ISBN: 9783032045522.
@conference{ 11568_1324255,
title = {Investigating Time-Scales in Deep Echo State Networks for Natural Language Processing},
author = {Corrado Baccheschi and Alessandro Bondielli and Alessandro Lenci and Alessio Micheli and Lucia Passaro and Marco Podda and Domenico Tortorella},
url = {https://link.springer.com/chapter/10.1007/978-3-032-04552-2_18},
doi = {10.1007/978-3-032-04552-2_18},
isbn = {9783032045522},
year = {2025},
date = {2025-01-01},
booktitle = {Artificial Neural Networks and Machine Learning. ICANN 2025 International Workshops and Special Sessions},
volume = {16072},
pages = {188–200},
publisher = {Springer},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Fidone, Giacomo; Passaro, Lucia; Guidotti, Riccardo
Evaluating Online Moderation Via LLM-Powered Counterfactual Simulations Conference
Proceedings of the 40th AAAI Conference on Artificial Intelligence (AAAI-26), In Press, 2025.
@conference{ 11568_1333475,
title = {Evaluating Online Moderation Via LLM-Powered Counterfactual Simulations},
author = {Giacomo Fidone and Lucia Passaro and Riccardo Guidotti},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 40th AAAI Conference on Artificial Intelligence (AAAI-26), In Press},
abstract = {Online Social Networks (OSNs) widely adopt content moderation to mitigate the spread of abusive and toxic discourse. Nonetheless, the real effectiveness of moderation interventions remains unclear due to the high cost of data collection and limited experimental control. The latest developments in Natural Language Processing pave the way for a new evaluation approach. Large Language Models (LLMs) can be successfully leveraged to enhance Agent-Based Modeling and simulate human-like social behavior with unprecedented degree of believability. Yet, existing tools do not support simulation-based evaluation of moderation strategies. We fill this gap by designing a LLM-powered simulator of OSN conversations enabling a parallel, counterfactual simulation where toxic behavior is influenced by moderation interventions, keeping all else equal. We conduct extensive experiments, unveiling the psychological realism of OSN agents, the emergence of social contagion phenomena and the superior effectiveness of personalized moderation strategies.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Muscato, Benedetta; Bushipaka, Praveen; Gezici, Gizem; Passaro, Lucia; Giannotti, Fosca; Cucinotta, Tommaso
Embracing Diversity: A Multi-Perspective Approach with Soft Labels Conference
Proceedings of HHAI 2025: The 4th International Conference Series on Hybrid Human-Artificial Intelligence, IOS Press, Amsterdam, 2025, ISBN: 978-1-64368-611-0.
@conference{ 11568_1322167,
title = {Embracing Diversity: A Multi-Perspective Approach with Soft Labels},
author = {Benedetta Muscato and Praveen Bushipaka and Gizem Gezici and Lucia Passaro and Fosca Giannotti and Tommaso Cucinotta},
url = {https://ebooks.iospress.nl/doi/10.3233/FAIA250654},
doi = {10.3233/FAIA250654},
isbn = {978-1-64368-611-0},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of HHAI 2025: The 4th International Conference Series on Hybrid Human-Artificial Intelligence},
pages = {370–384},
publisher = {IOS Press},
address = {Amsterdam},
abstract = {In subjective tasks like stance detection, diverse human perspectives are often simplified into a single ground truth through label aggregation i.e. majority voting, potentially marginalizing minority viewpoints. This paper presents a Multi-Perspective framework for stance detection that explicitly incorporates annotation diversity by using soft labels derived from both human and large language model (LLM) annotations. Building on a stance detection dataset focused on controversial topics, we augment it with document summaries and new LLM-generated labels.
We then compare two approaches: a baseline using aggregated hard labels, and a multi-perspective model trained on disaggregated soft labels that capture annotation distributions. Our findings show that multi-perspective models consistently outperform traditional baselines (higher F1-scores), with lower model confidence, reflecting task subjectivity. This work highlights the importance of modeling disagreement and promotes a shift toward more inclusive, perspective-aware NLP systems.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
We then compare two approaches: a baseline using aggregated hard labels, and a multi-perspective model trained on disaggregated soft labels that capture annotation distributions. Our findings show that multi-perspective models consistently outperform traditional baselines (higher F1-scores), with lower model confidence, reflecting task subjectivity. This work highlights the importance of modeling disagreement and promotes a shift toward more inclusive, perspective-aware NLP systems.
Taddei, Andrea; Lenci, Alessandro; D'Angelo, Caterina
Detecting Semantic Reuse in Ancient Greek Literature: AComputational Approach. Conference
Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025), 2025.
@conference{<LineBreak> 11568_1331500,
title = {Detecting Semantic Reuse in Ancient Greek Literature: AComputational Approach.},
author = {Andrea Taddei and Alessandro Lenci and Caterina D'Angelo},
editor = {Cristina Bosco and Elisabetta Jezek and Marco Polignano and Manuela Sanguinetti},
url = {https://clic2025.unica.it/wp-content/uploads/2025/09/33_main_long.pdf},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
booktitle = {Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025)},
pages = {1–10},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Rebillard, C.; Hurtado, J.; Krutsylo, A.; Passaro, L.; Lomonaco, V.
Continually learn to map visual concepts to language models in resource-constrained environments Journal Article
In: Neurocomputing, vol. 652, 2025.
@article{Rebillard2025,
title = {Continually learn to map visual concepts to language models in resource-constrained environments},
author = {C. Rebillard and J. Hurtado and A. Krutsylo and L. Passaro and V. Lomonaco},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105011582431&doi=10.1016%2fj.neucom.2025.131013&partnerID=40&md5=d9d8b55dad71120e084ef332e6948f8f},
doi = {10.1016/j.neucom.2025.131013},
year = {2025},
date = {2025-01-01},
journal = {Neurocomputing},
volume = {652},
abstract = {Continually learning from non-independent and identically distributed (non-i.i.d.) data poses a significant challenge in deep learning, particularly in resource-constrained environments. Visual models trained via supervised learning often suffer from overfitting, catastrophic forgetting, and biased representations when faced with sequential tasks. In contrast, pre-trained language models demonstrate greater robustness in managing task sequences due to their generalized knowledge representations, albeit at the cost of high computational resources. Leveraging this advantage, we propose a novel learning strategy, Continual Visual Mapping (CVM), which continuously maps visual representations into a fixed knowledge space derived from a language model. By anchoring learning to this fixed space, CVM enables training small, efficient visual models, making it particularly suited for scenarios where adapting large pre-trained visual models is computationally or data-prohibitive. Empirical evaluations across five benchmarks demonstrate that CVM consistently outperforms state-of-the-art continual learning methods, showcasing its potential to enhance generalization and mitigate challenges in resource-constrained continual learning settings. © 2025 The Authors},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bouchardet, Cecília Carvalho; Gonçalves, Kênia Cristina; Passaro, Lucia; Almeida, Jussara Maria; Marques-Neto, Humberto Torres
Call2Go: A Cross-Platform Strategy Of Kids Online Social Media Influencers Conference
Proceedings of the 31st Brazilian Symposium on Multimedia and the Web, Sociedade Brasileira de Computação (SBC), 2025.
@conference{ 11568_1331522,
title = {Call2Go: A Cross-Platform Strategy Of Kids Online Social Media Influencers},
author = {Cecília Carvalho Bouchardet and Kênia Cristina Gonçalves and Lucia Passaro and Jussara Maria Almeida and Humberto Torres Marques-Neto},
url = {https://sol.sbc.org.br/index.php/webmedia/article/view/37953},
doi = {10.5753/webmedia.2025.16108},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 31st Brazilian Symposium on Multimedia and the Web},
pages = {112–120},
publisher = {Sociedade Brasileira de Computação (SBC)},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Suozzi, A.; Capone, L.; Lebani, G. E.; Lenci, A.
BAMBI: DEVELOPING BABY LANGUAGE MODELS FOR ITALIAN Journal Article
In: LINGUE E LINGUAGGIO, vol. 24, no. 1, pp. 83–102, 2025.
@article{ 11568_1327954,
title = {BAMBI: DEVELOPING BABY LANGUAGE MODELS FOR ITALIAN},
author = {A. Suozzi and L. Capone and G. E. Lebani and A. Lenci},
url = {https://rivisteweb.it/doi/10.1418/117444},
doi = {10.1418/117444},
year = {2025},
date = {2025-01-01},
journal = {LINGUE E LINGUAGGIO},
volume = {24},
number = {1},
pages = {83–102},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Capone, Luca; Suozzi, Alice; Lebani, Gianluca E; Lenci, Alessandro
BAMBI Goes to School: Evaluating Italian BabyLMs with Invalsi-ITA Conference
Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025), Università di Cagliari, 2025.
@conference{ 11568_1327955.2,
title = {BAMBI Goes to School: Evaluating Italian BabyLMs with Invalsi-ITA},
author = {Luca Capone and Alice Suozzi and Gianluca E Lebani and Alessandro Lenci},
url = {https://clic2025.unica.it/wp-content/uploads/2025/09/15_main_long.pdf},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025)},
pages = {1–12},
publisher = {Università di Cagliari},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Arzilli, G; Baglivo, F; Angelis, L De; Casigliani, V; Renda, A; Bondielli, A; Dell'Oglio, P; Acampora, V; Marcelloni, F; Rizzo, C
An NLP-Based Approach for Surgical Site Infection Surveillance Using Hospital Discharge Letters Journal Article
In: EUROPEAN JOURNAL OF PUBLIC HEALTH, vol. 35, no. Supplement_4, 2025.
@article{ 11568_1334567,
title = {An NLP-Based Approach for Surgical Site Infection Surveillance Using Hospital Discharge Letters},
author = {G Arzilli and F Baglivo and L De Angelis and V Casigliani and A Renda and A Bondielli and P Dell'Oglio and V Acampora and F Marcelloni and C Rizzo},
url = {https://academic.oup.com/eurpub/article/35/Supplement_4/ckaf161.1037/8303229},
doi = {10.1093/eurpub/ckaf161.1037},
year = {2025},
date = {2025-01-01},
journal = {EUROPEAN JOURNAL OF PUBLIC HEALTH},
volume = {35},
number = {Supplement_4},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Testa, Davide; Bonetta, Giovanni; Bernardi, Raffaella; Bondielli, Alessandro; Lenci, Alessandro; Miaschi, Alessio; Passaro, Lucia; Magnini, Bernardo
All-in-one: Understanding and Generation in Multimodal Reasoning with the MAIA Benchmark Conference
Findings of the Association for Computational Linguistics: EMNLP 2025, Association for Computational Linguistics, 2025, ISBN: 979-8-89176-335-7.
@conference{ 11568_1331520,
title = {All-in-one: Understanding and Generation in Multimodal Reasoning with the MAIA Benchmark},
author = {Davide Testa and Giovanni Bonetta and Raffaella Bernardi and Alessandro Bondielli and Alessandro Lenci and Alessio Miaschi and Lucia Passaro and Bernardo Magnini},
url = {https://aclanthology.org/2025.findings-emnlp.1091/},
doi = {10.18653/v1/2025.findings-emnlp.1091},
isbn = {979-8-89176-335-7},
year = {2025},
date = {2025-01-01},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2025},
pages = {20030–20050},
publisher = {Association for Computational Linguistics},
abstract = {We introduce MAIA (Multimodal AI Assessment), a native-Italian benchmark designed for fine-grained investigation of the reasoning abilities of visual language models on videos. MAIA differs from other available video benchmarks for its design, its reasoning categories, the metric it uses, and the language and culture of the videos. MAIA evaluates Vision Language Models (VLMs) on two aligned tasks: a visual statement verification task, and an open-ended visual question-answering task, both on the same set of video-related questions. It considers twelve reasoning categories that aim to disentangle language and vision relations by highlighting the role of the visual input. Thanks to its carefully taught design, it evaluates VLMs' consistency and visually grounded natural language comprehension and generation simultaneously through an aggregated metric revealing low results that highlight models' fragility. Last but not least, the video collection has been carefully selected to reflect the Italian culture, and the language data are produced by native-speakers.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
2024
Capone, Luca; Auriemma, Serena; Miliani, Martina; Bondielli, Alessandro; Lenci, Alessandro
Lost in Disambiguation: How Instruction-Tuned LLMs Master Lexical Ambiguity Conference
Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024), CEUR Workshop Proceedings, Pisa, Italy, 2024, ISBN: 979-12-210-7060-6.
@conference{capone-etal-2024-lost,
title = {Lost in Disambiguation: How Instruction-Tuned LLMs Master Lexical Ambiguity},
author = {Luca Capone and Serena Auriemma and Martina Miliani and Alessandro Bondielli and Alessandro Lenci},
editor = {Felice Dell'Orletta and Alessandro Lenci and Simonetta Montemagni and Rachele Sprugnoli},
url = {https://aclanthology.org/2024.clicit-1.19/},
isbn = {979-12-210-7060-6},
year = {2024},
date = {2024-12-01},
booktitle = {Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024)},
pages = {148–156},
publisher = {CEUR Workshop Proceedings},
address = {Pisa, Italy},
abstract = {This paper investigates how decoder-only instruction-tuned LLMs handle lexical ambiguity. Two distinct methodologies are employed: Eliciting rating scores from the model via prompting and analysing the cosine similarity between pairs of polysemous words in context. Ratings and embeddings are obtained by providing pairs of sentences from Haber and Poesio (2021) to the model. These ratings and cosine similarity scores are compared with each other and with the human similarity judgments in the dataset.Surprisingly, the model scores show only a moderate correlation with the subjects' similarity judgments and no correlation with the target word embedding similarities. A vector space anisotropy inspection has also been performed, as a potential source of the experimental results. The analysis reveals that the embedding spaces of two out of the three analyzed models exhibit poor anisotropy, while the third model shows relatively moderate anisotropy compared to previous findings for models with similar architecture (Ethayarajh 2019). These findings offer new insights into the relationship between generation quality and vector representations in decoder-only LLMs.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Cascione, A.; Cerulli, A.; Manerba, M. M.; Passaro, L. C.
Women's Professions and Targeted Misogyny Online Conference
vol. 3878, 2024.
@conference{Cascione2024,
title = {Women's Professions and Targeted Misogyny Online},
author = {A. Cascione and A. Cerulli and M. M. Manerba and L. C. Passaro},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214429769&partnerID=40&md5=f2d3707476fb121b4200010236a6b194},
year = {2024},
date = {2024-01-01},
journal = {CEUR Workshop Proceedings},
volume = {3878},
abstract = {With the increasing popularity of social media platforms, the dissemination of misogynistic content has become more prevalent and challenging to address. In this paper, we investigate the phenomenon of online misogyny on Twitter through the lens of hurtfulness, qualifying its different manifestation in English tweets considering the profession of the targets of misogynistic attacks. By leveraging manual annotation and a BERTweet model trained for fine-grained misogyny identification, we find that specific types of misogynistic speech are more intensely directed towards particular professions. For example, derailing discourse predominantly targets authors and cultural figures, while dominance-oriented speech and sexual harassment are mainly directed at politicians and athletes. Additionally, we use the HurtLex lexicon and ItEM to assign hurtfulness scores to tweets based on different hate speech categories. Our analysis reveals that these scores align with the profession-based distribution of misogynistic speech, highlighting the targeted nature of such attacks. © 2024 CEUR-WS. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Gili, J.; Patti, V.; Passaro, L.; Caselli, T.
VeryfIT - Benchmark of Fact-Checked Claims for Italian: A CALAMITA Challenge Conference
vol. 3878, 2024.
@conference{Gili2024,
title = {VeryfIT - Benchmark of Fact-Checked Claims for Italian: A CALAMITA Challenge},
author = {J. Gili and V. Patti and L. Passaro and T. Caselli},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85214372761&partnerID=40&md5=089c1b0a0891e71e54e849d4a06e77c5},
year = {2024},
date = {2024-01-01},
journal = {CEUR Workshop Proceedings},
volume = {3878},
abstract = {Achieving factual accuracy is a known pending issue for language models. Their design centered around the interactive component of user interaction and the extensive use of “spontaneous” training data, has made them highly adept at conversational tasks but not fully reliable in terms of factual correctness. VeryfIT addresses this issue by evaluating the in-memory factual knowledge of language models on data written by professional fact-checkers, posing it as a true or false question. Topics of the statements vary but most are in specific domains related to the Italian government, policies, and social issues. The task presents several challenges: extracting statements from segments of speeches, determining appropriate contextual relevance both temporally and factually, and ultimately verifying the accuracy of the statements. © 2024 CEUR-WS. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Alberto, Roberto Marinelli; Carta, Antonio; Passaro, Lucia
Updating knowledge in Large Language Models: an Empirical Evaluation Conference
Conference Proceedings: 2024 IEEE International Conference on Evolving and Adaptive Intelligent Systems (EAIS), 2024.
@conference{ 11568_1254427.2,
title = {Updating knowledge in Large Language Models: an Empirical Evaluation},
author = {Roberto Marinelli Alberto and Antonio Carta and Lucia Passaro},
url = {https://ieeexplore.ieee.org/document/10570019},
doi = {10.1109/eais58494.2024.10570019},
year = {2024},
date = {2024-01-01},
booktitle = {Conference Proceedings: 2024 IEEE International Conference on Evolving and Adaptive Intelligent Systems (EAIS)},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Marinelli, A. R.; Carta, A.; Passaro, L. C.
Updating knowledge in Large Language Models: An Empirical Evaluation Conference
2024.
@conference{Marinelli2024,
title = {Updating knowledge in Large Language Models: An Empirical Evaluation},
author = {A. R. Marinelli and A. Carta and L. C. Passaro},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85199261839&doi=10.1109%2fEAIS58494.2024.10570019&partnerID=40&md5=2751592a1b7dff48c02cfea01ae84475},
doi = {10.1109/EAIS58494.2024.10570019},
year = {2024},
date = {2024-01-01},
journal = {IEEE Conference on Evolving and Adaptive Intelligent Systems},
abstract = {Natural Language Processing (NLP) has witnessed a paradigm shift with Large Language Models (LLMs), yet the static knowledge from pre-Training can lead to knowledge obsolescence. This study focuses on the dynamic relationship between LLMs and evolving knowledge, using GPT-2 as a case study. Leveraging an existing framework, we update models with monthly Wikipedia dumps and Wikidata probes, addressing the stability-plasticity trade-off. We introduce a novel synthetic data generation method for experimental control and present SMARTREVIEW, a state-of-The-Art continual learning method. This work advances understanding and methodologies in tackling knowledge obsolescence in evolving language models. © 2024 IEEE.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Aru, Giacomo; Emmolo, Nicola; Marzeddu, Simone; Piras, Andrea; Raffi, Jacopo; Passaro, Lucia C.
RoBEXedda: Sexism Detection in Tweets Conference
CEUR Workshop Proceedings, vol. 3740, CEUR-WS, 2024.
@conference{ 11568_1272686,
title = {RoBEXedda: Sexism Detection in Tweets},
author = {Giacomo Aru and Nicola Emmolo and Simone Marzeddu and Andrea Piras and Jacopo Raffi and Lucia C. Passaro},
url = {https://ceur-ws.org/Vol-3740/paper-88.pdf},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proceedings},
volume = {3740},
pages = {942–957},
publisher = {CEUR-WS},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Cerini, L.; Bondielli, A.; Lenci, A.
Representing Abstract Concepts with Images: An Investigation with Large Language Models Conference
Workshop on Cognitive Aspects of the Lexicon, CogALex 2024 at LREC-COLING 2024 - Workshop Proceedings, European Language Resources Association (ELRA), 2024, ISBN: 978-2-493814-45-6.
@conference{ 11568_1256367,
title = {Representing Abstract Concepts with Images: An Investigation with Large Language Models},
author = {L. Cerini and A. Bondielli and A. Lenci},
url = {https://aclanthology.org/2024.cogalex-1.12},
isbn = {978-2-493814-45-6},
year = {2024},
date = {2024-01-01},
booktitle = {Workshop on Cognitive Aspects of the Lexicon, CogALex 2024 at LREC-COLING 2024 - Workshop Proceedings},
pages = {107–113},
publisher = {European Language Resources Association (ELRA)},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Proietti, Mattia; Lebani, Gianluca E.; Lenci, Alessandro
On the proto-role properties inferred by transformer language models Journal Article
In: LINGUE E LINGUAGGIO, vol. 1, no. 1, pp. 111–140, 2024.
@article{ 11568_1287831,
title = {On the proto-role properties inferred by transformer language models},
author = {Mattia Proietti and Gianluca E. Lebani and Alessandro Lenci},
doi = {10.1418/113930},
year = {2024},
date = {2024-01-01},
journal = {LINGUE E LINGUAGGIO},
volume = {1},
number = {1},
pages = {111–140},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rambelli, Giulia; Chersoni, Emmanuele; Testa, Davide; Blache, Philippe; Lenci, Alessandro
Neural Generative Models and the Parallel Architecture of Language: A Critical Review and Outlook Journal Article
In: TOPICS IN COGNITIVE SCIENCE, no. 4, 2024.
@article{ 11568_1287828,
title = {Neural Generative Models and the Parallel Architecture of Language: A Critical Review and Outlook},
author = {Giulia Rambelli and Emmanuele Chersoni and Davide Testa and Philippe Blache and Alessandro Lenci},
doi = {10.1111/tops.12733},
year = {2024},
date = {2024-01-01},
journal = {TOPICS IN COGNITIVE SCIENCE},
number = {4},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Muscato, Benedetta; Bushipaka, Praveen; Gezici, Gizem; Passaro, Lucia; Giannotti, Fosca
Multi-Perspective Stance Detection Conference
CEUR Workshop Proceedings, vol. 3825, CEUR-WS, 2024.
@conference{ 11568_1280367,
title = {Multi-Perspective Stance Detection},
author = {Benedetta Muscato and Praveen Bushipaka and Gizem Gezici and Lucia Passaro and Fosca Giannotti},
year = {2024},
date = {2024-01-01},
booktitle = {CEUR Workshop Proceedings},
volume = {3825},
pages = {208–214},
publisher = {CEUR-WS},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Kauf, Carina; Chersoni, Emmanuele; Lenci, Alessandro; Fedorenko, Evelina; Ivanova, Anna A.
Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP, 2024, ISBN: 979-8-89176-170-4.
@conference{ 11568_1287830,
title = {Log Probabilities Are a Reliable Estimate of Semantic Plausibility in Base and Instruction-Tuned Language Models},
author = {Carina Kauf and Emmanuele Chersoni and Alessandro Lenci and Evelina Fedorenko and Anna A. Ivanova},
url = {https://aclanthology.org/2024.blackboxnlp-1.18},
doi = {10.18653/v1/2024.blackboxnlp-1.18},
isbn = {979-8-89176-170-4},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 7th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP},
pages = {263–277},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Cascione, A.; Cerulli, A.; Manerba, M. M.; Passaro, L. C.
Investigating the Hurtfulness of Misogynistic Tweets Across Professions Conference
Proceedings of the Discovery Science Late Breaking Contributions 2024 (DS-LB 2024) co-located with 27th International Conference Discovery Science 2024 (DS 2024), vol. 3928, 2024.
@conference{ 11568_1327848,
title = {Investigating the Hurtfulness of Misogynistic Tweets Across Professions},
author = {A. Cascione and A. Cerulli and M. M. Manerba and L. C. Passaro},
url = {https://ceur-ws.org/Vol-3928/paper_174.pdf},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the Discovery Science Late Breaking Contributions 2024 (DS-LB 2024) co-located with 27th International Conference Discovery Science 2024 (DS 2024)},
volume = {3928},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Tamponi, Lucia; Bondielli, Alessandro
Exploring Latin epigraphy with Distributional Semantic Models: A pilot study Journal Article
In: STUDI E SAGGI LINGUISTICI, vol. 62, no. 2, pp. 31–56, 2024.
@article{ 11568_1287408,
title = {Exploring Latin epigraphy with Distributional Semantic Models: A pilot study},
author = {Lucia Tamponi and Alessandro Bondielli},
year = {2024},
date = {2024-01-01},
journal = {STUDI E SAGGI LINGUISTICI},
volume = {62},
number = {2},
pages = {31–56},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Cassese, Maria; Bondielli, Alessandro; Lenci, Alessandro
Evaluation of event plausibility recognition in Large (Vision)-Language Models Journal Article
In: IJCOL, vol. 10, no. 2, 2024.
@article{ 11568_1322153,
title = {Evaluation of event plausibility recognition in Large (Vision)-Language Models},
author = {Maria Cassese and Alessandro Bondielli and Alessandro Lenci},
url = {https://journals.openedition.org/ijcol/1422},
doi = {10.17454/IJCOL102.02},
year = {2024},
date = {2024-01-01},
journal = {IJCOL},
volume = {10},
number = {2},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ferri, P.; Lomonaco, V.; Passaro, L. C.; Castro, A. Félix-De; Sánchez-Cuesta, P.; Sáez, C.; García-Gómez, J. M.
Deep continual learning for medical call incidents text classification under the presence of dataset shifts Journal Article
In: Computers in Biology and Medicine, vol. 175, 2024.
@article{Ferri2024,
title = {Deep continual learning for medical call incidents text classification under the presence of dataset shifts},
author = {P. Ferri and V. Lomonaco and L. C. Passaro and A. Félix-De Castro and P. Sánchez-Cuesta and C. Sáez and J. M. García-Gómez},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85192148870&doi=10.1016%2fj.compbiomed.2024.108548&partnerID=40&md5=dfc4ec4fb6df9d4011c89a8643c881ea},
doi = {10.1016/j.compbiomed.2024.108548},
year = {2024},
date = {2024-01-01},
journal = {Computers in Biology and Medicine},
volume = {175},
abstract = {The aim of this work is to develop and evaluate a deep classifier that can effectively prioritize Emergency Medical Call Incidents (EMCI) according to their life-threatening level under the presence of dataset shifts. We utilized a dataset consisting of 1 982 746 independent EMCI instances obtained from the Health Services Department of the Region of Valencia (Spain), with a time span from 2009 to 2019 (excluding 2013). The dataset includes free text dispatcher observations recorded during the call, as well as a binary variable indicating whether the event was life-threatening. To evaluate the presence of dataset shifts, we examined prior probability shifts, covariate shifts, and concept shifts. Subsequently, we designed and implemented four deep Continual Learning (CL) strategies—cumulative learning, continual fine-tuning, experience replay, and synaptic intelligence—alongside three deep CL baselines—joint training, static approach, and single fine-tuning—based on DistilBERT models. Our results demonstrated evidence of prior probability shifts, covariate shifts, and concept shifts in the data. Applying CL techniques had a statistically significant (α=0.05) positive impact on both backward and forward knowledge transfer, as measured by the F1-score, compared to non-continual approaches. We can argue that the utilization of CL techniques in the context of EMCI is effective in adapting deep learning classifiers to changes in data distributions, thereby maintaining the stability of model performance over time. To our knowledge, this study represents the first exploration of a CL approach using real EMCI data. © 2024 The Author(s)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
CAO, Mattia DE; Cao, Nicola De; Colonna, Angelo; Lenci, Alessandro
Deep Learning Meets Egyptology: a Hieroglyphic Transformer for Translating Ancient Egyptian Conference
Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024), Association for Computational Linguistics, 2024, ISBN: 979-8-89176-144-5.
@conference{ 11568_1276282,
title = {Deep Learning Meets Egyptology: a Hieroglyphic Transformer for Translating Ancient Egyptian},
author = {Mattia DE CAO and Nicola De Cao and Angelo Colonna and Alessandro Lenci},
url = {https://aclanthology.org/2024.ml4al-1.9},
doi = {10.18653/v1/2024.ml4al-1.9},
isbn = {979-8-89176-144-5},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)},
pages = {71–86},
publisher = {Association for Computational Linguistics},
abstract = {This work explores the potential of Transformer models focusing on the translation of ancient Egyptian hieroglyphs. We present a novel Hieroglyphic Transformer model, built upon the powerful M2M-100 multilingual translation framework and trained on a dataset we customised from the Thesaurus Linguae Aegyptiae database. Our experiments demonstrate promising results, with the model achieving significant accuracy in translating hieroglyphics into both German and English. This work holds significant implications for Egyptology, potentially accelerating the translation process and unlocking new research approaches.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Bondielli, A.; Dell'Oglio, P.; Lenci, A.; Marcelloni, F.; Passaro, L.
Dataset for multimodal fake news detection and verification tasks Journal Article
In: DATA IN BRIEF, vol. 54, 2024.
@article{ 11568_1241070.2,
title = {Dataset for multimodal fake news detection and verification tasks},
author = {A. Bondielli and P. Dell'Oglio and A. Lenci and F. Marcelloni and L. Passaro},
url = {https://www.sciencedirect.com/science/article/pii/S2352340924004098},
doi = {10.1016/j.dib.2024.110440},
year = {2024},
date = {2024-01-01},
journal = {DATA IN BRIEF},
volume = {54},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Cossu, A.; Carta, A.; Passaro, L.; Lomonaco, V.; Tuytelaars, T.; Bacciu, D.
Continual pre-training mitigates forgetting in language and vision Journal Article
In: Neural Networks, vol. 179, 2024.
@article{Cossu2024,
title = {Continual pre-training mitigates forgetting in language and vision},
author = {A. Cossu and A. Carta and L. Passaro and V. Lomonaco and T. Tuytelaars and D. Bacciu},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85197809738&doi=10.1016%2fj.neunet.2024.106492&partnerID=40&md5=9da883314248cc3696a82b3c6c43bd64},
doi = {10.1016/j.neunet.2024.106492},
year = {2024},
date = {2024-01-01},
journal = {Neural Networks},
volume = {179},
abstract = {Pre-trained models are commonly used in Continual Learning to initialize the model before training on the stream of non-stationary data. However, pre-training is rarely applied during Continual Learning. We investigate the characteristics of the Continual Pre-Training scenario, where a model is continually pre-trained on a stream of incoming data and only later fine-tuned to different downstream tasks. We introduce an evaluation protocol for Continual Pre-Training which monitors forgetting against a Forgetting Control dataset not present in the continual stream. We disentangle the impact on forgetting of 3 main factors: the input modality (NLP, Vision), the architecture type (Transformer, ResNet) and the pre-training protocol (supervised, self-supervised). Moreover, we propose a Sample-Efficient Pre-training method (SEP) that speeds up the pre-training phase. We show that the pre-training protocol is the most important factor accounting for forgetting. Surprisingly, we discovered that self-supervised continual pre-training in both NLP and Vision is sufficient to mitigate forgetting without the use of any Continual Learning strategy. Other factors, like model depth, input modality and architecture type are not as crucial. © 2024 The Author(s)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Capone, L.; Bondielli, A.; Lenci, A.
ConcreteGPT: A Baby GPT-2 Based on Lexical Concreteness and Curriculum Learning Conference
The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning, 2024.
@conference{ 11568_1327944,
title = {ConcreteGPT: A Baby GPT-2 Based on Lexical Concreteness and Curriculum Learning},
author = {L. Capone and A. Bondielli and A. Lenci},
url = {https://aclanthology.org/2024.conll-babylm.16/},
year = {2024},
date = {2024-01-01},
booktitle = {The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning},
pages = {189–196},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Pranav, A; Cong, Yan; Chersoni, Emmanuele; Hsu, Yu-Yin; Lenci, Alessandro
Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 2024, ISBN: 978-2-493814-10-4.
@conference{ 11568_1287833,
title = {Comparing Static and Contextual Distributional Semantic Models on Intrinsic Tasks: An Evaluation on Mandarin Chinese Datasets},
author = {A Pranav and Yan Cong and Emmanuele Chersoni and Yu-Yin Hsu and Alessandro Lenci},
url = {https://aclanthology.org/2024.lrec-main.320},
isbn = {978-2-493814-10-4},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
pages = {3610–3627},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Fioravanti, I.; Siyanova-Chanturia, A.; Lenci, A.
Collocation in the Mind: Investigating Collocational Priming in Second Language Speakers of Italian Journal Article
In: LANGUAGE LEARNING, no. 1, 2024.
@article{ 11568_1287827,
title = {Collocation in the Mind: Investigating Collocational Priming in Second Language Speakers of Italian},
author = {I. Fioravanti and A. Siyanova-Chanturia and A. Lenci},
doi = {10.1111/lang.12663},
year = {2024},
date = {2024-01-01},
journal = {LANGUAGE LEARNING},
number = {1},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Cruz, J. P. O.; Passaro, L. C.; Almeida, J. M.; Marques-Neto, H. T.
Characterizing fashion influencers’ behavior on instagram Journal Article
In: Social Network Analysis and Mining, vol. 14, no. 1, 2024.
@article{Cruz2024,
title = {Characterizing fashion influencers’ behavior on instagram},
author = {J. P. O. Cruz and L. C. Passaro and J. M. Almeida and H. T. Marques-Neto},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85200221718&doi=10.1007%2fs13278-024-01313-x&partnerID=40&md5=e41bf05abdcf7bb890049b3d3ded9f44},
doi = {10.1007/s13278-024-01313-x},
year = {2024},
date = {2024-01-01},
journal = {Social Network Analysis and Mining},
volume = {14},
number = {1},
abstract = {In recent years, the growth of social media platforms has led to an increase in the number of influencers who work in/for the fashion industry - these individuals have become attractive marketing partners for fashion brands. As a result, understanding how to measure the performance of fashion influencers (and what kinds of behavior lead to better performance) is paramount. This paper presents a methodology for characterizing fashion influencers’ behavior on Instagram. Using a preexisting dataset, we analyze Instagram fashion influencers exploring behavioral patterns, engagement metrics, and content dynamics. Our analyses uncover insights about influencer behavior, gender-based performance, and correlations between engagement metrics and posts’ metadata (such as caption sentiment, length, and hashtag usage). Additionally, our research highlights the impact of early engagement and post comments on overall engagement rates, highlighting the role of community interaction. Though focused on Instagram influencers, our proposed methodology could be adapted to diverse datasets and social media platforms. In sum, we present here a preliminary study for understanding and decoding the complex dynamics of fashion influencer culture on social media, offering actionable insights for marketers, brands, aspiring influencers, scholars, and regular social media users. © The Author(s), under exclusive licence to Springer-Verlag GmbH Austria, part of Springer Nature 2024.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Auriemma, Serena; Madeddu, Mauro; Miliani, Martina; Bondielli, Alessando; Lenci, Alessandro; Passaro, Lucia
Challenging Specialized Transformers on Zero-shot Classification Conference
Proceedings of the 9th Italian Conference on Computational Linguistics, Lexis Compagnia Editoriale in Torino srl, Torino, 2024, ISBN: 9791255000846.
@conference{ 11568_1327828,
title = {Challenging Specialized Transformers on Zero-shot Classification},
author = {Serena Auriemma and Mauro Madeddu and Martina Miliani and Alessando Bondielli and Alessandro Lenci and Lucia Passaro},
url = {https://www.aaccademia.it/ita/titolo?ref=1685},
isbn = {9791255000846},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 9th Italian Conference on Computational Linguistics},
pages = {54–63},
publisher = {Lexis Compagnia Editoriale in Torino srl},
address = {Torino},
abstract = {This paper investigates the feasibility of employing basic prompting systems for domain-specific language models. The study focuses on bureaucratic language and uses the recently introduced BureauBERTo model for experimentation. The experiments reveal that while further pre-trained models exhibit reduced robustness concerning general knowledge, they display greater adaptability in modeling domain-specific tasks, even under a zero-shot paradigm. This demonstrates the potential of leveraging simple prompting systems in specialized contexts, providing valuable insights both for research and industry.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Capone, Luca; Suozzi, Alice; Lebani, Gianluca; Lenci, Alessandro
BaBIEs: A Benchmark for the Linguistic Evaluation of Italian Baby Language Models Conference
Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024), CEUR Workshop Proceedings, 2024.
@conference{ 11568_1327950.2,
title = {BaBIEs: A Benchmark for the Linguistic Evaluation of Italian Baby Language Models},
author = {Luca Capone and Alice Suozzi and Gianluca Lebani and Alessandro Lenci},
url = {https://aclanthology.org/2024.clicit-1.20/},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)},
pages = {157–170},
publisher = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Lenci, Alessandro; Vestrucci, Andrea
Artificial Intelligence and Language Book Chapter
In: Nesi, Hilary; Milin, Petar (Ed.): International Encyclopedia of Language and Linguistics, Elsevier, 2024.
@inbook{Lenci2024,
title = {Artificial Intelligence and Language},
author = {Alessandro Lenci and Andrea Vestrucci},
editor = {Hilary Nesi and Petar Milin},
doi = {10.1016/b978-0-323-95504-1.00241-6},
year = {2024},
date = {2024-01-01},
booktitle = {International Encyclopedia of Language and Linguistics},
publisher = {Elsevier},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
2023
Miliani, Martina; Alva-Manchego, Fernando; Lenci, Alessandro
Proceedings of the Ninth Italian Conference on Computational Linguistics (CLiC-it 2023), CEUR Workshop Proceedings, Venice, Italy, 2023, ISBN: 979-12-550-0084-6.
@conference{miliani-etal-2023-simplifying,
title = {Simplifying Administrative Texts for Italian L2 Readers with Controllable Transformers Models: A Data-driven Approach},
author = {Martina Miliani and Fernando Alva-Manchego and Alessandro Lenci},
editor = {Federico Boschetti and Gianluca E. Lebani and Bernardo Magnini and Nicole Novielli},
url = {https://aclanthology.org/2023.clicit-1.37/},
isbn = {979-12-550-0084-6},
year = {2023},
date = {2023-11-01},
booktitle = {Proceedings of the Ninth Italian Conference on Computational Linguistics (CLiC-it 2023)},
pages = {303–315},
publisher = {CEUR Workshop Proceedings},
address = {Venice, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Auriemma, Serena; Madeddu, Mauro; Miliani, Martina; Bondielli, Alessandro; Lenci, Alessandro; Passaro, Lucia
Challenging Specialized Transformers on Zero-shot Classification Conference
Proceedings of the Ninth Italian Conference on Computational Linguistics (CLiC-it 2023), CEUR Workshop Proceedings, Venice, Italy, 2023, ISBN: 979-12-550-0084-6.
@conference{auriemma-etal-2023-challenging,
title = {Challenging Specialized Transformers on Zero-shot Classification},
author = {Serena Auriemma and Mauro Madeddu and Martina Miliani and Alessandro Bondielli and Alessandro Lenci and Lucia Passaro},
editor = {Federico Boschetti and Gianluca E. Lebani and Bernardo Magnini and Nicole Novielli},
url = {https://aclanthology.org/2023.clicit-1.8/},
isbn = {979-12-550-0084-6},
year = {2023},
date = {2023-11-01},
booktitle = {Proceedings of the Ninth Italian Conference on Computational Linguistics (CLiC-it 2023)},
pages = {53–62},
publisher = {CEUR Workshop Proceedings},
address = {Venice, Italy},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Cassotti, Pierluigi; Siciliani, Lucia; Passaro, Lucia C.; Gatto, Maristella; Basile, Pierpaolo
WiC-ITA at EVALITA2023: Overview of the EVALITA2023 Word-in-Context for ITAlian Task Conference
Proceedings of the Eighth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian Final Workshop (EVALITA 2023), Lexis Compagnia Editoriale in Torino srl, Torino, 2023, ISBN: 9791255000693.
@conference{ 11568_1327816,
title = {WiC-ITA at EVALITA2023: Overview of the EVALITA2023 Word-in-Context for ITAlian Task},
author = {Pierluigi Cassotti and Lucia Siciliani and Lucia C. Passaro and Maristella Gatto and Pierpaolo Basile},
url = {https://www.aaccademia.it/customized/downloadfile.php?tipo=opera_completa&formato=pdf&id=1654},
isbn = {9791255000693},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the Eighth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian Final Workshop (EVALITA 2023)},
pages = {316–324},
publisher = {Lexis Compagnia Editoriale in Torino srl},
address = {Torino},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Cassotti, Pierluigi; Siciliani, Lucia; Passaro, Lucia C.; Gatto, Maristella; Basile, And Pierpaolo
WiC-ITA at EVALITA2023: Overview of the EVALITA2023 Word-in-Context for ITAlian Task Conference
EVALITA 2023 Eighth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian, vol. 3473, 2023.
@conference{ 11568_1205488,
title = {WiC-ITA at EVALITA2023: Overview of the EVALITA2023 Word-in-Context for ITAlian Task},
author = {Pierluigi Cassotti and Lucia Siciliani and Lucia C. Passaro and Maristella Gatto and And Pierpaolo Basile},
url = {https://ceur-ws.org/Vol-3473/paper46.pdf},
year = {2023},
date = {2023-01-01},
booktitle = {EVALITA 2023 Eighth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian},
volume = {3473},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Testa, Davide; Chersoni, Emmanuele; Lenci, Alessandro
Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 2023, ISBN: 978-1-959429-72-2.
@conference{ 11568_1287867,
title = {We Understand Elliptical Sentences, and Language Models should Too: A New Dataset for Studying Ellipsis and its Interaction with Thematic Fit},
author = {Davide Testa and Emmanuele Chersoni and Alessandro Lenci},
url = {https://aclanthology.org/2023.acl-long.188},
doi = {10.18653/v1/2023.acl-long.188},
isbn = {978-1-959429-72-2},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {3340–3353},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
