This article presents a new bilingual dataset in written French and French Sign Language (LSF), called STK LSF. This corpus is currently being produced as part of the ANR SignToKids project. The aim of this corpus is to provide digital educational tools for deaf children, thereby facilitating the joint learning of LSF and written French. More broadly, it is intended to support future studies on the automatic processing of signed languages. To define this corpus, we focused on several grammatical phenomena typical to LSF, as well as in tales usually studied by hearing children in the second cycle in France. The corpus data represent approximately 1 hour of recording, carried out with a motion capture system (MoCap) offering a spatial precision of less than 1 mm and a temporal precision of 240 Hz. This high level of precision guarantees the quality of the data collected, which will be used both to build pedagogical scenarios in French and LSF, including signing avatar videos, and for automatic translation of text into LSF.
@inproceedings{reverdy:24031:sign-lang:lrec,
author = {Reverdy, Cl{\'e}ment and Gibet, Sylvie and Le Naour, Thibaut},
title = {{STK} {LSF}: A Motion Capture Dataset in {LSF} for {SignToKids}},
pages = {264--271},
editor = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
address = {Torino, Italy},
day = {25},
month = may,
year = {2024},
isbn = {978-2-493814-30-2},
language = {english},
url = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24031.pdf}
}