@inproceedings{delagarza:26039:sign-lang:lrec,
  author    = {de la Garza, Lorena and Halbout, Julie and Lascar, Julie and Mart{\'i}nez-Guevara, Niels and Curiel, Arturo and Gouiff{\`e}s, Mich{\`e}le and Braffort, Annelies},
  title     = {Extracting Signs from Weakly Aligned Sign Language Corpora: A Study on {LSF} and {LSM}},
  pages     = {174--183},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{ELRA Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26039.html},
  abstract  = {This paper presents a framework for the automatic annotation of sign language data across different recording conditions, including original and interpreted content. The proposed approach integrates weak alignment, sign segmentation, and multiple instance learning with a contrastive loss. The resulting annotations are subsequently refined and filtered to enhance their reliability. Our method was applied to two historically related sign languages, French Sign Language (LSF) and Mexican Sign Language (LSM). This led to the creation of two signaries, comprising approximately 2k categories in LSF (25k occurrences) and 41 categories in LSM (1k occurrences). Both resources provide valuable support for future research in artificial intelligence and linguistics, particularly for comparative analyses between the two languages. A seminal analysis is presented as part of this paper.}
}

@inproceedings{halbout:26028:sign-lang:lrec,
  author    = {Halbout, Julie and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le and Fabre, Diandra and Lascar, Julie},
  title     = {Learning to Spot Signs from Named Entities. A study on {French} {Sign} {Language}},
  pages     = {203--211},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{ELRA Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26028.html},
  abstract  = {French Sign Language (LSF) is a low-resourced language, with few available corpora, most of which being only partially annotated. Previous work on other sign languages has explored automatic sign annotation using subtitles as weak supervision, existing signaries, or mouthing cues. This paper focuses on the corpus Matignon-LSF, by first leveraging lexical token spotting then by studying Named Entities (locations, companies, persons). Accounting for the Named entities enables the automatic detection of 30\{\%} to 100\{\%} more signs per class and improves the spotting of rare signs. In addition, this work provides insights into the signing of named entities and contributes resources for improving LSF-to-French translation models.}
}

@inproceedings{halbout:24024:sign-lang:lrec,
  author    = {Halbout, Julie and Fabre, Diandra and Ouakrim, Yanis and Lascar, Julie and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le and Beautemps, Denis},
  title     = {{Matignon-LSF}: a Large Corpus of Interpreted {French} {Sign} {Language}},
  pages     = {95--101},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
  maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-30-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24024.html},
  abstract  = {In this paper we present Matignon-LSF, the first dataset of interpreted French Sign Language (LSF) and one of the largest LSF dataset available for research to date. This is a dataset of live interpreted LSF during public speeches by the French government. The dataset comprises 39 hours of LSF videos with French language audio and corresponding subtitles. In addition to this data, we offer pre-computed video features (I3D). We provide a detailed analysis of the proposed dataset as well as some experimental results to demonstrate the interest of this novel dataset.}
}

@inproceedings{lascar:24012:sign-lang:lrec,
  author    = {Lascar, Julie and Gouiff{\`e}s, Mich{\`e}le and Braffort, Annelies and Danet, Claire},
  title     = {Annotation of {LSF} subtitled videos without a pre-existing dictionary},
  pages     = {204--212},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
  maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-30-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24012.html},
  abstract  = {This paper proposes a method for the automatic annotation of lexical units in LSF videos, using a subtitled corpus without annotation. This method based on machine learning and involving linguists for added precision and reliability, comprises several stages. The first consists of building a bilingual lexicon (including potential variants of a given lexical unit) in a weakly supervised manner. The resulting lexicon is then refined and cleaned by LSF experts. This data serves next to train a supervised classifier for automatic annotation of lexical units on the Mediapi-RGB corpus. Our Pytorch implementation is publicly available.}
}

@inproceedings{belissen:20028:sign-lang:lrec,
  author    = {Belissen, Valentin and Gouiff{\`e}s, Mich{\`e}le and Braffort, Annelies},
  title     = {Improving and Extending Continuous Sign Language Recognition: Taking Iconicity and Spatial Language into Account},
  pages     = {7--12},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20028.html},
  abstract  = {In a lot of recent research, attention has been drawn to recognizing sequences of lexical signs in continuous Sign Language corpora, often artificial. However, as SLs are structured through the use of space and iconicity, focusing on lexicon only prevents the field of Continuous Sign Language Recognition (CSLR) from extending to Sign Language Understanding and Translation.
\par
In this article, we propose a new formulation of the CSLR problem and discuss the possibility of recognizing higher-level linguistic structures in SL videos, like classifier constructions. These structures show much more variability than lexical signs, and are fundamentally different than them in the sense that form and meaning can not be disentangled. Building on the recently published French Sign Language corpus Dicta-Sign-LSF-v2, we discuss the performance and relevance of a simple recurrent neural network trained to recognize illustrative structures.}
}

@inproceedings{belissen-etal-2020-dicta:lrec,
  author    = {Belissen, Valentin and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le},
  title     = {{D}icta-{S}ign-{LSF}-v2: Remake of a Continuous {F}rench {S}ign {L}anguage Dialogue Corpus and a First Baseline for Automatic Sign Language Processing},
  pages     = {6040--6048},
  editor    = {Calzolari, Nicoletta and Fr{\'e}d{\'e}ric B{\'e}chet and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
  booktitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {11--16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-34-4},
  language  = {english},
  url       = {https://aclanthology.org/2020.lrec-1.740},
  abstract  = {While the research in automatic Sign Language Processing (SLP) is growing, it has been almost exclusively focused on recognizing lexical signs, whether isolated or within continuous SL production. However, Sign Languages include many other gestural units like iconic structures, which need to be recognized in order to go towards a true SL understanding. In this paper, we propose a newer version of the publicly available SL corpus Dicta-Sign, limited to its French Sign Language part. Involving 16 different signers, this dialogue corpus was produced with very few constraints on the style and content. It includes lexical and non-lexical annotations over 11 hours of video recording, with 35000 manual units. With the aim of stimulating research in SL understanding, we also provide a baseline for the recognition of lexical signs and non-lexical structures on this corpus. A very compact modeling of a signer is built and a Convolutional-Recurrent Neural Network is trained and tested on Dicta-Sign-LSF-v2, with state-of-the-art results, including the ability to detect iconicity in SL production.}
}

@inproceedings{bull-etal-2020-mediapi:lrec,
  author    = {Bull, Hannah and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le},
  title     = {{MEDIAPI}-{SKEL} - A 2{D}-Skeleton Video Database of {F}rench {S}ign {L}anguage With Aligned {F}rench Subtitles},
  pages     = {6063--6068},
  editor    = {Calzolari, Nicoletta and Fr{\'e}d{\'e}ric B{\'e}chet and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
  booktitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {11--16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-34-4},
  language  = {english},
  url       = {https://aclanthology.org/2020.lrec-1.743},
  abstract  = {This paper presents MEDIAPI-SKEL, a 2D-skeleton database of French Sign Language videos aligned with French subtitles. The corpus contains 27 hours of video of body, face and hand keypoints, aligned to subtitles with a vocabulary size of 17k tokens. In contrast to existing sign language corpora such as videos produced under laboratory conditions or translations of TV programs into sign language, this database is constructed using original sign language content largely produced by deaf journalists at the media company M{\'e}dia-Pi. Moreover, the videos are accurately synchronized with French subtitles. We propose three challenges appropriate for this corpus that are related to processing units of signs in context: automatic alignment of text and video, semantic segmentation of sign language, and production of video-text embeddings for cross-modal retrieval. These challenges deviate from the classic task of identifying a limited number of lexical signs in a video stream.}
}

