@inproceedings{fabre:26016:sign-lang:lrec,
  author    = {Fabre, Diandra and Lascar, Julie and Halbout, Julie and Vartampetian, Markarit},
  title     = {Leveraging Text-side Augmentation For Sign Language Translation},
  pages     = {129--139},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26016.html},
  abstract  = {Sign language translation faces significant challenges due to the scarcity of annotated data and the inherent complexity of sign languages. This paper presents a method to improve sign-to-text translation models by augmenting data on the text side. We conduct experiments using two state-of-the-art models on two publicly available datasets: PHOENIX-2014T for German Sign Language and Mediapi-RGB for French Sign Language. Our main contributions are : (1) augmenting the training sets of both datasets on the text side using a generative model, (2) evaluating the impact of paraphrasing on BLEU and BLEURT scores, and (3) analyzing the impact of paraphrasing on translation outputs. We observed a significant improvement in translation for both languages. This suggests that adding variability to the training dataset through paraphrasing can lead to better generalization of the models. These results are comparable to state-of-the-art methods that use more complex approaches, such as Visual-Language fine-tuning, to improve translation.}
}

@inproceedings{delagarza:26039:sign-lang:lrec,
  author    = {de la Garza, Lorena and Halbout, Julie and Lascar, Julie and Martinez, Niels and Curiel, Arturo and Gouiff{\`e}s, Mich{\`e}le and Braffort, Annelies},
  title     = {Extracting Signs from Weakly Aligned Sign Language Corpora: A Study on {LSF} and {LSM}},
  pages     = {174--183},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26039.html},
  abstract  = {This paper presents a framework for the automatic annotation of sign language data across different recording conditions, including original and interpreted content. The proposed approach integrates weak alignment, sign segmentation, and multiple instance learning with a contrastive loss. The resulting annotations are subsequently refined and filtered to enhance their reliability. Our method was applied to two historically related sign languages, French Sign Language (LSF) and Mexican Sign Language (LSM). This led to the creation of two signaries, comprising approximately 2k categories in LSF (25k occurrences) and 41 categories in LSM (1k occurrences). Both resources provide valuable support for future research in artificial intelligence and linguistics, particularly for comparative analyses between the two languages. A seminal analysis is presented as part of this paper.}
}

@inproceedings{halbout:26028:sign-lang:lrec,
  author    = {Halbout, Julie and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le and Fabre, Diandra and Lascar, Julie},
  title     = {Learning to Spot Signs from Named Entities. A study on {French} {Sign} {Language}.},
  pages     = {203--211},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26028.html},
  abstract  = {French Sign Language (LSF) is a low-resourced language, with few available corpora, most of which being only partially annotated. Previous work on other sign languages has explored automatic sign annotation using subtitles as weak supervision, existing signaries, or mouthing cues. This paper focuses on the corpus Matignon-LSF, by first leveraging lexical token spotting then by studying Named Entities (locations, companies, persons). Accounting for the Named entities enables the automatic detection of 30\{\%} to 100\{\%} more signs per class and improves the spotting of rare signs. In addition, this work provides insights into the signing of named entities and contributes resources for improving LSF-to-French translation models.}
}

@inproceedings{halbout:24024:sign-lang:lrec,
  author    = {Halbout, Julie and Fabre, Diandra and Ouakrim, Yanis and Lascar, Julie and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le and Beautemps, Denis},
  title     = {{Matignon-LSF}: a Large Corpus of Interpreted {French} {Sign} {Language}},
  pages     = {95--101},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
  maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-30-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24024.html},
  abstract  = {In this paper we present Matignon-LSF, the first dataset of interpreted French Sign Language (LSF) and one of the largest LSF dataset available for research to date. This is a dataset of live interpreted LSF during public speeches by the French government. The dataset comprises 39 hours of LSF videos with French language audio and corresponding subtitles. In addition to this data, we offer pre-computed video features (I3D). We provide a detailed analysis of the proposed dataset as well as some experimental results to demonstrate the interest of this novel dataset.}
}

@inproceedings{lascar:24012:sign-lang:lrec,
  author    = {Lascar, Julie and Gouiff{\`e}s, Mich{\`e}le and Braffort, Annelies and Danet, Claire},
  title     = {Annotation of {LSF} subtitled videos without a pre-existing dictionary},
  pages     = {204--212},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
  maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-30-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24012.html},
  abstract  = {This paper proposes a method for the automatic annotation of lexical units in LSF videos, using a subtitled corpus without annotation. This method based on machine learning and involving linguists for added precision and reliability, comprises several stages. The first consists of building a bilingual lexicon (including potential variants of a given lexical unit) in a weakly supervised manner. The resulting lexicon is then refined and cleaned by LSF experts. This data serves next to train a supervised classifier for automatic annotation of lexical units on the Mediapi-RGB corpus. Our Pytorch implementation is publicly available.}
}

