Sign language understanding (SLU) aims to convert sign language videos into glosses that transcribe sign language word-by-word by means of another written language and generate corresponding spoken sentences, including sign language recognition (SLR) and sign language translation (SLT). SLU has been a challenging undertaking since it demands the capability of fine-grained video understanding and sequence generation. In addition, the lack of supervised training data further hinders the advancement of SLU. To narrow the modality gap between vision and language and mitigate the data scarcity problem, we propose a Simple and Effective Data Augmentation (SEDA) framework for end-to-end SLU. In particular, SEDA consists of two key components: data augmentations on both sign and text sides and multi-task learning with task-specific fine-tuning. Experimental results on RWTH-PHOENIX Weather 2014T demonstrate that our proposed SEDA framework significantly and consistently outperforms the baseline model and achieves a WER of 19.91, a BLEU score of 25.19, and a ROUGE score of 51.72, delivering competitive scores in both SLR and SLT.
@inproceedings{tan:24006:sign-lang:lrec,
author = {Tan, Sihan and Miyazaki, Taro and Itoyama, Katsutoshi and Nakadai, Kazuhiro},
title = {{SEDA}: Simple and Effective Data Augmentation for Sign Language Understanding},
pages = {48--53},
editor = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
address = {Torino, Italy},
day = {25},
month = may,
year = {2024},
isbn = {978-2-493814-30-2},
language = {english},
url = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24006.pdf}
}