Unsupervised representation learning offers a promising way of utilising large unannotated sign language resources found on the Internet. In this paper, a VQ-VAE model is trained to learn a codebook of motion primitives from sign language data. For training, we use isolated signs and sentences from a sign language dictionary. Three models are trained: one on isolated signs, one on sentences, and one mixed model. We test these models by comparing how well they are able to reconstruct held-out data from the dictionary, as well as an in-the-wild dataset based on sign language videos from YouTube. These data are characterized by less formal and more expressive signing than the dictionary items. Results show that the isolated sign model yields considerably higher reconstruction loss for the YouTube dataset, while the sentence model performs the best on this data. Further, an analysis of codebook usage reveals that the set of codes used by isolated signs and sentences differ significantly. In order to further understand the different character of the datasets, we carry out an analysis of the velocity profiles, which reveals that signing data in-the-wild has much higher average velocity than dictionary signs and phrases. We believe these differences also explain the large differences in reconstruction loss observed.
@inproceedings{malmberg:24047:sign-lang:lrec,
author = {Malmberg, Fredrik and Klezovich, Anna and Mesch, Johanna and Beskow, Jonas},
title = {Exploring Latent Sign Language Representations with Isolated Signs, Sentences and In-the-Wild Data},
pages = {378--383},
editor = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
address = {Torino, Italy},
day = {25},
month = may,
year = {2024},
isbn = {978-2-493814-30-2},
language = {english},
url = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24047.pdf}
}