@inproceedings{klezovich:26051:sign-lang:lrec,
  author    = {Klezovich, Anna and Mesch, Johanna and Henter, Gustav Eje and Beskow, Jonas},
  title     = {Comparison of Low Bitrate Quantizers for Encoding {Swedish} {Sign} {Language}},
  pages     = {256--261},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26051.html},
  abstract  = {This paper investigates the bitrate--distortion trade-off of different discrete representations for Swedish Sign Language (STS) using the STS Mocap v1 motion capture dataset. We compare the K-Means algorithm with the Residual Vector Quantized Variational Autoencoder (RQ-VAE) to determine how efficiently each method preserves salient motion information at low bitrates. The results show that RQ-VAE consistently achieves lower reconstruction error than K-Means at matching bitrates, particularly for body motion, and better preserves the signing space volume. We further demonstrate that quantized representations can serve as conditioning for a flow-matching generative model, producing plausible but still imperfect sign sequences at low bitrates. These findings highlight the advantages of vector quantized models for efficient sign language motion encoding.}
}

@inproceedings{klezovich-etal-2026-enough:lrec,
  author    = {Klezovich, Anna and Mesch, Johanna and Henter, Gustav Eje and Beskow, Jonas},
  title     = {How Much Data Is Enough Data? A New Motion Capture Corpus for Probabilistic Sign Language Generation},
  pages     = {9549--9558},
  editor    = {Piperidis, Stelios and Bel, N{\'u}ria and van den Heuvel, Henk and Ide, Nancy and Krek, Simon and Toral, Antonio},
  booktitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {11--16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-49-4},
  language  = {english},
  url       = {https://lrec.elra.info/lrec2026-main-750},
  doi       = {10.63317/5pmyrs7f9o33},
  abstract  = {We present a new 4.1 hours long high-quality motion capture sign language dataset for Swedish Sign Language --- STS Mocap v1. The dataset consists of high quality multimodal data: body tracked with markers, fingers tracked with Manus Quantum Metagloves, face tracked with iPhone LiveLink app in MetaHuman Animator mode, and corresponding textual sentence translation to spoken Swedish. With the help of this dataset, we show that four hours of motion capture data is enough for generative modeling of sign language conditioned on 2D pose. In comparison, training the same flow-matching model on only 30 minutes of this data, which is a common size for sign language motion capture datasets, shows a significant degradation in the quality of the synthesized data.}
}

