@inproceedings{roh:24052:sign-lang:lrec,
  author    = {Roh, Kyunggeun and Lee, Huije and Hwang, Eui Jun and Cho, Sukmin and Park, Jong C.},
  title     = {Preprocessing Mediapipe Keypoints with Keypoint Reconstruction and Anchors for Isolated Sign Language Recognition},
  pages     = {400--411},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
  maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-30-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24052.pdf},
  abstract  = {Isolated Sign Language Recognition (ISLR) aims to classify signs into the corresponding gloss, but it remains challenging due to rapid movements and minute changes of hands. Pose-based approaches, recently gaining attention due to their robustness against the environment, are crucial against such challenging movements and changes due to the diculty of capturing small joint movements from the noisy keypoints. In this work, we emphasize the importance of preprocessing keypoints to alleviate the risk of such errors. We employ normalization using anchor points to accurately track the relative motion of skeletal joints, focusing on hand movements. Additionally, we implement bilinear interpolation to reconstruct keypoints, particularly to retrieve missing information for hands that were not detected. Preprocessing methods proposed in this work show a 6.05{\%} improvement in accuracy and achieved 83.26{\%} accuracy with data augmentation on the WLASL dataset, which is the highest among pose-based approaches. The proposed methods show strengths in cases with signs having importance in the hand shape, especially when some frames have undetected hands.}
}

@inproceedings{kim-etal-2022-layering:lrec,
  author    = {Kim, Jung-Ho and Hwang, Eui Jun and Cho, Sukmin and Lee, Du Hui and Park, Jong C.},
  title     = {Sign Language Production With Avatar Layering: A Critical Use Case over Rare Words},
  pages     = {1519--1528},
  editor    = {Calzolari, Nicoletta and B{\'e}chet, Fr{\'e}d{\'e}ric and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Odijk, Jan and Piperidis, Stelios},
  booktitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {20--25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-72-6},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.163},
  abstract  = {Sign language production (SLP) is the process of generating sign language videos from spoken language expressions. Since sign languages are highly under-resourced, existing vision-based SLP approaches suffer from out-of-vocabulary (OOV) and test-time generalization problems and thus generate low-quality translations. To address these problems, we introduce an avatar-based SLP system composed of a sign language translation (SLT) model and an avatar animation generation module. Our Transformer-based SLT model utilizes two additional strategies to resolve these problems: named entity transformation to reduce OOV tokens and context vector generation using a pretrained language model (e.g., BERT) to reliably train the decoder. Our system is validated on a new Korean-Korean Sign Language (KSL) dataset of weather forecasts and emergency announcements. Our SLT model achieves an 8.77 higher BLEU-4 score and a 4.57 higher ROUGE-L score over those of our baseline model. In a user evaluation, 93.48{\%} of named entities were successfully identified by participants, demonstrating marked improvement on OOV issues.}
}