@inproceedings{mostowski:26053:sign-lang:lrec,
  author    = {Mostowski, Piotr and Kuder, Anna and W{\'o}jcicka, Joanna},
  title     = {Assisting Corpus Annotation: Automatic {BIO-Tagging} of Clause-Like Units in {Polish} {Sign} {Language}. A Pilot Study on Corpus Data},
  pages     = {351--360},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26053.html},
  abstract  = {The creation of large-scale sign language corpora is often bottlenecked by the labour-intensive process of multi-layered annotation that requires manual analysis. One of the annotation steps is the challenging and time-consuming task of segmenting continuous signing into clause-like-units (CLUs). In this paper, we propose an automated segmentation framework for Polish Sign Language (PJM) designed to support manual annotation. To detect sentence boundaries, we adapt the Multi-Stage Temporal Convolutional Network (MS-TCN) architecture, enhanced with a Channel Attention mechanism, to effectively fuse multimodal skeleton features (hands, body, and face) extracted via MediaPipe. We evaluate the model on a diverse subset of the PJM Corpus (40 video files, 25 signers), containing nearly 16,000 manually annotated clauses prior to the start of this study. The proposed method achieves a Segmental F1-score of 75.43{\%} at IoU = 0.10 and 57.52{\%} at IoU = 0.50, demonstrating a strong capability in localising sentence boundaries. Furthermore, ablation studies reveal that fusing manual kinematics with non-manual prosodic cues (face) yields a significant performance gain (+13.6 pp) over unimodal baselines, empirically confirming the linguistic necessity of incorporating both manual and non-manual articulators in the process of sentence delimitation. The solution offers a viable means for reducing CLU annotation time by automatically generating high-quality clause boundary proposals.}
}