@inproceedings{imashev:24023:sign-lang:lrec,
  author    = {Imashev, Alfarabi and Kydyrbekova, Aigerim and Mukushev, Medet and Sandygulova, Anara and Islam, Shynggys and Israilov, Khassan and Makazhanov, Aibek and Yessenbayev, Zhandos},
  title     = {Retrospective of {Kazakh-Russian} {Sign} {Language} Corpus Formation},
  pages     = {189--201},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC-COLING} 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources},
  maintitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-30-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/24023.html},
  abstract  = {Sign language (SL) is a mode of communication that, in most cases, relies on visual perception exclusively and utilizes visual-gestural modality. Sign languages are already universally acknowledged as complete and natural languages. The advent of machine learning techniques has expanded the range of potential applications, not only in industry but also in addressing societal needs. Previous research conducted before 2015 has already demonstrated encouraging outcomes in developing sign language recognition systems that are both quite accurate and resilient. Nevertheless, the effectiveness and utilization of algorithms are impacted not only by their accessibility but also, at times to a greater extent, by the presence of substantial quantities of pertinent data. At the commencement of the local sign language corpus collection in 2015, there was a notable deficit of local Kazakh-Russian sign language (K-RSL) data available for computer vision and machine-learning tasks. There were already corpora of another lexically close Russian Sign Langauge (RSL), but they were aimed at and tailored for research in linguistics. Therefore, we initiated the procedure by collecting pertinent data appropriate for machine-learning purposes. The subsets have been incorporated into the principal corpus and will be subject to future enhancements and refinements. This paper provides a concise overview of the collected components of the Kazakh-Russian Sign Language Corpus and the resulting outcomes derived from them within the last decade.}
}

@inproceedings{kuznetsova:22024:sign-lang:lrec,
  author    = {Kuznetsova, Anna and Imashev, Alfarabi and Mukushev, Medet and Sandygulova, Anara and Kimmelman, Vadim},
  title     = {Functional Data Analysis of Non-manual Marking of Questions in {Kazakh-Russian} {Sign} {Language}},
  pages     = {124--131},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22024.html},
  abstract  = {This paper is a continuation of Kuznetsova et al. (2021), which described non-manual markers of polar and wh-questions in comparison with statements in an NLP dataset of Kazakh-Russian Sign Language (KRSL) using Computer Vision. One of the limitations of the previous work was the distortion of the 3D face landmarks when the head was rotated. The proposed solution was to train a simple linear regression model to predict the distortion and then subtract it from the original output. We improve this technique with a multilayer perceptron. Another limitation that we intend to address in this paper is the discrete analysis of the continuous movement of non-manuals. In Kuznetsova et al. (2021) we averaged the value of the non-manual over its scope for statistical analysis. To preserve information on the shape of the movement, in this study we use a statistical tool that is often used in speech research, Functional Data Analysis, specifically Functional PCA.}
}

@inproceedings{mukushev:22031:sign-lang:lrec,
  author    = {Mukushev, Medet and Kydyrbekova, Aigerim and Kimmelman, Vadim and Sandygulova, Anara},
  title     = {Towards Large Vocabulary {Kazakh-Russian} {Sign} {Language} Dataset: {KRSL-OnlineSchool}},
  pages     = {154--158},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22031.html},
  abstract  = {This paper presents a new dataset for Kazakh-Russian Sign Language (KRSL) created for the purposes of Sign Language Processing. In 2020, Kazakhstan's schools were quickly switched to online mode due to the COVID-19 pandemic. Every working day, the El-arna TV channel was broadcasting video lessons for grades from 1 to 11 with sign language translation. This opportunity allowed us to record a corpus with a large vocabulary and spontaneous SL interpretation. To this end, this corpus contains video recordings of Kazakhstan's online school translated to Kazakh-Russian sign language by 7 interpreters. At the moment we collected and cleaned 890 hours of video material. A custom annotation tool was created to make the process of data annotation simple and easy-to-use by the Deaf community. To date, around 325 hours of videos have been annotated with glosses and 4,009 lessons out of 4,547 were transcribed with automatic speech-to-text software. The KRSL-OnlineSchool dataset will be made publicly available at https://krslproject.github.io/online-school/}
}

@inproceedings{mukushev:22030:sign-lang:lrec,
  author    = {Mukushev, Medet and Sabyrov, Arman and Sultanova, Madina and Kimmelman, Vadim and Sandygulova, Anara},
  title     = {Towards Semi-automatic Sign Language Annotation Tool: {SLAN-tool}},
  pages     = {159--164},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22030.html},
  abstract  = {This paper presents a semi-automatic annotation tool for sign languages namely SLAN-tool. The SLAN-tool provides a web-based service for the annotation of sign language videos. Researchers can use the SLAN-tool web service to annotate new and existing sign language datasets with different types of annotations, such as gloss, handshape configurations, and signing regions. This is allowed using a custom tier adding functionality. A unique feature of the tool is its automatic annotation functionality which uses several neural network models in order to recognize signing segments from videos and classify handshapes according to HamNoSys handshape inventory. Furthermore, SLAN-tool users can export annotations and import them into ELAN. The SLAN-tool is publicly available at https://slan-tool.com.}
}

@inproceedings{mukushev:20036:sign-lang:lrec,
  author    = {Mukushev, Medet and Imashev, Alfarabi and Kimmelman, Vadim and Sandygulova, Anara},
  title     = {Automatic Classification of Handshapes in {Russian} {Sign} {Language}},
  pages     = {165--170},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20036.html},
  abstract  = {Handshapes are one of the basic parameters of signs, and any phonological or phonetic analysis of a sign language must account for handshapes. Many sign languages have been carefully analysed by sign language linguists to create handshape inventories. This has theoretical implications, but also applied use, as it is important due to the need of generating corpora for sign languages that can be searched, filtered, sorted by different sign components (such as handshapes, orientation, location, movement, etc.). However, it is a very time-consuming process, thus only a handful of sign languages have such inventories. This work proposes a process of automatically generating such inventories for sign languages by applying automatic hand detection, cropping, and clustering techniques. We applied our proposed method to a commonly used resource: the Spreadthesign online dictionary (www.spreadthesign.com), in particular to Russian Sign Language (RSL). We then manually verified the data to be able to perform classification. Thus, the proposed pipeline can serve as an alternative approach to manual annotation, and can help linguists in answering numerous research questions in relation to handshape frequencies in sign languages.}
}

@inproceedings{mukushev-etal-2022-crowdsourcing:lrec,
  author    = {Mukushev, Medet and Ubingazhibov, Aidyn and Kydyrbekova, Aigerim and Imashev, Alfarabi and Kimmelman, Vadim and Sandygulova, Anara},
  title     = {Crowdsourcing {Kazakh-Russian} {Sign} {Language}: {FluentSigners-50}},
  pages     = {2541--2547},
  editor    = {Calzolari, Nicoletta and B{\'e}chet, Fr{\'e}d{\'e}ric and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Odijk, Jan and Piperidis, Stelios},
  booktitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {20--25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-72-6},
  language  = {english},
  url       = {https://aclanthology.org/2022.lrec-1.271},
  abstract  = {This paper presents the methodology we used to crowdsource a data collection of a new large-scale signer independent dataset for Kazakh-Russian Sign Language (KRSL) created for Sign Language Processing. By involving the Deaf community throughout the research process, we firstly designed a research protocol and then performed an efficient crowdsourcing campaign that resulted in a new FluentSigners-50 dataset. The FluentSigners-50 dataset consists of 173 sentences performed by 50 KRSL signers for 43,250 video samples. Dataset contributors recorded videos in real-life settings on various backgrounds using various devices such as smartphones and web cameras. Therefore, each dataset contribution has a varying distance to the camera, camera angles and aspect ratio, video quality, and frame rates. Additionally, the proposed dataset contains a high degree of linguistic and inter-signer variability and thus is a better training set for recognizing a real-life signed speech. FluentSigners-50 is publicly available at https://krslproject.github.io/fluentsigners-50/}
}

@inproceedings{mukushev-etal-2020-evaluation:lrec,
  author    = {Mukushev, Medet and Sabyrov, Arman and Imashev, Alfarabi and Koishybay, Kenessary and Kimmelman, Vadim and Sandygulova, Anara},
  title     = {Evaluation of Manual and Non-manual Components for Sign Language Recognition},
  pages     = {6073--6078},
  editor    = {Calzolari, Nicoletta and Fr{\'e}d{\'e}ric B{\'e}chet and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
  booktitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {11--16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-34-4},
  language  = {english},
  url       = {https://aclanthology.org/2020.lrec-1.745},
  abstract  = {The motivation behind this work lies in the need to differentiate between similar signs that differ in non-manual components present in any sign. To this end, we recorded full sentences signed by five native signers and extracted 5200 isolated sign samples of twenty frequently used signs in Kazakh-Russian Sign Language (K-RSL), which have similar manual components but differ in non-manual components (i.e. facial expressions, eyebrow height, mouth, and head orientation). We conducted a series of evaluations in order to investigate whether non-manual components would improve sign's recognition accuracy. Among standard machine learning approaches, Logistic Regression produced the best results, 78.2{\%} of accuracy for dataset with 20 signs and 77.9{\%} of accuracy for dataset with 2 classes (statement vs question). Dataset can be downloaded from the following website: https://krslproject.github.io/krsl20/}
}

