@inproceedings{brown:26026:sign-lang:lrec,
  author    = {Brown, Matt and Ranum, Oline and Fish, Edward and Proctor, Heidi and Woll, Bencie and Bowden, Richard and Cormier, Kearsy},
  title     = {{SignGPT} and the Visual Language Toolkit},
  pages     = {51--60},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2026} 12th Workshop on the Representation and Processing of Sign Languages: Language in Motion},
  maintitle = {15th International Conference on Language Resources and Evaluation ({LREC} 2026)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Palma, Mallorca, Spain},
  day       = {16},
  month     = may,
  year      = {2026},
  isbn      = {978-2-493814-82-1},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/26026.html},
  abstract  = {SignGPT's Visual Language Toolkit (VLTK) aims to remove fundamental barriers to large scale sign language modelling by developing data-driven, linguistically grounded methods for continuous sign language recognition. We first identify fundamental issues around the ecological validity of potential data sources (e.g. broadcast media with interpreted signing or captions, scraping of social media). We contrast these with the currently highly resource-intensive development of curated sign language corpora based on linguistic principles. The VLTK addresses this scarcity of high quality sign language data by providing semi-automated glossing and other recognition tools, driving large scale corpus expansion without sacrificing linguistic principles. Unlike prior systems that rely on sparse glossing, the project integrates dense temporal annotation, non-manual and non-lexical feature tracking, and transformer-based architectures to capture the multimodal and spatial structure of signing. By aligning machine vision innovation with linguistic insights and community-embedded evaluation, SignGPT establishes a foundation for robust and extensible sign language models.}
}

@inproceedings{woll:22007:sign-lang:lrec,
  author    = {Woll, Bencie and Fox, Neil and Cormier, Kearsy},
  title     = {Segmentation of Signs for Research Purposes: Comparing Humans and Machines},
  pages     = {198--201},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22007.html},
  abstract  = {Sign languages such as British Sign Language (BSL) are visual languages which lack standard writing systems. Annotation of sign language data, especially for the purposes of machine readability, is therefore extremely slow. Tools to help automate and thus speed up the annotation process are very much needed. Here we test the development of one such tool (VIA-SLA), which uses temporal convolutional networks (Renz et al., 2021a, b) for the purpose of segmenting continuous signing in any sign language, and is designed to integrate smoothly with ELAN, the widely used annotation software for analysis of videos of sign language. We compare automatic segmentation by machine with segmentation done by a human, both in terms of time needed and accuracy of segmentation, using samples taken from the BSL Corpus (Schembri et al., 2014). A small sample of four short video files is tested (mean duration 25 seconds). We find that mean accuracy in terms of number and location of segmentations is relatively high, at around 78{\%}. This preliminary test suggests that VIA-SLA promises to be very useful for sign linguists.}
}

@inproceedings{cormier:16015:sign-lang:lrec,
  author    = {Cormier, Kearsy and Crasborn, Onno and Bank, Richard},
  title     = {Digging into Signs: Emerging Annotation Standards for Sign Language Corpora},
  pages     = {35--40},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {28},
  month     = may,
  year      = {2016},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16015.html},
  abstract  = {This paper describes the creation of annotation standards for glossing sign language corpora as part of the Digging into Signs project (2014-2015). This project was based on the annotation of two major sign language corpora, the BSL Corpus (British Sign Language) and the Corpus NGT (Sign Language of the Netherlands). The focus of the gloss annotations in these data sets was in line with the starting point of most sign language corpora: to make general corpus annotation maximally useful regardless of the particular research focus. Therefore, the joint annotation guidelines that were the output of the project focus on basic annotation of hand activity, aiming to ensure that annotations can be made in a consistent way irrespective of the particular sign language. The annotation standard provides annotators with the means to create consistent annotations for various types of signs that in turn will facilitate cross-linguistic research. At the same time, the standard includes alternative strategies for some types of signs. In this paper we outline the key features of the joint annotation conventions arising from this project, describe the arguments around providing alternative strategies in a standard, as well as discuss reliability measures and improvement to annotation tools.}
}

@inproceedings{cormier:12033:sign-lang:lrec,
  author    = {Cormier, Kearsy and Fenlon, Jordan and Johnston, Trevor and Rentelis, Ramas and Schembri, Adam and Rowley, Katherine and Adam, Robert and Woll, Bencie},
  title     = {From Corpus to Lexical Database to Online Dictionary: Issues in annotation of the {BSL} Corpus and the Development of {BSL} {SignBank}},
  pages     = {7--12},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12033.html},
  abstract  = {One requirement of a sign language corpus is that it should be machine-readable, but only a systematic approach to annotation that involves lemmatisation of the sign language glosses can make this possible at the present time. Such lemmatisation involves grouping morphological and phonological variants together into a single lemma, so that all related variants of a sign can be identified and analysed as a single sign. This lemmatisation process is made more straightforward by the existence of a comprehensive lexical database, as in the case for Australian Sign Language (Auslan). When annotation of data collected as part of the British Sign Language (BSL) Corpus Project began, no such lexical database for BSL existed. Therefore, a lemmatised BSL lexical database was created concurrently during annotation of the BSL Corpus data. As part of ongoing work by the Deafness Cognition {\&} Language Research Centre, this lexical database is being developed into an online BSL dictionary, BSL SignBank. This paper describes the adaptation of the Auslan lexical database into a BSL lexical database, and the current development of this lexical database into BSL SignBank.}
}

