@inproceedings{crasborn:16023:sign-lang:lrec,
  author    = {Crasborn, Onno and Bank, Richard and Zwitserlood, Inge and van der Kooij, Els and Sch{\"u}ller, Anique and Ormel, Ellen and Nauta, Ellen Yassine and van Zuilen, Merel and van Winsum, Frouke and Ros, Johan},
  title     = {Linking Lexical and Corpus Data for Sign Languages: {NGT} {Signbank} and the {Corpus} {NGT}},
  pages     = {41--46},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {28},
  month     = may,
  year      = {2016},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16023.html},
  abstract  = {How can lexical resources for sign languages be integrated with corpus annotations? We answer this question by discussing an increasingly frequent scenario for sign language resources, where the lexical data are stored in an online lexical database that may also serve as a sign language dictionary, while the annotation data are offline files in the ELAN Annotation Format (EAF). There is by now broad consensus on the need for ID-glosses in corpus annotation, which in turn requires having at least a list of ID-glosses with a description of the phonological form and meaning of the signs. There is less of a consensus on standards for glossing, on practices of sign lemmatisation, and on the types of information that need to be stored in the lexical database. This paper contributes to the establishment of standards for sign language resources by discussing how two data resources for Sign Language of the Netherlands (NGT) are currently being integrated, using the ELAN annotation software for corpus annotation and an adaptation of the Auslan Signbank software as a lexical database. We discuss some of the present relations between two large NGT data sets, and outline some future developments that are foreseen.}
}

@inproceedings{dreuw:10001:sign-lang:lrec,
  author    = {Dreuw, Philippe and Forster, Jens and Gweth, Yannick and Stein, Daniel and Ney, Hermann and Mart{\'i}nez Ruiz, Gregorio and Verges Llahi, Jaume and Crasborn, Onno and Ormel, Ellen and Du, Wei and Hoyoux, Thomas and Piater, Justus and Moya Lazaro, Jos{\'e} Miguel and Wheatley, Mark},
  title     = {{SignSpeak} - Understanding, Recognition, and Translation of Sign Languages},
  pages     = {65--72},
  editor    = {Dreuw, Philippe and Efthimiou, Eleni and Hanke, Thomas and Johnston, Trevor and Mart{\'i}nez Ruiz, Gregorio and Schembri, Adam},
  booktitle = {Proceedings of the {LREC2010} 4th Workshop on the Representation and Processing of Sign Languages: Corpora and Sign Language Technologies},
  maintitle = {7th International Conference on Language Resources and Evaluation ({LREC} 2010)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Valletta, Malta},
  day       = {22--23},
  month     = may,
  year      = {2010},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/10001.html},
  abstract  = {The SignSpeak project will be the first step to approach sign language recognition and translation at a scientific level already reached in similar research fields such as automatic speech recognition or statistical machine translation of spoken languages. Deaf communities revolve around sign languages as they are their natural means of communication. Although deaf, hard of hearing and hearing signers can communicate without problems amongst themselves, there is a serious challenge for the deaf community in trying to integrate into educational, social and work environments. The overall goal of SignSpeak is to develop a new vision-based technology for recognizing and translating continuous sign language to text. New knowledge about the nature of sign language structure from the perspective of machine recognition of continuous sign language will allow a subsequent breakthrough in the development of a new vision-based technology for continuous sign language recognition and translation. Existing and new publicly available corpora will be used to evaluate the research progress throughout the whole project.}
}

@inproceedings{forster:10038:sign-lang:lrec,
  author    = {Forster, Jens and Stein, Daniel and Ormel, Ellen and Crasborn, Onno and Ney, Hermann},
  title     = {Best Practice for Sign Language Data Collections Regarding the Needs of Data-Driven Recognition and Translation},
  pages     = {92--97},
  editor    = {Dreuw, Philippe and Efthimiou, Eleni and Hanke, Thomas and Johnston, Trevor and Mart{\'i}nez Ruiz, Gregorio and Schembri, Adam},
  booktitle = {Proceedings of the {LREC2010} 4th Workshop on the Representation and Processing of Sign Languages: Corpora and Sign Language Technologies},
  maintitle = {7th International Conference on Language Resources and Evaluation ({LREC} 2010)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Valletta, Malta},
  day       = {22--23},
  month     = may,
  year      = {2010},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/10038.html},
  abstract  = {We propose best practices for gloss annotation of sign languages taking into account the needs of data-driven approaches to recognition and translation of natural languages. Furthermore, we provide reference numbers for several technical aspects for the creation of new sign language data collections. Most available sign language data collections are of limited use to data-driven approaches, because they focus on rare sign language phenomena, or lack machine readable annotation schemes. Using a natural language processing point of view, we briefly discuss several sign language data collection, propose best practices for gloss annotation stemming from experience gained using two large scale sign language data collections, and derive reference numbers for several technical aspects from standard benchmark data collections for speech recognition and translation.}
}

@inproceedings{ormel:10036:sign-lang:lrec,
  author    = {Ormel, Ellen and Crasborn, Onno and van der Kooij, Els and van Dijken, Lianne and Nauta, Ellen Yassine and Forster, Jens and Stein, Daniel},
  title     = {Glossing a multi-purpose sign language corpus},
  pages     = {186--191},
  editor    = {Dreuw, Philippe and Efthimiou, Eleni and Hanke, Thomas and Johnston, Trevor and Mart{\'i}nez Ruiz, Gregorio and Schembri, Adam},
  booktitle = {Proceedings of the {LREC2010} 4th Workshop on the Representation and Processing of Sign Languages: Corpora and Sign Language Technologies},
  maintitle = {7th International Conference on Language Resources and Evaluation ({LREC} 2010)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Valletta, Malta},
  day       = {22--23},
  month     = may,
  year      = {2010},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/10036.html},
  abstract  = {This paper describes the strategies that have been developed for creating consistent gloss annotations in the latest update to the Corpus NGT. Although the project aims to embrace the plea for ID-glosses in Johnston (2008), there is no reference lexicon that could be used in the creation of the annotations. An idiosyncratic strategy was developed that involved the creation of a temporary `glossing lexicon', which includes conventions for distinguishing regional and other variants, true and apparent homonymy, and other difficulties that are specifically related to the glossing of two-handed simultaneous constructions on different tiers.}
}

