@inproceedings{bigeard:22036:sign-lang:lrec,
  author    = {Bigeard, Sam and Schulder, Marc and Kopf, Maria and Hanke, Thomas and Vasilaki, Kiki and Vacalopoulou, Anna and Goulas, Theodoros and Dimou, Athanasia-Lida and Fotinea, Stavroula-Evita and Efthimiou, Eleni},
  title     = {Introducing Sign Languages to a Multilingual Wordnet: Bootstrapping Corpora and Lexical Resources of {Greek} {Sign} {Language} and {German} {Sign} {Language}},
  pages     = {9--15},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22036.pdf},
  abstract  = {Wordnets have been a popular lexical resource type for many years. Their sense-based representation of lexical items and numerous relation structures have been used for a variety of computational and linguistic applications. The inclusion of different wordnets into multilingual wordnet networks has further extended their use into the realm of cross-lingual research. Wordnets have been released for many spoken languages. Research has also been carried out into the creation of wordnets for several sign languages, but none have yet resulted in publicly available datasets. This article presents our own efforts towards an inclusion of sign languages in a multilingual wordnet, starting with Greek Sign Language (GSL) and German Sign Language (DGS). Based on differences in available language resources between GSL and DGS, we trial two workflows with different coverage priorities. We also explore how synergies between both workflows can be leveraged and how future work on additional sign languages could profit from building on existing sign language wordnet data. The results of our work are made publicly available.}
}

@inproceedings{isard:22034:sign-lang:lrec,
  author    = {Isard, Amy and Konrad, Reiner},
  title     = {{MY} {DGS} -- {ANNIS}: {ANNIS} and the {Public} {DGS} {Corpus}},
  pages     = {73--79},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22034.pdf},
  abstract  = {In 2018 the DGS-Korpus project published the first full release of the Public DGS Corpus. The data have already been published in two different ways to fulfil the needs of different user groups, and we have now published the third portal MY DGS -- ANNIS using the ANNIS browser-based corpus software. ANNIS is a corpus query tool for visualization and querying of multi-layer corpus data. It has its own query language, AQL, and is accessed from a web browser without requiring a login. It allows more complex queries and visualizations than those provided by the existing research portal. We introduce ANNIS and its query language AQL, describe the structure of MY DGS -- ANNIS, and give some example queries. The use cases with queries over multiple annotation tiers and metadata illustrate the research potential of this powerful tool and show how students and researchers can explore the Public DGS Corpus.}
}

@inproceedings{jahn:22035:sign-lang:lrec,
  author    = {Jahn, Elena and Khan, Calvin and Herrmann, Annika},
  title     = {Outreach and Science Communication in the {DGS-Korpus} Project: Accessibility of Data and the Benefit of Interactive Exchange between Communities},
  pages     = {80--87},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22035.pdf},
  abstract  = {In this paper, we tackle the issues of science communication and dissemination within a sign language corpus project with a focus on spreading accessible information and involving the D/deaf community on various levels. We will discuss successful examples, challenges, and limitations to public relations in such a project and particularly elaborate on use cases. The focus group is presented as a best-practice example of a what we think is a necessary perspective: taking external knowledge seriously and let community experts interact with and provide feedback on a par with academic personnel. Showing both social media and on-site events, we present some exemplary approaches from our team involved in public relations.}
}

@inproceedings{kuder:22020:sign-lang:lrec,
  author    = {Kuder, Anna},
  title     = {Making Sign Language Corpora Comparable: A Study of Palm-Up and Throw-Away in {Polish} {Sign} {Language}, {German} {Sign} {Language}, and {Russian} {Sign} {Language}},
  pages     = {110--117},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22020.pdf},
  abstract  = {This paper is primarily devoted to describing the preparation phase of a large-scale comparative study based on naturalistic linguistic data drawn from multiple sign language corpora. To provide an example, I am using my current project on manual gestural elements in Polish Sign Language, German Sign Language, and Russian Sign Language. The paper starts with a description of the reasons behind undertaking this project. Then, I describe the scope of my study, which is focused on two manual elements present in all three mentioned sign languages: palm-up and throw-away; and the three corpora which are my data sources. This is followed by a presentation of the steps taken in the initial stages of the project in order to make the data comparable. Those steps are: choosing the adequate data samples from all three corpora, gathering all data within the chosen software, and creating an annotation schema that builds on the annotations already present in all three corpora. Even though the project is still underway, and the annotation process is ongoing, preliminary discussions about the nature of the analysed manual activities are presented based on the initial annotations for the sake of evaluating the created annotation schema. I conclude the paper with some remarks about the performance of the employed methodology.}
}

@inproceedings{walsh:70007:sltat:lrec,
  author    = {Walsh, Harry and Saunders, Ben and Bowden, Richard},
  title     = {Changing the Representation: Examining Language Representation for Neural Sign Language Production},
  pages     = {117--124},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and McDonald, John C. and Shterionov, Dimitar and Wolfe, Rosalee},
  booktitle = {Proceedings of the 7th International Workshop on Sign Language Translation and Avatar Technology: The Junction of the Visual and the Textual: Challenges and Perspectives},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {24},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-82-5},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2022/workshops/sltat/pdf/2022.sltat-1.18},
  abstract  = {Neural Sign Language Production (SLP) aims to automatically translate from spoken language sentences to sign language videos. Historically the SLP task has been broken into two steps; Firstly, translating from a spoken language sentence to a gloss sequence and secondly, producing a sign language video given a sequence of glosses. In this paper we apply Natural Language Processing techniques to the first step of the SLP pipeline. We use language models such as BERT and Word2Vec to create better sentence level embeddings, and apply several tokenization techniques, demonstrating how these improve performance on the low resource translation task of Text to Gloss. We introduce Text to HamNoSys (T2H) translation, and show the advantages of using a phonetic representation for sign language translation rather than a sign level gloss representation. Furthermore, we use HamNoSys to extract the hand shape of a sign and use this as additional supervision during training, further increasing the performance on T2H. Assembling best practise, we achieve a BLEU-4 score of 26.99 on the MineDGS dataset and 25.09 on PHOENIX14T, two new state-of-the-art baselines.}
}

@inproceedings{schulder-hanke-2022-fair:lrec,
  author    = {Schulder, Marc and Hanke, Thomas},
  title     = {How to be {FAIR} when you {CARE}: The {DGS} {Corpus} as a Case Study of Open Science Resources for Minority Languages},
  pages     = {164--173},
  editor    = {Calzolari, Nicoletta and Fr{\'e}d{\'e}ric B{\'e}chet and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Odijk, Jan and Piperidis, Stelios},
  booktitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {20--25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-72-6},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.18},
  abstract  = {The publication of resources for minority languages requires a balance between making data open and accessible and respecting the rights and needs of its language community. The FAIR principles were introduced as a guide to good open data practices and they have since been complemented by the CARE principles for indigenous data governance. This article describes how the DGS Corpus implemented these principles and how the two sets of principles affected each other. The DGS Corpus is a large collection of recordings of members of the deaf community in Germany communicating in their primary language, German Sign Language (DGS); it was created to be both as a resource for linguistic research and as a record of the life experiences of deaf people in Germany. The corpus was designed with CARE in mind to respect and empower the language community and FAIR data publishing was used to enhance its usefulness as a scientific resource.}
}

@inproceedings{declerck-2022-ontology:lrec,
  author    = {Declerck, Thierry},
  title     = {Towards a new Ontology for Sign Languages},
  pages     = {3977--3983},
  editor    = {Calzolari, Nicoletta and Fr{\'e}d{\'e}ric B{\'e}chet and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Odijk, Jan and Piperidis, Stelios},
  booktitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {20--25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-72-6},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.423},
  abstract  = {We present the current status of a new ontology for representing constitutive elements of Sign Languages (SL). This development emerged from investigations on how to represent multimodal lexical data in the OntoLex-Lemon framework, with the goal to publish such data in the Linguistic Linked Open Data (LLOD) cloud. While studying the literature and various sites dealing with sign languages, we saw the need to harmonise all the data categories (or features) defined and used in those sources, and to organise them in an ontology to which lexical descriptions in OntoLex-Lemon could be linked. We make the code of the first version of this ontology available, so that it can be further developed collaboratively by both the Linked Data and the SL communities}
}

@inproceedings{brumm:20020:sign-lang:lrec,
  author    = {Brumm, Maren and Grigat, Rolf-Rainer},
  title     = {Optimised Preprocessing for Automatic Mouth Gesture Classification},
  pages     = {27--32},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20020.pdf},
  abstract  = {Mouth gestures are facial expressions in sign language, that do not refer to lip patterns of a spoken language. Research on this topic has been limited so far. The aim of this work is to automatically classify mouth gestures from video material by training a neural network. This could render time-consuming manual annotation unnecessary and help advance the field of automatic sign language translation. However, it is a challenging task due to the little data available as training material and the similarity of different mouth gesture classes. In this paper we focus on the preprocessing of the data, such as finding the area of the face important for mouth gesture recognition. Furthermore we analyse the duration of mouth gestures and determine the optimal length of video clips for classification. Our experiments show, that this can  improve the classification results significantly and helps to reach a near human accuracy.}
}

@inproceedings{hanke:20016:sign-lang:lrec,
  author    = {Hanke, Thomas and Schulder, Marc and Konrad, Reiner and Jahn, Elena},
  title     = {Extending the {Public} {DGS} {Corpus} in Size and Depth},
  pages     = {75--82},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20016.pdf},
  abstract  = {In 2018 the DGS-Korpus project published the first full release of the Public DGS Corpus. This event marked a change of focus for the project. While before most attention had been on increasing the size of the corpus, now an increase in its depth became the priority. New data formats were added, corpus annotation conventions were released and OpenPose pose information was published for all transcripts. The community and research portal websites of the corpus also received upgrades, including persistent identifiers, archival copies of previous releases and improvements to their usability on mobile devices.The research portal was enhanced even further, improving its transcript web viewer, adding a KWIC concordance view, introducing cross-references to other linguistic resources of DGS and making its entire interface available in German in addition to English. This article provides an overview of these changes, chronicling the evolution of the Public DGS Corpus from its first release in 2018, through its second release in 2019 until its third release in 2020.}
}

@inproceedings{hanke:20030:sign-lang:lrec,
  author    = {Hanke, Thomas and Jahn, Elena and W{\"a}hl, Sabrina and B{\"o}se, Oliver and K{\"o}nig, Lutz},
  title     = {{SignHunter} -- A Sign Elicitation Tool Suitable for Deaf Events},
  pages     = {83--88},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20030.pdf},
  abstract  = {This paper presents SignHunter, a tool for collecting isolated signs, and discusses application possibilities. SignHunter is successfully used within the DGS-Korpus project to collect name signs for places and cities. The data adds to the content of a German Sign Language (DGS) -- German dictionary which is currently being developed, as well as a freely accessible subset of the DGS Corpus, the Public DGS Corpus. We discuss reasons to complement a natural language corpus by eliciting concepts without context and present an application example of SignHunter.}
}

@inproceedings{isard:20037:sign-lang:lrec,
  author    = {Isard, Amy},
  title     = {Approaches to the Anonymisation of Sign Language Corpora},
  pages     = {95--100},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20037.pdf},
  abstract  = {In this paper we survey the state of the art for the anonymisation of sign language corpora. We begin by exploring the motivations behind anonymisation and the close connection with the issue of ethics and informed consent for corpus participants. We detail how the the names which should be anonymised can be identified. We then describe the processes which can be used to anonymise both the video and the annotations belonging to a corpus, and the variety of ways in which these can be carried out. We provide examples for all of these processes from three sign language corpora in which anonymisation of the data has been performed.}
}

@inproceedings{langer:20017:sign-lang:lrec,
  author    = {Langer, Gabriele and Schulder, Marc},
  title     = {Collocations in Sign Language Lexicography: Towards Semantic Abstractions for Word Sense Discrimination},
  pages     = {127--134},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20017.pdf},
  abstract  = {In general monolingual lexicography a corpus-based approach to word sense discrimination (WSD) is the current standard. Automatically generated lexical profiles such as Word Sketches provide an overview on typical uses in the form of collocate lists grouped by their part of speech categories and their syntactic dependency relations to the base item. Collocates are sorted by their typicality according to frequency-based rankings. With the advancement of sign language (SL) corpora, SL lexicography can finally be based on actual language use as reflected in corpus data. In order to use such data effectively and gain new insights on sign usage, automatically generated collocation profiles need to be developed under the special conditions and circumstances of the SL data available. One of these conditions is that many of the prerequesites for the automatic syntactic parsing of corpora are not yet available for SL. In this article we describe a collocation summary generated from DGS Corpus data which is used for WSD as well as in entry-writing. The summary works based on the glosses used for lemmatisation. In addition, we explore how other resources can be utilised to add an additional layer of semantic grouping to the collocation analysis. For this experimental approach we use glosses, concepts, and wordnet supersenses.}
}

@inproceedings{muller:20025:sign-lang:lrec,
  author    = {M{\"u}ller, Anke and Hanke, Thomas and Konrad, Reiner and Langer, Gabriele and W{\"a}hl, Sabrina},
  title     = {From Dictionary to Corpus and Back Again -- Linking Heterogeneous Language Resources for {DGS}},
  pages     = {157--164},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20025.pdf},
  abstract  = {The Public DGS Corpus is published in two different formats, that is subtitled videos for lay persons and lemmatized and annotated transcripts and videos for experts. In addition, a draft version with the first set of preliminary entries of the DGS dictionary (DW-DGS) to be completed in 2023 is now online. The Public DGS Corpus and the DW-DGS are conceived of as stand-alone products, but are nevertheless closely interconnected to offer additional and complementary informative functions. In this paper we focus on linking the published products in order to provide users access to corpus and corpus-based dictionary in various, interrelated ways. We discuss which links are thought to be useful and what challenges the linking of the products poses. In addition we address the inclusion of links to other, older lexical resources (LSP dictionaries).}
}

@inproceedings{skobov:20001:sign-lang:lrec,
  author    = {Skobov, Victor and Lepage, Yves},
  title     = {{Video-to-HamNoSys} Automated Annotation System},
  pages     = {209--216},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20001.pdf},
  abstract  = {The Hamburg Notation System (HamNoSys) was developed for movement annotation of any sign language (SL) and can be used to produce signing animations for a virtual avatar with the JASigning platform. This provides the potential to use HamNoSys, i.e., strings of characters, as a representation of an SL corpus instead of video material. Processing strings of characters instead of images can significantly contribute to sign language research. However, the complexity of HamNoSys makes it difficult to annotate without a lot of time and effort. Therefore annotation has to be automatized. This work proposes a conceptually new approach to this problem. It includes a new tree representation of the HamNoSys grammar that serves as a basis for the generation of grammatical training data and classification of complex movements using machine learning. Our automatic annotation system relies on HamNoSys grammar structure and can potentially be used on already existing SL corpora. It is retrainable for specific settings such as camera angles, speed, and gestures. Our approach is conceptually different from other SL recognition solutions and offers a developed methodology for future research.}
}

@inproceedings{jahn:18018:sign-lang:lrec,
  author    = {Jahn, Elena and Konrad, Reiner and Langer, Gabriele and Wagner, Sven and Hanke, Thomas},
  title     = {Publishing {DGS} {Corpus} Data: Different Formats for Different Needs},
  pages     = {83--90},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18018.pdf},
  abstract  = {In 2010-2012, the DGS-Korpus project collected a large corpus of German Sign Language (DGS). Now, a substantial subset of the data is published, namely the Public DGS Corpus. We describe the considerations and decisions taken regarding what part of the data is to be made public, the necessary quality assurance measures to the data preparation as well as the formats of the published data. The corpus is published in three different ways in order to fulfil the needs of a variety of different users. First of all, the data is made available to the language community whose members allowed us to share their recorded language. In addition, we hope that a large number of non-scientific users with various backgrounds will find the data useful. Last but not least, we aim to make the data attractive for users with a scientific background and provide the possibility to conduct studies based on it, irrespective of whether they are familiar with DGS or not.}
}

@inproceedings{langer:18026:sign-lang:lrec,
  author    = {Langer, Gabriele and M{\"u}ller, Anke and W{\"a}hl, Sabrina},
  title     = {Queries and Views in {iLex} to Support Corpus-based Lexicographic Work on {German} {Sign} {Language} ({DGS})},
  pages     = {107--114},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18026.pdf},
  abstract  = {In the DGS-Korpus project the corpus is being used as the basis for lexicographic descriptions of signs in dictionary entries. In this process the lexicographers start from the data and type entry structures as found in the annotation database. While preparing a dictionary entry much of the work consists of manually going through a number of single tokens viewing the original data and available annotations. Findings are then categorised and summarised. However, a number of decisions and descriptions are also supported by pre-defined searches and views on the data. Supported areas include lexicographic lemmatisation (lemma sign establishment), selection of citation forms and variants, grammatical behaviour of signs, collocational patterns of use, regional distribution patterns and distribution of lexical or formational variants over different age groups. While we are still in the process of exploring the possibilities of a sign language corpus for lexicography, searches and views that have proven useful for our work are exemplified in this paper with regard to dictionary entries.}
}

@inproceedings{wahl:18025:sign-lang:lrec,
  author    = {W{\"a}hl, Sabrina and Langer, Gabriele and M{\"u}ller, Anke},
  title     = {Hand in Hand - Using Data from an Online Survey System to Support Lexicographic Work},
  pages     = {199--206},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18025.pdf},
  abstract  = {In the DGS-Korpus project the lexicographic descriptions of signs are based on the available data of the DGS-Korpus, a reference corpus of German Sign Language (DGS). As this corpus is limited in size, number of informants recorded and topics included it is in some cases helpful to obtain additional information from the larger sign language community via an online voting system. This is done using the DGS-Feedback System, a tool especially designed for online surveys conducted using a sign language. With this tool further information on e.g. sign forms and meanings and their use and regional distribution has been elicited. Data from the DGS-Feedback is used in several ways during the lexicographic process of preparing dictionary entries to supplement data from the corpus. In the following the consideration of the data from the DGS-Feedback in relation to the corpus data in decision-making, analysis and lexicographic description is explained and discussed by way of examples.}
}

@inproceedings{hanke:16024:sign-lang:lrec,
  author    = {Hanke, Thomas},
  title     = {Towards a Visual Sign Language Corpus Linguistics},
  pages     = {89--92},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {28},
  month     = may,
  year      = {2016},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16024.pdf},
  abstract  = {Visualisations have a long tradition in linguistics, as in many fields dealing with complex structure. New forms of representations have been introduced to Visual Linguistics in the recent past, e.g. to help the researcher find the needle in a haystack, i.e. corpus. Here we present visualisation services available in iLex making a combined corpus and lexical database visually accessible. While many approaches suggested for textual languages transfer to sign language data as well, others explore sign-specific structure, such as multi-dimensional concordances not being restricted to sequentiality. Experimental combinations of animated visualisation and image processing might support the researcher to compensate for incomplete high-quality (=manual) annotation. In the long run, we see the potential that visualisation and data manipulation go hand in hand, allowing future user interfaces that are less text-heavy than today's sign language annotation environments.}
}

@inproceedings{langer:16013:sign-lang:lrec,
  author    = {Langer, Gabriele and Hanke, Thomas and Konrad, Reiner and K{\"o}nig, Susanne},
  title     = {``Non-tokens'': When Tokens Should not Count as Evidence of Sign Use},
  pages     = {137--142},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {28},
  month     = may,
  year      = {2016},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16013.pdf},
  abstract  = {Lemmatised corpora consist of tokens as instantiations of signs (types). Tokens usually count as evidences of the signs' use. Frequency of tokens is an important criterion for the lexical status of a sign. In combination with metadata on the signers' sociolinguistic backgrounds such as age, gender, and origin these tokens can also be analysed for regional and sociolinguistic variation. However, corpora may also contain instances of sign use that do not reflect the sign use of the person uttering them. This is particularly true for metalinguistic discussions of signs, malformed signing and slips of the hand as well as other phenomena such as copying/repeating signs of the interlocutors or from stimulus material. In our presentation we list and discuss different kinds of sign use (tokens) that should either not be counted as proof of a sign type at all or at least not as evidence of regular sign use by that particular person. Examples of these ``non-tokens'' are either taken from the DGS Corpus or from uploaded video answers of the DGS Feedback. We also discuss some implications on how to annotate these cases.}
}

@inproceedings{bleicken-etal-2016-using:lrec,
  author    = {Bleicken, Julian and Hanke, Thomas and Salden, Uta and Wagner, Sven},
  title     = {Using a Language Technology Infrastructure for {G}erman in order to Anonymize {G}erman {S}ign {L}anguage Corpus Data},
  pages     = {3303--3306},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Goggi, Sara and Grobelnik, Marko and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
  booktitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {23--28},
  month     = may,
  year      = {2016},
  isbn      = {978-2-9517408-9-1},
  language  = {english},
  url       = {https://aclanthology.org/L16-1526},
  abstract  = {For publishing sign language corpus data on the web, anonymization is crucial even if it is impossible to hide the visual appearance of the signers: In a small community, even vague references to third persons may be enough to identify those persons. In the case of the DGS Korpus (German Sign Language corpus) project, we want to publish data as a contribution to the cultural heritage of the sign language community while annotation of the data is still ongoing. This poses the question how well anonymization can be achieved given that no full linguistic analysis of the data is available. Basically, we combine analysis of all data that we have, including named entity recognition on translations into German. For this, we use the WebLicht language technology infrastructure. We report on the reliability of these methods in this special context and also illustrate how the anonymization of the video data is technically achieved in order to minimally disturb the viewer.}
}

@inproceedings{hanke:14029:sign-lang:lrec,
  author    = {Hanke, Thomas},
  title     = {Annotation of mouth activities with {iLex}},
  pages     = {67--70},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2014} 6th Workshop on the Representation and Processing of Sign Languages: Beyond the Manual Channel},
  maintitle = {9th International Conference on Language Resources and Evaluation ({LREC} 2014)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Reykjavik, Iceland},
  day       = {31},
  month     = may,
  year      = {2014},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/14029.pdf},
  abstract  = {Recordings from the DGS-Korpus project with 330 informants confirm that at least for German Sign Language (DGS) you hardly find longer stretches of signing not accompanied by any mouth activity. Independent of whether you consider mouth activity while signing as part of the sign language proper or as a parallel system interacting with sign language to jointly transport meaning, mouth activity is part of the linguistic system used by signers and should be treated as such by any corpus approach. In a purely bottom-up approach an annotation practice used for mouth activities would try to describe the phenomena and leave it to a second step to classify (e.g. between mouthing and mouth gestures) and relate (e.g. to spoken language words). For practical reasons, however, the first step is often skipped, and separate coding systems are applied to what is categorised either as mouthing derived from spoken language or mouth gesture where there is no obvious connection between the meaning expressed and any spoken language words expressing that same meaning. This happens not only for time (=budget) reasons, but also because it is difficult for coders to describe mouth visemes precisely if the sign/mouth combo already suggests what is to be seen on the mouth. While there are established coding procedures to avoid influence as far as possible (like only showing the signer's face, provided video quality is good enough), they make the approach very time-consuming, even if not counting quality assurance measures like inter-transcriber agreement. Some projects undertaken at the IDGS in Hamburg therefore leave it with a spoken-language-driven approach: The mouth activity is classified as either mouth gesture or mouthing, and in the latter case the German word is noted down that a competent DGS signer ``reads'' from the lips, i.e. that word from the set of words to be expected with the co-temporal sign in its context that matches the observation. Standard orthography is used unless there is a substantial deviation. For mouth gestures, holistic labels are used. These two extremes span a whole spectrum of coding approaches that can be used for mouth activities. iLex, the Hamburg sign language annotation workbench, tries to support the whole range of solutions as good as possible. The poster w/ demo shows a variety of approaches actually in use or on the horizon and what iLex has to offer for each of those, from more time-series like systems to those evaluating co-occurrence and semantic relatedness, from novice-friendly decision trees to expert-only modes. Inter-transcriber agreement data on the examples given clearly show that a thorough analysis of data quality has to go beyond such measures.}
}

@inproceedings{hanke:12029:sign-lang:lrec,
  author    = {Hanke, Thomas and K{\"o}nig, Susanne and Konrad, Reiner and Langer, Gabriele},
  title     = {Towards tagging of multi-sign lexemes and other multi-unit structures},
  pages     = {67--68},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12029.pdf},
  abstract  = {With the building of larger sign language corpora tagging, handling and analysing large amounts of data reach a new level of complexity. Efficiency and interpersonal consistency in tagging are relevant issues as well as procedures and structures to identify and tag relevant linguistic units and structures beyond and above the manual sign level. We present and discuss problems and possible solution approaches (focussing on the working environment of iLex) of how to deal with multi-unit structures and more specifically multi-sign lexemes in annotation and lexicon building.}
}

@inproceedings{hanke:12028:sign-lang:lrec,
  author    = {Hanke, Thomas and Matthes, Silke and Regen, Anja and Worseck, Satu},
  title     = {Where Does a Sign Start and End? Segmentation of Continuous Signing},
  pages     = {69--74},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12028.pdf},
  abstract  = {There are two basic approaches how to segment continuous signing into individual signs:\begin{itemize}\item A sign starts where the preceding one ends (i.e. fluent signing means there are no gaps between signs)\item Transitional movements between signs do not count as part of either sign. Therefore, usually there are gaps between two signs during which the articulators move from the end of one sign to the beginning of the next.\end{itemize}Both approaches have their pros and cons. However, in the context of the DGS Corpus and the Dicta-Sign project the second approach offers advantages for the subsequent processing. Here we investigate how sensitive this approach is with respect to higher video frame rates.}
}

@inproceedings{konrad:12023:sign-lang:lrec,
  author    = {Konrad, Reiner and Hanke, Thomas and K{\"o}nig, Susanne and Langer, Gabriele and Matthes, Silke and Nishio, Rie and Regen, Anja},
  title     = {From form to function. A database approach to handle lexicon building and spotting token forms in sign languages},
  pages     = {87--94},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12023.pdf},
  abstract  = {Using a database with type entries that are linked to token tags in transcripts has the advantage that consistency in lemmatising is not depending on ID-glosses. In iLex types are organised in different levels. The type hierarchy allows for analysing form, iconic value, and conventionalised meanings of a sign (sub-types). Tokens can be linked either to types or sub-types.
\par
We expanded this structure for modelling sign inflection and modification as well as phonological variation. Differences between token and type form are grouped by features, called qualifiers, and specified by feature values (vocabularies). Built-in qualifiers allow for spotting the form difference when lemmatising. This facilitates lemma revision and helps to get a clear picture of how inflection, modification, or phonological variation is distributed among lexical signs. This is also a strong indicator for further POS tagging. In the long term this approach will extend the lexical database from citation-form closer to  full-form.
\par
The paper will explain the type hierarchy and introduce the qualifiers used up-to-date. Further on the handling and how the data are displayed will be illustrated. As we report work in progress in the context of the DGS corpus project, the modelling is far from complete.}
}

@inproceedings{langer:12017:sign-lang:lrec,
  author    = {Langer, Gabriele},
  title     = {A colorful first glance at data on regional variation extracted from the {DGS-Corpus}: With a focus on procedures},
  pages     = {101--108},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12017.pdf},
  abstract  = {In this work in progress procedures for analyzing and displaying distributional patterns of sign variants have been developed and tested on data for color signs elicited by the DGS Corpus Project. The data for this preliminary study were elicited as isolated signs and have been made accessible through spot annotations in iLex. The annotations had not been lemma revised but nevertheless revealed some interesting insights. Several color signs exhibited a high degree of variation. The distributional maps showed that a number of signs were mainly used in certain regions and thus provided evidence on dialectal differences within DGS. The relevant information necessary to generate distributional maps have been directly extracted via SQL-statements from the corpus and fed into R. The approach is data driven. The distributional maps show either the distribution of one sign form (variant) or of several different variants in relation to each other. Analyses of regional distribution as displayed by the distributional maps may support the annotation and lemma revision process and are a valuable basis for a lexicographical description of signs and their use as needed for compiling dictionary entries. A refined procedure to take multiple regional influences on informants into account for analysis is proposed.}
}

@inproceedings{hanke:10047:sign-lang:lrec,
  author    = {Hanke, Thomas and K{\"o}nig, Lutz and Wagner, Sven and Matthes, Silke},
  title     = {{DGS} {Corpus} {\&} {Dicta-Sign}: The {Hamburg} Studio Setup},
  pages     = {106--109},
  editor    = {Dreuw, Philippe and Efthimiou, Eleni and Hanke, Thomas and Johnston, Trevor and Mart{\'i}nez Ruiz, Gregorio and Schembri, Adam},
  booktitle = {Proceedings of the {LREC2010} 4th Workshop on the Representation and Processing of Sign Languages: Corpora and Sign Language Technologies},
  maintitle = {7th International Conference on Language Resources and Evaluation ({LREC} 2010)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Valletta, Malta},
  day       = {22--23},
  month     = may,
  year      = {2010},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/10047.pdf},
  abstract  = {Not taking into account budget restrictions, the setup of a sign language studio always is a balancing act between high quality recordings on the one hand not to make the transcription process even more complicated than it is anyway and possibly to enable automatic processing of the recordings, and on the other hand an environment where the informants still feel comfortable enough so that the recording situation does not have too much impact on the signing. In the case of the DGS Corpus project, an additional constraint is that the studio is to be relocated twelve times over the course of two years as it was decided to make the recordings in the regions instead of inviting participants to one central place to avoid dialectal mixing. One of the implications of this approach is that the studio is operated by non-specialist deaf fieldworkers with limited time available for training.
\par
Basically all tasks in the project involve two informants interacting in different ways with each other. A moderator (the fieldworker from the region) introduces the tasks and observes the conversation, but only interferes with the conversation if absolutely necessary.
\par
The camera setup we finally ended up with consists of seven cameras altogether, three on each informant and one for the whole scene including the moderator. Two HD cameras on the informant provide frontal and birds-eye views while a stereo camera mounted on top of the frontal-view camera provides footage that helps automatic processing. The seventh camera is an HD camera as well.
\par
In contrary to setups in earlier projects, we invite the two informants to sit down directly facing each other, with the frontal-view camera positioned above (and behind) the head of the other informant. Pre-tests revealed that with a distance of approximately three meters, the distorsion introduced by the elevated position of the camera does not negatively affect the transcription from video. Instead, this setting provides a front view of the informant similar to the addressee's, allowing to identify body shifts as well as direction of eye gaze more easily. At the same time, this constellation avoids informants targeting their signing back and forth between the addressee and the camera.
\par
Elicitation material and instructions are presented to the informants on screens located on the floor between them. A custom software, ``Session Director'' allows the moderator to present slides to the informants by the click of a button, and to keep track of the time elapsed for each individual task as well as the whole session. Using pre-recorded instructions and elicitation materials not only reduces the burdens on the moderator, but also makes sure that all informants get exactly the same input.
\par
Session Director keeps a log of all actions started by the moderator, allowing us to exactly reconstruct what task has been worked on when. This log is easily converted into tagging in our transcription environment, iLex. This not only allows automatic segmentation of tasks and pauses, but also introduces links from the transcript to the task and vice versa.
\par
Task descriptions for Session Director are kept as XML files, making it easy to use this freely available tool for other projects as well.}
}

@inproceedings{nishio:10026:sign-lang:lrec,
  author    = {Nishio, Rie and Hong, Sung-Eun and K{\"o}nig, Susanne and Konrad, Reiner and Langer, Gabriele and Hanke, Thomas and Rathmann, Christian},
  title     = {Elicitation methods in the {DGS} ({German} {Sign} {Language}) Corpus Project},
  pages     = {178--185},
  editor    = {Dreuw, Philippe and Efthimiou, Eleni and Hanke, Thomas and Johnston, Trevor and Mart{\'i}nez Ruiz, Gregorio and Schembri, Adam},
  booktitle = {Proceedings of the {LREC2010} 4th Workshop on the Representation and Processing of Sign Languages: Corpora and Sign Language Technologies},
  maintitle = {7th International Conference on Language Resources and Evaluation ({LREC} 2010)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Valletta, Malta},
  day       = {22--23},
  month     = may,
  year      = {2010},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/10026.pdf},
  abstract  = {The DGS Corpus Project is a long-term project with two major aims: (i) to establish an extensive corpus of DGS and (ii) to develop a comprehensive dictionary of DGS-German based on the analysis of the corpus data. During the first three years the main focus is on data collection. Before setting up the corpus design we conducted a survey to get an overview on the existing elicitation materials. The design of our data collection contains a variety of different stimuli and tasks with the special attention to free conversation, dialogues and monologues. To this effect, a range of possible discourse modes were considered: narration and renarration, discussion, report and description. The stimuli include pictures, picture stories, non-verbal film clips (e.g. cartoons and realistic film clips) and signed movies. In order to minimize the influence of the surrounding spoken/written language, written German is not used if possible. Introduction and explanation of each task is provided in DGS in form of movie clips. All tasks were tested in a pilot phase to examine their feasibility and reliability. Some of the tasks tested needed to go through several rounds of modifications while others did not work at all and thus were excluded from the data collection. In this paper, we not only present the tasks for elicitation and stimuli, but also describe their development process. We also discuss reasons why some stimuli were adopted from other projects while others had to be developed specifically for the purpose of our project.}
}

@inproceedings{prillwitz:08018:sign-lang:lrec,
  author    = {Prillwitz, Siegmund and Hanke, Thomas and K{\"o}nig, Susanne and Konrad, Reiner and Langer, Gabriele and Schwarz, Arvid},
  title     = {{DGS} {Corpus} Project -- Development of a Corpus Based Electronic Dictionary {German} {Sign} {Language} / {German}},
  pages     = {159--164},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Hanke, Thomas and Thoutenhoofd, Ernst D. and Zwitserlood, Inge},
  booktitle = {Proceedings of the {LREC2008} 3rd Workshop on the Representation and Processing of Sign Languages: Construction and Exploitation of Sign Language Corpora},
  maintitle = {6th International Conference on Language Resources and Evaluation ({LREC} 2008)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marrakech, Morocco},
  day       = {1},
  month     = jun,
  year      = {2008},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/08018.pdf},
  abstract  = {The poster introduces a 15-year project accepted for funding by the Hamburg Academy of Sciences. The proposed project aims to combine the collection of a large corpus with the development and production of a comprehensive, corpus based electronic dictionary of German Sign Language (DGS).
\par
To this aim, a corpus of approximately 350--400 hours from 250--300 informants will be collected in a variety of elicitation settings. This is, in size and scope, comparable to large spoken language corpora. The design allows the use of the corpus for various tasks. These are, amongst others: (i) the validation by corpus data of a basic vocabulary compiled from different published sources; (ii) research on DGS grammar based on detailed transcription data; (iii) identification of different meanings and collocations of a sign by appropriate contexts. Furthermore, the design anticipates a comparative sociolinguistic study comparable in kind and quality to Lucas et al. (2001) and Schembri/Johnston (2004). The corpus thus provides a starting point for research deep into the structure and lexicon of German Sign Language as well as into the visual-gestural mode of sign languages in general. Parts of the annotated corpus, i.e. transcription files with English translations, will be made available online to the international linguistic community.
\par
The corpus data will undergo two stages of transcription. First, a basic transcription serves to segment utterances and to identify lexical items and thus provides a first access to the data. Second, approximately 50 {\%} of the transcriptions will be transcribed again in more detail. This serves the purpose of clarifying grammatical questions for the dictionary grammar as well as dealing with lexicological and lexicographic issues. The annotation of the corpus will be closely intertwined with the requirements of lexical analysis. A high quality of transcription will be achieved through continuous verification by native signers. A relational database (iLex, cf. Hanke/Storz) supports this process, especially the consistency of type- token matching.
\par
Lexical analysis and lexicographic decisions concerning for example lexical status, language change, and lemma selection will be continuously validated by a deaf focus group and a general voting web interface which will be open for all interested members of the deaf community.
\par
The dictionary will be entirely based on the corpus with respect to the list of lemmas to be included but decidedly exceed a conglomeration of corpus references. Rather, we will systematically abstract from the references to obtain a generalized description of lexical items. Examples of sign uses will be taken directly from the corpus.
\par
For cross-linguistic research and comparability of results across projects, we consider it essential to push standardisation or at least compatibility of annotation and transcription conventions. To reach this, we have arranged cooperations with some other national corpus projects and look forward to cooperate with more projects currently in preparation.
\par
References
\par
Lucas, Ceil / Bayley, Robert / Valli, Clayton (2001): Sociolinguistic Variation in American Sign Language. Washington, DC: Gallaudet Univ. Press.
\par
Schembri, Adam / Johnston, Trevor (2004): Sociolinguistic variation in Auslan (Australian Sign Language). A research project in progress. In: Deaf Worlds 20 (1), 78-90.}
}