@inproceedings{jahn:22035:sign-lang:lrec,
  author    = {Jahn, Elena and Khan, Calvin and Herrmann, Annika},
  title     = {Outreach and Science Communication in the {DGS-Korpus} Project: Accessibility of Data and the Benefit of Interactive Exchange between Communities},
  pages     = {80--87},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Schulder, Marc},
  booktitle = {Proceedings of the {LREC2022} 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources},
  maintitle = {13th International Conference on Language Resources and Evaluation ({LREC} 2022)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {25},
  month     = jun,
  year      = {2022},
  isbn      = {979-10-95546-86-3},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/22035.pdf},
  abstract  = {In this paper, we tackle the issues of science communication and dissemination within a sign language corpus project with a focus on spreading accessible information and involving the D/deaf community on various levels. We will discuss successful examples, challenges, and limitations to public relations in such a project and particularly elaborate on use cases. The focus group is presented as a best-practice example of a what we think is a necessary perspective: taking external knowledge seriously and let community experts interact with and provide feedback on a par with academic personnel. Showing both social media and on-site events, we present some exemplary approaches from our team involved in public relations.}
}

@inproceedings{hanke:20030:sign-lang:lrec,
  author    = {Hanke, Thomas and Jahn, Elena and W{\"a}hl, Sabrina and B{\"o}se, Oliver and K{\"o}nig, Lutz},
  title     = {{SignHunter} -- A Sign Elicitation Tool Suitable for Deaf Events},
  pages     = {83--88},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20030.pdf},
  abstract  = {This paper presents SignHunter, a tool for collecting isolated signs, and discusses application possibilities. SignHunter is successfully used within the DGS-Korpus project to collect name signs for places and cities. The data adds to the content of a German Sign Language (DGS) -- German dictionary which is currently being developed, as well as a freely accessible subset of the DGS Corpus, the Public DGS Corpus. We discuss reasons to complement a natural language corpus by eliciting concepts without context and present an application example of SignHunter.}
}

@inproceedings{langer:20017:sign-lang:lrec,
  author    = {Langer, Gabriele and Schulder, Marc},
  title     = {Collocations in Sign Language Lexicography: Towards Semantic Abstractions for Word Sense Discrimination},
  pages     = {127--134},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20017.pdf},
  abstract  = {In general monolingual lexicography a corpus-based approach to word sense discrimination (WSD) is the current standard. Automatically generated lexical profiles such as Word Sketches provide an overview on typical uses in the form of collocate lists grouped by their part of speech categories and their syntactic dependency relations to the base item. Collocates are sorted by their typicality according to frequency-based rankings. With the advancement of sign language (SL) corpora, SL lexicography can finally be based on actual language use as reflected in corpus data. In order to use such data effectively and gain new insights on sign usage, automatically generated collocation profiles need to be developed under the special conditions and circumstances of the SL data available. One of these conditions is that many of the prerequesites for the automatic syntactic parsing of corpora are not yet available for SL. In this article we describe a collocation summary generated from DGS Corpus data which is used for WSD as well as in entry-writing. The summary works based on the glosses used for lemmatisation. In addition, we explore how other resources can be utilised to add an additional layer of semantic grouping to the collocation analysis. For this experimental approach we use glosses, concepts, and wordnet supersenses.}
}

@inproceedings{muller:20025:sign-lang:lrec,
  author    = {M{\"u}ller, Anke and Hanke, Thomas and Konrad, Reiner and Langer, Gabriele and W{\"a}hl, Sabrina},
  title     = {From Dictionary to Corpus and Back Again -- Linking Heterogeneous Language Resources for {DGS}},
  pages     = {157--164},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2020} 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives},
  maintitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marseille, France},
  day       = {16},
  month     = may,
  year      = {2020},
  isbn      = {979-10-95546-54-2},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/20025.pdf},
  abstract  = {The Public DGS Corpus is published in two different formats, that is subtitled videos for lay persons and lemmatized and annotated transcripts and videos for experts. In addition, a draft version with the first set of preliminary entries of the DGS dictionary (DW-DGS) to be completed in 2023 is now online. The Public DGS Corpus and the DW-DGS are conceived of as stand-alone products, but are nevertheless closely interconnected to offer additional and complementary informative functions. In this paper we focus on linking the published products in order to provide users access to corpus and corpus-based dictionary in various, interrelated ways. We discuss which links are thought to be useful and what challenges the linking of the products poses. In addition we address the inclusion of links to other, older lexical resources (LSP dictionaries).}
}

@inproceedings{jahn:18018:sign-lang:lrec,
  author    = {Jahn, Elena and Konrad, Reiner and Langer, Gabriele and Wagner, Sven and Hanke, Thomas},
  title     = {Publishing {DGS} {Corpus} Data: Different Formats for Different Needs},
  pages     = {83--90},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18018.pdf},
  abstract  = {In 2010-2012, the DGS-Korpus project collected a large corpus of German Sign Language (DGS). Now, a substantial subset of the data is published, namely the Public DGS Corpus. We describe the considerations and decisions taken regarding what part of the data is to be made public, the necessary quality assurance measures to the data preparation as well as the formats of the published data. The corpus is published in three different ways in order to fulfil the needs of a variety of different users. First of all, the data is made available to the language community whose members allowed us to share their recorded language. In addition, we hope that a large number of non-scientific users with various backgrounds will find the data useful. Last but not least, we aim to make the data attractive for users with a scientific background and provide the possibility to conduct studies based on it, irrespective of whether they are familiar with DGS or not.}
}

@inproceedings{langer:18026:sign-lang:lrec,
  author    = {Langer, Gabriele and M{\"u}ller, Anke and W{\"a}hl, Sabrina},
  title     = {Queries and Views in {iLex} to Support Corpus-based Lexicographic Work on {German} {Sign} {Language} ({DGS})},
  pages     = {107--114},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18026.pdf},
  abstract  = {In the DGS-Korpus project the corpus is being used as the basis for lexicographic descriptions of signs in dictionary entries. In this process the lexicographers start from the data and type entry structures as found in the annotation database. While preparing a dictionary entry much of the work consists of manually going through a number of single tokens viewing the original data and available annotations. Findings are then categorised and summarised. However, a number of decisions and descriptions are also supported by pre-defined searches and views on the data. Supported areas include lexicographic lemmatisation (lemma sign establishment), selection of citation forms and variants, grammatical behaviour of signs, collocational patterns of use, regional distribution patterns and distribution of lexical or formational variants over different age groups. While we are still in the process of exploring the possibilities of a sign language corpus for lexicography, searches and views that have proven useful for our work are exemplified in this paper with regard to dictionary entries.}
}

@inproceedings{wahl:18025:sign-lang:lrec,
  author    = {W{\"a}hl, Sabrina and Langer, Gabriele and M{\"u}ller, Anke},
  title     = {Hand in Hand - Using Data from an Online Survey System to Support Lexicographic Work},
  pages     = {199--206},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18025.pdf},
  abstract  = {In the DGS-Korpus project the lexicographic descriptions of signs are based on the available data of the DGS-Korpus, a reference corpus of German Sign Language (DGS). As this corpus is limited in size, number of informants recorded and topics included it is in some cases helpful to obtain additional information from the larger sign language community via an online voting system. This is done using the DGS-Feedback System, a tool especially designed for online surveys conducted using a sign language. With this tool further information on e.g. sign forms and meanings and their use and regional distribution has been elicited. Data from the DGS-Feedback is used in several ways during the lexicographic process of preparing dictionary entries to supplement data from the corpus. In the following the consideration of the data from the DGS-Feedback in relation to the corpus data in decision-making, analysis and lexicographic description is explained and discussed by way of examples.}
}

@inproceedings{langer:16013:sign-lang:lrec,
  author    = {Langer, Gabriele and Hanke, Thomas and Konrad, Reiner and K{\"o}nig, Susanne},
  title     = {``Non-tokens'': When Tokens Should not Count as Evidence of Sign Use},
  pages     = {137--142},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {28},
  month     = may,
  year      = {2016},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16013.pdf},
  abstract  = {Lemmatised corpora consist of tokens as instantiations of signs (types). Tokens usually count as evidences of the signs' use. Frequency of tokens is an important criterion for the lexical status of a sign. In combination with metadata on the signers' sociolinguistic backgrounds such as age, gender, and origin these tokens can also be analysed for regional and sociolinguistic variation. However, corpora may also contain instances of sign use that do not reflect the sign use of the person uttering them. This is particularly true for metalinguistic discussions of signs, malformed signing and slips of the hand as well as other phenomena such as copying/repeating signs of the interlocutors or from stimulus material. In our presentation we list and discuss different kinds of sign use (tokens) that should either not be counted as proof of a sign type at all or at least not as evidence of regular sign use by that particular person. Examples of these ``non-tokens'' are either taken from the DGS Corpus or from uploaded video answers of the DGS Feedback. We also discuss some implications on how to annotate these cases.}
}

@inproceedings{hanke:12029:sign-lang:lrec,
  author    = {Hanke, Thomas and K{\"o}nig, Susanne and Konrad, Reiner and Langer, Gabriele},
  title     = {Towards tagging of multi-sign lexemes and other multi-unit structures},
  pages     = {67--68},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12029.pdf},
  abstract  = {With the building of larger sign language corpora tagging, handling and analysing large amounts of data reach a new level of complexity. Efficiency and interpersonal consistency in tagging are relevant issues as well as procedures and structures to identify and tag relevant linguistic units and structures beyond and above the manual sign level. We present and discuss problems and possible solution approaches (focussing on the working environment of iLex) of how to deal with multi-unit structures and more specifically multi-sign lexemes in annotation and lexicon building.}
}

@inproceedings{konrad:12023:sign-lang:lrec,
  author    = {Konrad, Reiner and Hanke, Thomas and K{\"o}nig, Susanne and Langer, Gabriele and Matthes, Silke and Nishio, Rie and Regen, Anja},
  title     = {From form to function. A database approach to handle lexicon building and spotting token forms in sign languages},
  pages     = {87--94},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12023.pdf},
  abstract  = {Using a database with type entries that are linked to token tags in transcripts has the advantage that consistency in lemmatising is not depending on ID-glosses. In iLex types are organised in different levels. The type hierarchy allows for analysing form, iconic value, and conventionalised meanings of a sign (sub-types). Tokens can be linked either to types or sub-types.
\par
We expanded this structure for modelling sign inflection and modification as well as phonological variation. Differences between token and type form are grouped by features, called qualifiers, and specified by feature values (vocabularies). Built-in qualifiers allow for spotting the form difference when lemmatising. This facilitates lemma revision and helps to get a clear picture of how inflection, modification, or phonological variation is distributed among lexical signs. This is also a strong indicator for further POS tagging. In the long term this approach will extend the lexical database from citation-form closer to  full-form.
\par
The paper will explain the type hierarchy and introduce the qualifiers used up-to-date. Further on the handling and how the data are displayed will be illustrated. As we report work in progress in the context of the DGS corpus project, the modelling is far from complete.}
}

@inproceedings{langer:12017:sign-lang:lrec,
  author    = {Langer, Gabriele},
  title     = {A colorful first glance at data on regional variation extracted from the {DGS-Corpus}: With a focus on procedures},
  pages     = {101--108},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12017.pdf},
  abstract  = {In this work in progress procedures for analyzing and displaying distributional patterns of sign variants have been developed and tested on data for color signs elicited by the DGS Corpus Project. The data for this preliminary study were elicited as isolated signs and have been made accessible through spot annotations in iLex. The annotations had not been lemma revised but nevertheless revealed some interesting insights. Several color signs exhibited a high degree of variation. The distributional maps showed that a number of signs were mainly used in certain regions and thus provided evidence on dialectal differences within DGS. The relevant information necessary to generate distributional maps have been directly extracted via SQL-statements from the corpus and fed into R. The approach is data driven. The distributional maps show either the distribution of one sign form (variant) or of several different variants in relation to each other. Analyses of regional distribution as displayed by the distributional maps may support the annotation and lemma revision process and are a valuable basis for a lexicographical description of signs and their use as needed for compiling dictionary entries. A refined procedure to take multiple regional influences on informants into account for analysis is proposed.}
}

@inproceedings{prillwitz:08018:sign-lang:lrec,
  author    = {Prillwitz, Siegmund and Hanke, Thomas and K{\"o}nig, Susanne and Konrad, Reiner and Langer, Gabriele and Schwarz, Arvid},
  title     = {{DGS} {Corpus} Project -- Development of a Corpus Based Electronic Dictionary {German} {Sign} {Language} / {German}},
  pages     = {159--164},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Hanke, Thomas and Thoutenhoofd, Ernst D. and Zwitserlood, Inge},
  booktitle = {Proceedings of the {LREC2008} 3rd Workshop on the Representation and Processing of Sign Languages: Construction and Exploitation of Sign Language Corpora},
  maintitle = {6th International Conference on Language Resources and Evaluation ({LREC} 2008)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marrakech, Morocco},
  day       = {1},
  month     = jun,
  year      = {2008},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/08018.pdf},
  abstract  = {The poster introduces a 15-year project accepted for funding by the Hamburg Academy of Sciences. The proposed project aims to combine the collection of a large corpus with the development and production of a comprehensive, corpus based electronic dictionary of German Sign Language (DGS).
\par
To this aim, a corpus of approximately 350--400 hours from 250--300 informants will be collected in a variety of elicitation settings. This is, in size and scope, comparable to large spoken language corpora. The design allows the use of the corpus for various tasks. These are, amongst others: (i) the validation by corpus data of a basic vocabulary compiled from different published sources; (ii) research on DGS grammar based on detailed transcription data; (iii) identification of different meanings and collocations of a sign by appropriate contexts. Furthermore, the design anticipates a comparative sociolinguistic study comparable in kind and quality to Lucas et al. (2001) and Schembri/Johnston (2004). The corpus thus provides a starting point for research deep into the structure and lexicon of German Sign Language as well as into the visual-gestural mode of sign languages in general. Parts of the annotated corpus, i.e. transcription files with English translations, will be made available online to the international linguistic community.
\par
The corpus data will undergo two stages of transcription. First, a basic transcription serves to segment utterances and to identify lexical items and thus provides a first access to the data. Second, approximately 50 {\%} of the transcriptions will be transcribed again in more detail. This serves the purpose of clarifying grammatical questions for the dictionary grammar as well as dealing with lexicological and lexicographic issues. The annotation of the corpus will be closely intertwined with the requirements of lexical analysis. A high quality of transcription will be achieved through continuous verification by native signers. A relational database (iLex, cf. Hanke/Storz) supports this process, especially the consistency of type- token matching.
\par
Lexical analysis and lexicographic decisions concerning for example lexical status, language change, and lemma selection will be continuously validated by a deaf focus group and a general voting web interface which will be open for all interested members of the deaf community.
\par
The dictionary will be entirely based on the corpus with respect to the list of lemmas to be included but decidedly exceed a conglomeration of corpus references. Rather, we will systematically abstract from the references to obtain a generalized description of lexical items. Examples of sign uses will be taken directly from the corpus.
\par
For cross-linguistic research and comparability of results across projects, we consider it essential to push standardisation or at least compatibility of annotation and transcription conventions. To reach this, we have arranged cooperations with some other national corpus projects and look forward to cooperate with more projects currently in preparation.
\par
References
\par
Lucas, Ceil / Bayley, Robert / Valli, Clayton (2001): Sociolinguistic Variation in American Sign Language. Washington, DC: Gallaudet Univ. Press.
\par
Schembri, Adam / Johnston, Trevor (2004): Sociolinguistic variation in Auslan (Australian Sign Language). A research project in progress. In: Deaf Worlds 20 (1), 78-90.}
}