Lemmatised corpora consist of tokens as instantiations of signs (types). Tokens usually count as evidences of the signs’ use. Frequency of tokens is an important criterion for the lexical status of a sign. In combination with metadata on the signers’ sociolinguistic backgrounds such as age, gender, and origin these tokens can also be analysed for regional and sociolinguistic variation. However, corpora may also contain instances of sign use that do not reflect the sign use of the person uttering them. This is particularly true for metalinguistic discussions of signs, malformed signing and slips of the hand as well as other phenomena such as copying/repeating signs of the interlocutors or from stimulus material. In our presentation we list and discuss different kinds of sign use (tokens) that should either not be counted as proof of a sign type at all or at least not as evidence of regular sign use by that particular person. Examples of these “non-tokens” are either taken from the DGS Corpus or from uploaded video answers of the DGS Feedback. We also discuss some implications on how to annotate these cases.
@inproceedings{langer:16013:sign-lang:lrec,
author = {Langer, Gabriele and Hanke, Thomas and Konrad, Reiner and K{\"o}nig, Susanne},
title = {``Non-tokens'': When Tokens Should not Count as Evidence of Sign Use},
pages = {137--142},
editor = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
publisher = {{European Language Resources Association (ELRA)}},
address = {Portoro{\v z}, Slovenia},
day = {28},
month = may,
year = {2016},
language = {english},
url = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16013.pdf}
}