This paper introduces the RWTH-PHOENIX-Weather corpus, a video-based, large vocabulary corpus of German Sign Language suitable for statistical sign language recognition and translation. In contrastto most available sign language data collections, the RWTH-PHOENIX-Weather corpus has not been recorded for linguistic research but for the use in statistical pattern recognition. The corpus contains weather forecasts recorded from German public TV which are manually annotated using glosses distinguishing sign variants, and time boundaries have been marked on the sentence and the gloss level. Further, the spoken German weather forecast has been transcribed in a semi-automatic fashion using a state-of-the-art automatic speech recognition system. Moreover, an additional translation of the glosses into spoken German has been created to capture allowable translation variability. In addition to the corpus, experimental baseline results for hand and head tracking, statistical sign language recognition and translation are presented.
@inproceedings{forster-etal-2012-rwth:lrec,
author = {Forster, Jens and Schmidt, Christoph and Hoyoux, Thomas and Koller, Oscar and Zelle, Uwe and Piater, Justus and Ney, Hermann},
title = {{RWTH}-{PHOENIX}-{Weather}: A Large Vocabulary Sign Language Recognition and Translation Corpus},
pages = {3785--3789},
editor = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Do{\u g}an, Mehmet U{\u g}ur and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
booktitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
publisher = {{European Language Resources Association (ELRA)}},
address = {Istanbul, Turkey},
day = {21--27},
month = may,
year = {2012},
isbn = {978-2-9517408-7-7},
language = {english},
url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/844_Paper.pdf}
}