Here we present the POLYTROPON parallel corpus for the language pair Greek Sign Language (GSL) – Greek, which is created and annotated aiming to serve as a golden corpus available to the community of SL technologies for experimentation with various approaches to SL processing, focusing on machine learning for SL recognition, machine translation (MT) and information retrieval. The corpus volume incorporates 3653 clauses in three repetitions each, captured in front view by means of one HD and one kinect camera. Corpus annotation has allowed to extract initial features sets with the aim to reach a GSL level of abstraction close to the one currently available for Greek language representations, exploiting the inherent characteristics of the language in view of applying initial deep learning experiments on GSL data, where both words and signs may be represented as vectors of characteristics which allow dependency tree structure representations of input text and signed clauses as those created by the use of Tree Editor TrEd 2.0.
@inproceedings{efthimiou:18043:sign-lang:lrec,
author = {Efthimiou, Eleni and Vasilaki, Kyriaki and Fotinea, Stavroula-Evita and Vacalopoulou, Anna and Goulas, Theodoros and Dimou, Athanasia-Lida},
title = {The {POLYTROPON} Parallel Corpus},
pages = {39--44},
editor = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
publisher = {{European Language Resources Association (ELRA)}},
address = {Miyazaki, Japan},
day = {12},
month = may,
year = {2018},
isbn = {979-10-95546-01-6},
language = {english},
url = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18043.pdf}
}