In this article, we present the first academic comparable corpus involving written French and French Sign Language. After explaining our initial motivation to build a parallel set of such data, especially in the context of our work on Sign Language modelling and our prospect of machine translation into Sign Language, we present the main problems posed when mixing language channels and modalities (oral, written, signed), discussing the translation-vs-interpretation narrative in particular. We describe the process followed to guarantee feature coverage and exploitable results despite a serious cost limitation, the data being collected from professional translations. We conclude with a few uses and prospects of the corpus.
@inproceedings{filhol:70003:bucc:lrec,
author = {Filhol, Michael and Tannier, Xavier},
title = {Construction of a {French--LSF} corpus},
pages = {2--5},
editor = {Zweigenbaum, Pierre and Sharoff, Serge and Rapp, Reinhard and Aker, Ahmet and Vogel, Stephan},
booktitle = {Proceedings of the 7th Workshop on Building and Using Comparable Corpora Building Resources for Machine Translation Research},
maintitle = {9th International Conference on Language Resources and Evaluation ({LREC} 2014)},
publisher = {{European Language Resources Association (ELRA)}},
address = {Reykjavik, Iceland},
day = {27},
month = may,
year = {2014},
language = {english},
url = {https://comparable.limsi.fr/bucc2014/3.pdf}
}