This paper presents MEDIAPI-SKEL, a 2D-skeleton database of French Sign Language videos aligned with French subtitles. The corpus contains 27 hours of video of body, face and hand keypoints, aligned to subtitles with a vocabulary size of 17k tokens. In contrast to existing sign language corpora such as videos produced under laboratory conditions or translations of TV programs into sign language, this database is constructed using original sign language content largely produced by deaf journalists at the media company Média-Pi. Moreover, the videos are accurately synchronized with French subtitles. We propose three challenges appropriate for this corpus that are related to processing units of signs in context: automatic alignment of text and video, semantic segmentation of sign language, and production of video-text embeddings for cross-modal retrieval. These challenges deviate from the classic task of identifying a limited number of lexical signs in a video stream.
@inproceedings{bull-etal-2020-mediapi:lrec,
author = {Bull, Hannah and Braffort, Annelies and Gouiff{\`e}s, Mich{\`e}le},
title = {{MEDIAPI}-{SKEL} - A 2{D}-Skeleton Video Database of {F}rench {S}ign {L}anguage With Aligned {F}rench Subtitles},
pages = {6063--6068},
editor = {Calzolari, Nicoletta and Fr{\'e}d{\'e}ric B{\'e}chet and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
booktitle = {12th International Conference on Language Resources and Evaluation ({LREC} 2020)},
publisher = {{European Language Resources Association (ELRA)}},
address = {Marseille, France},
day = {11--16},
month = may,
year = {2020},
isbn = {979-10-95546-34-4},
language = {english},
url = {https://aclanthology.org/2020.lrec-1.743}
}