We consider a non-intrusive computer-vision method for measuring the motion of a person performing natural signing in video recordings. The quality and usefulness of the method is compared to a traditional marker-based motion capture set-up. The accuracy of descriptors extracted from video footage is assessed qualitatively in the context of sign language analysis by examining if the shape of the curves produced by the different means resemble one another in sequences where the shape could be a source of valuable linguistic information. Then, quantitative comparison is performed first by correlating the computer-vision-based descriptors with the variables gathered with the motion capture equipment. Finally, multivariate linear and non-linar regression methods are applied for predicting the motion capture variables based on combinations of computer vision descriptors. The results show that even the simple computer vision method evaluated in this paper can produce promisingly good results for assisting researchers working on sign language analysis.
@inproceedings{karppa-etal-2012-comparing:lrec,
author = {Karppa, Matti and Jantunen, Tommi and Viitaniemi, Ville and Laaksonen, Jorma and Burger, Birgitta and De Weerdt, Danny},
title = {Comparing computer vision analysis of signed language video with motion capture recordings},
pages = {2421--2425},
editor = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Do{\u g}an, Mehmet U{\u g}ur and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
booktitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
publisher = {{European Language Resources Association (ELRA)}},
address = {Istanbul, Turkey},
day = {21--27},
month = may,
year = {2012},
isbn = {978-2-9517408-7-7},
language = {english},
url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/321_Paper.pdf}
}