@inproceedings{jantunen:16006:sign-lang:lrec,
  author    = {Jantunen, Tommi and Pippuri, Outi and Wainio, Tuija and Puupponen, Anna and Laaksonen, Jorma},
  title     = {Annotated video corpus on {FinSL} with {Kinect} and computer-vision data},
  pages     = {93--100},
  editor    = {Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2016} 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  maintitle = {10th International Conference on Language Resources and Evaluation ({LREC} 2016)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Portoro{\v z}, Slovenia},
  day       = {28},
  month     = may,
  year      = {2016},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/16006.pdf},
  abstract  = {This paper presents an annotated video corpus of Finnish Sign Language (FinSL) to which has been appended Kinect and computer-vision data. The video material consists of signed retellings of the stories Snowman and Frog, where are you?, elicited from 12 native FinSL signers in a dialogue setting. The recordings were carried out with 6 cameras directed toward the signers from different angles, and 6 signers were also recorded with one Kinect motion and depth sensing input device. All the material has been annotated in ELAN for signs, translations, grammar and prosody. To further facilitate research into FinSL prosody, computer-vision data describing the head movements and the aperture changes of the eyes and mouth of all the signers has been added to the corpus. The total duration of the material is 45 minutes and that part of it that is permitted by research consents is available for research purposes via the LAT online service of the Language Bank of Finland. The paper briefly demonstrates the linguistic use of the corpus.}
}

@inproceedings{luzardo:14021:sign-lang:lrec,
  author    = {Luzardo, Marcos and Viitaniemi, Ville and Karppa, Matti and Laaksonen, Jorma and Jantunen, Tommi},
  title     = {Estimating head pose and state of facial elements for sign language video},
  pages     = {105--112},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2014} 6th Workshop on the Representation and Processing of Sign Languages: Beyond the Manual Channel},
  maintitle = {9th International Conference on Language Resources and Evaluation ({LREC} 2014)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Reykjavik, Iceland},
  day       = {31},
  month     = may,
  year      = {2014},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/14021.pdf},
  abstract  = {In this work we present methods for automatic estimation of non-manual gestures in sign language videos. More specifically, we study the estimation of three head pose angles (yaw, pitch, roll) and the state of facial elements (eyebrow position, eye openness, and mouth state). This kind of estimation facilitates automatic annotation of sign language videos and promotes more prolific production of annotated sign language corpora. The proposed estimation methods are incorporated in our publicly available SLMotion software package for sign language video processing and analysis. Our method implements a model-based approach: for head pose we employ facial landmarks and skins masks as features, and estimate yaw and pitch angles by regression and roll using a geometric measure; for the state of facial elements we use the geometric information of facial elements of the face as features, and estimate quantized states using a classification algorithm. We evaluate the results of our proposed methods in quantitative and qualitative experiments.}
}

@inproceedings{koskela:08002:sign-lang:lrec,
  author    = {Koskela, Markus and Laaksonen, Jorma and Jantunen, Tommi and Takkinen, Ritva and Rain{\`o}, P{\"a}ivi and Raike, Antti},
  title     = {Content-Based Video Analysis and Access for {Finnish} {Sign} {Language} -- A Multidisciplinary Research Project},
  pages     = {101--104},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Hanke, Thomas and Thoutenhoofd, Ernst D. and Zwitserlood, Inge},
  booktitle = {Proceedings of the {LREC2008} 3rd Workshop on the Representation and Processing of Sign Languages: Construction and Exploitation of Sign Language Corpora},
  maintitle = {6th International Conference on Language Resources and Evaluation ({LREC} 2008)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marrakech, Morocco},
  day       = {1},
  month     = jun,
  year      = {2008},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/08002.pdf},
  abstract  = {In this research project, computer vision techniques for recognition and analysis of gestures and facial expressions from video will be developed and the techniques will be applied for processing of sign language. This is a collaborative project between four partners: Helsinki University of Technology, University of Art and Design, University of Jyv{\"a}skyl{\"a}, and the Finnish Association of the Deaf. It has several objectives of which four are presented in more detail in this poster.
\par
The first objective is to develop novel methods for content-based processing and analysis of sign language video recorded using a single camera. The PicSOM retrieval system framework developed by the Helsinki University of Technology regarding content-based analysis of multimedia data will be adapted to continuous signing to facilitate automatic and semi-automatic analysis of sign language videos.
\par
The second objective of the project is to develop a computer system which can both (i) automatically indicate meaningful signs and other gesture-like sequences from a video signal which contains natural sign language data, and (ii) disregard parts of the signal which do not count as such sequences. In other words, the goal is to develop an automatized mechanism which can identify sign and gesture boundaries and indicate, from the video, the sequences that correspond to signs and gestures. The system is not expected to be able to tell the meanings of these sequences.
\par
An automatic segmentation of recorded continuous-signing sign language is an important first step in the automatic processing of sign language videos and online applications. It is our hypothesis that the temporal boundaries of different sign gestures can be detected and signs and non-signs (intersign transitions, other movements) can be classified using a combination of a hand motion detector, still image multimodal analysis, facial expression analysis and and other non- manual signal recognition. The PicSOM system inherently supports such fusion of different features.
\par
The third objective is linked to generating an example-based corpus for FinSL. There exist increasing amounts of recorded video data of the language, but almost no means for utilizing it efficiently due to missing indexing and lack of methods for content-based access. The studied methods could facilitate a leap forward in founding the corpus.
\par
The fourth objective is a feasibility study for the implementation of mobile video access to sign language dictionaries and corpora. Currently an existing dictionary can be searched by giving a rough description of the location, motion and handform of the sign. The automatic content-based analysis methods could be applied to online mobile phone videos, thus enabling sign language access to dictionaries and corpora.}
}

@inproceedings{karppa-etal-2014-slmotion:lrec,
  author    = {Karppa, Matti and Viitaniemi, Ville and Luzardo, Marcos and Laaksonen, Jorma and Jantunen, Tommi},
  title     = {{SLM}otion - An extensible sign language oriented video analysis tool},
  pages     = {1886--1891},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Loftsson, Hrafn and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios,},
  booktitle = {9th International Conference on Language Resources and Evaluation ({LREC} 2014)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Reykjavik, Iceland},
  day       = {26--31},
  month     = may,
  year      = {2014},
  isbn      = {978-2-9517408-8-4},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/209_Paper.pdf},
  abstract  = {We present a software toolkit called SLMotion which provides a framework for automatic and semiautomatic analysis, feature extraction and annotation of individual sign language videos, and which can easily be adapted to batch processing of entire sign language corpora. The program follows a modular design, and exposes a Numpy-compatible Python application programming interface that makes it easy and convenient to extend its functionality through scripting. The program includes support for exporting the annotations in ELAN format. The program is released as free software, and is available for GNU/Linux and MacOS platforms.}
}

@inproceedings{viitaniemi-etal-2014-pot:lrec,
  author    = {Viitaniemi, Ville and Jantunen, Tommi and Savolainen, Leena and Karppa, Matti and Laaksonen, Jorma},
  title     = {{S}-pot - a benchmark in spotting signs within continuous signing},
  pages     = {1892--1897},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Loftsson, Hrafn and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios,},
  booktitle = {9th International Conference on Language Resources and Evaluation ({LREC} 2014)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Reykjavik, Iceland},
  day       = {26--31},
  month     = may,
  year      = {2014},
  isbn      = {978-2-9517408-8-4},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/440_Paper.pdf},
  abstract  = {In this paper we present S-pot, a benchmark setting for evaluating the performance of automatic spotting of signs in continuous sign language videos. The benchmark includes 5539 video files of Finnish Sign Language, ground truth sign spotting results, a tool for assessing the spottings against the ground truth, and a repository for storing information on the results. In addition we will make our sign detection system and results made with it publicly available as a baseline for comparison and further developments.}
}

@inproceedings{karppa-etal-2012-comparing:lrec,
  author    = {Karppa, Matti and Jantunen, Tommi and Viitaniemi, Ville and Laaksonen, Jorma and Burger, Birgitta and De Weerdt, Danny},
  title     = {Comparing computer vision analysis of signed language video with motion capture recordings},
  pages     = {2421--2425},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Do{\u g}an, Mehmet U{\u g}ur and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
  booktitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {21--27},
  month     = may,
  year      = {2012},
  isbn      = {978-2-9517408-7-7},
  language  = {english},
  url       = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/321_Paper.pdf},
  abstract  = {We consider a non-intrusive computer-vision method for measuring the motion of a person performing natural signing in video recordings. The quality and usefulness of the method is compared to a traditional marker-based motion capture set-up. The accuracy of descriptors extracted from video footage is assessed qualitatively in the context of sign language analysis by examining if the shape of the curves produced by the different means resemble one another in sequences where the shape could be a source of valuable linguistic information. Then, quantitative comparison is performed first by correlating the computer-vision-based descriptors with the variables gathered with the motion capture equipment. Finally, multivariate linear and non-linar regression methods are applied for predicting the motion capture variables based on combinations of computer vision descriptors. The results show that even the simple computer vision method evaluated in this paper can produce promisingly good results for assisting researchers working on sign language analysis.}
}