@inproceedings{oviedo:18021:sign-lang:lrec,
  author    = {Oviedo, Alejandro and Kaul, Thomas and Klinner, Leonid and Griebel, Reiner},
  title     = {The {Cologne} {Corpus} of {German} {Sign} {Language} as {L2} ({C/CSL2}): Current Development Stand},
  pages     = {163--166},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18021.pdf},
  abstract  = {Since 2016 (Kaul et al., 2016) a German Sign Language (DGS) learner corpus (Granger et al., 2015) it has been building up at the University of Cologne. Primary data consist of around 60 hours of signed discourse in more than 1,250 individual files produced by 350 DGS hearing learners (312 female / 38 male) whose mother tongue is German.  Data has been collected from A1 to C1 CEFR (Council of Europe, 2001) proficiency levels. A similar number of monologues and dialogues is included. Monologues (average duration 2.5 minutes) are mostly induced by an illustration or a video. Dialogues have an average duration of 8 minutes. Dialogues corresponding to the levels A1 to B2 are performed between the informant and a Deaf teacher. At advanced level (C1) dialogues show an interaction between two students.  Metadata related to the videos includes age and gender of the informants as well as the proficiency level and semester of data collection. A part of the data corresponds to a longitudinal learner corpus (Granger et al., 2015). This is the case of a group of students who visited DGS-courses of different proficiency levels between mid-2015 and the end of 2017 and were filmed at different times along that period. The corpus is a work in progress. Our primary data are constantly being extended, since each semester new videos are added to the corpus (the tests presented by the students in the DGS courses as well as a number of videos produced and analyzed by the students in linguistics courses).  Only around 6{\%} of the videos have received so far transcription: German glosses,  translation into German and some linguistic tags have been included in ELAN (Crasborn {\&} Slotjes, 2008) files. Lemmatisation (Johnston, 2010) has been oriented using a lexical database of around 8,000 signs previously produced by our university to serve as teaching material. Current transcriptions also include a series of annotation lines with controlled vocabulary for word-classes, disfluencies (Oviedo et al., in press) and deviations from the DGS standard at phonetic-phonological, morphological and syntactic levels. The biggest challenge faced so far in the development of our corpus is the reluctance of students to authorize the use of the corpus outside our research group. We are only authorized to transcribe the videos and use the transcriptions as a data source. However, a small group of students have up to now authorized us to show their videos and/or video-pictures to external audiences. One strategy that has proved to be useful in obtaining data that can be shared is that of linking students to the tasks of transcription and linguistic analysis. During the 2016/2017 winter semester we held a seminar with masters students to train them in the transcription of their own signed recordings. At the end of the course, the majority of the participants gave us permission to use their videos in public demonstrations. References Kaul, Th.; Oviedo, A.; Griebel, R.; Klinner, L.;  Pr{\"u}fer, T. {\&} Krumpen, M. (2016). C/CSL2, The Cologne Corpus of Sign Language as a Second Language. Poster presented at TaLC 12, University of Giessen, held on 21th July 2016. Granger, S.; Guilquin, G. {\&} Meunier, F. (Eds.) (2015). The Cambridge Handbook of Learner Corpus Research. Cambridge: Cambridge University Press. Council of Europe. 2001. Common European framework of reference for languages: Learning, teaching, assessment. Cambridge: Press Syndicate of the University of Cambridge. Crasborn, O. {\&} Sloetjes, H.. 2008. Enhanced ELAN functionality for sign language corpora. In Onno Crasborn, Thomas Hanke, Eleni Efthimiou, Inge Zwitserlood {\&} Ernst Thoutenhoofd (Eds.). Proceedings of LREC 2008, Sixth International Conference on Language Resources and Evaluation. Paris: ELDA, pp. 39-43. Johnston, T. 2010. From archive to corpus: Transcription and annotation in the creation of signed language corpora. International Journal of Corpus Linguistics, 15(1), pp. 106-131. Oviedo, A.; Kaul, Th.; Urbann, K.; Griebel, R. {\&} Klinner, L. (in press): Eine Ann{\"a}herung zu den Pausen als Fl{\"u}ssigkeitsfaktoren in Deutscher Geb{\"a}rdensprache als L1. Das Zeichen 32(108). --to appear in March 2018.}
}

@inproceedings{oviedo:18004:sign-lang:lrec,
  author    = {Oviedo, Alejandro and Ram{\'i}rez Valerio, Christian},
  title     = {The {LESCO} Corpus. Data for the Description of {Costa} {Rican} {Sign} {Language}},
  pages     = {167--170},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18004.pdf},
  abstract  = {LESCO is the most widely used sign language among Deaf people in Costa Rica (Woodward 1992). There are no precise figures available on the number of LESCO users, who live mostly in urban areas of the Central Valley of the country. Between 2010 and 2013 the Costa Rican government funded a project for a first linguistic description of LESCO, as a step towards recognition of the rights of Deaf people. The study of LESCO was based on the Corpus LESCO, a group of transcribed videos collected from Deaf signers from the main cities of the country along 2011. The project was carried on by a group consisted of five Deaf native LESCO-users and a hearing person with a good command of this sign language. A series of interviews were done over several months throughout the country allowing a pre-selection of 102 potential informants (all of them attesting a relative early LESCO acquisition, frequent use of LESCO, high degree of hearing-impairment, etc.). These people were video-recorded and so nearly 200 video-files (over 2000 minutes of footage) were obtained. Films included induced stories, life stories, free dialogues and interviews. For an initial description of the language, a selection of 44 files was transcribed on the basis of ELAN. Variants of each sign were identified and assigned to a particular lexeme. This process allowed the definition of more than 1,500 lemmas (Johnston 2010) from a total of around 14,000 lexical occurrences. The Corpus LESCO underpinned the construction of a basic dictionary (1,100 entries) and the drafting of a basic descriptive grammar of this sign language. Both dictionary and grammar are available online since the beginning of 2014 (www.cenarec-lesco.org). These works are the second corpus-based descriptions of a signed language in Spanish speaking Latin America. A previous experienced was carried of in Colombia between 2000 and 2005 (Oviedo 2001, CyC 2005). The initial project did not include the extension of the corpus. Both the Corpus LESCO and the rest of videos collected during the project are archived by the institution that administered the project in Costa Rica. The poster offers details about the process of building up the corpus and about its main characteristics. References CyC (Instituto Caro y Cuervo) (2005). Diccionario B{\'a}sico de la Lengua de Se{\~n}as Colombiana. Bogot{\'a}: INSOR-Instituto Caro y Cuervo. Johnston, T. (2010). From archive to corpus: Transcription and annotation in the creation of signed language corpora. International Journal of Corpus Linguistics, 15(1). pp. 106-131. Oviedo, A. (2001). Apuntes para una gram{\'a}tica de la Lengua de Se{\~n}as Colombiana. Cali: Universidad del Valle/INSOR. Woodward, J. (1991). Sign language varieties in Costa Rica. Sign Language Studies (20), pp. 329-334.}
}