@inproceedings{niu-etal-2024-hksl:lrec,
  author    = {Niu, Zhe and Zuo, Ronglai and Mak, Brian and Wei, Fangyun},
  title     = {A Hong Kong Sign Language Corpus Collected from Sign-interpreted TV News},
  pages     = {636--646},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen},
  booktitle = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation ({LREC-COLING} 2024)},
  publisher = {{ELRA Language Resources Association (ELRA) and the International Committee on Computational Linguistics (ICCL)}},
  address   = {Torino, Italy},
  day       = {20--25},
  month     = may,
  year      = {2024},
  isbn      = {978-2-493814-10-4},
  language  = {english},
  url       = {https://aclanthology.org/2024.lrec-main.55},
  abstract  = {This paper introduces TVB-HKSL-News, a new Hong Kong sign language (HKSL) dataset collected from a TV news program over a period of 7 months. The dataset is collected to enrich resources for HKSL and support research in large-vocabulary continuous sign language recognition (SLR) and translation (SLT). It consists of 16.07 hours of sign videos of two signers with a vocabulary of 6,515 glosses (for SLR) and 2,850 Chinese characters or 18K Chinese words (for SLT). One signer has 11.66 hours of sign videos and the other has 4.41 hours. One objective in building the dataset is to support the investigation of how well large-vocabulary continuous sign language recognition/translation can be done for a single signer given a (relatively) large amount of his/her training data, which could potentially lead to the development of new modeling methods. Besides, most parts of the data collection pipeline are automated with little human intervention; we believe that our collection method can be scaled up to collect more sign language data easily for SLT in the future for any sign languages if such sign-interpreted videos are available. We also run a SOTA SLR/SLT model on the dataset and get a baseline SLR word error rate of 34.08{\%} and a baseline SLT BLEU-4 score of 23.58 for benchmarking future research on the dataset.}
}

@inproceedings{sze:18037:sign-lang:lrec,
  author    = {Sze, Felix and Lau, Kloris and Yu, Kevin},
  title     = {The {Hong} {Kong} {Sign} {Language} Browser},
  pages     = {185--188},
  editor    = {Bono, Mayumi and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Hochgesang, Julie A. and Kristoffersen, Jette and Mesch, Johanna and Osugi, Yutaka},
  booktitle = {Proceedings of the {LREC2018} 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community},
  maintitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-01-6},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/18037.html},
  abstract  = {This paper describes the design of the Hong Kong Sign Language Browser which was established for providing accessible online resources on the lexical variations of HKSL in order to support the promotion of sign language and other sign-related services in the local community. With continuous funding support from the government since 2012, local Deaf organizations and Deaf signers of diverse backgrounds are invited to contribute their sign language knowledge in the data collection and evaluation process. Each year Deaf informants proficient in HKSL are invited to CSLDS to provide signing data to a pre-defined list of lexical targets. Their signing data are analyzed and variants are identified. These video data are then placed in an online platform for local Deaf organizations for rating and comments, and they can contribute data as well if there are additional variants not yet covered in the initial round of data collection. Once finalized, the lexical variants are placed in the Hong Kong Sign Language Browser for free public access. For each lexical target, each variant is indicated by a different color. Variants that are more commonly used and seen by Deaf organizations are listed first whereas the least common variants are listed last.}
}

@inproceedings{yu-etal-2018-sign:lrec,
  author    = {Yu, Shi and Geraci, Carlo and Abner, Natasha},
  title     = {Sign Languages and the Online World Online Dictionaries {\&} Lexicostatistics},
  pages     = {4235--4240},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Hasida, Koiti and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo,  H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios and Tokunaga, Takenobu},
  booktitle = {11th International Conference on Language Resources and Evaluation ({LREC} 2018)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Miyazaki, Japan},
  day       = {7--12},
  month     = may,
  year      = {2018},
  isbn      = {979-10-95546-00-9},
  language  = {english},
  url       = {https://aclanthology.org/L18-1668}
}

@inproceedings{sze:12022:sign-lang:lrec,
  author    = {Sze, Felix and Woodward, James and Tang, Gladys and Lee, Jafi and Cheng, Ka-Yiu and Mak, Joe},
  title     = {Sign Language Documentation in the {Asia-Pacific} Region: A Deaf-centred approach},
  pages     = {155--158},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and Kristoffersen, Jette and Mesch, Johanna},
  booktitle = {Proceedings of the {LREC2012} 5th Workshop on the Representation and Processing of Sign Languages: Interactions between Corpus and Lexicon},
  maintitle = {8th International Conference on Language Resources and Evaluation ({LREC} 2012)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Istanbul, Turkey},
  day       = {27},
  month     = may,
  year      = {2012},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/12022.html},
  abstract  = {In this paper, we would like to share our experience in training up Deaf individuals from the Asian-Pacific countries to compile sign language dictionaries and conduct sign language research through the `Asia-Pacific Sign Linguistics Research and Training Program' at the Chinese University of Hong Kong. The program, fully funded by the Nippon Foundation, is a multi-country, multi-phase project which aims at nurturing Deaf people to become sign language researchers through a series of credit-bearing training programs at the diploma and higher diploma levels. The training covers three major areas: Sign Linguistics, Sign Language Teaching and English Literacy. One important part of the training involves the production of sample dictionaries of the Deaf trainees' own sign languages. To confirm the dictionary entries, the Deaf trainees conduct surveys in the Deaf communities in their home countries from time to time and as a result a substantial amount of lexical variants have been collected. An online database, called the Asian SignBank, is now being developed to house these lexical data and facilitate further research. Apart from basic search functions, the SignBank also incorporates detailed phonetic features of individual signs and a materials-generating function which allows quicker production of dictionaries in the future.}
}

@inproceedings{fung:08034:sign-lang:lrec,
  author    = {Fung, Cat H-M and Lam, Scholastica and Mak, Joe and Tang, Gladys},
  title     = {Establishment of a corpus of {Hong} {Kong} {Sign} {Language} acquisition data: from {ELAN} to {CLAN}},
  pages     = {17--21},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Hanke, Thomas and Thoutenhoofd, Ernst D. and Zwitserlood, Inge},
  booktitle = {Proceedings of the {LREC2008} 3rd Workshop on the Representation and Processing of Sign Languages: Construction and Exploitation of Sign Language Corpora},
  maintitle = {6th International Conference on Language Resources and Evaluation ({LREC} 2008)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marrakech, Morocco},
  day       = {1},
  month     = jun,
  year      = {2008},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/08034.html},
  abstract  = {This paper introduces the Hong Kong Sign Language Child Language Corpus currently developed by the Centre for Sign Linguistics and Deaf Studies, the Chinese University of Hong Kong. When completed, the corpus will include both longitudinal and cross-sectional data of deaf children acquiring Hong Kong Sign Language. Our research team has decided to establish a meaning-based transcription system compatible with both the ELAN and CLAN programs in order to facilitate future linguistic analysis. The ELAN program, which allows multiple-tier data entries and synchronization of video data with glosses, is an ideal tool for transcribing and viewing sign language data. The CLAN program, on the other hand, has a wide range of well-developed functions such as auto-tagging and the `kwal' function for data search and they are extremely useful for conducting quantitative analyses. With add-on programs developed by our research team and additional functions in CLAN developed by the CHILDES research team, the transcribed data are transferable from the ELAN format to CLAN format, thus allowing researchers to optimize the use of both programs in conducting different types of linguistic analysis on the acquisition data.}
}

@inproceedings{fung:08001:sign-lang:lrec,
  author    = {Fung, Cat H-M and Sze, Felix and Lam, Scholastica and Tang, Gladys},
  title     = {Simultaneity vs. sequentiality: developing a transcription system of {Hong} {Kong} {Sign} {Language} acquisition data},
  pages     = {22--27},
  editor    = {Crasborn, Onno and Efthimiou, Eleni and Hanke, Thomas and Thoutenhoofd, Ernst D. and Zwitserlood, Inge},
  booktitle = {Proceedings of the {LREC2008} 3rd Workshop on the Representation and Processing of Sign Languages: Construction and Exploitation of Sign Language Corpora},
  maintitle = {6th International Conference on Language Resources and Evaluation ({LREC} 2008)},
  publisher = {{European Language Resources Association (ELRA)}},
  address   = {Marrakech, Morocco},
  day       = {1},
  month     = jun,
  year      = {2008},
  language  = {english},
  url       = {https://www.sign-lang.uni-hamburg.de/lrec/pub/08001.html},
  abstract  = {It is a well-known fact that sign languages are characterized with a wide range of simultaneous constructions, e.g. complex polymorphemic constructions, maintenance of list buoys in space while another hand continues signing, overlaying of various types of non-manuals with manual signing, etc. In transcribing these simultaneous constructions, decisions have to be made as to whether they should be given a single gloss or be glossed separately in two different tiers. This presentation discusses the transcription system of Hong Kong Sign Language acquisition data, with particular focus on how simultaneous constructions are analyzed and glossed, and the difficulties we encountered in the transcription process.
\par
We are currently developing a Hong Kong Sign Language acquisition Corpus (Tang et al.) with transcriptions done with ELAN. One major advantage of ELAN is that it allows us to represent different pieces of linguistic information simultaneously on separate tiers. However, it is not always easy to decide whether two different signs produced by two hands should be glossed as a single sign or be teased apart and glossed separately on two different tiers. For example, in a typical classifier predicate such as `a cup on a table' in example one below, the signs can either be glossed as a single entry `CL-cup-on-table', or marked separately by `CL-cup' and `CL-flat surface' on two different tiers:
\par
Example (1): `a cup on a table' Left hand: CL-cup
\par
Right hand: CL-flat-surface
\par
The advantage of having a single gloss is that it reflects the native intuition that the two classifiers form a single syntactic unit. Yet it fails to reflect the morphological complexity of the construction, leading to a potential underestimation of the morphological development of the deaf child.
\par
On the other hand, having two separate glosses can clearly show that two classifiers are involved in the construction, reflecting its morphological complexities to some extent. From a theoretical point of view, however, once this method is adopted, the glosses are being used as `analyzable units' to represent separate handshape morphemes. A question that arises logically is, why do we want to represent handshape morphemes separately in the transcription, but not morphemes of other phonological parameters, such as movements and locations?
\par
Another equally thorny issue is how to gloss classifiers or signs (i.e. list buoy) that are held in space. In example (2), the signer expresses two propositions: `A man stands here' and `a woman shot him with a gun':
\par
Example (2):
\par
Left hand: MAN CL-stand --------------------------------------------> Right hand: FEMALE SHOOT-WITH-A-GUN
\par
In terms of articulation, the classifier for `MAN' is held in space while the second clause is signed. Syntactically, the classifier for MAN becomes the internal argument of the transitive verb SHOOT-WITH-A-GUN in the second clause. In the literature, if a sign is held in space, a broken line is usually used to represent the duration of which the sign is held. If the same method is used in the transcription, however, the fact that the classifier is the internal argument of the second clause cannot be captured. This may potentially lead to an under-estimation of the deaf child's syntactic complexity, if statistics are based on figures generated by the search functions of ELAN. In this presentation, an attempt will be made to provide solutions to the above issues.}
}

