|
Tevor Cohn and Mirella Lapata. 2007. Machine Translation by Triangulation: Making Effective Use of Multi-Parallel Corpora. In Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics, 348-355. Prague. Current phrase-based SMT systems perform poorly when using small training sets. This is a consequence of unreliable translation estimates and low coverage over source and target phrases. This paper presents a method which alleviates this problem by exploiting multiple translations of the same source phrase. Central to our approach is triangulation, the process of translating from a source to a target language via an intermediate third language. This allows the use of a much wider range of parallel corpora for training, and can be combined with a standard phrase table using conventional smoothing methods. Experimental results demonstrate Bleu improvements for triangulated models over a standard phrase-based system.
@InProceedings{Cohn:Lapata:07a
author = {Trevor Cohn and Mirella Lapata},
title = {Machine Translation by Triangulation: Making
Effective Use of Multi-Parallel Corpora},
crossref = {ACL:07},
pages = {728--735}
}
@Proceedings{NAACL:07,
title = {Proceedings of the 45th Annual Meeting of the
Association for Computational Linguistics},
booktitle = {Proceedings of the 45th Annual Meeting of the
Association for Computational Linguistics},
year = 2007,
address = {Prague}
}
|