Tevor Cohn and Mirella Lapata. 2007. Machine Translation by Triangulation: Making Effective Use of Multi-Parallel Corpora. In Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics, 348-355. Prague. Current phrase-based SMT systems perform poorly when using small training sets. This is a consequence of unreliable translation estimates and low coverage over source and target phrases. This paper presents a method which alleviates this problem by exploiting multiple translations of the same source phrase. Central to our approach is triangulation, the process of translating from a source to a target language via an intermediate third language. This allows the use of a much wider range of parallel corpora for training, and can be combined with a standard phrase table using conventional smoothing methods. Experimental results demonstrate Bleu improvements for triangulated models over a standard phrase-based system.
@InProceedings{Cohn:Lapata:07a author = {Trevor Cohn and Mirella Lapata}, title = {Machine Translation by Triangulation: Making Effective Use of Multi-Parallel Corpora}, crossref = {ACL:07}, pages = {728--735} } @Proceedings{NAACL:07, title = {Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics}, booktitle = {Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics}, year = 2007, address = {Prague} } |