Sporleder, Caroline and Mirella Lapata. 2004. Automatic Paragraph Identification: A Study across Languages and Domains. In Proceedings of the Conference on Empirical Methods in Natural Language Processing, 72-79. Barcelona.

In this paper we investigate whether paragraphs can be identified automatically in different languages and domains. We propose a machine learning approach which exploits textual and discourse cues and we assess how well humans perform on this task. Our best models achieve an accuracy that is significantly higher than the best baseline and, for most data sets, comes to within 6% of human performance.



@InProceedings{Sporleder:Lapata:04,
  author = 	 {Caroline Sporleder and Mirella Lapata},
  title = 	 {Automatic Paragraph Identification: A Study across Languages and Domains},
  crossref =	 {EMNLP:04},
  pages =        {72--79},
  year =         {2004}
}

@Proceedings{EMNLP:04,
  editor =       {Dekang Lin and Dekai Wu},
  title =        {Proceedings of the Conference on 
                  Empirical Methods in Natural Language Processing},
  booktitle =    {Proceedings of the Conference on 
                  Empirical Methods in Natural Language Processing},
  year =         2004,
  address =      {Barcelona}
}