@inproceedings{oai:mie-u.repo.nii.ac.jp:00010047,
 month = {Jan},
 note = {application/pdf, Current general digitization approach of paper media is converting them into the digital images by a scanner, and then reading them by an OCR to generate ASCII text for full-text retrieval. However, it is impossible to recognize all characters with 100% accuracy by the present OCR technology. Therefore, it is important to know the impact of OCR accuracy on automatic text classification to reveal its technical feasibility. In this research we perform automatic text classification experiments for English newswire articles to study on the relationships between the accuracies of OCR and the text classification employing the statistical classification techniques., Berlin ; New York, 501, Content computing : Advanced Workshop on Content Computing, AWCC 2004, ZhenJiang, JiangSu, China, November 15-17, 2004 : proceedings, Lecture Notes in Computer Science},
 pages = {403--409},
 publisher = {Springer},
 title = {The impact of OCR accuracy on automatic text classification},
 volume = {3309},
 year = {2004}
}