WEKO3
アイテム
{"_buckets": {"deposit": "18fd3887-f1d8-4701-9996-9fa57c13e741"}, "_deposit": {"created_by": 13, "id": "10047", "owners": [13], "pid": {"revision_id": 0, "type": "depid", "value": "10047"}, "status": "published"}, "_oai": {"id": "oai:mie-u.repo.nii.ac.jp:00010047", "sets": ["677"]}, "author_link": ["24861", "24862", "24863", "24864", "24865"], "item_1706510172288": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "Zu, Guowei", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "24861", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Murata, Mayo", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "24862", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Ohyama, Wataru", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "24863", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Wakabayashi, Tetsushi", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "24864", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Kimura, Fumitaka", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "24865", "nameIdentifierScheme": "WEKO"}]}]}, "item_3_biblio_info_6": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2004-01-01", "bibliographicIssueDateType": "Issued"}, "bibliographicPageEnd": "409", "bibliographicPageStart": "403", "bibliographicVolumeNumber": "3309"}]}, "item_3_description_14": {"attribute_name": "フォーマット", "attribute_value_mlt": [{"subitem_description": "application/pdf", "subitem_description_type": "Other"}]}, "item_3_description_5": {"attribute_name": "内容記述", "attribute_value_mlt": [{"subitem_description": "Current general digitization approach of paper media is converting them into the digital images by a scanner, and then reading them by an OCR to generate ASCII text for full-text retrieval. However, it is impossible to recognize all characters with 100% accuracy by the present OCR technology. Therefore, it is important to know the impact of OCR accuracy on automatic text classification to reveal its technical feasibility. In this research we perform automatic text classification experiments for English newswire articles to study on the relationships between the accuracies of OCR and the text classification employing the statistical classification techniques.", "subitem_description_type": "Other"}, {"subitem_description": "Berlin ; New York", "subitem_description_type": "Other"}, {"subitem_description": "501", "subitem_description_type": "Other"}, {"subitem_description": "Content computing : Advanced Workshop on Content Computing, AWCC 2004, ZhenJiang, JiangSu, China, November 15-17, 2004 : proceedings", "subitem_description_type": "Other"}, {"subitem_description": "Lecture Notes in Computer Science", "subitem_description_type": "Other"}]}, "item_3_publisher_30": {"attribute_name": "出版者", "attribute_value_mlt": [{"subitem_publisher": "Springer"}]}, "item_3_relation_11": {"attribute_name": "DOI", "attribute_value_mlt": [{"subitem_relation_type": "isVersionOf", "subitem_relation_type_id": {"subitem_relation_type_id_text": "10.1007/978-3-540-30483-8_49", "subitem_relation_type_select": "DOI"}}]}, "item_3_relation_37": {"attribute_name": "関係URI", "attribute_value_mlt": [{"subitem_relation_name": [{"subitem_relation_name_text": "http://www.springerlink.com/content/0d4p3l9pdl8rh4c6/?p=e2c2c566b6a5413c84137501350ca497π=56"}]}]}, "item_3_relation_8": {"attribute_name": "ISBN", "attribute_value_mlt": [{"subitem_relation_type_id": {"subitem_relation_type_id_text": "9783540238980", "subitem_relation_type_select": "ISBN"}}]}, "item_3_source_id_7": {"attribute_name": "ISSN", "attribute_value_mlt": [{"subitem_source_identifier": "0302-9743", "subitem_source_identifier_type": "PISSN"}]}, "item_3_subject_16": {"attribute_name": "日本十進分類法", "attribute_value_mlt": [{"subitem_subject": "007", "subitem_subject_scheme": "NDC"}]}, "item_3_text_63": {"attribute_name": "ノート", "attribute_value_mlt": [{"subitem_text_value": "出版者版電子ジャーナルあり"}]}, "item_3_text_65": {"attribute_name": "資源タイプ(三重大)", "attribute_value_mlt": [{"subitem_text_value": "Conference Paper / 会議発表論文"}]}, "item_3_version_type_15": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_ab4af688f83e57aa", "subitem_version_type": "AM"}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2017-02-20"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "40A12190.pdf", "filesize": [{"value": "293.5 kB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_note", "mimetype": "application/pdf", "size": 293500.0, "url": {"label": "40A12190.pdf", "url": "https://mie-u.repo.nii.ac.jp/record/10047/files/40A12190.pdf"}, "version_id": "5831e2b2-5c43-4e0f-93a1-ba601f5345bb"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "eng"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "conference paper", "resourceuri": "http://purl.org/coar/resource_type/c_5794"}]}, "item_title": "The impact of OCR accuracy on automatic text classification", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "The impact of OCR accuracy on automatic text classification", "subitem_title_language": "en"}]}, "item_type_id": "3", "owner": "13", "path": ["677"], "permalink_uri": "http://hdl.handle.net/10076/11096", "pubdate": {"attribute_name": "PubDate", "attribute_value": "2010-05-24"}, "publish_date": "2010-05-24", "publish_status": "0", "recid": "10047", "relation": {}, "relation_version_is_last": true, "title": ["The impact of OCR accuracy on automatic text classification"], "weko_shared_id": -1}
The impact of OCR accuracy on automatic text classification
http://hdl.handle.net/10076/11096
http://hdl.handle.net/10076/11096e40450c1-f7a5-491b-8bea-f10a6b43043f
名前 / ファイル | ライセンス | アクション |
---|---|---|
40A12190.pdf (293.5 kB)
|
|
Item type | 会議発表論文 / Conference Paper(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2010-05-24 | |||||
タイトル | ||||||
言語 | en | |||||
タイトル | The impact of OCR accuracy on automatic text classification | |||||
言語 | ||||||
言語 | eng | |||||
資源タイプ | ||||||
資源タイプ識別子 | http://purl.org/coar/resource_type/c_5794 | |||||
資源タイプ | conference paper | |||||
著者 |
Zu, Guowei
× Zu, Guowei× Murata, Mayo× Ohyama, Wataru× Wakabayashi, Tetsushi× Kimura, Fumitaka |
|||||
内容記述 | ||||||
内容記述タイプ | Other | |||||
内容記述 | Current general digitization approach of paper media is converting them into the digital images by a scanner, and then reading them by an OCR to generate ASCII text for full-text retrieval. However, it is impossible to recognize all characters with 100% accuracy by the present OCR technology. Therefore, it is important to know the impact of OCR accuracy on automatic text classification to reveal its technical feasibility. In this research we perform automatic text classification experiments for English newswire articles to study on the relationships between the accuracies of OCR and the text classification employing the statistical classification techniques. | |||||
内容記述 | ||||||
内容記述タイプ | Other | |||||
内容記述 | Berlin ; New York | |||||
内容記述 | ||||||
内容記述タイプ | Other | |||||
内容記述 | 501 | |||||
内容記述 | ||||||
内容記述タイプ | Other | |||||
内容記述 | Content computing : Advanced Workshop on Content Computing, AWCC 2004, ZhenJiang, JiangSu, China, November 15-17, 2004 : proceedings | |||||
内容記述 | ||||||
内容記述タイプ | Other | |||||
内容記述 | Lecture Notes in Computer Science | |||||
書誌情報 |
巻 3309, p. 403-409, 発行日 2004-01-01 |
|||||
ISSN | ||||||
収録物識別子タイプ | PISSN | |||||
収録物識別子 | 0302-9743 | |||||
ISBN | ||||||
識別子タイプ | ISBN | |||||
関連識別子 | 9783540238980 | |||||
DOI | ||||||
関連タイプ | isVersionOf | |||||
識別子タイプ | DOI | |||||
関連識別子 | 10.1007/978-3-540-30483-8_49 | |||||
フォーマット | ||||||
内容記述タイプ | Other | |||||
内容記述 | application/pdf | |||||
著者版フラグ | ||||||
出版タイプ | AM | |||||
出版タイプResource | http://purl.org/coar/version/c_ab4af688f83e57aa | |||||
日本十進分類法 | ||||||
主題Scheme | NDC | |||||
主題 | 007 | |||||
出版者 | ||||||
出版者 | Springer | |||||
関係URI | ||||||
関連名称 | http://www.springerlink.com/content/0d4p3l9pdl8rh4c6/?p=e2c2c566b6a5413c84137501350ca497π=56 | |||||
ノート | ||||||
出版者版電子ジャーナルあり | ||||||
資源タイプ(三重大) | ||||||
Conference Paper / 会議発表論文 |