{"created":"2023-06-19T11:40:38.106841+00:00","id":9081,"links":{},"metadata":{"_buckets":{"deposit":"f127e8f8-1bf2-44df-8e65-4cb79bcba46c"},"_deposit":{"created_by":15,"id":"9081","owners":[15],"pid":{"revision_id":0,"type":"depid","value":"9081"},"status":"published"},"_oai":{"id":"oai:mie-u.repo.nii.ac.jp:00009081","sets":["366:640:641:647"]},"author_link":["22700","22699"],"item_7_biblio_info_6":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2011-01-01","bibliographicIssueDateType":"Issued"}}]},"item_7_contributor_61":{"attribute_name":"修士論文指導教員","attribute_value_mlt":[{"contributorNames":[{"contributorName":"Kimura, Fumitaka","lang":"en"}],"nameIdentifiers":[{"nameIdentifier":"22700","nameIdentifierScheme":"WEKO"}]}]},"item_7_description_14":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_7_description_4":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"Automatic text classification (ATC) is the task to automatically assign one or more appropriate categories for a document according to its content or topic. Traditionally, text classification is carried out by human experts as it requires a certain level of vocabulary recognition and knowledge processing. With the rapid explosion of texts in digital form and growth of online information, text classification has become an important research area owing to the need to automatically handle and organize text collections. The applications of this technology are manifold, including automatic indexing for information retrieval systems, document organization, text filtering, spam filtering, and even hierarchical categorization of web pages. Many standard machine learning techniques have been applied to automated text classification problems, and K Nearest Neighbor system (kNN) and Support Vector Machines (SVM) have been reported as the top performing methods for English text classification. Unfortunately, perfect precision cannot be reached in Chinese text classification and the inherent errors caused by word segmentation always remain as a problem. The purpose of this research is to evaluate the effectiveness of feature extraction, feature transformation and dimension reduction techniques, and to improve the accuracy of Chinese text classification using various techniques. In this paper, we perform Chinese text classification using N-gram (uni-gram, bi-gram and mixed uni-gram/bi-gram) frequency feature instead of word frequency feature to represent documents and propose the use of mixed uni-gram/bi-gram after feature transformation. We further propose a serial approach based on feature transformation and dimension reduction techniques to improve the performance. Then we compare the results of three different types of SVM kernel functions. Experimental results show that our proposed approach is efficient and effective for improving the performance of Chinese text classification. Furthermore, we propose a novel feature selection method based on part-of-speech analysis. According to the components of Chinese texts, we utilize the words’ part-of-speech (POS) at tributes to filter lots of meaningless features. The results show that suitable combination ofpart-of-speech can lead to better classification performance.","subitem_description_type":"Abstract"}]},"item_7_description_5":{"attribute_name":"内容記述","attribute_value_mlt":[{"subitem_description":"三重大学大学院工学研究科博士前期課程情報工学専攻","subitem_description_type":"Other"},{"subitem_description":"4, 28","subitem_description_type":"Other"}]},"item_7_publisher_30":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"三重大学"}]},"item_7_text_65":{"attribute_name":"資源タイプ(三重大)","attribute_value_mlt":[{"subitem_text_value":"Master's Thesis / 修士論文"}]},"item_7_version_type_15":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"LUO, XI","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"22699","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2017-02-20"}],"displaytype":"detail","filename":"2010M251.pdf","filesize":[{"value":"1.6 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"2010M251.pdf","url":"https://mie-u.repo.nii.ac.jp/record/9081/files/2010M251.pdf"},"version_id":"4cc5a48e-ad7a-4f3c-ba3a-64b5153dd1bc"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"thesis","resourceuri":"http://purl.org/coar/resource_type/c_46ec"}]},"item_title":"A Study on Automatic Chinese Text Classification","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"A Study on Automatic Chinese Text Classification","subitem_title_language":"en"}]},"item_type_id":"7","owner":"15","path":["647"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2013-06-11"},"publish_date":"2013-06-11","publish_status":"0","recid":"9081","relation_version_is_last":true,"title":["A Study on Automatic Chinese Text Classification"],"weko_creator_id":"15","weko_shared_id":-1},"updated":"2023-09-11T01:30:25.061119+00:00"}