{"created":"2023-06-19T11:38:41.184479+00:00","id":6445,"links":{},"metadata":{"_buckets":{"deposit":"258001cd-7fb0-4279-897b-230a228a6437"},"_deposit":{"created_by":13,"id":"6445","owners":[13],"pid":{"revision_id":0,"type":"depid","value":"6445"},"status":"published"},"_oai":{"id":"oai:mie-u.repo.nii.ac.jp:00006445","sets":["515:516:517:518"]},"author_link":["15037","15038"],"item_4_biblio_info_6":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2011-10-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicPageEnd":"74","bibliographicPageStart":"71","bibliographic_titles":[{"bibliographic_title":"Proceedings of the Second International Workshop on Regional Innovation Studies : (IWRIS2010)"}]}]},"item_4_description_14":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_4_description_4":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"The reduction on the trial frequency is\nimportant for reinforcement learning under an actual\nenvironment.\nWe propose the Q-learning method that selects proper\nactions of robot in unknown environment by using the Self-\nInstruction based on the experience in known environment.\nConcretely, it has two Q-tables, one is smaller, based on a\npartial space of the environment, the other is larger, based on\nthe whole space of the environment. At each learning step, Qvalues\nof these Q-tables are updated at the same time, but an\naction is selected by using Q-table that has smaller entropy of\nQ-values at the situation. We think that the smaller Q-table is\nused for the knowledge storing as self-instructing. The larger is\nused for the experiment storing.\nWe experimented the proposed method with using an actual\nmobile robot. In the experimental environment, exist a mobile\nrobot, two goals and one of a red, a green, a yellow and a blue\nobject. The robot has a task to carry a colored object into the\ncorresponding goal. In this experiment, the Q-table for the\nwhole has a state for the view of the object and the goals with\nthe colors, the Q-table for the partial has the state without\ncolor information. We verified that the proposed method is\nmore effective than the ordinaries in an actual environment.","subitem_description_type":"Abstract"}]},"item_4_publisher_30":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Graduate School of Regional Innovation Studies, Mie University"}]},"item_4_text_65":{"attribute_name":"資源タイプ(三重大)","attribute_value_mlt":[{"subitem_text_value":"Departmental Bulletin Paper / 紀要論文"}]},"item_4_version_type_15":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shibata, Nobuo","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Matsui, Hirokazu","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2017-02-18"}],"displaytype":"detail","filename":"60C15239.pdf","filesize":[{"value":"398.8 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"60C15239.pdf","url":"https://mie-u.repo.nii.ac.jp/record/6445/files/60C15239.pdf"},"version_id":"b701d604-ff42-4a8e-b67f-bc2e9a8b9e37"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Reinforcement learning","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"departmental bulletin paper","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Reinforcement Learning with dual tables for a partial and a whole space","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Reinforcement Learning with dual tables for a partial and a whole space","subitem_title_language":"en"}]},"item_type_id":"4","owner":"13","path":["518"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2011-11-08"},"publish_date":"2011-11-08","publish_status":"0","recid":"6445","relation_version_is_last":true,"title":["Reinforcement Learning with dual tables for a partial and a whole space"],"weko_creator_id":"13","weko_shared_id":-1},"updated":"2023-09-19T06:24:17.965622+00:00"}