{"created":"2023-06-19T11:38:41.011547+00:00","id":6441,"links":{},"metadata":{"_buckets":{"deposit":"bfd4cdc9-a671-4d0d-bfb4-5c1529f15d34"},"_deposit":{"created_by":13,"id":"6441","owners":[13],"pid":{"revision_id":0,"type":"depid","value":"6441"},"status":"published"},"_oai":{"id":"oai:mie-u.repo.nii.ac.jp:00006441","sets":["515:516:517:518"]},"author_link":["15018","15019","15020","15021"],"item_4_biblio_info_6":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2011-10-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicPageEnd":"58","bibliographicPageStart":"55","bibliographic_titles":[{"bibliographic_title":"Proceedings of the Second International Workshop on Regional Innovation Studies :  (IWRIS2010)"}]}]},"item_4_description_14":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_4_description_4":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"This paper aims to accelerate processes\nof actor-critic method, which is one of major\nreinforcement learning algorithms, by a transfer\nlearning. In general, reinforcement learning is used\nto solve optimization problems. Learning agents\nacquire a policy to accomplish the target task autonomously.\nTo solve the problems, agents require\nlong learning processes for trial and error. Transfer\nlearning is one of effective methods to accelerate\nlearning processes of machine learning algorithms.\nIt accelerates learning processes by using\nprior knowledge from a policy for a source task. We\npropose an effective transfer learning algorithm for\nactor-critic method. Two basic issues for the transfer\nlearning are method to select an effective source\npolicy and method to reuse without negative transfer.\nIn this paper, we mainly discuss the latter. We proposed\nthe reuse method which based on the selection\nmethod that uses the forbidden rule set. Forbidden\nrule set is the set of rules that cause immediate failure\nof tasks. It is used to foresee similarity between\na source policy and the target policy. Agents should\nnot transfer the inappropriate rules in the selected\npolicy. In actor-critic, a policy is constructed by two\nparameter sets: action preferences and state values.\nTo avoid inappropriate rules, agents reuse only reliable\naction preferences and state values that imply\npreferred actions. We perform simple experiments\nto show the effectiveness of the proposed method. In\nconclusion, the proposed method accelerates learning\nprocesses for the target tasks.","subitem_description_type":"Abstract"}]},"item_4_publisher_30":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Graduate School of Regional Innovation Studies, Mie University"}]},"item_4_text_65":{"attribute_name":"資源タイプ（三重大）","attribute_value_mlt":[{"subitem_text_value":"Departmental Bulletin Paper / 紀要論文"}]},"item_4_version_type_15":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"TAKANO, Toshiaki","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"15018","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"TAKASE, Haruhiko","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"15019","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"KAWANAKA, Hiroharu","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"15020","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"TSURUOKA, Shinji","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"15021","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2017-02-18"}],"displaytype":"detail","filename":"60C15235.pdf","filesize":[{"value":"106.1 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"60C15235.pdf","url":"https://mie-u.repo.nii.ac.jp/record/6441/files/60C15235.pdf"},"version_id":"b23be4b4-7e0a-4bc6-8a37-d1dcd7f7915c"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Reinforcement learning / actor-critic method / Transfer learning","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"departmental bulletin paper","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Accelerate Learning Processes by Avoiding Inappropriate Rules in Transfer Learning for Actor-Critic","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Accelerate Learning Processes by Avoiding Inappropriate Rules in Transfer Learning for Actor-Critic","subitem_title_language":"en"}]},"item_type_id":"4","owner":"13","path":["518"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2011-11-08"},"publish_date":"2011-11-08","publish_status":"0","recid":"6441","relation_version_is_last":true,"title":["Accelerate Learning Processes by Avoiding Inappropriate Rules in Transfer Learning for Actor-Critic"],"weko_creator_id":"13","weko_shared_id":-1},"updated":"2023-09-19T06:24:05.751015+00:00"}