{"id":4235,"date":"2020-12-13T20:42:24","date_gmt":"2020-12-13T11:42:24","guid":{"rendered":"https:\/\/now0930.pe.kr\/wordpress\/?p=4235"},"modified":"2020-12-14T10:38:17","modified_gmt":"2020-12-14T01:38:17","slug":"a3c-global-network-update","status":"publish","type":"post","link":"https:\/\/now0930.pe.kr\/wordpress\/a3c-global-network-update\/","title":{"rendered":"a3c global network update"},"content":{"rendered":"\n<p>\ud558&#8230;. \uc774\uac70 \ud55c\ub2e4\uace0 \uac70\uc758 \uba70\uce60\uc744 \ub0a0\ub838\ub2e4. A3C \uc131\ub2a5\uc774 \uc88b\ub2e4\uae38\ub798 \ub530\ub77c \ud574 \ubd24\ub294\ub370, \ub0b4\uac00 \uac00\uc9c4 \ucc45\uc740 tensorflow 1.x \ubc84\uc804 \uae30\uc900 \ucf54\ub4dc\uac00 \uc2e4\ub838\ub2e4. tensorflow 2.x\ub300\ub85c \uc5c5\ub370\uc774\ud2b8 \ub418\uba74\uc11c \uacfc\uac70 \uc5ec\ub7ec \ub2a5\ub825\uc790\ub4e4\uc774 \uad6c\ud604\ud55c fit \ubd80\ubd84 \ucf54\ub4dc\ub97c \uc0ac\uc6a9\ud560 \uc218 \uc5c6\uac8c \ub418\uc5c8\ub2e4. \ub300\uc138\ub294 gradienttape()\ub85c \ub124\ud2b8\uc6cd\uc744 \uc5c5\ub370\uc774\ud2b8 \ud558\ub294 \ubc29\ubc95\uc774\ub77c\uace0 \ud55c\ub2e4. a3c\uc5d0\uc11c\ub294 local \ubaa8\ub378\uc744 global \ubaa8\ub378\uacfc \ub611\uac19\uc774 \ub9cc\ub4e4\uace0, local model  \uacbd\ud5d8\uc73c\ub85c global network\ub97c \uc5c5\ub370\uc774\ud2b8 \ud55c\ub2e4. thread \uac1c\uc218\ub294 \uc784\uc758\ub85c \uc120\ud0dd\ud55c\ub2e4. a2c \ud655\uc7a5\ud3b8\uc774\ub77c thread \uc640 apply \ubd80\ubd84\uc744 \uc870\uae08 \uc218\uc815\ud558\uba74 \uc27d\uac8c \ub41c\ub2e4\uace0 \uc0dd\uac01\ud588\ub2e4. state\ub85c \ubaa8\ub378\uc744 \uc608\uce21\ud558\ub294 \ubd80\ubd84\uc744 \ud2c0\ub824\uc11c \uc544\ub798 \uacb0\uacfc\ub97c \uc5bb\uc5c8\ub2e4.<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"480\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2020\/12\/201212history.png\" alt=\"\" class=\"wp-image-4291\"\/><\/figure>\n\n\n\n<p>2,000\ubc88\ub300\uc5d0 \uc8fc\uc2dd\uc744 \uc0ac\uc11c 14,000\ubc88\ub300\uc5d0 \ud314\uace0 \uc2f6\ub2e4. \uc8fc\uc2dd \ucc28\ud2b8\ub97c \ubcf4\ub294 \ub4ef \ud558\ub2e4. \uc704 \uadf8\ub798\ud504\ub294 \uc5d0\ud53c\uc18c\ub4dc\uac00 \ub05d\ub0a0 \ub54c \uae4c\uc9c0 \uc0c8\ub85c\uc6b4 action\uc744 \uc5bb\uc5b4\uc57c \ud588\ub294\ub370, action\uc744 \ud55c\ubc88\ub9cc \uc5bb\uc5b4\uc11c \uadf8\ub807\ub2e4. \uc81c\ub300\ub85c \uc2e4\ud589\ud558\uba74 \ub2e4\uc74c \uadf8\ub798\ud504\uc640 \uac19\uc544\uc57c \ud55c\ub2e4. \uc810\uc218\ub294 \uc784\uc758\ub300\ub85c \ud588\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from env_reinforcev2 import CarrierStorage \nfrom env_reinforcev2 import Action\nimport random\nfrom collections import defaultdict\nimport numpy as np\nfrom termcolor import colored\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Input\nfrom keras.models  import Model\nfrom keras.optimizers import Adam\nimport copy\nfrom keras.models import model_from_json\nfrom collections import deque\nfrom keras import backend as K\nimport threading\nfrom queue import Queue\nimport time\nfrom tensorflow.python import keras\nimport matplotlib.pyplot as plt\n\neps = np.finfo(np.float32).eps.item()  # Smallest number such that 1.0 + eps != 1.0\n\n#\uc5ec\uae30 \ucc38\uc870.\n#https:\/\/github.com\/tensorflow\/models\/blob\/master\/research\/a3c_blogpost\/a3c_cartpole.py\n#actor critic \uc744 \ub530\ub85c \ub9cc\ub4e6.\n#https:\/\/github.com\/marload\/DeepRL-TensorFlow2\/blob\/master\/A3C\/A3C_Discrete.py\n\n#custom loss\ub97c \uad6c\ud558\uae30 \uc704\ud574 tensor\ub97c \uc989\uc2dc \ud655\uc778.\nimport tensorflow as tf\ntf.config.run_functions_eagerly(True)\n# \uba40\ud2f0\uc4f0\ub808\ub529\uc744 \uc704\ud55c \uae00\ub85c\ubc8c \ubcc0\uc218\n# \ud658\uacbd \uc0dd\uc131\nenv_name = \"smart_storage\"\n\n\n# \ube0c\ub808\uc774\ud06c\uc544\uc6c3\uc5d0\uc11c\uc758 A3CAgent \ud074\ub798\uc2a4(\uae00\ub85c\ubc8c\uc2e0\uacbd\ub9dd)\nclass A3CAgent:\n    def __init__(self):\n        # \uc0c1\ud0dc\ud06c\uae30\uc640 \ud589\ub3d9\ud06c\uae30\ub97c \uac16\uace0\uc634\n        self.state_size = 40\n        self.action_size = 7\n        self.value_size = 1\n        # A3C \ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\n        self.discount_factor = 0.9\n        #self.actor_lr = 2.5e-4\n        #self.critic_lr = 2.5e-4\n        # \uc4f0\ub808\ub4dc\uc758 \uac2f\uc218\n        self.threads = 12\n        self.DEFINE_NEW = False\n        self.RENDER = False\n\n        #global network \uc124\uc815\n        #self.a3c_global_model = ActorCriticModel(self.state_size, self.action_size)\n        #self.global_actor, self.global_critic = self.a3c_global_model.build_model()\n        self.global_model = self.build_actorCritic()\n    def build_actorCritic(self):\n        if(self.DEFINE_NEW == True):\n            input = Input(shape = (self.state_size,))\n            common = Dense(self.state_size*8, activation='relu', kernel_initializer='he_uniform')(input)\n            common2 = Dense(self.action_size*8, activation = 'relu',kernel_initializer='he_uniform')(common)\n            common3 = Dense(self.state_size*4, activation='relu', kernel_initializer='he_uniform')(common2)\n\n            action_prob = Dense(self.action_size, activation = 'softmax', kernel_initializer='he_uniform')(common3)\n            critic = Dense(1)(common3)\n            model = Model(inputs = input, outputs = [action_prob, critic])\n\n        else:\n            #\uc788\ub294 \ub370\uc774\ud130 \ub85c\ub529\n            json_actor = open(\".\/201208ActorA3c.json\", \"r\")\n            loaded_actor = json_actor.read()\n            json_actor.close()\n            model= model_from_json(loaded_actor)\n            print(\"\ubaa8\ub378 %s\ub97c \ub85c\ub529\"%json_actor)\n            weight_actor = \".\/201208weightCriticA3c.h5\"\n            model.load_weights(weight_actor)\n            print(\"\uc800\uc7a5\ub41c weights %s\ub97c \ub85c\ub529\"%weight_actor)\n        return model\n\n    def get_action(self, action_prob):\n        #[[\ud655\uc728 \ud615\uc2dd\uc73c\ub85c \ucd9c\ub825]]\n        # [0]\uc744 \ub123\uc5b4 \uc90c\n        #print(\"policy = \", policy)\n        return np.random.choice(self.action_size, 1, p=np.squeeze(action_prob))[0]\n\n    def train(self):\n        # \uc4f0\ub808\ub4dc \uc218\ub9cc\ud07c Agent \ud074\ub798\uc2a4 \uc0dd\uc131\n        agents = [Agent(self.action_size, self.state_size, self.global_model)\n                  for _ in range(self.threads)]\n\n        # \uac01 \uc4f0\ub808\ub4dc \uc2dc\uc791\n        for agent in agents:\n            time.sleep(2)\n            agent.start()\n\n        # 10\ubd84(600\ucd08)\uc5d0 \ud55c\ubc88\uc529 \ubaa8\ub378\uc744 \uc800\uc7a5\n        while True:\n            time.sleep(60 * 10)\n            model_json_actor = self.global_model.to_json()\n            with open(\".\/201208ActorA3c.json\", \"w\") as json_file:\n                json_file.write(model_json_actor)\n            self.global_model.save_weights(\".\/201208weightCriticA3c.h5\")\n \n\n\n# \uc561\ud130\ub7ec\ub108 \ud074\ub798\uc2a4(\uc4f0\ub808\ub4dc)\nclass Agent(threading.Thread):\n    def __init__(self, action_size, state_size, model):\n        threading.Thread.__init__(self)\n        self.action_size = action_size\n        self.state_size = state_size\n\n        # \uc9c0\uc815\ub41c \ud0c0\uc784\uc2a4\ud15d\ub3d9\uc548 \uc0d8\ud50c\uc744 \uc800\uc7a5\ud560 \ub9ac\uc2a4\ud2b8\n        self.states, self.actions, self.rewards = [], [], []\n\n        #init\ub85c \ub118\uc5b4\uc628 global model\uc744 \uc5f0\uacb0.\n        self.global_model = model\n\n        # \ub85c\uceec \ubaa8\ub378 \uc0dd\uc131\n        self.local_model = self.build_local_actorCritic()\n\n        #global\ub85c \uc5c5\ub370\uc774\ud2b8\n        self.update_local_from_global()\n\n        #A3C model class \uc548\uc5d0 \uc788\ub294 \uc815\ubcf4\ub97c \ubc16\uc73c\ub85c \ube7c\uc918\uc57c \ud558\ub294\ub370,\n        #\uadc0\ucc2e\uc544\uc11c \uadf8\ub0e5 \uc500.\n        self.discount_factor = 0.8\n        self.value_size = 1\n\n        #self.avg_p_max = 0\n        #self.avg_loss = 0\n\n        # \ubaa8\ub378 \uc5c5\ub370\uc774\ud2b8 \uc8fc\uae30\n        self.t_max = 20\n        self.t = 0\n\n    def build_local_actorCritic(self):\n        input = Input(shape = (self.state_size,))\n        common = Dense(self.state_size*8, activation='relu', kernel_initializer='he_uniform')(input)\n        common2 = Dense(self.action_size*8, activation = 'relu',kernel_initializer='he_uniform')(common)\n        common3 = Dense(self.state_size*4, activation='relu', kernel_initializer='he_uniform')(common2)\n\n        action_prob = Dense(self.action_size, activation = 'softmax', kernel_initializer='he_uniform')(common3)\n        critic = Dense(1)(common3)\n        model = Model(inputs = input, outputs = [action_prob, critic])\n        return model\n\n    def update_local_from_global(self):\n        self.local_model.set_weights(self.global_model.get_weights())\n\n    def run(self):\n        #\uba54\uc778 \ud568\uc218\n        env = CarrierStorage()\n        #agent = A3CAgent()\n        state = env.reset()\n\n        #state history\ub97c \uae30\ub85d\n        #historyState = []\n\n        scores, episodes, score_average = [], [], []\n        EPISODES = 1000000\n        #EPISODES = 100\n\n\n        global_step = 0\n        average = 0\n        huber_loss = tf.losses.Huber()\n        optimizer = Adam(learning_rate = 0.001)\n\n\n        #action, critic, reward\ub97c list\ub85c \uae30\ub85d.\n        actionprob_history, critic_history, reward_history = [], [], []\n        \n\n        total_loss_batch = []\n        success_counter = 0\n        success_counter_list = []\n        for e in range (EPISODES):\n            #print(\"episode check\", e)\n            done = False\n            score = 0\n            #\ubd88\uac00\ub2a5\ud55c \uacbd\uc6b0\uac00 \ub098\uc624\uba74 \ub2e4\uc2dc reset\n            #gradient tape\uc5d0\uc11c 0\ub97c \ub123\uc73c\uba74 \uc5d0\ub7ec.\n            while(True):\n                state = env.reset()\n                state = env.stateTo1hot(self.state_size)\n                status = env.isItEnd()\n                if(status == -1):\n                    break;\n            #print(\"reseted\")\n            #if(status == 0 or status == 1):\n            #    done = True\n            #    reward = 0\n                #print(\"zero rewards\")\n                #\uc5ec\uae30\uc5d0\uc11c apply.gradients\ub97c \uc801\uc6a9\ud55c\uba74 \uc548\ub428.\n\n            #with tf.GradientTape(persistent=True) as tape:\n            with tf.GradientTape() as tape:\n                while not done:\n                    action_prob, critic = self.local_model(state)\n                    if(agent.RENDER == True):\n                        env.render()\n                    global_step += 1\n                    #tape \uc544\ub798\ub85c \ubaa8\ub378\uc744 \uc785\ub825\ud574\uc57c input, output \uad00\uacc4\ub97c \uc54c \uc218 \uc788\uc74c.\n                    #actor, critic \ubaa8\ub450 \uc608\uce21.\n\n                    #action\uc740 action tf.Tensor(\n                    #[[0.16487105 0.0549401  0.12524831 0.1738248  0.31119537 0.07012787  0.0997925 ]], shape=(1, 7), dtype=float32)\n                    #critic\uc740 \n                    #critic tf.Tensor([[0.04798129]], shape=(1, 1), dtype=float32)\n                    #\uc73c\ub85c \ucd9c\ub825.\n                    #action_prob\ub85c action\uc744 \uad6c\ud568.\n                    action = agent.get_action(action_prob[0])\n                    #print(\"actionprob history\",actionprob_history)\n                    if(agent.RENDER == True):\n                        print(\"action is\", Action(action))\n                    next_state, reward, done, info = env.step(action)\n\n                    #history\uc5d0 \ucd94\uac00\n                    critic_history.append(critic[0,0])\n                    actionprob_history.append(tf.math.log(action_prob[0, action]))\n                    reward_history.append(reward)\n                    next_state = env.stateTo1hot(agent.state_size)\n                    #_, next_critic = agent.model(next_state)\n                    score += reward\n                    average = average + score\n                    state = copy.deepcopy(next_state)\n\n               #score\ub85c \uc131\uacf5, \uc2e4\ud328 \ud310\ub2e8.\n                #print(\"score\", score)\n                if(score > 0):\n                    success_counter = success_counter + 1\n\n                #rewards \ub97c discounted factor\ub85c \ub2e4\uc2dc \uacc4\uc0b0.\n                returns = []\n                discounted_sum = 0\n                for r in reward_history[::-1]:\n                    discounted_sum = r + agent.discount_factor* discounted_sum\n                    returns.insert(0, discounted_sum)\n\n                # Normalize\n                #returns\ub97c normailze\ud558\uba74 \n                #\ub9e4 \uc5d0\ud53c\uc18c\ub4dc\ub9c8\ub2e4 \ud55c \ud589\ub3d9\uc774 \ub2e4\ub978\ub370, \n                #\uac19\uc740 \uac12\uc73c\ub85c \ub9de\ucda4.\n                #\uc8fc\uc11d  \ucc98\ub9ac.\n\n                #reset \uacfc\uc815 \uc911 \uc644\ub8cc\uc778\ub370 \ud559\uc2b5\ub8e8\ud2b8\ub85c \ub4e4\uc5b4\uac00\ub294 \uacbd\uc6b0\ub97c \ucc3e\uc544 \uc218\uc815.\n                #normailze \ub2e4\uc2dc \uc6d0\ubcf5.\n\n                #normalize\ub97c \uc0ac\uc6a9\ud558\uba74 \uc798 \uc548\ub418\ub294\uac83 \uac19\uc740. \ub2e4\uc2dc \uc0ad\uc81c \ud6c4 \ud559\uc2b5.\n\n                #state\ub97c \uc608\uce21\ud558\ub294 \ubd80\ubd84\uc744 \uc798\ubabb \ub123\uc5b4\uc11c \uc5ec\ud0dc\uae4c\uc9c0 \ub2e4 \uc0bd\uc9c8.\n                #action_prob, critic = self.local_model(state) \uc704\uce58 \ubc14\uafbc \ub4a4 \ub2e4\uc0ac normailze on\n                returns = np.array(returns)\n                returns = (returns - np.mean(returns)) \/ (np.std(returns) + eps)\n                returns = returns.tolist()\n                #print(\"critic history\", critic_history)\n                #print(\"action prob\", action_prob)\n                #print(\"return\", reward)\n\n                # Calculating loss values to update our network\n                history = zip(actionprob_history, critic_history, returns)\n                #print(\"history\", history)\n                actor_losses = []\n                critic_losses = []\n                for log_prob, value, ret in history:\n                    advantage = ret - value\n                    #advantage = reward  + (1.0 - done) * agent.discount_factor * next_critic - critic\n                    #[ [prob, prob, ... ] ]\ud615\uc2dd\uc73c\ub85c \uc785\ub825\uc774 \ub4e4\uc5b4\uc62e\n                    actor_losses.append(-log_prob*advantage)\n                    #critic_losses.append(advantage**2)\n                    critic_losses.append(huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0)))\n                    #print(\"actor loss \", actor_losses)\n                    #print(\"critic loss \", critic_losses)\n                    #\ubaa8\ub378\uc774 \ud558\ub098\ub77c actor_loss + critic_loss \ub354\ud574\uc11c \ud55c\ubc88\uc5d0 train\n                    #print(\"grad\" , grads)\n                    #print(\"history\", len(actionprob_history))\n               \n                #print(\"actor_losses\", actor_losses)\n                total_loss = actor_losses + critic_losses\n                #print(\"total loss\", total_loss)\n                #loss\ub3c4 gradientTape \uc548\uc5d0 \ub4e4\uc5b4\uc788\uc5b4\uc57c \ud568.\n                #print(\"type total loss\", type(total_loss))\n                #print(\"total loss\", total_loss.numpy())\n                #10\uac1c\uc529 \ubaa8\uc544\uc11c \ud559\uc2b5\n                total_loss_batch.append(total_loss)\n                #print(\"total loss\", total_loss)\n                #print(\"total loss length\", len(total_loss))\n                #print(\"total loss batch \", total_loss_batch)\n                #print(\"total loss batch length\", len(total_loss_batch))\n\n                #print(\"==========================\")\n                #global model update\n                #print(\"length\", total_loss_batch)\n                #reinforce\ub294 2000\uac1c\uc529 \ubaa8\uc544\uc11c \ud559\uc2b5\ud558\ub294\uac8c \ud6a8\uacfc\uc801\uc778\ub4ef \ud558\ub098.\n                #a3c\ub294 100\uac1c\uc529 \uc870\uae08\uc529 \uc798\ub77c\uc11c \uc5c5\ub370\uc774\ud2b8\ub97c \ube68\ub9ac \ud558\ub294\uac8c \uc88b\uc544 \ubcf4\uc784.\n                #grads = tape.gradient(total_loss_batch, self.local_model.trainable_weights)\n                #grads = tape.gradient(total_loss, self.local_model.trainable_weights)\n                if(e%200 == 0 and e> 1):\n                    grads = tape.gradient(total_loss_batch, self.local_model.trainable_weights)\n                    optimizer.apply_gradients(zip(grads, self.global_model.trainable_weights))\n                    self.update_local_from_global()\n                    #print(\"hit!\")\n                    #print(\"total_loss_batch len is\", len(total_loss_batch))\n                    total_loss_batch.clear()\n\n                #history clear\n                actionprob_history.clear()\n                critic_history.clear()\n                reward_history.clear()\n\n\n            #if(len(actionprob_history) > 0 &amp; e%10 == 0):\n                #if(e%100 == 0 and len(total_loss_batch) > 0):\n                    #\uc704\uc5d0\uc11c done\uc774 \uc5c6\uc73c\uba74 \uc791\uc740 \uc774\ubca4\ud2b8\ub9cc \uacc4\uc0b0\ud568.\n                    #\uc644\uc804\ud558\uac8c \ub2e4 \ub05d\ub0ac\uc744 \uacbd\uc6b0\uc5d0\ub9cc \ud559\uc2b5\ud558\uae30 \uc704\ud574 done\uc744 \ucd94\uac00\n                    #print(\"actor losses\", len(actor_losses))\n                    #print(\"critic losses\", len(critic_losses))\n                    #print(\"check\", len(total_loss))\n                    #print(\"done\", done)\n                    #grads = tape.gradient(total_loss, self.local_model.trainable_weights)\n                #    grads = tape.gradient(total_loss_batch, self.local_model.trainable_weights)\n                    #print(\"grads\", grads)\n                #    optimizer.apply_gradients(zip(grads, self.global_model.trainable_weights))\n                    #print(\"actionprob history\", actionprob_history)\n                    #print(\"cirtic,\",critic_history)\n                    #print(\"rewards\", reward_history)\n                    #print(\"actor losses\", len(actor_losses))\n                    #print(\"critic losses\", len(critic_losses))\n                    #print(\"total loss\", len(total_loss))\n\n                    #print(\"actionprob_history\", len(actionprob_history))\n                    #print(\"episodes\", e)\n\n                    #global network\uc73c\ub85c local network update\n                    #self.update_local_from_global()\n                    #print(\"hit!\")\n                    #print(\"total loss batch len\", len(total_loss_batch))\n                #    total_loss_batch = []\n                    #total_loss_batch.clear()\n\n            if(agent.RENDER == True):\n                print(\"episode:\", e, \"  score:\", score)\n            if(e%1000 == 0):\n                #print(\"history length is\", len(actionprob_history))\n                #print(\"total loss length is\", total_loss.numpy().size)\n                print(\"episode:\", e, \"  score:\", score, \"global_step\", global_step,\"average\", average, \n                        \"success_counter\", success_counter)\n                scores.append(score)\n                success_counter_list.append(success_counter)\n                score_average.append(average)\n                episodes.append(e)\n                #\ub9e4 1000\ud68c\ub9c8\ub2e4 average \ucd08\uae30\ud654.\n                average = 0\n                #model_json_actor = self.global_model.to_json()\n                #with open(\".\/201208ActorA3c.json\", \"w\") as json_file:\n                #    json_file.write(model_json_actor)\n                #self.global_model.save_weights(\".\/201208weightCriticA3c.h5\")\n                #plt.plot(episodes, score_average, 'b')\n                plt.plot(episodes, success_counter_list, 'b')\n\n                success_counter = 0\n                #plt.show()\n                plt.savefig(\".\/history.png\")\n            #\ube44\uc5b4\uc788\ub294 history\ub85c gradients\ub97c \uacc4\uc0b0\ud558\uc9c0 \uc54a\ub3c4\ub85d..\n            #print(\"episode\", e)\n\nif __name__ == '__main__':\n    #\uba54\uc778 \ud568\uc218\n    agent = A3CAgent()\n    agent.train()<\/pre>\n\n\n\n<p>\uc774\ub7f0\uc800\ub7f0 \ud14c\uc2a4\ud2b8\ub97c \ud558\ub2e4\ubcf4\ub2c8 \ucf54\ub4dc\uac00 \ub11d\ub9c8 \uc870\uac01\uc778\ub370, \ub2e4\uc2dc \uc218\uc815\ud558\uae34 \uadc0\ucc2e\ub2e4. \uc5ed\uc2dc \uc704\uc640 \uac19\uc774 \ud574\ub3c4, \ub4a4\ucabd\uc5d0 \uc788\ub294 \ub300\ucc28\ub97c \uc798 \ubf51\uc544\ub0b4\uc9c0 \ubabb\ud55c\ub2e4. \ud658\uacbd\uc744 \uc0c1\ub2f9\ud788 \uae4c\ub2e4\ub86d\uac8c \uc124\uc815\ud574\uc57c \ud55c\ub2e4. \uc911\uac04\uc5d0 return\uc744 normailze \ud558\uc5ec \ud559\uc2b5\ud558\ub294\ub370, normailze\ub97c \ud558\uc9c0 \ub9d0\uc544\uc57c \ud560 \ub4ef\ud558\ub2e4. \uac01 thread \ubcc4 \uac12\uc774 \ub2e4\ub978\ub370, \uc77c\uc815 \uae30\uc900\uc73c\ub85c \ub9de\ucd94\uba74 \uac01 \ud589\ub3d9\uc744 \uc81c\ub300\ub85c \ud559\uc2b5\uc2dc\ud0ac \uc218 \uc5c6\uc5b4 \ubcf4\uc778\ub2e4. \uc544\ub798 \uadf8\ub798\ud504\uac00 return\uc744 normailze\ub85c \ud55c \uacbd\uc6b0\uc778\ub370, \ud559\uc2b5\uc774 \uc798 \uc548\ub41c\ub2e4. \uc67c\ucabd \uc22b\uc790\ub294 \uc804\uccb4 1,000\ud68c \uc911 \uc131\uacf5 \ud68c\uc218\ub2e4.<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"480\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2020\/12\/history_normailze.png\" alt=\"\" class=\"wp-image-4294\"\/><figcaption>normailze return.<\/figcaption><\/figure>\n\n\n\n<p>normailze\ub97c \ud558\uc9c0 \uc54a\uc73c\uba74 \uc544\ub798 \uadf8\ub9bc\uacfc \uac19\ub2e4.<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"480\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2020\/12\/history.png\" alt=\"\" class=\"wp-image-4297\"\/><\/figure>\n\n\n\n<p>\ud558\ub3c4 \uc5ec\ub7ec \uc0ac\uc774\ud2b8\uc5d0\uc11c \uac00\uc838\ub2e4 \uc4f0\ub2e4 \ubcf4\ub2c8, \uc5b4\ub514\uc5d0\uc11c \ubb34\uc5c7\uc744 \ucc38\uc870 \ud588\ub294\uc9c0 \ubaa8\ub974\uaca0\ub2e4. \uc77c\ub2e8 \ub2e4 \uc801\uc5b4\uc57c\uaca0\ub2e4.<\/p>\n\n\n\n<p><a href=\"https:\/\/blog.tensorflow.org\/2018\/07\/deep-reinforcement-learning-keras-eager-execution.html\">https:\/\/blog.tensorflow.org\/2018\/07\/deep-reinforcement-learning-keras-eager-execution.html<\/a><\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-wp-embed is-provider-pyimagesearch wp-block-embed-pyimagesearch\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"QKcvFYLdEL\"><a href=\"https:\/\/pyimagesearch.com\/2020\/03\/23\/using-tensorflow-and-gradienttape-to-train-a-keras-model\/\">Using TensorFlow and GradientTape to train a Keras model<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;Using TensorFlow and GradientTape to train a Keras model&#8221; &#8212; PyImageSearch\" src=\"https:\/\/pyimagesearch.com\/2020\/03\/23\/using-tensorflow-and-gradienttape-to-train-a-keras-model\/embed\/#?secret=w8Ge7ZFbfU#?secret=QKcvFYLdEL\" data-secret=\"QKcvFYLdEL\" width=\"500\" height=\"282\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<p><a href=\"https:\/\/rlzoo.readthedocs.io\/en\/latest\/_modules\/rlzoo\/algorithms\/a3c\/a3c.html\">https:\/\/rlzoo.readthedocs.io\/en\/latest\/_modules\/rlzoo\/algorithms\/a3c\/a3c.html<\/a><\/p>\n\n\n\n<p><a href=\"https:\/\/stackoverflow.com\/questions\/60510441\/implementing-a3c-on-tensorflow-2\">https:\/\/stackoverflow.com\/questions\/60510441\/implementing-a3c-on-tensorflow-2<\/a><\/p>\n\n\n\n<p><a href=\"https:\/\/github.com\/tensorflow\/models\/blob\/master\/research\/a3c_blogpost\/a3c_cartpole.py\">https:\/\/github.com\/tensorflow\/models\/blob\/master\/research\/a3c_blogpost\/a3c_cartpole.py<\/a><\/p>\n\n\n\n<p><a href=\"https:\/\/github.com\/keras-team\/keras-io\/blob\/master\/examples\/rl\/actor_critic_cartpole.py\">https:\/\/github.com\/keras-team\/keras-io\/blob\/master\/examples\/rl\/actor_critic_cartpole.py<\/a><\/p>\n\n\n\n<p>youtube \uc26c\uc6b4 \uac15\uc758..\uc911\uac04\uc5d0 \ub9e4\uc9c1\uc744 \ubcfc \uc218 \uc788\uc74c.<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"RLCode\uc640 A3C \uc27d\uace0 \uae4a\uac8c \uc774\ud574\ud558\uae30\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/gINks-YCTBs?feature=oembed\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<p><a href=\"https:\/\/github.com\/marload\/DeepRL-TensorFlow2\/blob\/master\/A3C\/A3C_Discrete.py\">https:\/\/github.com\/marload\/DeepRL-TensorFlow2\/blob\/master\/A3C\/A3C_Discrete.py<\/a><\/p>\n\n\n\n<p>actor critic \uac04\ub2e8\ud55c \uc608\uc81c<\/p>\n\n\n\n<p><a href=\"https:\/\/keras.io\/examples\/rl\/actor_critic_cartpole\/\">https:\/\/keras.io\/examples\/rl\/actor_critic_cartpole\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\ud558&#8230;. \uc774\uac70 \ud55c\ub2e4\uace0 \uac70\uc758 \uba70\uce60\uc744 \ub0a0\ub838\ub2e4. A3C \uc131\ub2a5\uc774 \uc88b\ub2e4\uae38\ub798 \ub530\ub77c \ud574 \ubd24\ub294\ub370, \ub0b4\uac00 \uac00\uc9c4 \ucc45\uc740 tensorflow 1.x \ubc84\uc804 \uae30\uc900 \ucf54\ub4dc\uac00 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[33,12],"tags":[828,819,271,109],"class_list":["post-4235","post","type-post","status-publish","format-standard","hentry","category-tensorflow","category-12","tag-a3c","tag-keras","tag-reinforcementlearning","tag-tensorflow"],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/4235","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/comments?post=4235"}],"version-history":[{"count":9,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/4235\/revisions"}],"predecessor-version":[{"id":4299,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/4235\/revisions\/4299"}],"wp:attachment":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media?parent=4235"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/categories?post=4235"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/tags?post=4235"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}