{"id":4197,"date":"2020-10-27T20:46:38","date_gmt":"2020-10-27T11:46:38","guid":{"rendered":"https:\/\/now0930.pe.kr\/wordpress\/?p=4197"},"modified":"2020-11-25T21:55:03","modified_gmt":"2020-11-25T12:55:03","slug":"a2c-keras%eb%a1%9c-%ea%b5%ac%ed%98%84","status":"publish","type":"post","link":"https:\/\/now0930.pe.kr\/wordpress\/a2c-keras%eb%a1%9c-%ea%b5%ac%ed%98%84\/","title":{"rendered":"A2C keras\ub85c \uad6c\ud604(updated, \uc644\uc131)"},"content":{"rendered":"\n<p>a2c\ub97c keras\ub85c \uc0ac\uc6a9\ud558\ub824\uba74 loss function\uc744 \uc0c8\ub86d\uac8c \uc815\uc758\ud574\uc57c \ud55c\ub2e4. <s>\ubcf4\ud1b5 fit\uc73c\ub85c \ub118\uc5b4\uc624\ub294 \uc778\uc790\uac00 input, output \uac01 \ud55c \uac1c\uc529 \uc0ac\uc6a9\ud55c\ub2e4. input\uc774\ub098 output\uc73c\ub85c \ud30c\ub77c\ubbf8\ud130\ub97c \ub118\uae38 \ub54c advantage\ub97c \uac19\uc774 \ub118\uaca8\uc57c \ud55c\ub2e4.<\/s> tensorflow 1.x\uc5d0\uc11c\ub294 \uc774\uac8c \uaf3c\uc218\ub85c \ub418\uc5c8\ub294\ub370, 2.x\ub85c \uc62c\ub77c\uc624\uba74\uc11c \uc548\ub41c\ub2e4. \uc544\ub798 \ubcf4\uba74 actor loss\uac00 0\uc73c\ub85c \uace0\uc815\ub418\uc5b4 \uc788\ub2e4.<\/p>\n\n\n\n<p>\uc785\ub825 \ud30c\ub77c\ubbf8\ud130\ub97c \ub118\uae38 \ub54c \ub9ac\uc2a4\ud2b8\ub85c 2\uac1c\ub97c \ub118\uae38 \uc218 \uc788\ub2e4. input = [input, advantage] \ud615\uc2dd\uc73c\ub85c \uc0ac\uc6a9\ud560 \uc218 \uc788\ub2e4. \uadf8\ub7ec\ub098 tensor\ub97c \uadf8\ub300\ub85c \ub118\uae38 \uacbd\uc6b0 \uac12\uc744 \uc54c \uc218 \uc5c6\uc5b4 \uc5d0\ub7ec\uac00 \ub09c\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/execute.py\", line 60, in quick_execute\n    inputs, attrs, num_outputs)\nTypeError: An op outside of the function building code is being passed\na \"Graph\" tensor. It is possible to have Graph tensors\nleak out of the function building context by including a\ntf.init_scope in your function building code.\nFor example, the following function will fail:\n  @tf.function\n  def has_init_scope():\n    my_constant = tf.constant(1.)\n    with tf.init_scope():\n      added = my_constant * 2\nThe graph tensor has name: input_2:0\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File \"agent_a2c.py\", line 153, in &lt;module>\n    agent.train_model(state, action, reward, next_state, done )\n  File \"agent_a2c.py\", line 110, in train_model\n    self.actor.fit(x=[state, advantageTmp], y=actions, epochs = 1, verbose =0)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/keras\/engine\/training.py\", line 108, in _method_wrapper\n    return method(self, *args, **kwargs)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/keras\/engine\/training.py\", line 1098, in fit\n    tmp_logs = train_function(iterator)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/def_function.py\", line 780, in __call__\n    result = self._call(*args, **kwds)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/def_function.py\", line 840, in _call\n    return self._stateless_fn(*args, **kwds)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/function.py\", line 2829, in __call__\n    return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/function.py\", line 1848, in _filtered_call\n    cancellation_manager=cancellation_manager)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/function.py\", line 1924, in _call_flat\n    ctx, args, cancellation_manager=cancellation_manager))\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/function.py\", line 550, in call\n    ctx=ctx)\n  File \"\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/eager\/execute.py\", line 74, in quick_execute\n    \"tensors, but found {}\".format(keras_symbolic_tensors))\ntensorflow.python.eager.core._SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [&lt;tf.Tensor 'input_2:0' shape=(None, 1) dtype=float32>]\n<\/pre>\n\n\n\n<p>\uc774\ub7f4 \uacbd\uc6b0 eager.execution\uc744 \ub123\uc5b4\uc8fc\uba74 \uc5d0\ub7ec\ub97c \uc5c6\uc568 \uc218 \uc788\ub2e4. tensorflow 2.x\ubd80\ud130 \ucd94\uac00\ub418\uc5c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from env_reinforce import CarrierStorage \nfrom env_reinforce import Action\nimport random\nfrom collections import defaultdict\nimport numpy as np\nfrom termcolor import colored\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Input\nfrom keras.models  import Model\nfrom keras.optimizers import Adam\nimport copy\nfrom keras.models import model_from_json\nfrom collections import deque\nfrom keras import backend as K\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\n\n\n#custom loss\ub97c \uad6c\ud558\uae30 \uc704\ud574 tensor\ub97c \uc989\uc2dc \ud655\uc778.\nimport tensorflow as tf\ntf.config.experimental_run_functions_eagerly(True)\n\n\nclass A2CAgent(object):\n\n    def __init__(self):\n\n        #\ub2e8\uc21c\ud558\uac8c \ud588\uc744 \uacbd\uc6b0\uc5d0\ub294 40\uc73c\ub85c \uc0ac\uc6a9.\n        self.state_size = 40 #float value \ud558\ub098 \uc0ac\uc6a9\n        self.action_size = 7\n        self.value_size = 1\n\n        self.discount_factor = 0.99\n        self.actor_lr = 0.001\n        self.critic_lr = 0.005\n\n        self.actor = self.build_actor()\n        self.critic = self.build_critic()\n\n\n\n    # actor: \uc0c1\ud0dc\ub97c \ubc1b\uc544 \uac01 \ud589\ub3d9\uc758 \ud655\ub960\uc744 \uacc4\uc0b0\n    def build_actor(self):\n        input = Input(shape = (self.state_size,))\n        delta = Input(shape = [1])\n\n        print(\"delta is \", delta)\n\n        dense1 = Dense(self.state_size*2, activation='relu', kernel_initializer='he_uniform')(input)\n        action = Dense(self.action_size, activation = 'softmax', kernel_initializer='he_uniform')(dense1)\n        actor = Model(inputs = [input, delta], outputs = action)\n\n        def actor_loss(y_true, y_prediction):\n            out = K.clip(y_prediction, 1e-8, 1-1e-8)\n            log_likily = y_true*K.log(out)\n\n            return K.sum(-log_likily * delta)\n\n        actor.summary()\n        #loss function\uc774 \ubb38\uc81c..\n        actor.compile(loss = actor_loss, optimizer = Adam(lr=self.actor_lr))\n        return actor\n\n    # critic: \uc0c1\ud0dc\ub97c \ubc1b\uc544\uc11c \uc0c1\ud0dc\uc758 \uac00\uce58\ub97c \uacc4\uc0b0\n    def build_critic(self):\n        critic = Sequential()\n        critic.add(Dense(self.state_size*2, input_dim=self.state_size, activation='relu', kernel_initializer='he_uniform'))\n        #critic.add(Dense(24, input_dim=self.state_size, activation='relu', kernel_initializer='he_uniform'))\n        critic.add(Dense(self.value_size, activation='linear', kernel_initializer='he_uniform'))\n\n\n        critic.compile(loss = 'mse', optimizer = Adam(lr=self.critic_lr))\n        print(\"critic summary\")\n        critic.summary()\n        return critic\n\n\n    # \uac01 \ud0c0\uc784\uc2a4\ud15d\ub9c8\ub2e4 \uc815\ucc45\uc2e0\uacbd\ub9dd\uacfc \uac00\uce58\uc2e0\uacbd\ub9dd\uc744 \uc5c5\ub370\uc774\ud2b8\n    def train_model(self, state, action, reward, next_state, done):\n\n        value = self.critic.predict(state)[0][0]\n        next_value = self.critic.predict(next_state)[0][0]\n\n        #action\uc744 one-hot \uc73c\ub85c \ub9cc\ub4e6.\n        actions = np.zeros([1, self.action_size])\n        actions[np.arange(1), action] = 1.0\n\n        #reshape\n        actions = np.reshape(actions, [1, self.action_size])\n\n        # \ubca8\ub9cc \uae30\ub300 \ubc29\uc815\uc2dd\ub97c \uc774\uc6a9\ud55c \uc5b4\ub4dc\ubca4\ud2f0\uc9c0\uc640 \uc5c5\ub370\uc774\ud2b8 \ud0c0\uae43\n        if done:\n            advantage = reward - value\n            target = reward\n        else:\n            advantage = (reward + self.discount_factor * next_value) - value\n            target = reward + self.discount_factor * next_value\n\n\n        #tensorflow 2.3, keras 2.4\uc5d0 \ub9de\ub3c4\ub85d \uc218\uc815.\n        #np.array\ub97c \ucd94\uac00\ud574\uc57c \ud568.\n        target = np.reshape(target, [1,self.value_size])\n        #print(\"target shape is\", target.shape)\n        #critic\uc744 predictionr\uacfc target\uc73c\ub85c \uc5c5\ub370\uc774\ud2b8\n        self.critic.fit(state, target, epochs = 1, verbose = 0)\n\n        advantageTmp = np.reshape(advantage, [1,1])\n\n        self.actor.fit(x=[state, advantageTmp], y=actions, epochs = 1, verbose =0)\n\n    def get_action(self, state):\n        #[[\ud655\uc728 \ud615\uc2dd\uc73c\ub85c \ucd9c\ub825]]\n        # [0]\uc744 \ub123\uc5b4 \uc90c\n        policy = self.actor.predict(state)[0]\n        #print(\"policy = \", policy)\n        return np.random.choice(self.action_size, 1, p=policy)[0]\n         \n\nif __name__ == '__main__':\n\n    #\uba54\uc778 \ud568\uc218\n    env = CarrierStorage()\n    agent = A2CAgent()\n    state = env.reset()\n\n    #state history\ub97c \uae30\ub85d\n    #historyState = []\n\n    scores, episodes = [], []\n    EPISODES = 1000\n\n    global_step = 0\n\n    for e in range (EPISODES):\n        done = False\n        score = 0\n        state = env.reset()\n        state = env.stateTo1hot(agent.state_size)\n        status = env.isItEnd()\n        if(status == 0 or status == 1):\n            done = True\n            reward = 0\n\n        while not done:\n            #env.render()\n            global_step += 1\n            action = agent.get_action(state)\n            #print(\"action is\", Action(action))\n            next_state, reward, done, info = env.step(action)\n            next_state = env.stateTo1hot(agent.state_size)\n            agent.train_model(state, action, reward, next_state, done )\n            score += reward\n            state = copy.deepcopy(next_state)\n\n        if done:\n            print(\"episode:\", e, \"  score:\", score, \"global_step\", global_step)\n            scores.append(score)\n            episodes.append(e)\n        plt.plot(episodes, scores, 'b')\n        plt.show()\n        plt.savefig(\".\/history.png\")\n<\/pre>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"480\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2020\/10\/history.png\" alt=\"\" class=\"wp-image-4204\"\/><\/figure>\n\n\n\n<p>\ubb50\uac00 \uc798 \uc548\ub9de\ub294\uc9c0, 1,000\ud68c \ud559\uc2b5\ud558\uba74 \ubcc4 \ud6a8\uacfc\uac00 \uc5c6\ub2e4. \uac01 100\ubc88\uc9f8 \ud3c9\uade0\uc744 \ubcf4\uba74 \ub2e4\uc74c\uacfc \uac19\ub2e4. \uacbd\ud5d8 \ub9ac\ud50c\ub808\uc774\ub97c \uc0ac\uc6a9\ud558\uc9c0 \uc54a\uc740 \uac83\uacfc \uac19\uc740 \ud604\uc0c1\uc774\ub2e4. <s>A3C\ub85c \uace0\uace0!<\/s><\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"480\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2020\/10\/history-1.png\" alt=\"\" class=\"wp-image-4211\"\/><\/figure>\n\n\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<iframe loading=\"lazy\" title=\"Actor Critic Methods Are Easy With Keras\" width=\"500\" height=\"281\" src=\"https:\/\/www.youtube.com\/embed\/2vJtbAha3To?feature=oembed\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed-wordpress wp-block-embed is-type-wp-embed is-provider-pyimagesearch\"><div class=\"wp-block-embed__wrapper\">\n<blockquote class=\"wp-embedded-content\" data-secret=\"lZbeBfwBVh\"><a href=\"https:\/\/pyimagesearch.com\/2019\/02\/04\/keras-multiple-inputs-and-mixed-data\/\">Keras: Multiple Inputs and Mixed Data<\/a><\/blockquote><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; visibility: hidden;\" title=\"&#8220;Keras: Multiple Inputs and Mixed Data&#8221; &#8212; PyImageSearch\" src=\"https:\/\/pyimagesearch.com\/2019\/02\/04\/keras-multiple-inputs-and-mixed-data\/embed\/#?secret=yGwjOw0BRx#?secret=lZbeBfwBVh\" data-secret=\"lZbeBfwBVh\" width=\"500\" height=\"282\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe>\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed\"><div class=\"wp-block-embed__wrapper\">\nhttps:\/\/stackoverflow.com\/questions\/57704771\/inputs-to-eager-execution-function-cannot-be-keras-symbolic-tensors\n<\/div><\/figure>\n\n\n\n<figure class=\"wp-block-embed\"><div class=\"wp-block-embed__wrapper\">\nhttps:\/\/stackoverflow.com\/questions\/45961428\/make-a-custom-loss-function-in-keras\n<\/div><\/figure>\n\n\n\n<p>\uc804\ud1b5\uc801\uc778 fake input\uc73c\ub85c loss funcion\uc5d0 \ud544\uc694\ud55c \ud30c\ub77c\ubbf8\ud130\ub97c \uc804\ub2ec\ud558\ub294 \ubc29\ubc95\uc740 tensorflow 2.0 \uc774\uc0c1\uc5d0\uc11c\ub294 \ub3d9\uc791\ud558\uc9c0 \uc54a\ub294 \ub4ef \ud558\ub2e4.<\/p>\n\n\n\n<figure class=\"wp-block-embed\"><div class=\"wp-block-embed__wrapper\">\nhttps:\/\/github.com\/tensorflow\/tensorflow\/issues\/32142\n<\/div><\/figure>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">[16:01:00]>cat testCustomLoss.py \nimport keras \nfrom keras.layers import Input, Embedding, LSTM, Dense\nfrom keras.models import Model\nimport numpy as np\nfrom keras import backend as K\nimport tensorflow as tf\ntf.config.run_functions_eagerly(True)\n#tf.experimental_run_tf_function\nmain_input = Input(shape=(10,), dtype='int32', name='main_input')\n \n#x = Embedding(output_dim=12, input_dim=100, input_length=100)(main_input)\n \n#lstm_out = LSTM(4)(x)\n \n#auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)\n \nauxiliary_input = Input(shape=[1], name='aux_input')\n\n \n#x = keras.layers.concatenate([lstm_out, auxiliary_input])\nx = Dense(4, activation='relu')(main_input)\nmain_output = Dense(1, activation='sigmoid', name='main_output')(x)\n\ndef actor_loss_threeParameter(y_true, y_prediction, auxiliary_input):\n    #aux_in = tf.keras.backend.cast(auxiliary_input, dtype='float64')\n    out = K.clip(y_prediction, 1e-8, 1-1e-8)\n    log_likily = y_true*K.log(out)\n    return K.sum(-log_likily * auxiliary_input)\n\ndef actor_loss(delta):\n    def actor_loss_fit(y_true, y_prediction):\n        return actor_loss_threeParameter(y_true, y_prediction, delta)\n    return actor_loss_fit\n\n\ndef test_loss(y_true, y_prediction):\n    return (y_true - y_prediction)\n\n\n \nmodel = Model(inputs=[main_input, auxiliary_input], outputs=[main_output])\n#model.compile(optimizer='rmsprop', loss='binary_crossentropy',loss_weights=[1., 0.2])\nmodel.compile(optimizer='rmsprop', loss=actor_loss(delta = auxiliary_input))\n#model.compile(optimizer='rmsprop', loss=test_loss)\n\n\nmodel.summary()\n\nmain_in = np.arange(0,10)\nmain_in = np.reshape(main_in,[1,10])\ncopied_main_in = main_in.astype(np.float32)\n\nmain_out = 10\nmain_out = np.reshape(main_out, [1,1])\ncopied_out = main_out.astype(np.float32)\n\n\naux_in = 20\naux_in = np.reshape(aux_in, [1,1])\nprint(\"main in\", main_in.shape)\nprint(\"main out\", main_out.shape)\nprint(\"aux input\", aux_in.shape)\nmodel.fit(x= [copied_main_in, aux_in], y=copied_out, epochs = 10, verbose = 1)<\/pre>\n\n\n\n<p>\uc774\ub807\uac8c \ub193\uace0 \ud14c\uc2a4\ud2b8\ud574\ubcf4\uba74 loss\uac00 0\uc5d0\uc11c \uc904\uc5b4\ub4e4\uc9c0 \uc54a\ub294\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"shell\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">tf-docker \/home\/mnt\/myStorage\/test_gradientTape > python testCustomLoss.py \n2020-11-18 16:00:53.764598: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n2020-11-18 16:00:54.659858: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1\n2020-11-18 16:00:54.678468: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.678865: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1716] Found device 0 with properties: \npciBusID: 0000:26:00.0 name: GeForce GTX 1060 6GB computeCapability: 6.1\ncoreClock: 1.7085GHz coreCount: 10 deviceMemorySize: 5.93GiB deviceMemoryBandwidth: 178.99GiB\/s\n2020-11-18 16:00:54.678890: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n2020-11-18 16:00:54.680022: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10\n2020-11-18 16:00:54.681178: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10\n2020-11-18 16:00:54.681346: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10\n2020-11-18 16:00:54.682446: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10\n2020-11-18 16:00:54.683116: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10\n2020-11-18 16:00:54.685549: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7\n2020-11-18 16:00:54.685688: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.686097: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.686405: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1858] Adding visible gpu devices: 0\n2020-11-18 16:00:54.686688: I tensorflow\/core\/platform\/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\nTo enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n2020-11-18 16:00:54.710941: I tensorflow\/core\/platform\/profile_utils\/cpu_utils.cc:104] CPU Frequency: 3399500000 Hz\n2020-11-18 16:00:54.711710: I tensorflow\/compiler\/xla\/service\/service.cc:168] XLA service 0x41b63d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n2020-11-18 16:00:54.711754: I tensorflow\/compiler\/xla\/service\/service.cc:176]   StreamExecutor device (0): Host, Default Version\n2020-11-18 16:00:54.986824: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.987296: I tensorflow\/compiler\/xla\/service\/service.cc:168] XLA service 0x41b8620 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n2020-11-18 16:00:54.987352: I tensorflow\/compiler\/xla\/service\/service.cc:176]   StreamExecutor device (0): GeForce GTX 1060 6GB, Compute Capability 6.1\n2020-11-18 16:00:54.987771: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.988673: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1716] Found device 0 with properties: \npciBusID: 0000:26:00.0 name: GeForce GTX 1060 6GB computeCapability: 6.1\ncoreClock: 1.7085GHz coreCount: 10 deviceMemorySize: 5.93GiB deviceMemoryBandwidth: 178.99GiB\/s\n2020-11-18 16:00:54.988743: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n2020-11-18 16:00:54.988804: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10\n2020-11-18 16:00:54.988846: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10\n2020-11-18 16:00:54.988887: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10\n2020-11-18 16:00:54.988926: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10\n2020-11-18 16:00:54.988972: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10\n2020-11-18 16:00:54.989018: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7\n2020-11-18 16:00:54.989196: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.990189: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:54.991068: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1858] Adding visible gpu devices: 0\n2020-11-18 16:00:54.991140: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n2020-11-18 16:00:55.378048: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:\n2020-11-18 16:00:55.378099: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1263]      0 \n2020-11-18 16:00:55.378107: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1276] 0:   N \n2020-11-18 16:00:55.378324: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:55.378869: I tensorflow\/stream_executor\/cuda\/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n2020-11-18 16:00:55.379209: I tensorflow\/core\/common_runtime\/gpu\/gpu_device.cc:1402] Created TensorFlow device (\/job:localhost\/replica:0\/task:0\/device:GPU:0 with 4990 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:26:00.0, compute capability: 6.1)\nModel: \"functional_1\"\n__________________________________________________________________________________________________\nLayer (type)                    Output Shape         Param #     Connected to                     \n==================================================================================================\nmain_input (InputLayer)         [(None, 10)]         0                                            \n__________________________________________________________________________________________________\ndense (Dense)                   (None, 4)            44          main_input[0][0]                 \n__________________________________________________________________________________________________\naux_input (InputLayer)          [(None, 1)]          0                                            \n__________________________________________________________________________________________________\nmain_output (Dense)             (None, 1)            5           dense[0][0]                      \n==================================================================================================\nTotal params: 49\nTrainable params: 49\nNon-trainable params: 0\n__________________________________________________________________________________________________\nmain in (1, 10)\nmain out (1, 1)\naux input (1, 1)\n\/usr\/local\/lib\/python3.6\/dist-packages\/tensorflow\/python\/data\/ops\/dataset_ops.py:3350: UserWarning: Even though the tf.config.experimental_run_functions_eagerly option is set, this option does not apply to tf.data functions. tf.data functions are still traced and executed as graphs.\n  \"Even though the tf.config.experimental_run_functions_eagerly \"\nEpoch 1\/10\n2020-11-18 16:00:55.511115: I tensorflow\/stream_executor\/platform\/default\/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10\n1\/1 [==============================] - 0s 428us\/step - loss: 0.0000e+00\nEpoch 2\/10\n1\/1 [==============================] - 0s 331us\/step - loss: 0.0000e+00\nEpoch 3\/10\n1\/1 [==============================] - 0s 333us\/step - loss: 0.0000e+00\nEpoch 4\/10\n1\/1 [==============================] - 0s 303us\/step - loss: 0.0000e+00\nEpoch 5\/10\n1\/1 [==============================] - 0s 302us\/step - loss: 0.0000e+00\nEpoch 6\/10\n1\/1 [==============================] - 0s 277us\/step - loss: 0.0000e+00\nEpoch 7\/10\n1\/1 [==============================] - 0s 348us\/step - loss: 0.0000e+00\nEpoch 8\/10\n1\/1 [==============================] - 0s 289us\/step - loss: 0.0000e+00\nEpoch 9\/10\n1\/1 [==============================] - 0s 275us\/step - loss: 0.0000e+00\nEpoch 10\/10\n1\/1 [==============================] - 0s 269us\/step - loss: 0.0000e+00\ntf-docker \/home\/mnt\/myStorage\/test_gradientTape > \n<\/pre>\n\n\n\n<p>\ud639\uc2dc\ub098 \ud574\uc11c loss function \uc548\uc5d0 \ubcc0\uc218 \ub300\uc2e0 \uc22b\uc790\ub97c \ub123\uc5b4\ubcf4\ub2c8 loss\uac00 \ubcc0\ud588\ub2e4. tensorflow 2.x\uc5d0\uc11c\ub294 fit\uc73c\ub85c \ud560 \uc218 \uc788\uc744\uae4c\ub77c\ub294 \ub9c8\uc74c\uc744 \uc811\uace0, 2.x\uc774 \uc9c0\uc6d0\ud558\ub294 gradient tape\ub85c \ud574\uc57c \ud560 \ub4ef \ud558\ub2e4. \ub2e4\ud589\ud788 \ub204\uac00 \uc774\ubbf8 \uad6c\ud604\ud588\ub2e4.<\/p>\n\n\n\n<p>20.11.21.  \uc0bd\uc9c8 \ub05d\uc5d0 \uc65c gradient tape\uc73c\ub85c \uc5c5\ub370\uc774\ud2b8 \ud560 \uc218 \uc5c6\ub294\uc9c0 \uc54c\uc558\ub2e4.  gradientTape()\uc744 \uc0ac\uc6a9\ud55c \ub4a4, model\ub85c \uc785\ub825\uc744 \uc9d1\uc5b4 \ub123\uc5b4\uc57c \ud55c\ub2e4. \ubd84\ub9ac\ub418\uc5b4 \uc788\uc73c\uba74 \uc598\uac00 \uc54c \uc218 \uc5c6\ub2e4 \uc5d0\ub7ec\ub09c\ub2e4. \ucf54\ub4dc\ub294 \ub11d\ub9c8 \uc870\uac01\uc774 \ub418\uac00\uace0 \uc788\ub2e4. \uc544\uc9c1\ub3c4 \ud55c\ubc88 \ud568\uc815\uc5d0 \ube60\uc9c0\uba74 \ube60\uc838 \ub098\uc62c \uc218 \uc5c6\ub2e4. \uc5ec\ub7ec \uc0d8\ud50c\uc744 \uc218\uc9d1\ud558\uc5ec \ud55c\ubc88\uc5d0 \ud559\uc2b5\uc2dc\ucf1c\uc57c \ud560 \ub4ef \ud558\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from env_reinforce import CarrierStorage \nfrom env_reinforce import Action\nimport random\nfrom collections import defaultdict\nimport numpy as np\nfrom termcolor import colored\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Input\nfrom keras.models  import Model\nfrom keras.optimizers import Adam\nimport copy\nfrom keras.models import model_from_json\nfrom collections import deque\nfrom keras import backend as K\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\n\n\n#custom loss\ub97c \uad6c\ud558\uae30 \uc704\ud574 tensor\ub97c \uc989\uc2dc \ud655\uc778.\nimport tensorflow as tf\ntf.config.run_functions_eagerly(True)\n\n\n#\uc5ec\uae30 \ucc38\uc870\n#https:\/\/github.com\/keras-team\/keras-io\/blob\/master\/examples\/rl\/actor_critic_cartpole.py\n\nclass A2CAgent(object):\n\n    def __init__(self):\n\n        #\ub2e8\uc21c\ud558\uac8c \ud588\uc744 \uacbd\uc6b0\uc5d0\ub294 40\uc73c\ub85c \uc0ac\uc6a9.\n        self.state_size = 40 #float value \ud558\ub098 \uc0ac\uc6a9\n        self.action_size = 7\n\n        self.discount_factor = 0.99\n        self.actor_lr = 0.001\n        self.critic_lr = 0.005\n\n        self.DEFINE_NEW = True\n        self.RENDER = True\n\n        #self.actor = self.build_actor()\n        #self.critic = self.build_critic()\n        self.model = self.build_actorCritic()\n\n    def build_actorCritic(self):\n        input = Input(shape = (self.state_size,))\n        common = Dense(self.state_size*2, activation='relu', kernel_initializer='he_uniform')(input)\n        action_prob = Dense(self.action_size, activation = 'softmax', kernel_initializer='he_uniform')(common)\n        critic = Dense(1)(common)\n        model = Model(inputs = input, outputs = [action_prob, critic])\n        return model\n\n\n\n    def get_action(self, action_prob):\n        #[[\ud655\uc728 \ud615\uc2dd\uc73c\ub85c \ucd9c\ub825]]\n        # [0]\uc744 \ub123\uc5b4 \uc90c\n        #print(\"policy = \", policy)\n        return np.random.choice(self.action_size, 1, p=np.squeeze(action_prob))[0]\n\nif __name__ == '__main__':\n\n    #\uba54\uc778 \ud568\uc218\n    env = CarrierStorage()\n    agent = A2CAgent()\n    state = env.reset()\n\n    #state history\ub97c \uae30\ub85d\n    #historyState = []\n\n    scores, episodes, score_average = [], [], []\n    EPISODES = 100000\n\n    global_step = 0\n    average = 0\n\n    for e in range (EPISODES):\n        done = False\n        score = 0\n        state = env.reset()\n        state = env.stateTo1hot(agent.state_size)\n        status = env.isItEnd()\n        if(status == 0 or status == 1):\n            done = True\n            reward = 0\n\n        while not done:\n            if(agent.RENDER == True):\n                env.render()\n            global_step += 1\n            with tf.GradientTape() as tape:\n                #tape \uc544\ub798\ub85c \ubaa8\ub378\uc744 \uc785\ub825\ud574\uc57c input, output \uad00\uacc4\ub97c \uc54c \uc218 \uc788\uc74c.\n                #actor, critic \ubaa8\ub450 \uc608\uce21.\n                action_prob, critic = agent.model(state)\n                #numpy state\ub97c tensor\ub85c \ubc14\uafb8\uace0, overide\ud55c call\ub85c \uc785\ub825.\n                #state = np.reshape(state, [1, agent.state_size])\n                #state = tf.convert_to_tensor(state, dtype=tf.int8, dtype_hint=None, name=None)\n                #action_prob, critic = agent.model.call(state)\n                print(\"action prob\", action_prob)\n                #print(\"critic\", critic)\n                #action\uc740 action tf.Tensor(\n                #[[0.16487105 0.0549401  0.12524831 0.1738248  0.31119537 0.07012787  0.0997925 ]], shape=(1, 7), dtype=float32)\n                #critic\uc740 \n                #critic tf.Tensor([[0.04798129]], shape=(1, 1), dtype=float32)\n                #\uc73c\ub85c \ucd9c\ub825.\n                #action_prob\ub85c action\uc744 \uad6c\ud568.\n                action = agent.get_action(action_prob[0])\n                #print(\"Action is\", Action(action))\n                #\n                #print(\"critic\", critic)\n                #print(\"next critic\", next_critic)\n                if(agent.RENDER == True):\n                    print(\"action is\", Action(action))\n                next_state, reward, done, info = env.step(action)\n                next_state = env.stateTo1hot(agent.state_size)\n                _, next_critic = agent.model(next_state)\n                advantage = reward  + (1.0 - done) * agent.discount_factor * next_critic - critic\n                #[ [prob, prob, ... ] ]\ud615\uc2dd\uc73c\ub85c \uc785\ub825\uc774 \ub4e4\uc5b4\uc62e\n                actor_loss = tf.math.log(action_prob[0, action]) * advantage\n                critic_loss =  advantage**2\n                print(\"actor loss \", actor_loss)\n                print(\"critic loss \", critic_loss)\n                #\ubaa8\ub378\uc774 \ud558\ub098\ub77c actor_loss + critic_loss \ub354\ud574\uc11c \ud55c\ubc88\uc5d0 train\n                total_loss = actor_loss + critic_loss\n                grads = tape.gradient(total_loss, agent.model.trainable_weights)\n                #print(\"grad\" , grads)\n                optimizer = Adam(learning_rate = 0.01)\n                optimizer.apply_gradients(zip(grads, agent.model.trainable_weights))\n\n            score += reward\n            average = average + score\n            state = copy.deepcopy(next_state)\n\n        if done:\n            if(agent.RENDER == True):\n                print(\"episode:\", e, \"  score:\", score)\n\n            if(e%1000 == 0 and e>1):\n                print(\"episode:\", e, \"  score:\", score, \"global_step\", global_step,\"average\", average)\n                scores.append(score)\n                score_average.append(average)\n                episodes.append(e)\n\n                #\ub9e4 100\ud68c\ub9c8\ub2e4 average \ucd08\uae30\ud654.\n                average = 0\n                model_json_actor = agent.model.to_json()\n                model_json_critic = agent.model.to_json()\n                with open(\".\/201027ActorA2c.json\", \"w\") as json_file:\n                    json_file.write(model_json_actor)\n                with open(\".\/201027CriticA2c.json\", \"w\") as json_file:\n                    json_file.write(model_json_critic)\n\n                agent.model.save_weights(\".\/201027weightActorA2c.h5\")\n                agent.model.save_weights(\".\/201027weightCriticA2c.h5\")\n\n        plt.plot(episodes, score_average, 'b')\n        #plt.show()\n        plt.savefig(\".\/history.png\")\n<\/pre>\n\n\n\n<p>tensorflow 2.x\uc774 fit\uc744 \uc9c0\uc6d0\ud558\uc9c0 \uc54a\uace0 gradientTape\ub85c \ud559\uc2b5\uc2dc\ucf1c\uc57c \ud558\uc5ec \uc880 \ubd88\ud3b8\ud558\ub2e4. \ub098\uc628\uc9c0 \uc624\ub798\ub418\uc5b4 \uc5ec\ub7ec \uaf3c\uc218\ub4e4\uc744 \uc368\uba39\uc744 \uc218 \uc5c6\ub2e4. \uc218\uc815\ud558\uace0 \uc218\uc815\ud558\uc5ec \uc544\ub798\uc640 \uac19\uc774 \ud588\ub2e4. \uc810\uc218\ub294 DQN\ubcf4\ub2e4 \uc798 \uc548\uc624\ub974\ub294 \ud3b8\uc774\ub2e4. 100\uac1c\uc529 \uc0d8\ud50c\uc744 \uc800\uc7a5\ud558\uc5ec \ud559\uc2b5\uc2dc\ucf30\ub294\ub370, \ucd1d\uc810\uc774 1000\uc810 \ub118\uae30\uae30 \ud798\ub4e4\ub2e4. \uadf8\ub798\ub3c4 \uc810\uc218\uac00 \uc624\ub974\ub77d \ub0b4\ub9ac\ub77d \ud558\ub294 \ud328\ud134\uc744 \ubcf4\uba74 \uc54c\uace0\ub9ac\uc998\uc740 \uc815\ud655\ud55c \ub4ef \ud558\ub2e4. gradientTape \uc548\uc5d0 \ub123\uc744 \ub54c loss \uad6c\ud558\ub294 \ubd80\ubd84\ub3c4 \uac19\uc740 \ud0ed\uc5d0 \uc788\uc5b4\uc57c \ud55c\ub2e4. \uc798 \ubab0\ub77c \ud55c\ucc38 \ud574\uba68\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from env_reinforce import CarrierStorage \nfrom env_reinforce import Action\nimport random\nfrom collections import defaultdict\nimport numpy as np\nfrom termcolor import colored\nfrom keras.models import Sequential\nfrom keras.layers import Dense, Input\nfrom keras.models  import Model\nfrom keras.optimizers import Adam\nimport copy\nfrom keras.models import model_from_json\nfrom collections import deque\nfrom keras import backend as K\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\neps = np.finfo(np.float32).eps.item()  # Smallest number such that 1.0 + eps != 1.0\n\n#custom loss\ub97c \uad6c\ud558\uae30 \uc704\ud574 tensor\ub97c \uc989\uc2dc \ud655\uc778.\nimport tensorflow as tf\ntf.config.run_functions_eagerly(True)\n\n\n#\uc5ec\uae30 \ucc38\uc870\n#https:\/\/github.com\/keras-team\/keras-io\/blob\/master\/examples\/rl\/actor_critic_cartpole.py\n\nclass A2CAgent(object):\n\n    def __init__(self):\n\n        #\ub2e8\uc21c\ud558\uac8c \ud588\uc744 \uacbd\uc6b0\uc5d0\ub294 40\uc73c\ub85c \uc0ac\uc6a9.\n        self.state_size = 40 #float value \ud558\ub098 \uc0ac\uc6a9\n        self.action_size = 7\n\n        self.discount_factor = 0.8\n\n        self.DEFINE_NEW = False\n        self.RENDER = False\n\n        #self.actor = self.build_actor()\n        #self.critic = self.build_critic()\n        self.model = self.build_actorCritic()\n\n    def build_actorCritic(self):\n        if(self.DEFINE_NEW == True):\n            input = Input(shape = (self.state_size,))\n            common = Dense(self.state_size*24, activation='relu', kernel_initializer='he_uniform')(input)\n            common2 = Dense(self.action_size*12, activation = 'relu',kernel_initializer='he_uniform')(common)\n            action_prob = Dense(self.action_size, activation = 'softmax', kernel_initializer='he_uniform')(common2)\n            critic = Dense(1)(common2)\n            model = Model(inputs = input, outputs = [action_prob, critic])\n\n        else:\n            #\uc788\ub294 \ub370\uc774\ud130 \ub85c\ub529\n            json_actor = open(\".\/201027ActorA2c.json\", \"r\")\n            loaded_actor = json_actor.read()\n            json_actor.close()\n            model= model_from_json(loaded_actor)\n            print(\"\ubaa8\ub378 %s\ub97c \ub85c\ub529\"%json_actor)\n            weight_actor = \".\/201027weightCriticA2c.h5\"\n            model.load_weights(weight_actor)\n            print(\"\uc800\uc7a5\ub41c weights %s\ub97c \ub85c\ub529\"%weight_actor)\n        return model\n\n    def get_action(self, action_prob):\n        #[[\ud655\uc728 \ud615\uc2dd\uc73c\ub85c \ucd9c\ub825]]\n        # [0]\uc744 \ub123\uc5b4 \uc90c\n        #print(\"policy = \", policy)\n        return np.random.choice(self.action_size, 1, p=np.squeeze(action_prob))[0]\n\nif __name__ == '__main__':\n\n    #\uba54\uc778 \ud568\uc218\n    env = CarrierStorage()\n    agent = A2CAgent()\n    state = env.reset()\n\n    #state history\ub97c \uae30\ub85d\n    #historyState = []\n\n    scores, episodes, score_average = [], [], []\n    EPISODES = 100000\n\n    global_step = 0\n    average = 0\n    huber_loss = tf.losses.Huber()\n    optimizer = Adam(learning_rate = 0.0001)\n\n\n    #action, critic, reward\ub97c list\ub85c \uae30\ub85d.\n    actionprob_history, critic_history, reward_history = [], [], []\n    \n\n    for e in range (EPISODES):\n        #print(\"episode check\", e)\n        done = False\n        score = 0\n        state = env.reset()\n        state = env.stateTo1hot(agent.state_size)\n        status = env.isItEnd()\n        #print(\"reseted\")\n        if(status == 0 or status == 1):\n            done = True\n            reward = 0\n            #print(\"zero rewards\")\n            #\uc5ec\uae30\uc5d0\uc11c apply.gradients\ub97c \uc801\uc6a9\ud55c\uba74 \uc548\ub428.\n        while not done:\n            if(agent.RENDER == True):\n                env.render()\n            global_step += 1\n            #tape \uc544\ub798\ub85c \ubaa8\ub378\uc744 \uc785\ub825\ud574\uc57c input, output \uad00\uacc4\ub97c \uc54c \uc218 \uc788\uc74c.\n            #actor, critic \ubaa8\ub450 \uc608\uce21.\n\n            #with tf.GradientTape(persistent=True) as tape:\n            with tf.GradientTape() as tape:\n                action_prob, critic = agent.model(state)\n\n                #action\uc740 action tf.Tensor(\n                #[[0.16487105 0.0549401  0.12524831 0.1738248  0.31119537 0.07012787  0.0997925 ]], shape=(1, 7), dtype=float32)\n                #critic\uc740 \n                #critic tf.Tensor([[0.04798129]], shape=(1, 1), dtype=float32)\n                #\uc73c\ub85c \ucd9c\ub825.\n                #action_prob\ub85c action\uc744 \uad6c\ud568.\n                action = agent.get_action(action_prob[0])\n                #print(\"actionprob history\",actionprob_history)\n                if(agent.RENDER == True):\n                    print(\"action is\", Action(action))\n                next_state, reward, done, info = env.step(action)\n\n                #history\uc5d0 \ucd94\uac00\n                critic_history.append(critic[0,0])\n                actionprob_history.append(tf.math.log(action_prob[0, action]))\n                reward_history.append(reward)\n                next_state = env.stateTo1hot(agent.state_size)\n                #_, next_critic = agent.model(next_state)\n                score += reward\n                average = average + score\n                state = copy.deepcopy(next_state)\n\n                #rewards \ub97c discounted factor\ub85c \ub2e4\uc2dc \uacc4\uc0b0.\n                returns = []\n                discounted_sum = 0\n                for r in reward_history[::-1]:\n                    discounted_sum = r + agent.discount_factor* discounted_sum\n                    returns.insert(0, discounted_sum)\n\n                # Normalize\n                returns = np.array(returns)\n                returns = (returns - np.mean(returns)) \/ (np.std(returns) + eps)\n                returns = returns.tolist()\n\n                # Calculating loss values to update our network\n                history = zip(actionprob_history, critic_history, returns)\n                actor_losses = []\n                critic_losses = []\n                for log_prob, value, ret in history:\n                    advantage = ret - value\n                    #advantage = reward  + (1.0 - done) * agent.discount_factor * next_critic - critic\n                    #[ [prob, prob, ... ] ]\ud615\uc2dd\uc73c\ub85c \uc785\ub825\uc774 \ub4e4\uc5b4\uc62e\n                    actor_losses.append(-log_prob*advantage)\n                    #critic_losses.append(advantage**2)\n                    critic_losses.append(huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0)))\n                    #print(\"actor loss \", actor_losses)\n                    #print(\"critic loss \", critic_losses)\n                    #\ubaa8\ub378\uc774 \ud558\ub098\ub77c actor_loss + critic_loss \ub354\ud574\uc11c \ud55c\ubc88\uc5d0 train\n                    #print(\"grad\" , grads)\n                    #print(\"history\", len(actionprob_history))\n               \n                total_loss = actor_losses + critic_losses\n                #loss\ub3c4 gradientTape \uc548\uc5d0 \ub4e4\uc5b4\uc788\uc5b4\uc57c \ud568.\n            if(len(actionprob_history) > 0 ):\n                #print(\"actor losses\", len(actor_losses))\n                #print(\"critic losses\", len(critic_losses))\n                #print(\"check\", len(total_loss))\n                grads = tape.gradient(total_loss, agent.model.trainable_weights)\n                #print(\"grads\", grads)\n                optimizer.apply_gradients(zip(grads, agent.model.trainable_weights))\n                #print(\"actionprob history\", actionprob_history)\n                #print(\"cirtic,\",critic_history)\n                #print(\"rewards\", reward_history)\n                #print(\"actor losses\", len(actor_losses))\n                #print(\"critic losses\", len(critic_losses))\n                #print(\"total loss\", len(total_loss))\n\n                #print(\"actionprob_history\", len(actionprob_history))\n                #print(\"episodes\", e)\n        if(agent.RENDER == True):\n            print(\"episode:\", e, \"  score:\", score)\n        if(e%100 == 0):\n            print(\"history length is\", len(actionprob_history))\n            print(\"episode:\", e, \"  score:\", score, \"global_step\", global_step,\"average\", average)\n            scores.append(score)\n            score_average.append(average)\n            episodes.append(e)\n            #\ub9e4 1000\ud68c\ub9c8\ub2e4 average \ucd08\uae30\ud654.\n            average = 0\n            model_json_actor = agent.model.to_json()\n            with open(\".\/201027ActorA2c.json\", \"w\") as json_file:\n                json_file.write(model_json_actor)\n            agent.model.save_weights(\".\/201027weightCriticA2c.h5\")\n            plt.plot(episodes, score_average, 'b')\n            #plt.show()\n            plt.savefig(\".\/history.png\")\n        #\ube44\uc5b4\uc788\ub294 history\ub85c gradients\ub97c \uacc4\uc0b0\ud558\uc9c0 \uc54a\ub3c4\ub85d..\n        #print(\"episode\", e)\n            actionprob_history.clear()\n            critic_history.clear()\n            reward_history.clear()\n\n    plt.plot(episodes, score_average, 'b')\n    #plt.show()\n    plt.savefig(\".\/history.png\")\n<\/pre>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"480\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2020\/11\/history.png\" alt=\"\" class=\"wp-image-4252\"\/><\/figure>\n\n\n\n<p>\uc26c\uc6b4 \uc791\uc5c5\uc740 \uc798 \ud574\ub0b4\uc5b4 \uc810\uc218\ub97c \ub0b4\ub294\ub370, \ub300\ucc28\uac00 \uad6c\uc11d\uc5d0 \ucc98\ubc15\ud600 \uc788\uc73c\uba74 \uc0bd\uc9c8\ub9cc \ud558\ub2e4 \uc810\uc218\ub97c \ubaa8\ub450 \uae4c \uba39\ub294\ub2e4. DQN\ubcf4\ub2e4 \uc131\ub2a5\uc774 \ub0ae\ub2e4. \uc774\uc81c \ub300\ub9dd\uc758 A3C\ub85c \uace0\uace0.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>a2c\ub97c keras\ub85c \uc0ac\uc6a9\ud558\ub824\uba74 loss function\uc744 \uc0c8\ub86d\uac8c \uc815\uc758\ud574\uc57c \ud55c\ub2e4. \ubcf4\ud1b5 fit\uc73c\ub85c \ub118\uc5b4\uc624\ub294 \uc778\uc790\uac00 input, output \uac01 \ud55c \uac1c\uc529 \uc0ac\uc6a9\ud55c\ub2e4. input\uc774\ub098 output\uc73c\ub85c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_post_was_ever_published":false},"categories":[33],"tags":[820,819,271,109],"class_list":["post-4197","post","type-post","status-publish","format-standard","hentry","category-tensorflow","tag-a2c","tag-keras","tag-reinforcementlearning","tag-tensorflow"],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/4197","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/comments?post=4197"}],"version-history":[{"count":12,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/4197\/revisions"}],"predecessor-version":[{"id":4254,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/4197\/revisions\/4254"}],"wp:attachment":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media?parent=4197"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/categories?post=4197"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/tags?post=4197"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}