{"id":3319,"date":"2019-12-21T23:14:09","date_gmt":"2019-12-21T14:14:09","guid":{"rendered":"https:\/\/now0930.pe.kr\/wordpress\/?p=3319"},"modified":"2019-12-21T23:20:08","modified_gmt":"2019-12-21T14:20:08","slug":"carppole-deep-q-network-%ec%9d%b4%ed%95%b4","status":"publish","type":"post","link":"https:\/\/now0930.pe.kr\/wordpress\/carppole-deep-q-network-%ec%9d%b4%ed%95%b4\/","title":{"rendered":"CarpPole Deep Q Network \uc774\ud574"},"content":{"rendered":"\n<p><a href=\"http:\/\/www.kyobobook.co.kr\/product\/detailViewKor.laf?mallGb=KOR&amp;ejkGb=KOR&amp;linkClass=&amp;barcode=9791158390723\">\uac15\uc544\uc9c0 \ucc45<\/a>\uc5d0 \ub098\uc628 \uc608\uc81c\ub97c \uc2e4\ud589\ud588\ub2e4. \ucc45 \uc2e4\ud589 \ud658\uacbd\uacfc \ub0b4 \uadf8\uac83\uc774 \ub2ec\ub77c \uc2e4\ud589\ud560 \uc218 \uc5c6\uc5c8\ub2e4. \ub098\ub294 docker\ub85c tensorflow\ub97c \uc0ac\uc6a9\ud55c\ub2e4. \uadf8\uac83\ub3c4 cpu\uac00 avx \ub4f1 \uc744 \uc9c0\uc6d0\ud558\uc9c0 \uc54a\uc544 \uc9c1\uc811 \ucef4\ud30c\uc77c\ud588\ub2e4. openAi gym\uc740 GUI \ud658\uacbd\uc5d0\uc11c \uc2e4\ud589\ub418\uc5b4 docker\ub85c \uad6c\ub3d9\ud558\uae30 \ud798\ub4e4\ub2e4. \ub370\uc2a4\ud06c\ud0d1\uc744 \uac70\uc2e4\uc5d0 \uc124\uce58\ud558\uc5ec \ub2e4\ub978 \ub178\ud2b8\ubd81\uc5d0\uc11c ssh\ub85c \uc811\uc18d\ud558\uc5ec \uc0ac\uc6a9\ud55c\ub2e4. \uc774\ub7f0 \ubaa8\ub4e0 \ubb38\uc81c\ub97c jypyter notebook\ub85c \ud574\uacb0\ud588\ub2e4. \uc804 \uae00\uc5d0 \uc124\uba85\ud55c\ub300\ub85c docker\ub97c \uc544\ub798 \uba85\ub839\uc73c\ub85c \uad6c\ub3d9\ud55c\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">docker run -it -v \/run\/user\/1000:\/run\/user\/1000 -v \/dev:\/dev -v \/tmp\/.X11-unix:\/tmp\/.X11-unix:ro --privileged --ipc=host --shm-size=256m --net=host -e DISPLAY=$DISPLAY -e XDG_RUNTIME_DIR=\/run\/user\/1000 --runtime=nvidia -e LC_ALL=C.UTF-8 -v \/home\/now0930\/tensorflow\/:\/home\/mnt tensorflow\/tensorflow:1.12.0-rc2-gpu-py3-keras \/bin\/bash<\/pre>\n\n\n\n<p>jupyter notebook\uc744 \uc2e4\ud589\ud55c\ub2e4. uid 1000\uc73c\ub85c \uc2e4\ud589\ud558\ub294 \ubc29\ubc95\uc744 \ubaa8\ub974\uaca0\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">docker exec -w \/home\/mnt\/gym vibrant_banach \/usr\/bin\/xvfb-run -s \"-screen 0 1400x600x24\" jupyter notebook --allow-root<\/pre>\n\n\n\n<p><a href=\"https:\/\/github.com\/rlcode\/reinforcement-learning-kr\/blob\/master\/2-cartpole\/1-dqn\/cartpole_dqn.py\">\ucc45 \ucf54\ub4dc<\/a>\ub97c \uc2e4\ud589\ud558\uae30 \uc704\ud574 \uc544\ub798\uc640 \uac19\uc774 \uc218\uc815\ud588\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import sys\nimport gym\nimport pylab\nimport random\nimport numpy as np\nfrom collections import deque\nfrom keras.layers import Dense\nfrom keras.optimizers import Adam\nfrom keras.models import Sequential\nfrom gym import wrappers\n\nEPISODES = 300\n\n\n# \uce74\ud2b8\ud3f4 \uc608\uc81c\uc5d0\uc11c\uc758 DQN \uc5d0\uc774\uc804\ud2b8\nclass DQNAgent:\n    def __init__(self, state_size, action_size):\n        self.render = False\n        self.load_model = False\n\n        # \uc0c1\ud0dc\uc640 \ud589\ub3d9\uc758 \ud06c\uae30 \uc815\uc758\n        self.state_size = state_size\n        self.action_size = action_size\n        print(\"self.state_size\ub294\", self.state_size)\n        print(\"self.action_size\ub294\", self.action_size)\n        # DQN \ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\n        self.discount_factor = 0.99\n        self.learning_rate = 0.001\n        self.epsilon = 1.0\n        self.epsilon_decay = 0.999\n        self.epsilon_min = 0.01\n        self.batch_size = 64\n        #self.batch_size = 1\n        self.train_start = 1000\n\n        # \ub9ac\ud50c\ub808\uc774 \uba54\ubaa8\ub9ac, \ucd5c\ub300 \ud06c\uae30 2000\n        self.memory = deque(maxlen=2000)\n\n        # \ubaa8\ub378\uacfc \ud0c0\uae43 \ubaa8\ub378 \uc0dd\uc131\n        self.model = self.build_model()\n        self.target_model = self.build_model()\n\n        # \ud0c0\uae43 \ubaa8\ub378 \ucd08\uae30\ud654\n        self.update_target_model()\n\n        if self.load_model:\n            self.model.load_weights(\".\/save_model\/cartpole_dqn_trained.h5\")\n\n    # \uc0c1\ud0dc\uac00 \uc785\ub825, \ud050\ud568\uc218\uac00 \ucd9c\ub825\uc778 \uc778\uacf5\uc2e0\uacbd\ub9dd \uc0dd\uc131\n    def build_model(self):\n        model = Sequential()\n        model.add(Dense(24, input_dim=self.state_size, activation='relu',\n                        kernel_initializer='he_uniform'))\n        model.add(Dense(24, activation='relu',\n                        kernel_initializer='he_uniform'))\n        model.add(Dense(self.action_size, activation='linear',\n                        kernel_initializer='he_uniform'))\n        model.summary()\n        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))\n        return model\n\n    # \ud0c0\uae43 \ubaa8\ub378\uc744 \ubaa8\ub378\uc758 \uac00\uc911\uce58\ub85c \uc5c5\ub370\uc774\ud2b8\n    def update_target_model(self):\n        self.target_model.set_weights(self.model.get_weights())\n\n    # \uc785\uc2e4\ub860 \ud0d0\uc695 \uc815\ucc45\uc73c\ub85c \ud589\ub3d9 \uc120\ud0dd\n    def get_action(self, state):\n        if np.random.rand() &lt;= self.epsilon:\n            return random.randrange(self.action_size)\n        else:\n            q_value = self.model.predict(state)\n            #print(\"q_value\ub294\", q_value)\n            #print(\"argamx(q_value)\ub294\", np.argmax(q_value[0]))\n            #print(\"State\ub294\", state)\n            return np.argmax(q_value[0])\n\n    # \uc0d8\ud50c &lt;s, a, r, s'>\uc744 \ub9ac\ud50c\ub808\uc774 \uba54\ubaa8\ub9ac\uc5d0 \uc800\uc7a5\n    def append_sample(self, state, action, reward, next_state, done):\n        self.memory.append((state, action, reward, next_state, done))\n\n    # \ub9ac\ud50c\ub808\uc774 \uba54\ubaa8\ub9ac\uc5d0\uc11c \ubb34\uc791\uc704\ub85c \ucd94\ucd9c\ud55c \ubc30\uce58\ub85c \ubaa8\ub378 \ud559\uc2b5\n    def train_model(self):\n        if self.epsilon > self.epsilon_min:\n            self.epsilon *= self.epsilon_decay\n\n        # \uba54\ubaa8\ub9ac\uc5d0\uc11c \ubc30\uce58 \ud06c\uae30\ub9cc\ud07c \ubb34\uc791\uc704\ub85c \uc0d8\ud50c \ucd94\ucd9c\n        mini_batch = random.sample(self.memory, self.batch_size)\n        states = np.zeros((self.batch_size, self.state_size))\n        next_states = np.zeros((self.batch_size, self.state_size))\n        actions, rewards, dones = [], [], []\n\n        for i in range(self.batch_size):\n            states[i] = mini_batch[i][0]\n            actions.append(mini_batch[i][1])\n            rewards.append(mini_batch[i][2])\n            next_states[i] = mini_batch[i][3]\n            dones.append(mini_batch[i][4])\n\n        # \ud604\uc7ac \uc0c1\ud0dc\uc5d0 \ub300\ud55c \ubaa8\ub378\uc758 \ud050\ud568\uc218\n        # \ub2e4\uc74c \uc0c1\ud0dc\uc5d0 \ub300\ud55c \ud0c0\uae43 \ubaa8\ub378\uc758 \ud050\ud568\uc218\n        target = self.model.predict(states)\n        target_val = self.target_model.predict(next_states)\n\n        # \ubca8\ub9cc \ucd5c\uc801 \ubc29\uc815\uc2dd\uc744 \uc774\uc6a9\ud55c \uc5c5\ub370\uc774\ud2b8 \ud0c0\uae43\n        for i in range(self.batch_size):\n            #print(\"target\uc740\", target[i])\n            if dones[i]:\n                target[i][actions[i]] = rewards[i]\n            else:\n                target[i][actions[i]] = rewards[i] + self.discount_factor * (\n                    np.amax(target_val[i]))\n\n        self.model.fit(states, target, batch_size=self.batch_size,\n                       epochs=10, verbose=0)\n\n\nif __name__ == \"__main__\":\n    # CartPole-v1 \ud658\uacbd, \ucd5c\ub300 \ud0c0\uc784\uc2a4\ud15d \uc218\uac00 500\n    env = gym.make('CartPole-v1')\n    env = wrappers.Monitor(env, \".\/gym-results-Cart\", force=True, video_callable=lambda episode_id: episode_id%20==0)\n\n    state_size = env.observation_space.shape[0]\n    action_size = env.action_space.n\n\n    # DQN \uc5d0\uc774\uc804\ud2b8 \uc0dd\uc131\n    agent = DQNAgent(state_size, action_size)\n\n    scores, episodes = [], []\n\n    for e in range(EPISODES):\n        done = False\n        score = 0\n        # env \ucd08\uae30\ud654\n        #env = wrappers.Monitor(env, \".\/gym-results-Cart\", force=True)\n        state = env.reset()\n        state = np.reshape(state, [1, state_size])\n        #print(\"state reshape\ub294\",state)\n\n        while not done:\n            \n            if agent.render:\n                env.render()\n\n            # \ud604\uc7ac \uc0c1\ud0dc\ub85c \ud589\ub3d9\uc744 \uc120\ud0dd\n            action = agent.get_action(state)\n            # \uc120\ud0dd\ud55c \ud589\ub3d9\uc73c\ub85c \ud658\uacbd\uc5d0\uc11c \ud55c \ud0c0\uc784\uc2a4\ud15d \uc9c4\ud589\n            next_state, reward, done, info = env.step(action)\n            next_state = np.reshape(next_state, [1, state_size])\n            # \uc5d0\ud53c\uc18c\ub4dc\uac00 \uc911\uac04\uc5d0 \ub05d\ub098\uba74 -100 \ubcf4\uc0c1\n            reward = reward if not done or score == 499 else -100\n\n            # \ub9ac\ud50c\ub808\uc774 \uba54\ubaa8\ub9ac\uc5d0 \uc0d8\ud50c &lt;s, a, r, s'> \uc800\uc7a5\n            agent.append_sample(state, action, reward, next_state, done)\n            # \ub9e4 \ud0c0\uc784\uc2a4\ud15d\ub9c8\ub2e4 \ud559\uc2b5\n            if len(agent.memory) >= agent.train_start:\n                agent.train_model()\n\n            score += reward\n            state = next_state\n\n            if done:\n                # \uac01 \uc5d0\ud53c\uc18c\ub4dc\ub9c8\ub2e4 \ud0c0\uae43 \ubaa8\ub378\uc744 \ubaa8\ub378\uc758 \uac00\uc911\uce58\ub85c \uc5c5\ub370\uc774\ud2b8\n                agent.update_target_model()\n\n                score = score if score == 500 else score + 100\n                # \uc5d0\ud53c\uc18c\ub4dc\ub9c8\ub2e4 \ud559\uc2b5 \uacb0\uacfc \ucd9c\ub825\n                scores.append(score)\n                episodes.append(e)\n                #pylab.plot(episodes, scores, 'b')\n                #pylab.savefig(\".\/save_graph\/cartpole_dqn.png\")\n                print(\"episode:\", e, \"  score:\", score, \"  memory length:\",\n                      len(agent.memory), \"  epsilon:\", agent.epsilon)\n\n                # \uc774\uc804 10\uac1c \uc5d0\ud53c\uc18c\ub4dc\uc758 \uc810\uc218 \ud3c9\uade0\uc774 490\ubcf4\ub2e4 \ud06c\uba74 \ud559\uc2b5 \uc911\ub2e8\n                if np.mean(scores[-min(10, len(scores)):]) > 490:\n                    agent.model.save_weights(\".\/save_model\/cartpole_dqn.h5\")\n                    sys.exit()\n<\/pre>\n\n\n\n<p>\ub2e4\uc74c \ub0b4\uc6a9\uc744 \uc218\uc815\ud588\ub2e4.<\/p>\n\n\n\n<ul class=\"wp-block-list\"><li>10 \ud589: from gym import wrappers: wrappers \uc0ac\uc6a9.<\/li><li>120 \ud589: env = wrappers.Monitor(env, &#8220;.\/gym-results-Cart&#8221;, force=True, video_callable=lambda episode_id: episode_id%20==0): 20\ubc88\ub9c8\ub2e4 \ub3d9\uc601\uc0c1\uc73c\ub85c \uc800\uc7a5. \uc5ec\uae30\ub97c \uc124\uc815\ud558\uc9c0 \uc54a\uc73c\uba74 64\ubc88\uc9f8 \uc5d0\ud53c\uc18c\ub4dc \ub3d9\uc601\uc0c1\uc744 \uc800\uc7a5\ud558\uace0, 1,000\ubc88\uc9f8 \uc5d0\ud53c\uc18c\ub4dc\ub85c \ub118\uc5b4\uac04\ub2e4. \ud559\uc2b5 \uacfc\uc815\uc744 \uc54c \uc218 \uc5c6\ub2e4.<\/li><\/ul>\n\n\n\n<p>cartPole-v1\uc740 10\ucd08 \ub3d9\uc548 \ub9c9\ub300\uae30\uac00 \ub118\uc5b4\uc9c0\uc9c0 \uc54a\uc73c\uba74 \ub05d\ub098\ub294 \ubaa8\ub378\uc774\ub2e4. 10\ucd08 \uc804 \ub9c9\ub300\uae30\uac00 12\ub3c4 \ub118\uac8c \uae30\uc6b8\uc5b4\uc9c0\uba74 \ub05d\ub09c\ub2e4. Q-learning\uc740 \uc544\ub798 \uc2dd\uc73c\ub85c \uc815\uc758\ub41c\ub2e4. $latex \\alpha$ \ub294 learning rate, $latex \\gamma $\ub294 discount factor.<\/p>\n\n\n\n<p>$latex Q(S_{t},A_{t})\\leftarrow Q(S_{t},A_{t})+\\alpha \\left [R_{t+1}+\\gamma\\max_{a}Q(S_{t+1},a)-Q(S_{t},A_{t}) \\right ] $<\/p>\n\n\n\n<p>\uc704 \uc2dd\uc5d0\uc11c $latex Q(S_{t},A_{t})$\uc640 $latex R_{t+1}+\\gamma \\max_{a}Q(S_{t+1},a) $\ub85c \uc624\ucc28\ub97c \uc904\uc5ec \ub098\uac04\ub2e4. \uc624\ucc28\ub294 $latex MSE = ($ \uc815\ub2f5 &#8211; \uc608\uce21 $latex ) ^{2} = (R_{t+1} + \\gamma Q(S_{t+1},A_{t+1}) &#8211; Q(S_{t},A_{t}) $\ub85c \uc815\uc758\ud55c\ub2e4. keras.fit\uc5d0\uc11c state\uc5d0 \ub530\ub978 \uc608\uce21$latex Q(S_{t},A_{t}) $\uacfc \uc815\ub2f5\uc778 target[i]\ub85c \ud559\uc2b5\ud55c\ub2e4. keras.fit\uc744 \ubd80\ub974\uae30 \uc804\uc5d0 miniBatch \ud06c\uae30 \ubaa8\ub4e0 target[size]\ub97c \uc5c5\ub370\uc774\ud2b8\ud55c\ub2e4. print\ub85c taget[i]\uac12\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">episode: 73   score: 64.0   memory length: 2000   epsilon: 0.2699131774597243\nupdate\uc804 target[20] [41.376812 37.715374]\nupdate\ud6c4 target[20] [41.376812 40.977367]\n#action\uc5d0 \ub530\ub978 \uc815\ud655\ud55c Q \uac12\uc744 \uc218\uc815.\n##\uc5ec\uae30\uc5d0\uc11c \ubb34\uc791\uc704\ub85c \ub2e4\uc2dc \ubf51\uc74c.\nupdate\uc804 target[20] [46.597427 39.114082]\nupdate\ud6c4 target[20] [46.597427 42.882465]\n...\nepisode: 132   score: 419.0   memory length: 2000   epsilon: 0.009998671593271896\nupdate\uc804 target[20] [113.11228 112.72044]\nupdate\ud6c4 target[20] [113.11228 113.5387 ]\nupdate\uc804 target[20] [109.97069 109.24642]\nupdate\ud6c4 target[20] [109.97069 109.48601]\nupdate\uc804 target[20] [112.96742 112.18164]\nupdate\ud6c4 target[20] [112.92011 112.18164]\n# 132\ubc88 episode\uac00 73\ubc88 episode\ubcf4\ub2e4 \uc815\ub2f5\uc5d0 \ub354 \uadfc\uc811\ud558\uc5ec \uc5c5\ub370\uc774\ud2b8 \ub7c9\uc774 \uc904\uc5c8\ub2e4.<\/pre>\n\n\n\n<p>\uc774\ub807\uac8c 10\uac1c \ud3c9\uade0 score\uac00 490 \uc774\uc0c1\uc774\uba74 \ud559\uc2b5\uc744 \uc911\ub2e8\ud55c\ub2e4. \uc801\ub2f9\ud55c Q \uac12\uc744 \ucc3e\ub294 \ubb38\uc81c\ub77c value based Reinforce Learning\uc774\ub77c \ud55c\ub2e4. \ubaa8\ub378\uc744 \uc815\uc758\ud560 \ub54c Q \uac12\uc744 \uc81c\ud55c\ud558\uc9c0 \uc54a\uc73c\ub824 activation\uc744 linear\ub85c \uc124\uc815\ud588\ub2e4.<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"378\" height=\"252\" src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2019\/12\/index.png\" alt=\"\" class=\"wp-image-3346\" srcset=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2019\/12\/index.png 378w, https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2019\/12\/index-300x200.png 300w\" sizes=\"auto, (max-width: 378px) 100vw, 378px\" \/><figcaption>300\ubc88 \ud559\uc2b5 \uacb0\uacfc,\uc2a4\ucf54\uc5b4 vs \ud69f\uc218.<\/figcaption><\/figure>\n\n\n\n<figure class=\"wp-block-video\"><video height=\"400\" style=\"aspect-ratio: 600 \/ 400;\" width=\"600\" controls src=\"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2019\/12\/openaigym.video_.5.59.video000280.mp4\"><\/video><\/figure>\n\n\n\n<p>\uc815\ucc45\uc744 \uc9c1\uc811 \uadfc\uc0ac\ud558\ub294 \ubc29\ubc95\uc740 policy based Reinforce Learning\uc778\ub370, \ub9c8\uc9c0\ub9c9\uc5d0 softmax \ub85c \ud655\uc728\uc744 \ub098\uc624\ub3c4\ub85d \ud574\uc57c \ud55c\ub2e4.<\/p>\n\n\n\n<p>Sutton &amp; Barto \uac00 \uc4f4 \ucc45\uc744 \ubcf4\uba74 \uc798 \uc774\ud574\uac00\uc9c0 \uc54a\ub294\ub370, \uc9c1\uc811 \ub3cc\ub824 \ud655\uc778\ud558\uba74 \uc774\ud574\ud588\ub2e4 \ucc29\uac01\ud55c\ub2e4.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\uac15\uc544\uc9c0 \ucc45\uc5d0 \ub098\uc628 \uc608\uc81c\ub97c \uc2e4\ud589\ud588\ub2e4. \ucc45 \uc2e4\ud589 \ud658\uacbd\uacfc \ub0b4 \uadf8\uac83\uc774 \ub2ec\ub77c \uc2e4\ud589\ud560 \uc218 \uc5c6\uc5c8\ub2e4. \ub098\ub294 docker\ub85c tensorflow\ub97c \uc0ac\uc6a9\ud55c\ub2e4. \uadf8\uac83\ub3c4 cpu\uac00 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[33],"tags":[700,699,271,701,698],"class_list":["post-3319","post","type-post","status-publish","format-standard","hentry","category-tensorflow","tag-gym","tag-openai","tag-reinforcementlearning","tag-701","tag-698"],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/3319","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/comments?post=3319"}],"version-history":[{"count":27,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/3319\/revisions"}],"predecessor-version":[{"id":3348,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/3319\/revisions\/3348"}],"wp:attachment":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media?parent=3319"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/categories?post=3319"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/tags?post=3319"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}