{"id":3134,"date":"2019-09-08T23:28:43","date_gmt":"2019-09-08T14:28:43","guid":{"rendered":"https:\/\/now0930.pe.kr\/wordpress\/?p=3134"},"modified":"2019-09-26T21:17:42","modified_gmt":"2019-09-26T12:17:42","slug":"keras%eb%a1%9c-%ed%82%a4%ec%9b%8c%eb%93%9c-%eb%b6%84%ec%84%9d","status":"publish","type":"post","link":"https:\/\/now0930.pe.kr\/wordpress\/keras%eb%a1%9c-%ed%82%a4%ec%9b%8c%eb%93%9c-%eb%b6%84%ec%84%9d\/","title":{"rendered":"keras\ub85c \ud0a4\uc6cc\ub4dc \ubd84\uc11d(1\/5)"},"content":{"rendered":"\n<p>\uc778\uacf5\uc9c0\ub2a5\uc73c\ub85c \uc790\uc5f0\uc5b4 \ubd84\uc11d\uc744 \ub9ce\uc774\ud55c\ub2e4. \ub098\ub3c4 \ub0a8 \ub530\ub77c \ud574\ubcf4\uae30\ub85c \ud588\ub2e4. \uc870\uae08\uc529 \ubc30\uc6cc\ud558\ubbc0\ub85c  \ucf54\ub4dc\ub294 \ubcc4\uac70 \uc5c6\uc9c0\ub9cc \ub9ce\uc740 \uc2dc\uac04\uc744 \uc4f0\uace0\uc788\ub2e4. \uc0dd\uac01\uc740 \uc774\ub807\ub2e4. <\/p>\n\n\n\n<ol class=\"wp-block-list\"><li>\ub098\ub294 \uc124\ube44 \ud0dc\uadf8\ub97c \uac00\uc9c0\uace0 \uc788\ub2e4. \uc0ac\ub78c\ub9c8\ub2e4 \ud0dc\uadf8 \uc791\uc131\uc744 \ub2e4\ub974\uac8c \ud55c\ub2e4. \uc774\ub807\uac8c \ub418\uba74 \uc77c\uad00\uc131\uc5c6\uc5b4 \ubd84\uc11d\ud558\uae30 \uc5b4\ub835\ub2e4. \uc774\ub97c \ud45c\uc900\ud654? &#8211; \uc801\uc5b4\ub3c4 \ud615\uc2dd\uc5d0 \ubc97\uc5b4\ub09c \ud0dc\uadf8\ub97c \ucc3e\uae30\uc704\ud574 &#8211; \ud558\ub294 \ubb34\uc5c7\uc778\uac00 \uc788\uc73c\uba74 \uc88b\uaca0\ub2e4.<\/li><li>\ud0dc\uadf8\ub97c \uc900\ube44\ud55c\ub2e4. \ud55c\uae00+\uc601\uc5b4\ub85c \uad6c\uc131\ub41c\ub2e4.<\/li><li>\uac04\ub2e8\ud55c \ubd84\ub958\uae30\ub97c \uad6c\uc131\ud55c\ub2e4. \ucc98\uc74c\uacfc \ub05d \ub2e8\uc5b4\ub97c \uae30\uc900\uc73c\ub85c OK, NG\ub97c \uc784\uc758\ub85c \ub123\ub294\ub2e4.<\/li><li>\ud55c\uae00\uc744 \uc0ac\uc6a9\ud558\uc5ec \ud615\ud0dc\uc18c\ub97c \uae30\uc900\uc73c\ub85c \uc790\ub978\ub2e4. konlpy\ub85c \ud0dc\uadf8\ub97c \uc790\ub978\ub2e4.<\/li><li>\ud56b\ud55c word2vec\ub97c \ud65c\uc6a9\ud55c\ub2e4.<\/li><li>\ub0b4 \ub9d8\ub300\ub85c \ub124\ud2b8\uc6cd\uc744 \uad6c\uc131\ud558\uc5ec \ud559\uc2b5\ud558\uace0 \uacb0\uacfc\ub97c \ubcf8\ub2e4.<\/li><\/ol>\n\n\n\n<p>\uc77c\ub2e8 word2vec\ub97c \ub9cc\ub4e4\uc5c8\ub2e4. \uc778\ud130\ub137\uc5d0 \uc774 \uc720\uba85\ud55c word2vec \ub300\ud55c \uc124\uba85\uc744 \uc27d\uac8c \ucc3e\ub294\ub2e4. \uc774\ub860\uc740 \uadf8\ub807\ub2e4\uce58\uace0, \uc751\uc6a9\uc744 \uc5b4\ub5bb\uac8c\ud560\uc9c0 \ud655\uc778\ud588\ub2e4. https:\/\/wikidocs.net\/book\/2155 \ub97c \uadf8\ub300\ub85c \ucc38\uc870\ud588\ub2e4.<\/p>\n\n\n\n<p>\ud0dc\uadf8\ub97c word2vec\uc5d0\uc11c \uc0ac\uc6a9\ud55c vector\ub85c \ub098\ud0c0\ub0b4\uc57c \ud55c\ub2e4. \uc778\ud130\ub137\uc5d0 \uc704\ud0a4\ud53c\ub514\uc544 \ub4f1 \ubc29\ub300\ud55c \ubb38\uc11c\ub85c \ud559\uc2b5\ud55c word2vec\ub97c \uad6c\ud560 \uc218 \uc788\uace0, \uc27d\uac8c \uc801\uc6a9\ud560 \uc218 \uc788\ub2e4. \uadf8\ub7ec\ub098 \ub0b4\uac00 \uac00\uc9c4 \ud0dc\uadf8\ub294 \uc774\ucabd \uc5c5\uacc4 \uc790\uc758\uc801, \ub2e8\ucd95 \ub2e8\uc5b4\ub85c \uc774\ub904\uc84c\ub2e4. \uc608\ub97c \ub4e4\uc5b4 \ucc28\uc885, \uc808\ud658 \ub4f1 \uc77c\ubcf8\uc5b4 \ube44\uc2b7\ud55c \ub2e8\uc5b4\ub97c \uc8fc\ub85c \uc0ac\uc6a9\ud55c\ub2e4. \uc774 \ub2e8\uc5b4\ub294 \ud45c\uc900\uc5b4\ub3c4 \uc544\ub2c8\uace0, \uc2ec\uc9c0\uc5b4 \uc704\ud0a4\ud53c\ub514\uc544\uc5d0\uc11c \uc0ac\uc6a9\ud558\uc9c0 \uc54a\ub294\ub2e4. \uc774\ub97c \uad6c\ubd84\ud558\ub824\uba74 \uc0ac\uc6a9\uc790 \uc815\uc758 \uc0ac\uc804\uc5d0 \uc785\ub825\ud574\uc57c \ud55c\ub2e4. <a href=\"https:\/\/cromboltz.tistory.com\/18\">\uc778\ud130\ub137 \ub2a5\ub825\uc790<\/a>\uc758 \ub3c4\uc6c0\uc73c\ub85c \uba85\uc0ac\ub85c \ucd94\uac00\ud588\ub2e4. \uc704 \ub9c1\ud06c \ud575\uc2ec\uc740 1. \uc0ac\uc6a9\ud560 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30 \uc555\ucd95\uc744 \ud47c\ub2e4. 2. \uc0ac\uc804 \ud30c\uc77c\uc5d0 \ub2e8\uc5b4\ub97c \ucd94\uac00\ud55c\ub2e4. 3. \ub2e4\uc2dc jar\ub85c \uc555\ucd95\ud55c\ub2e4. \uc774\ub2e4.<\/p>\n\n\n\n<p>\ub2e4\uc2dc konlpy\ub85c \ub2e8\uc5b4, \uc22b\uc790,\uc601\uc5b4\ub85c \ubd84\ub9ac\ud55c\ub2e4. \uc544\ub798 \ucf54\ub4dc\uc758 word[0]\uc740 \ub2e8\uc5b4\uc774\uace0, word[1]\uc740 \ud488\uc0ac \uc815\ub3c4 \ub41c\ub2e4.<\/p>\n\n\n\n<p>\uc774\uc81c \ubd84\ub9ac\ud55c \ub2e8\uc5b4\ub97c word2vec \ud568\uc218\ub85c vector\ub97c \ub9cc\ub4e4\uc5b4 \ud30c\uc77c\ub85c \uc800\uc7a5\ud55c\ub2e4. \uc544\ub798 \ucf54\ub4dc\ub294 \uc778\ud130\ub137\uc5d0\uc11c \uad6c\ud588\uace0 \ub098\ub294 \uc815\ub9d0 \uc870\uae08\ub9cc \uc218\uc815\ud588\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from konlpy.tag import Okt\nokt=Okt()\nfrom gensim.models import Word2Vec\n\ntargetFile = open(\".\/tagv4\ud0dc\uadf8\ubd99\uc778\ud30c\uc77c.csv\", \"r\", encoding='UTF-8')\n#lines=targetFile.readline()\n\ni=0\nresult=[]\nwhile True:\n\n    lines = targetFile.readline()\n    firstColumn = lines.split(',')\n    #print(lines)\n    #print(lines[0])\n    #print(firstColumn[1])\n    #print(firstColumn[4])\n    #firstColumn[4]\uac00 \ub9c8\uc9c0\ub9c9 OK, NG \ud50c\ub798\uadf8\n\n    #if not lines:break\n    #1000\ub77c\uc778\uc5d0\uc11c \uc27d\uac8c \ub04a\uae30 \uc704\ud574 \ub123\uc5b4\uc90c.\n    if i == 1000:\n        break\n    #print(lines)\n    i=i+1\n    if i%1000 == 0:\n        print(\"%d\ubc88\uc9f8 while\ubb38\"%i)\n    tokenlist = okt.pos(firstColumn[1], stem=True, norm=True)\n    temp=[]\n\n    for word in tokenlist:\n        if word[1] in [\"Noun\",\"Alpha\",\"Number\"]:\n            temp.append((word[0]))\n    if temp:\n        result.append(temp)\n\n#    print(result)\ntargetFile.close()\n#print(result[100:])\nprint(result)\n#print(tokenlist)\n\nmodel=Word2Vec(sentences=result, size=200, window=4, min_count=2, workers=6, sg=0, iter=100)\n\nmodel.save('myModel')\na=model.wv.most_similar(\"\uc13c\uc11c\")\nprint(a)\n\"\"\"\nsent_text = sent_tokenize(lines)\n\nnormalized_text = []\nfor string in sent_text:\n    tokens = re.sub(r\"[^a-z0-9]+\", \" \", string.lower())\n    normalized_text.append(tokens)\n\nresult=[]\nresult=[word_tokenize(sentence) for sentence in normalized_text]\n\n#print(result)\n\n#print(lines)\nmodel=Word2Vec(sentences=result, size=100, window=5, min_count=3, workers=6, sg=0, iter=10000)\nprint(model)\n\na=model.wv.most_similar(\"you\")\n#print(model['man'])\nprint(a)\n#print(normalized_text)\n\n#print(result[:10])\n\"\"\"<\/pre>\n\n\n\n<p>\uc774\uc81c vector\ub97c \ubd88\ub7ec\uc640 keras\ub85c \ud559\uc2b5\uc744 \uc2dc\ucf1c\uc57c \ud55c\ub2e4. rnn\uc774\ub098 lstm \uc774\ub7f0 \ubaa8\ub4c8\uc744 \uc0ac\uc6a9\ud560 \uc608\uc815\uc774\ub2e4. \ud559\uc2b5\ud560 \uc785\ub825\uc744 vector \ud615\uc2dd\uc73c\ub85c \ub124\ud2b8\uc6cd\uc5d0 \uc9d1\uc5b4 \ub123\uc5b4\uc57c\ud558\ub294\ub370, embedding\uc73c\ub85c \uc27d\uac8c \uc0ac\uc6a9\ud560 \uc218 \uc788\ub294 \ub4ef \ud558\ub2e4. \uc785\ub825\uc73c\ub85c \ub123\uae30 \uc704\ud574 \ud06c\uae30\ub97c \uc77c\uc815\ud558\uac8c \ub9de\ucdb0\uc57c\ud55c\ub2e4. pad_sequences\ub85c \uc27d\uac8c \uac00\ub2a5\ud558\ub2e4. \ub2e4\ub9cc dtype=&#8217;float32&#8217;\ub85c \uc18c\uc218\uc810 \ubc84\ub9bc\uc744 \ub9c9\uc544\uc900\ub2e4.<\/p>\n\n\n\n<p>pad_sequences\ub97c \uc801\uc6a9\ud558\uae30 \uc804\uc5d0\ub294 \uc2a4\ud2b8\ub9c1\uc73c\ub85c \uc778\uc2dd\ud558\ub294\ub370, \uc774 \ud6c4\uc5d0 \ubc14\ub85c vector\ub85c \ubc14\uafd4\uc900\ub2e4. \uc5b4\ub514\uae4c\uc9c0 \ud574\uc904 \uc9c0 \ubaa8\ub974\uaca0\uc73c\ub098, \uc774 \ubd80\ubd84\uc744 \uc9c1\uc811 \uc785\ub825\uc73c\ub85c \ub123\uc5b4\ub3c4 \ub420 \ub4ef \ud558\ub2e4. vector \ud06c\uae30\ub97c 100\uc73c\ub85c \ud588\ub294\ub370 \ub108\ubb34 \ud06c\uac8c \uc7a1\uc740 \ub4ef \ud558\ub2e4. \ub098\uc911\uc5d0 \uc904\uc5ec\uc57c\uaca0\ub2e4.<\/p>\n\n\n\n<p>\uc911\uac04\uc5d0 \uc2e4\uc7ac 6\uac1c(\ub0b4\uac00 \uc815\ud55c \ucd5c\ub300\uac12)\ub85c \uc77c\uce58\ub418\ub294\uc9c0\uc640 \uac01 \ub2e8\uc5b4\ub97c vector\ub85c print\ub85c \ubf51\uc558\ub2e4. \uc5ed\uc2dc word2vec\ub97c \ub9cc\ub4e0 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30\ub97c \uadf8\ub300\ub85c \uc0ac\uc6a9\ud588\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from konlpy.tag import Okt\nokt=Okt()\nfrom gensim.models import Word2Vec\nfrom keras.layers import Dense, LSTM, Dropout\nfrom keras.models import Sequential\nfrom keras.preprocessing.text import Tokenizer\nfrom keras.preprocessing.sequence import pad_sequences\nimport numpy as np\n\nmodel=Word2Vec.load('.\/myModel')\n#b=model.wv.most_similar(positive=[\"\ud074\ub7a8\ud504\", \"\uc7a0\uae40\"])\nprint(model)\n\n#tokenizer \uc124\uc815.\n#Okt()\uc0ac\uc6a9.\n\ntargetFile = open(\".\/tagv4\ud0dc\uadf8\ubd99\uc778\ud30c\uc77c.csv\", \"r\", encoding='UTF-8')\n\ni=0\nresult=[]\nWORD_MAX=10\nWV_SIZE=200\nblankArray = np.zeros(WV_SIZE)\nwhile True:\n\n    lines = targetFile.readline()\n    firstColumn = lines.split(',')\n    if i == 100:\n        break\n    i=i+1\n    #word2vec\ub97c \ub9cc\ub4e0 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30\ub97c \uc0ac\uc6a9..\n    tokenlist = okt.pos(firstColumn[1], stem=True, norm=True)\n    temp=[]\n\n    for word in tokenlist:\n        #word[0]\uc740 \ub2e8\uc5b4.\n        #word[1]\uc740 \ud488\uc0ac.\n        #print(\"word[0]\uc740\",word[0])\n        #print(\"word[1]\uc740\",word[1])\n\n        if word[1] in [\"Noun\",\"Alpha\",\"Number\"]:\n            temp.append(model.wv[word[0]])\n\n    if temp:\n        result.append(temp)\ntargetFile.close()\n\n#\ucd5c\ub300 \ub2e8\uc5b4\ub97c 6\uc73c\ub85c \uc124\uc815.\n#\ud589 \uc218\ubcf4\ub2e4 6\uae4c\uc9c0 \ub4a4\ucabd\uc73c\ub85c 0\uc744 \ucc44\uc6c0.\n#word2Vec\uac00 \uc2e4\uc218\uc774\ubbc0\ub85c float32\ub85c \uc124\uc815\nfixed_result = pad_sequences(result, maxlen=6, padding='post', dtype='float32')\n\n\n#\uc815\ud655\ud558\uac8c \uc785\ub825\ub418\uc5b4 \uc788\ub294\uc9c0 \ud14c\uc2a4\ud2b8\ud558\ub294 \ubd80\ubd84.\nprint(len(fixed_result))\nj=0\nwhile True:\n    print(\"\uc5ec\uae30\uac00 \uc2dc\uc791\")\n    print(len(fixed_result[j]))\n    #print(fixed_result[j][0].shape)\n    print(result[j])\n    #print(fixed_result[j])\n    print(fixed_result[j][0])\n    print(fixed_result[j][1])\n    print(fixed_result[j][2])\n    print(fixed_result[j][3])\n    print(fixed_result[j][4])\n    print(fixed_result[j][5])\n\n    j=j+1\n\n    if j == 100:\n        break;\n#print(result[0][0])\n#print(result[0][1])\n\n#b1=model.wv[result[0][0]]\n#b2=model.wv[result[0][1]]\n#print(b1)\n#print(b2)\n#keras \ubaa8\ub378 \uc124\uc815.\nmodel = Sequential()\nDense(100, input_dim=200, kernel_initializer='uniform', activation='relu')\nDense(2, input_dim=100, activation='softmax')\nmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n<\/pre>\n\n\n\n<p>\uc774 \ud6c4\ub294 \ub098\uc911\uc5d0 \uacc4\uc18d \ud574\uc57c\ub41c\ub2e4.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\uc778\uacf5\uc9c0\ub2a5\uc73c\ub85c \uc790\uc5f0\uc5b4 \ubd84\uc11d\uc744 \ub9ce\uc774\ud55c\ub2e4. \ub098\ub3c4 \ub0a8 \ub530\ub77c \ud574\ubcf4\uae30\ub85c \ud588\ub2e4. \uc870\uae08\uc529 \ubc30\uc6cc\ud558\ubbc0\ub85c \ucf54\ub4dc\ub294 \ubcc4\uac70 \uc5c6\uc9c0\ub9cc \ub9ce\uc740 \uc2dc\uac04\uc744 \uc4f0\uace0\uc788\ub2e4. \uc0dd\uac01\uc740 \uc774\ub807\ub2e4. \ub098\ub294 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":3135,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[33],"tags":[650,109,637,652,649,648,441,651],"class_list":["post-3134","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-tensorflow","tag-konlpy","tag-tensorflow","tag-word2vec","tag-652","tag-649","tag-648","tag-441","tag-651"],"jetpack_featured_media_url":"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2019\/09\/Word2Vector.png","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/3134","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/comments?post=3134"}],"version-history":[{"count":3,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/3134\/revisions"}],"predecessor-version":[{"id":3185,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/3134\/revisions\/3185"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media\/3135"}],"wp:attachment":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media?parent=3134"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/categories?post=3134"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/tags?post=3134"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}