{"id":2639,"date":"2019-03-24T22:48:38","date_gmt":"2019-03-24T13:48:38","guid":{"rendered":"https:\/\/now0930.tk\/wordpress\/?p=2639"},"modified":"2019-04-11T19:27:48","modified_gmt":"2019-04-11T10:27:48","slug":"%ec%9b%b9-%ed%81%ac%eb%a1%a4%eb%9f%acwordcloud","status":"publish","type":"post","link":"https:\/\/now0930.pe.kr\/wordpress\/%ec%9b%b9-%ed%81%ac%eb%a1%a4%eb%9f%acwordcloud\/","title":{"rendered":"\uc6f9 \ud06c\ub864\ub7ec+\uc6cc\ub4dc\ud074\ub77c\uc6b0\ub4dc"},"content":{"rendered":"\n<p>python\uc73c\ub85c \ubcf4\uae30\uc2eb\uc740 \uc883\uc120, \uc911\uc559 \uc81c\uc548 \ud0a4\uc6cc\ub4dc\ub97c \ube7c\uace0 \ub274\uc2a4\ub97c \uac80\uc0c9\ud574 \ubcf4\uc790. \uacc4\ud68d\uc740 \uc544\ub798\uc640 \uac19\ub2e4.<\/p>\n\n\n\n<ul class=\"wp-block-list\"><li>\uc6f9 \ud06c\ub864\ub7ec\ub97c \ub9cc\ub4e4\uc5b4 \uc870\uc120, \uc911\uc559 \uc778\ud130\ub137 \ud398\uc774\uc9c0\ub97c \uc811\uc18d\ud55c\ub2e4.<\/li><li>\ud5e4\ub4dc\ub77c\uc778\uc744 \uae01\uc5b4 \ud30c\uc77c\ub85c \uc800\uc7a5\ud55c\ub2e4.<\/li><li>\ud30c\uc77c\uc744 \uc77d\uc5b4 \uc6cc\ub4dc\ud074\ub77c\uc6b0\ub4dc\ub85c \uc8fc\uc694 \ud0a4\uc6cc\ub4dc\ub97c \ud655\uc778\ud55c\ub2e4.<\/li><li>\uad6c\uae00\ub274\uc2a4\ub85c \ud0a4\uc6cc\ub4dc\ub97c \uac80\uc0c9\uc5b4 \uc81c\uc678\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<p>\uc6f9 \ud06c\ub864\ub7ec\ub294 \uc778\ud130\ub137\uc5d0 \ub9ce\uc774 \uacf5\uac1c\ub418\uc5b4 \uc788\uc5b4 \uc27d\uac8c \ub9cc\ub4e4\uc5c8\ub2e4. \ucf54\ub4dc \uba87 \uc904\ub85c \uc6d0\ud558\ub294 \uae30\ub2a5\uc744 \uad6c\ud604\ud588\ub2e4. \uc5b4\ub824\uc6e0\ub358 \uc810\uc740 \ubcf5\uc0ac\ud55c \ucf54\ub4dc\ub97c \uc218\uc815\ud55c \uc810\uc774\ub2e4. \uc778\ud130\ub137 \ucf54\ub4dc\uac00 json \ud615\uc2dd\uc73c\ub85c \ud30c\uc77c\uc744 \uc800\uc7a5\ud588\ub2e4. \uc774\ub97c \ud14d\uc2a4\ud2b8\ub85c \ubcc0\uacbd\ud558\ub294 \uacfc\uc815\uc5d0 \ubb38\uc81c\uac00 \uc788\uc5c8\ub2e4. csv\uc758 writerow\ub97c dictionary \uc778\uc790\ub85c \ub123\uc73c\uba74 \ud55c \uae00\uc790\ub9c8\ub2e4 \ucef4\ub9c8\ub97c \ucc0d\ub294\ub2e4. dictionary\ub97c [] \uad04\ud638\ub85c \uac10\uc2f8\uc57c \ub41c\ub2e4.<\/p>\n\n\n\n<p>\uc6cc\ub4dc\ud074\ub77c\uc6b0\ub4dc \uc5ed\uc2dc \uc27d\ub2e4. \ud55c\uae00\ud3f0\ud2b8\ub97c \uba85\uc2dc\ud558\uc5ec generate\ud558\uba74 \ubc14\ub85c \ub41c\ub2e4. \ub098\uba38\uc9c0 \ubd80\uc871\ud55c \ubd80\ubd84(plot\uc744 \ud30c\uc77c\ub85c \uc800\uc7a5 \ub4f1)\uc744 \uacfc\uac70 \ucf54\ub4dc\uc5d0\uc11c \ubcf5\uc0ac\ud574\uc11c \ud574\uacb0\ud588\ub2e4. <\/p>\n\n\n\n<p>\uad6c\uae00\ub274\uc2a4\uac00 &#8211; \uae30\ud638\ub85c \uac80\uc0c9\uc5b4\ub97c \uc81c\uc678\ud558\ub294 \uae30\ub2a5\uc744 \uc81c\uacf5\ud55c\ub2e4. +\ud0a4\uc6cc\ub4dc\uc5c6\uc774 \ubaa8\ub450 -\ub85c \ub123\uc73c\uba74 \ubc94\uc704\ub97c \ub108\ubb34 \ud06c\uac8c \uc7a1\ub294\ub2e4. \ud55c\uad6d\uc5b4, \ucd5c\uadfc 1\uc8fc\uc77c \ub4f1 \ubc94\uc704\ub97c \uc881\ud614\ub2e4. \uac80\uc0c9\uc5b4 \uc81c\uc678\uc640 \ubcf4\ud1b5 \uac80\uc0c9\uc744 \ube44\uad50\ud574 \ubcf4\uba74 \uc880 \ud6a8\uacfc\uac00 \uc788\ub294 \ub4ef \ud558\ub2e4.<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># -*- coding: utf-8 -*-\nimport requests\nfrom bs4 import BeautifulSoup\nimport os\nimport csv\n\nfrom wordcloud import WordCloud\n\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\n\n\nBASE_DIR = os.path.dirname(os.path.abspath(__file__))\n\nreq = requests.get('http:\/\/www.chosun.com\/')\nreq.encoding=None\nhtml = req.text\n\n\nsoup = BeautifulSoup(html, 'html.parser')\n\n\n#\uc870\uc120\uc77c\ubcf4\ub294 dl\ud615\uc2dd\uc73c\ub85c \ud5e4\ub4dc\ub77c\uc778\uc744 \uc791\uc131\ud55c\ub2e4.\nmy_contents = soup.find_all('dl', {'class':'news_item'}, 'dt')\n\n\ndata = {}\n\n\n#\uc784\uc758\uc758 \ud0a4\ub97c \ub9cc\ub4e4\uc5b4\uc11c \uc800\uc7a5.\nindex = 0\n\nfor content in my_contents:\n    data[index] = content.text\n    index = index + 1\n\n\n###\uc911\uc559\uc77c\ubcf4\nreq = requests.get('https:\/\/joongang.joins.com\/')\nreq.encoding=None\nhtml = req.text\nsoup = BeautifulSoup(html, 'html.parser')\n\n#\uc911\uc54c\uc77c\ubcf4\ub294 li\ud615\uc2dd\uc73c\ub85c \ud5e4\ub4dc\ub77c\uc778\uc744 \ub9cc\ub4e0\ub2e4.\nmy_contents = soup.find_all('li')\n\n\nfor content in my_contents:\n    data[index] = content.text\n    index = index + 1\n\nprint(data.values())\n\nwith open(os.path.join(BASE_DIR, 'result.csv'), 'w', encoding='utf8') as csv_file:\n    writer = csv.writer(csv_file)\n    for key in data.keys():\n    #writer = csv.DictWriter(csv_file, data.keys())\n    #writer = csv.writer(csv_file, delimiter=',')\n        #print(data[key])\n        writer.writerow([data[key]])\n\ncsv_file.close()\n\n##word cloud\n\n\ntext = open(os.path.join(BASE_DIR, 'result.csv'), 'r', encoding='utf8').read()\n\n#\ud55c\uae00 \ud3f0\ud2b8\ub97c \uc0ac\uc6a9\ud558\uae30 \uc704\ud574, \uba85\uc2dc\nwordcloud = WordCloud(font_path='\/usr\/share\/fonts\/truetype\/nanum\/NanumBarunGothic.ttf', background_color='white').generate(text)\n\nplt.figure()\nplt.imshow(wordcloud, interpolation=\"bilinear\")\nplt.axis(\"off\")\nplt.show()\nplt.savefig('.\/wordcloud.jpg',format='jpg', dpi=300)\n<\/code><\/pre>\n\n\n\n<p>\uace0\ud574\uc0c1\ub3c4 \uc774\ubbf8\uc9c0\ub97c \uc6d0\ud558\uba74 \uc544\ub798\uc640 \uac19\uc774 \uc791\uc5c5\ud55c\ub2e4.<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>wordcloud = WordCloud(width=800, height=400).generate(text)\nplt.figure( figsize=(20,10) )\nplt.imshow(wordcloud)<\/code><\/pre>\n\n\n\n<p>\ucc38\uc870 \uc0ac\uc774\ud2b8<\/p>\n\n\n\n<p><a href=\"https:\/\/datamod.tistory.com\/104\">https:\/\/datamod.tistory.com\/104<\/a><\/p>\n\n\n\n<p>json\uc73c\ub85c \uc800\uc7a5\ud560 \uacbd\uc6b0, utf8 \ubc29\uc2dd\uc73c\ub85c \uc800\uc7a5.<br><\/p>\n\n\n\n<p><a href=\"https:\/\/beomi.github.io\/2017\/01\/20\/HowToMakeWebCrawler\/\">https:\/\/beomi.github.io\/2017\/01\/20\/HowToMakeWebCrawler\/<\/a><\/p>\n\n\n\n<p>\uc6f9 \ud06c\ub864\ub7ec \uae30\ubcf8.<\/p>\n\n\n\n<p><a href=\"http:\/\/pythonstudy.xyz\/python\/article\/403-%ED%8C%8C%EC%9D%B4%EC%8D%AC-Web-Scraping\">http:\/\/pythonstudy.xyz\/python\/article\/403-%ED%8C%8C%EC%9D%B4%EC%8D%AC-Web-Scraping<\/a><\/p>\n\n\n\n<p>requests \ud55c\uae00 \uc0ac\uc6a9.<\/p>\n\n\n\n<p><a href=\"https:\/\/twpower.github.io\/84-how-to-use-beautiful-soup\">https:\/\/twpower.github.io\/84-how-to-use-beautiful-soup<\/a><\/p>\n\n\n\n<p>beautifulsoup \uae30\ubcf8.<\/p>\n\n\n\n<p><a href=\"https:\/\/medium.com\/@gis10kwo\/converting-nested-json-data-to-csv-using-python-pandas-dc6eddc69175\">https:\/\/medium.com\/@gis10kwo\/converting-nested-json-data-to-csv-using-python-pandas-dc6eddc69175<\/a><\/p>\n\n\n\n<p>json to csv, pandas<\/p>\n\n\n\n<p><a href=\"https:\/\/www.programiz.com\/python-programming\/working-csv-files\">https:\/\/www.programiz.com\/python-programming\/working-csv-files<\/a><\/p>\n\n\n\n<p>write to csv<\/p>\n\n\n\n<p><a href=\"https:\/\/stackoverflow.com\/questions\/1816880\/why-does-csvwriter-writerow-put-a-comma-after-each-character\">https:\/\/stackoverflow.com\/questions\/1816880\/why-does-csvwriter-writerow-put-a-comma-after-each-character<\/a><\/p>\n\n\n\n<p>writerow \ud560 \ub54c, \uac01 \uae00\uc790\ub4a4\uc5d0 comma \uc0bd\uc785\ub420 \ub54c.<\/p>\n\n\n\n<p><a href=\"https:\/\/myjamong.tistory.com\/48\">https:\/\/myjamong.tistory.com\/48<\/a><\/p>\n\n\n\n<p>wordcloud \uc0ac\uc6a9\uc5d0 \ud55c\uae00 \ud3f0\ud2b8 \uc124\uc815<\/p>\n\n\n\n<p><a href=\"https:\/\/stackoverflow.com\/questions\/28786534\/increase-resolution-with-word-cloud-and-remove-empty-border\/28795577\">https:\/\/stackoverflow.com\/questions\/28786534\/increase-resolution-with-word-cloud-and-remove-empty-border\/28795577<\/a><\/p>\n\n\n\n<p>\uace0\ud574\uc0c1\ub3c4 wordcloud<\/p>\n","protected":false},"excerpt":{"rendered":"<p>python\uc73c\ub85c \ubcf4\uae30\uc2eb\uc740 \uc883\uc120, \uc911\uc559 \uc81c\uc548 \ud0a4\uc6cc\ub4dc\ub97c \ube7c\uace0 \ub274\uc2a4\ub97c \uac80\uc0c9\ud574 \ubcf4\uc790. \uacc4\ud68d\uc740 \uc544\ub798\uc640 \uac19\ub2e4. \uc6f9 \ud06c\ub864\ub7ec\ub97c \ub9cc\ub4e4\uc5b4 \uc870\uc120, \uc911\uc559 \uc778\ud130\ub137 \ud398\uc774\uc9c0\ub97c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":2647,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[12],"tags":[110,532,531],"class_list":["post-2639","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-12","tag-python","tag-532","tag-531"],"jetpack_featured_media_url":"https:\/\/now0930.pe.kr\/wordpress\/wp-content\/uploads\/2019\/03\/wordcloud.jpg","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/2639","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/comments?post=2639"}],"version-history":[{"count":12,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/2639\/revisions"}],"predecessor-version":[{"id":2691,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/posts\/2639\/revisions\/2691"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media\/2647"}],"wp:attachment":[{"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/media?parent=2639"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/categories?post=2639"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/now0930.pe.kr\/wordpress\/wp-json\/wp\/v2\/tags?post=2639"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}