rubus0304 님의 블로그
[최종프로젝트 1일차] 본문
기획서 의견조율 및 웹크롤링 시도
개인 맞춤형 여행코스 추천 플랫폼: 대전을 중심으로
https://docs.google.com/document/d/1ZZM8FRppvcXzF-aNP0ZY9xeCc5QgL2FcR2eS5ugMu0s/edit?tab=t.pln2up5i1p5nMYRO - AI 여행 플래너
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
import random
import re
class Colors:
RED = '\033[91m'
GREEN = '\033[92m'
BLUE = '\033[94m'
MAGENTA = '\033[95m'
CYAN = '\033[96m'
RESET = '\033[0m'
def switch_left():
driver.switch_to.parent_frame()
iframe = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="searchIframe"]'))
)
driver.switch_to.frame(iframe)
def switch_right():
driver.switch_to.parent_frame()
iframe = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="entryIframe"]'))
)
driver.switch_to.frame(iframe)
options = webdriver.ChromeOptions()
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3')
options.add_argument('window-size=1380,900')
driver = webdriver.Chrome(options=options)
# 네이버 지도 URL
driver.get(url=URL)
driver.implicitly_wait(3)
while True:
switch_left()
# 다음 페이지 확인
next_page = driver.find_element(By.XPATH, '//*[@id="app-root"]/div/div[3]/div[2]/a[7]').get_attribute('aria-disabled')
if next_page == 'true':
break
# 페이지의 가게 리스트
page_no = driver.find_element(By.XPATH, '//a[contains(@class, "mBN2s qxokY")]').text
elements = driver.find_elements(By.XPATH, '//*[@id="_pcmap_list_scroll_container"]//li')[2:] if page_no == '1' else driver.find_elements(By.XPATH, '//*[@id="_pcmap_list_scroll_container"]//li')
for index, e in enumerate(elements, start=1):
# 데이터 초기화
store_name = ''
category = ''
new_open = ''
rating = 0.0
visited_review = 0
blog_review = 0
store_id = ''
address = ''
business_hours = []
phone_num = ''
menu = []
top_reviews = []
theme_keywords = {'분위기': [], '인기토픽': [], '찾는목적': []}
# 가게 클릭 및 상세 페이지 이동
e.find_element(By.CLASS_NAME, 'CHC5F').find_element(By.XPATH, ".//a/div/div/span").click()
sleep(2)
switch_right()
try:
# 상세 정보 크롤링
title = driver.find_element(By.XPATH, '//div[@class="zD5Nm undefined"]')
store_name = title.find_element(By.XPATH, './/div[1]/div[1]/span[1]').text
category = title.find_element(By.XPATH, './/div[1]/div[1]/span[2]').text
if len(title.find_elements(By.XPATH, './/div[1]/div[1]/span')) > 2:
new_open = title.find_element(By.XPATH, './/div[1]/div[1]/span[3]').text
# 리뷰 정보
review_elements = title.find_elements(By.XPATH, './/div[2]/span')
if len(review_elements) > 2:
rating = review_elements[0].text
visited_review = review_elements[1].text
blog_review = review_elements[2].text
# 메뉴 정보
menu_elements = driver.find_elements(By.XPATH, '//*[@id="app-root"]/div/div/div/div[5]/div/div[3]/div[1]')
menu = [menu_element.text for menu_element in menu_elements]
# 방문자 리뷰
review_elements = driver.find_elements(By.XPATH, '//*[@id="app-root"]/div/div/div/div[6]/div/div[7]/div[1]/div[1]/div/a/ul')
top_reviews = [review_element.text for review_element in review_elements[:5]]
# 테마 키워드
theme_elements = driver.find_elements(By.XPATH, '//*[@id="app-root"]/div/div/div/div[6]/div/div[9]/div[1]')
for theme_element in theme_elements:
category_name = theme_element.find_element(By.CLASS_NAME, 'pNnVF').text
keywords = theme_element.find_element(By.CLASS_NAME, 'sJgQj').find_elements(By.TAG_NAME, 'span')
theme_keywords[category_name] = [keyword.text for keyword in keywords]
# 가게 주소 및 영업 시간
address = driver.find_element(By.XPATH, '//span[@class="LDgIH"]').text
try:
driver.find_element(By.XPATH, '//div[@class="y6tNq"]//span').click()
sleep(2)
parent_element = driver.find_element(By.XPATH, '//a[@class="gKP9i RMgN0"]')
child_elements = parent_element.find_elements(By.XPATH, './*[@class="w9QyJ" or @class="w9QyJ undefined"]')
for child in child_elements:
business_hours.append(child.text)
phone_num = driver.find_element(By.XPATH, '//span[@class="xlx7Q"]').text
except:
pass
except Exception as ex:
print(Colors.RED + '------------ 데이터 크롤링 오류 ------------' + Colors.RESET)
print(ex)
# 데이터 출력
print(Colors.BLUE + f'{index}. {store_name}' + Colors.RESET + ' · ' + category)
print('메뉴:', menu)
print('상위 5개 리뷰:', top_reviews)
print('테마 키워드:', theme_keywords)
print(Colors.MAGENTA + '-' * 50 + Colors.RESET)
print('평점 ' + Colors.RED + str(rating) + Colors.RESET + ' / ' + visited_review + ' · ' + blog_review)
print(f'가게 고유 번호 -> {store_id}')
print('가게 주소 ' + Colors.GREEN + str(address) + Colors.RESET)
print(Colors.CYAN + '가게 영업 시간' + Colors.RESET)
for i in business_hours:
print(i)
print('')
print('가게 번호 ' + Colors.GREEN + phone_num + Colors.RESET)
print(Colors.MAGENTA + "-"*50 + Colors.RESET)
switch_left()
sleep(2)
# 다음 페이지로 이동
if next_page == 'false':
driver.find_element(By.XPATH, '//*[@id="app-root"]/div/div[3]/div[2]/a[7]').click()
else:
break
메뉴 등 생각보다 리스트에 안 들어가는 애들이 많이 나옴.
'Data Analyst > daily' 카테고리의 다른 글
[QCC 6회차] (0) | 2025.01.10 |
---|---|
[프로젝트 2일차] (0) | 2025.01.08 |
[데이터 파이프라인 1강] (0) | 2025.01.07 |
[라이브세션] '5분 기록 보드'로 20시간 절약하기 (0) | 2025.01.06 |
[QCC 5회차] (1) | 2025.01.03 |