新乡营销网站建设公司四川seo哪里有
该楼层疑似违规已被系统折叠 隐藏此楼查看此楼
用python抓取网页数据,抓取完最后一页进了无限循环,应该该怎么办,跪求大神
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pymysql
def yes24Search():
url = "http://www.yes24.com/"
driver = webdriver.Chrome("./chromedriver.exe")
driver.get(url)
yes24Service = driver.find_elements_by_css_selector(".yesCornerLi li")
yes24Service[0].click()
time.sleep(1)
yes24Category = driver.find_elements_by_css_selector("#bestMenu .dpth1 li")
yes24Category[4].click()
time.sleep(1)
search = input("search : ")
inputBox = driver.find_element_by_id("query")
inputBox.send_keys(search)
inputBox.send_keys(Keys.ENTER)
searchPages(driver)
def yes24Info(driver):
time.sleep(1)
productTitle = driver.find_elements_by_css_selector(".goods_infogrp .goods_name strong")
productPrice = driver.find_elements_by_css_selector(".goods_infogrp .goods_price strong")
for index in range(len(productTitle)):
print("product :" , productTitle[index].text)
print("price : ",productPrice[index].text)
print("==================================================================")
dbData = [[productTitle[index].text,productPrice[index].text]]
connectDB(dbData)
def searchPages(driver):
yes24Info(driver)
while True:
nextBtn = driver.find_elements_by_css_selector(".pagen.pat30.pab15 .bw.ne")
if nextBtn == []:
pageNo = driver.find_elements_by_css_selector(".pagen.pat30.pab15 a")
for no in range(len(pageNo)):
pageNo = driver.find_elements_by_css_selector(".pagen.pat30.pab15 a")
pageNo[no].click()
print("next page")
time.sleep(1)
yes24Info(driver)
print("last page")
break
else:
pageNo = driver.find_elements_by_css_selector(".paginate a")
for no in range(len(pageNo)):
pageNo = driver.find_elements_by_css_selector(".paginate a")
pageNo[no].click()
print("next page")
time.sleep(1)
abcmartInfo(driver)
nextBtn = driver.find_elements_by_css_selector(".paginate .next")
nextBtn[0].click()
yes24Search()