如何解决python css选择器拉无类型
我在尝试拉标题时不断收到 return none 类型,为什么? 我已经玩了一整天了,查了很多不同的东西仍然一无所获。
import requests
from bs4 import BeautifulSoup
urlz = "http://www.weedmaps.com/search?entryType=home%20page%20product%20card&filter%5BboundingRadius%5D=120mi&page=1"
def get_page_link(url):
baseurl = "https://weedmaps.com"
r = requests.get(url)
sp = BeautifulSoup(r.text,'html')
links = sp.select("#menu-tab-wrapper div.styles__NameRatingWrap-j5iyiv-15.eaLQmf > a:nth-child(2)")
#menu-tab-wrapper > div:nth-child(2) > div:nth-child(2) > div > div > div > div.styles__NameRatingWrap-j5iyiv-15.eaLQmf > a:nth-child(2)
return [baseurl + link.attrs['href'] for link in links]
#print (len(links))
def product_data(url):
r = requests.get(url)
sp = BeautifulSoup(r.text,'html')
product={
'Title: ': sp.select_one("#content > div.content-wrapper__ContentWrapper-ljfebg-0.efqrNq > div > div > div.src__Box-sc-1sbtrzs-0.src__Flex-sc-1sbtrzs-1.bQaUiS.eSfsMV > div.src__Box-sc-1sbtrzs-0.jvkycj > div > h1 ")
#'Brand': sp.select_one("div.styled-components__ProductCategoryBrand-sc-1fbw3xt-6 a").text
#'Price': sp.select_one("div.styled-components__Price-sc-1fbw3xt-15 ").text
#'Distance': sp.select_one("div.styled-components__FromListing-sc-1fbw3xt-20 span").text
#'Pick_up_location': sp.select_one("div.styled-components__FromListing-sc-1fbw3xt-20 span").text
#'Obj_type': sp.select_one("div.styled-components__ProductCategoryBrand-sc-1fbw3xt-6").text
}
return(product)
product_data("https://weedmaps.com/brands/stiiizy/products/stiiizy-battery-starter-kit?filter%5BanyWeights%5D%5Bunit%5D%5B0%5D=1&filter%5BboundingRadius%5D=120mi&boost%5Blisting_wmid%5D=587311662&origin=search")
解决方法
>>> import requests
>>> url = "https://weedmaps.com/brands/stiiizy/products/stiiizy-battery-starter-kit?filter%5BanyWeights%5D%5Bunit%5D%5B0%5D=1&filter%5BboundingRadius%5D=120mi&boost%5Blisting_wmid%5D=587311662&origin=search"
>>> from bs4 import BeautifulSoup
>>> r = requests.get(url)
>>> sp = BeautifulSoup(r.text,'html')
>>> sp.prettify()
'<html>\n <body>\n <p>\n 406 Not Acceptable\n </p>\n </body>\n</html>'
>>> print(r.text)
406 Not Acceptable
这是我得到的结果。好像服务器有一种反爬虫,或者你需要额外的验证。
很明显,您无法将元素从它根本不存在的地方取出。
我想你需要更换爬虫。
感谢阅读。
这是一个可能有用的链接。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。