fromurllibimportrequest
importre
url="http://www.baidu.com/s?wd="
key="python"#搜索关键词
pageCount=#搜索记录条数
#对关键词进行编码,因为url中需要对中文等进行处理
key_code=request.quote(key)
#带检索关键词的url
url=url+key_code+"&ie=utf-8&pn="
#请求头
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'
}
defgetInfosByPage(page):
req=request.Request(url+str(page*10-10),headers=headers)
res=request.urlopen(req)
html=res.read().decode("utf-8","ignore")
ifpage==1:
patPage="百度为您找到相关结果约(.*?)个"
globalpageCount
pageInfo=re.findall(patPage,html)[]
pageCount=int(pageInfo.replace(",",""))
ifpageCount
return
pat='data-tools="{"title":"(.*?)","url":"(.*?)"}"'
pat=pat.replace('"','[\'"]?')
datas=re.findall(pat,html,re.S|re.M)
fordataindatas:
print("标题:"+data[])
print("网址:"+data[1])
print("="*100)
if__name__=="__main__":
forpageinrange(1,10):
getInfosByPage(page)
领取专属 10元无门槛券
私享最新 技术干货