# coding=utf-8
"""
作者:gaojs
功能:
新增功能:
日期:2022/5/13 20:14
"""
import os.path
import json
import pprint
import re
import requests
from faker import Factory
def get_random_ua():
"""
获取随机UA
"""
fact = Factory.create()
ua = fact.user_agent()
return ua
def get_photo():
"""
爬取华灯上博客的图床信息
"""
if not os.path.exists('photo/'):
os.mkdir('photo/')
random_ua = get_random_ua()
# url = 'https://mengkai.fun:88/album/C7X'
# 翻页操作
for page in range(1, 10):
url = f'https://mengkai.fun:88/album/C7X/?sort=date_desc&page={page}'
headers = {
'user-agent': random_ua
}
rsp = requests.get(url=url, headers=headers, timeout=50)
# print(rsp.text)
# 获取图片标题
photo_name = re.findall('<a href=".*?" class="list-item-desc-title-link" data-text="image-title" data-content="image-link">(.*?)</a>', rsp.text)
photo_list = list(photo_name)
for photo in photo_list:
photo_title = photo
# print(photo_title)
# 获取图片url
url_title = re.findall('<img src="(.*?)" alt="(.*?)" width="500" height="281">', rsp.text)
list_url_title = list(url_title)
# print(list_url_title)
for index in list_url_title:
new_url = index[0]
url_rfind = new_url[:new_url.rfind('/')]
end_url = index[1]
new_url_content = url_rfind + '/' + end_url
print(new_url_content)
# print(new_url)
# 下载图床图片到本地
content_photo = requests.get(new_url_content, headers=headers, timeout=50).content
with open('photo/' + end_url, mode='wb') as f:
print(f'******************************正在下载 {end_url} ****************************')
f.write(content_photo)
def get_page():
"""
翻页操作
"""
pass
get_photo()