一、
1,python version: 3.6.5;
2,Django version: 2.0.5;
3,web 应用测试工具selenium库: pip install selenium
4,浏览器驱动: webdriver,我用的chrome浏览器,需要下载对应浏览器版本的驱动器,参考https://blog.csdn.net/huilan_same/article/details/51896672
5,用bs4解析网页;
6,用mysql存储数据-注意修改settings的配置;
7,房天下成都天府新区二手房信息:http://cd.esf.fang.com/house-a016418/
8,网页html格式用到了bootstrap;
二、
原理:
1,用web自动测试工具,驱动chrome浏览器访问网页,得到目标网页后;
2,用BeautifulSoup解析网页,提取需要的信息,将提取出的信息存储在mysql数据库里,然后关闭数据库连接和浏览器;
3,最后用从mysql数据库里将存储的数据展示在网页上。
三、
代码实现:
这里只放置了views.py的代码,其他代码简单容易实现。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pymysql
def houseinfo(request):
connect = pymysql.connect(user='root', password='xxxxxx', host='localhost', port=3306, db='studyuser',
charset='utf8')
conn = connect.cursor()
conn.execute("create database if not exists studyuser character set utf8;")
conn.execute("use studyuser;")
conn.execute('drop table if exists user_room;')
sql = """create table if not exists user_room (id INT PRIMARY KEY AUTO_INCREMENT,house_title VARCHAR(200),house_room_number VARCHAR(200),house_size VARCHAR(200),house_floor VARCHAR(200),house_diretion VARCHAR(200),
house_location VARCHAR(200),house_total_price VARCHAR(200),house_per_price VARCHAR(200),house_link VARCHAR(200))"""
conn.execute(sql)
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 5)
def get_first_page():
browser.get('http://cd.esf.fang.com/house-a016418/')
try:
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#closemengceng')))
submit.click()
print('done')
except:
pass
time.sleep(1)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#list_D10_15 > p:nth-child(10) > a')))
soup = BeautifulSoup(browser.page_source, 'lxml')
items = soup.find_all('dl', class_='clearfix')
# print(items)
for item in items:
try:
house_title = item.find('span').text
except:
house_title = False
try:
house_type = item.find('p', class_='tel_shop')
house_room_number = house_type.text[40:50].strip()
house_size = house_type.text[91:95].strip()
house_floor = house_type.text[100:200].strip()
house_diretion = house_type.text[270:300].strip()
except:
house_room_number = False
house_size = False
house_floor = False
house_diretion = False
try:
house_location = item.find('p', class_='add_shop').find('span').text
except:
house_location = False
house_price = item.find('dd', class_='price_right')
try:
house_total_price = house_price.find('span', class_='red').text.strip()
except:
house_total_price = False
try:
house_per_price = house_price.find('span', class_='').text.strip()
except:
house_per_price = False
try:
url = item.find('a')
house_url = 'http://cd.esf.fang.com' + str(url['href'])
print(house_url)
except:
house_url = False
house_link = house_url
print(house_title)
print(house_room_number)
print(house_size)
print(house_floor)
print(house_diretion)
print(house_location)
print(house_total_price)
print(house_per_price)
if house_title and house_room_number and house_size and house_floor and house_diretion and house_location and house_total_price and house_per_price and house_link:
conn.execute(
"insert into user_room (house_title,house_room_number,house_size,house_floor,house_diretion,house_location,house_total_price,house_per_price,house_link) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
house_title, house_room_number, house_size, house_floor, house_diretion, house_location,
house_total_price, house_per_price, house_link))
connect.commit()
def get_next_page():
try:
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#list_D10_15 > p:nth-child(10) > a')))
submit.click()
except:
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#list_D10_15 > p:nth-child(12) > a')))
submit.click()
soup = BeautifulSoup(browser.page_source, 'lxml')
items = soup.find_all('dl', class_='clearfix')
for item in items:
try:
house_title = item.find('span').text
except:
house_title = False
try:
house_type = item.find('p', class_='tel_shop')
house_room_number = house_type.text[40:50].strip()
house_size = house_type.text[91:95].strip()
house_floor = house_type.text[100:200].strip()
house_diretion = house_type.text[270:300].strip()
except:
house_room_number = False
house_size = False
house_floor = False
house_diretion = False
try:
house_location = item.find('p', class_='add_shop').find('span').text
except:
house_location = False
house_price = item.find('dd', class_='price_right')
try:
house_total_price = house_price.find('span', class_='red').text.strip()
except:
house_total_price = False
try:
house_per_price = house_price.find('span', class_='').text.strip()
except:
house_per_price = False
try:
url = item.find('a')
house_url = 'http://cd.esf.fang.com' + str(url['href'])
print(house_url)
except:
house_url = False
house_link = house_url
print(house_title)
print(house_room_number)
print(house_size)
print(house_floor)
print(house_diretion)
print(house_location)
print(house_total_price)
print(house_per_price)
if house_title and house_room_number and house_size and house_floor and house_diretion and house_location and house_total_price and house_per_price and house_link:
conn.execute(
"insert into user_room (house_title,house_room_number,house_size,house_floor,house_diretion,house_location,house_total_price,house_per_price,house_link) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
house_title, house_room_number, house_size, house_floor, house_diretion, house_location,
house_total_price, house_per_price, house_link))
connect.commit()
get_first_page()
for i in range(2):
get_next_page()
conn.close()
connect.close()
browser.close()
return redirect('/user/soufangwang/')
def soufangwang(request):
house=room.objects.all()
return render(request,'user/soufangwang.html',locals())
四、
Django工作流程原理图:
网页效果图:
有兴趣也可以看看网站其他未完成页面:
用户名:admin
密码:123
http://xiaomokuaipao.com/user/index/1/