最近闲来无事,在学python爬虫,而对于一个老司机来说,美女图永远是我学习爬虫的动力。@(小乖)
而对于所有的性感美女图片站中,mm131可谓是独树一帜,他们家的模特着实漂亮@(你懂的),这也就促成了我的这篇文章。
先贴一张图片吧#(邪恶)
![[开车]Python爬取mm131美女套图附百度云](https://ae01.alicdn.com/kf/Uf8bc8505674e4a058d3c52481e6fc567E.png)
看完是不是鸡儿一硬@(滑稽)
别急 来看代码
#!/usr/bin/env python
# coding=utf-8
import re
import requests
from bs4 import BeautifulSoup
import os
import sys
def downloadpic(url):
headers={
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.9',
'Connection':'keep-alive',
'Cookie':'UM_distinctid=160c072721f36a-049309acceadc2-e323462-144000-160c0727220f67; CNZZDATA3866066=cnzz_eid%3D1829424698-1494676185-%26ntime%3D1494676185; bdshare_firstime=1515057214243; Hm_lvt_9a737a8572f89206db6e9c301695b55a=1515057214,1515074260,1515159455; Hm_lpvt_9a737a8572f89206db6e9c301695b55a=1515159455',
'Host':'img1.mm131.me',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Referer':'http://www.mm131.com/'
}
#url='http://www.mm131.com/xinggan/3561.html'
r=requests.get(url)
#r.encoding="gb2312"
r.encoding=r.apparent_encoding
html=r.text
# 套图标题
title = BeautifulSoup(html,'lxml').find("h5").get_text()
#获取页码
page = BeautifulSoup(html, 'lxml').find("span", {"class": "page-ch"}).get_text()
print page
pattern = re.compile('\d*')
page = pattern.findall(page)[1]
#创建以套图标题为题的文件夹
path="E:\\pic\\"
os.makedirs(path + title + page + 'P')
#获取第一张图片地址
a = re.search(r'img alt=.* src="(.*?)" /',html,re.S)
print a.group(1)
pic=requests.get(a.group(1),headers=headers)
#下载图片
f=open(path + title + page + 'P' + '\\' + '1.jpg',"wb")
f.write(pic.content)
f.close
#下载第一张以后的图
after = int(page) + 1
for i in range(2, after):
#改变地址结构
url0 = url[:-5]
url1 = url0 + '_' + str(i) + '.html'
#print url1
html=requests.get(url1).text
a = re.search(r'img alt=.* src="(.*?)" /',html,re.S)
pic=requests.get(a.group(1),headers=headers)
print a.group(1)
f=open(path + title + page + 'P' + '\\' + str(i) +".jpg","wb")
f.write(pic.content)
f.close
if __name__ == '__main__':
url = 'http://www.mm131.com/xinggan/'
html = requests.get(url).text
urls = BeautifulSoup(html, 'lxml').find('dl', {'class': 'list-left public-box'}).findAll('a', {'target': '_blank'})
for url in urls:
url = url['href']
print url
#downloadpic(url)
for i in range(2,122):
print("第"+str(i)+"页")
url = 'http://www.mm131.com/xinggan/list_6_'+str(i)+'.html'
html = requests.get(url).text
urls = BeautifulSoup(html,'lxml').find('dl',{'class': 'list-left public-box'}).findAll('a',{'target': '_blank'})
for url in urls:
url = url['href']
print url
downloadpic(url)
ps:
- 注意安装模块
- 请注意修改文件保存路径,在37行
path="E:\\pic\\"
@(呵呵)看这里 Github开源 给个star啊
百度云套图地址:https://pan.baidu.com/s/4dFQ7Tdv
原创文章,作者:Y4er,未经授权禁止转载!如若转载,请联系作者:Y4er