三大妈网图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from bs4 import BeautifulSoup
import requests
'''
网址:
https://www.3dmgame.com/bagua/2192.html
https://www.3dmgame.com/bagua/2216.html

如果图片过期,更改以下列表便可以更替
'''
list_head_url = ['https://www.3dmgame.com/bagua/2216','https://www.3dmgame.com/bagua/2192']

for i in list_head_url:
# head_url = 'https://www.3dmgame.com/bagua/1570'
head_url = str(i)
tail_url = '.html'

for i in range(1,5):
if i == 1:
url = head_url + tail_url
else:
url = head_url + '_' + str(i) + tail_url

head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/68.0'}

re_url = requests.get(url)
bs_url = BeautifulSoup(re_url.text, 'html.parser')
items = bs_url.find_all('p', align='center')

for i in items:
p = i.find('img')['src']
# print(p)
res = requests.get(str(p)).content
photo = open(str(p[-10:-4])+'.jpg', 'wb')
photo.write(res)
photo.close()



input('爬取完成,任意键退出')

彼岸图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from bs4 import BeautifulSoup
import requests
"""
爬取网址:http://pic.netbian.com/4kfengjing/

第9行改地址
第23行改页数
"""
list_head_url = ['http://pic.netbian.com/4kyingshi/']

first_head_url = 'http://pic.netbian.com' # 第二层需要用到的头部

# headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': 'pic.netbian.com', 'Referer': 'http://pic.netbian.com/4kmeinv/', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36'}

# proxies = {
# "http" : "183.3.218.34" # 代理ip
# }

for i in list_head_url:
head_url = str(i)
tail_url = ".html"

for i in range(1,20):
if i == 1:
url = head_url
else:
url = head_url + 'index_' + str(i) + tail_url

print(url)
re_url = requests.get(url)
# re_url = requests.get(url, headers=headers, proxies = proxies)
print(re_url.status_code)
bs_url = BeautifulSoup( re_url.text, 'html.parser')

items = bs_url.find_all('ul', class_='clearfix')

'''
提取第一个链接,然后拼接
'''
for i in items:
list_a = i.find_all('a')
for first_tail_urls in list_a:
# print(first_tail_url['href'])

first_tail_url = first_tail_urls['href']
first_url = first_head_url + first_tail_url


# first_url
#第一条连接提取
re_second_url = requests.get(first_url)
# re_second_url = requests.get(first_url, headers=headers, proxies = proxies)
bs_second_url = BeautifulSoup(re_second_url.text, 'html.parser')
second_items = bs_second_url.find_all('div', class_ = 'photo-pic')
# print(second_items)

for i in second_items:
second_tail_url = i.find('img')['src']
# 提取第二层连接
second_url = first_head_url + second_tail_url
res = requests.get(str(second_url)).content
# res = requests.get(str(second_url), headers=headers, proxies = proxies).content
photo = open(str(second_url[-10:-4] + '.jpg'), 'wb')
photo.write(res)
photo.close()
print('下载了一张')


print('全部下载完成')

摄影作品

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import requests
from bs4 import BeautifulSoup
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import os

class chiphell:
def __init__(self,page):
self.page = page

def get_forumaddr(self):
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
self.forumaddr = []
self.cookiesdict = []
self.proxies = {'http': '127.0.0.1:8888', 'https': '127.0.0.1:8888'}
self.headers = \
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
"referer":"https://www.chiphell.com/forum-62-1.html"}
self.address = "https://www.chiphell.com/forum.php"
self.params = {"mod":"forumdisplay",
"fid":"62",
"filter":"typeid",
"orderby":"lastpost",
"typeid":"277",
"page":"1"}
for i in range(1,self.page+1):
self.params['page'] = i
resdata = requests.get(self.address, headers=self.headers, params=self.params,cookies = self.cookiesdict)
self.cookiesdict = dict(self.cookiesdict, **requests.utils.dict_from_cookiejar(resdata.cookies))
data = BeautifulSoup(resdata.text, 'html.parser').find('ul', class_='ml').find_all('li')
for ii in data:
self.forumaddr.append('https://www.chiphell.com/' + ii.find('h3', class_='xw0').find('a')['href'])

def get_forum_picture(self,address):
print(address)
dic=address[address.find('viewthread&tid=')+15:address.find('&extra=page')]
if os.path.exists('output') == True:
pass
else:
os.makedirs('output')
if os.path.exists('.\output\\'+dic)== True:
pass
else:
os.makedirs('.\output\\'+dic)
self.get_picture_headers = \
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
"referer":"https://www.chiphell.com/forum.php?mod=forumdisplay&fid=62&filter=typeid&typeid=277"}
self.get_picture_address = address
resdata = requests.get(self.get_picture_address, headers=self.get_picture_headers, cookies=self.cookiesdict)
data = BeautifulSoup(resdata.text, 'html.parser').find('div', class_='t_fsz').find_all('img',class_='zoom')
for i in data:
try:
if i['zoomfile'] == None:
pass
else:
print(i['zoomfile'])
filename = i['zoomfile'][i['zoomfile'].rfind('/') + 1:]
print(filename)
pic = requests.get(i['zoomfile'], headers=self.get_picture_headers, cookies=self.cookiesdict)
with open('.\output\\' + '.\\' + dic + '\\' + filename, 'wb') as file:
file.write(pic.content)
except:
pass

def Start(self):
self.get_forumaddr()
for i in self.forumaddr:
self.get_forum_picture(i)


a = chiphell(10)
a.Start()