| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 
 | 
 '''
 @File    :   test.py
 @Time    :   2021/03/06 00:19:09
 @Author  :   Zhu Zhouyue
 @Version :   1.0
 @Contact :   zhuzhouyue2005@outlook.com
 @Desc    :   None
 '''
 
 
 import requests
 import re
 from bs4 import BeautifulSoup
 import time
 
 def GetHtmlText(url):
 
 try:
 user_agent = {
 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
 rspon = requests.get(url, headers=user_agent)
 rspon.encoding = rspon.apparent_encoding
 rspon.raise_for_status()
 except:
 print('网页获取失败:', rspon.status_code)
 return None
 return rspon
 
 def GetMaxPageCount():
 
 max_page_count = 0
 url = 'https://bing.ioliu.cn/'
 soup = BeautifulSoup(GetHtmlText(url).text, "html.parser")
 tag_page = soup.find('div', {'class': 'page'})
 page_txt = None
 for tag_child in tag_page.children:
 if(tag_child.name == 'span'):
 page_txt = tag_child.string
 match = re.search(r'(?<=1 / )\d*', page_txt)
 max_page_count = int(match.group(0))
 time.sleep(0.5)
 return max_page_count
 return rspon
 
 def SavePictureInUrl(pic_url,pic_name,pic_path):
 
 source = GetHtmlText(pic_url)
 if source == None:
 return
 file_name = '{}.jpg'.format(pic_name)
 file = open(pic_path+file_name, "wb")
 file.write(source.content)
 file.close()
 
 def GetOnePageJpg(page_count, pic_path):
 
 url = 'https://bing.ioliu.cn/?p={}'.format(page_count)
 suop = BeautifulSoup(GetHtmlText(url).text, 'html.parser')
 tag_container = suop.find_all('div', {'class':'container'})
 tag_item = tag_container[1]
 url_photo = 'https://bing.ioliu.cn'
 for tag_pic in tag_item.children:
 
 tag_title = tag_pic.find('h3')
 text_title = tag_title.string
 a = re.findall(r'[^\*"/:?\\|<>]', text_title, re.S)
 text_title = ''.join(a)
 tag_calendar = tag_pic.find('p', {'class':'calendar'})
 tag_em = tag_calendar.find('em')
 text_calendar = tag_em.string
 text_pic_name = text_calendar + '__' + text_title
 
 tag_download = tag_pic.find('a', {'class':'ctrl download'})
 url_pic = url_photo + tag_download['href']
 
 SavePictureInUrl(url_pic, text_pic_name, pic_path)
 print('.', end='', flush=True)
 time.sleep(5)
 def GetAllPageJpg(pic_path):
 
 max_page_count = GetMaxPageCount()
 for page_index in range(1, max_page_count):
 GetOnePageJpg(page_index, pic_path)
 print('\r', '正在获取,已完成:{:.2f} %'.format(page_index/max_page_count*100), end = '', flush=True)
 
 def main():
 
 pic_path = '/Volumes/ZZY/bing/'
 GetAllPageJpg(pic_path)
 main()
 
 |