From 1b5efdbf3ed1fde116c60e941fab2369048121d6 Mon Sep 17 00:00:00 2001 From: sss011317 Date: Fri, 11 Dec 2020 14:39:34 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E5=8F=96169=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E7=BD=91=E7=AB=99=EF=BC=8C=E8=B5=B7=E5=A7=8B=E9=A0=81=E6=95=B8?= =?UTF-8?q?=E8=88=87=E7=B5=90=E6=9D=9F=E9=A0=81=E6=95=B8=E7=84=A1=E6=B3=95?= =?UTF-8?q?=E5=B0=8D=E6=87=89class?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...76\347\211\207\347\275\221\347\253\231.py" | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git "a/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" "b/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" index 4546759..8680ae7 100644 --- "a/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" +++ "b/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" @@ -7,6 +7,8 @@ headers={ #下载图片的模块 def Download_the_module(file,tehurl): + # 輸入您要放置的檔案位置 + savePath = "C:/Users/user-50/source/python/crawlerResource/" count = 1 # 进入网站下载图片 The_second_request = requests.get(tehurl, headers=headers).text @@ -19,36 +21,37 @@ def Download_the_module(file,tehurl): save=i.attr('src') #print(save) The_sponse=requests.get(save,headers=headers) - The_name='F:/图片/'+file + The_name=savePath+file Save_the_address = str(The_name) # 检测是否有image目录没有则创建 if not os.path.exists(Save_the_address): - os.makedirs('F:/图片/' + file) + os.makedirs(savePath + file) else: with open(Save_the_address+'/%s.jpg'%count,'wb')as f: f.write(The_sponse.content) - print('已经下载了%s张'%count) + print('將資料下載到%s | 並下載了%s張'%(Save_the_address,count)) count += 1 #爬取地址 -def Climb_to_address(page): - - URL='https://www.169tp.com/gaogensiwa/list_3_%s.html'%page - sponse=requests.get(URL,headers=headers) - sponse.encoding='gbk' - encodin=sponse.text - doc=pq(encodin) - extract=doc('.pic').items() - for i in extract: - #文件名 - The_file_name=i.text() - #提取到的网站 - The_url=i.attr('href') - - Download_the_module(The_file_name,The_url) +def Climb_to_address(Startpage,Endpage): + print("起始頁數:%s,結束頁數:%s,總共找尋頁數:%s" %(Startpage,Endpage,Endpage-Startpage)) + for page in range(Startpage,Endpage): + URL='https://www.169tp.com/gaogensiwa/list_3_%s.html'%page + sponse=requests.get(URL,headers=headers) + sponse.encoding='gbk' + encodin=sponse.text + doc=pq(encodin) + extract=doc('.pic').items() + for i in extract: + #文件名 + The_file_name=i.text() + #提取到的网站 + The_url=i.attr('href') + + Download_the_module(The_file_name,The_url) #一共有616页 a=int(input('请输入开始爬取的页数:')) -- Gitee