diff --git "a/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" "b/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" index 4546759858710581f72e5a63e2360da11179c2b9..8680ae77c5bf001402711300fb9780942be5a000 100644 --- "a/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" +++ "b/\347\210\254\345\217\226169\345\233\276\347\211\207\347\275\221\347\253\231.py" @@ -7,6 +7,8 @@ headers={ #下载图片的模块 def Download_the_module(file,tehurl): + # 輸入您要放置的檔案位置 + savePath = "C:/Users/user-50/source/python/crawlerResource/" count = 1 # 进入网站下载图片 The_second_request = requests.get(tehurl, headers=headers).text @@ -19,36 +21,37 @@ def Download_the_module(file,tehurl): save=i.attr('src') #print(save) The_sponse=requests.get(save,headers=headers) - The_name='F:/图片/'+file + The_name=savePath+file Save_the_address = str(The_name) # 检测是否有image目录没有则创建 if not os.path.exists(Save_the_address): - os.makedirs('F:/图片/' + file) + os.makedirs(savePath + file) else: with open(Save_the_address+'/%s.jpg'%count,'wb')as f: f.write(The_sponse.content) - print('已经下载了%s张'%count) + print('將資料下載到%s | 並下載了%s張'%(Save_the_address,count)) count += 1 #爬取地址 -def Climb_to_address(page): - - URL='https://www.169tp.com/gaogensiwa/list_3_%s.html'%page - sponse=requests.get(URL,headers=headers) - sponse.encoding='gbk' - encodin=sponse.text - doc=pq(encodin) - extract=doc('.pic').items() - for i in extract: - #文件名 - The_file_name=i.text() - #提取到的网站 - The_url=i.attr('href') - - Download_the_module(The_file_name,The_url) +def Climb_to_address(Startpage,Endpage): + print("起始頁數:%s,結束頁數:%s,總共找尋頁數:%s" %(Startpage,Endpage,Endpage-Startpage)) + for page in range(Startpage,Endpage): + URL='https://www.169tp.com/gaogensiwa/list_3_%s.html'%page + sponse=requests.get(URL,headers=headers) + sponse.encoding='gbk' + encodin=sponse.text + doc=pq(encodin) + extract=doc('.pic').items() + for i in extract: + #文件名 + The_file_name=i.text() + #提取到的网站 + The_url=i.attr('href') + + Download_the_module(The_file_name,The_url) #一共有616页 a=int(input('请输入开始爬取的页数:'))