上传文件至 ''

Embrace affordable luxury with our [Gold Plated Jewelry Wholesale](https://www.kirinjewelrywholesale.com/collections/plating-plating). Immerse yourself in the radiance of meticulously designed pieces that bring a touch of glamour to your look, celebrating elegance without compromise.
3 months ago · ca8a652b38
parent 2500e17e40
commit ca8a652b38
2 changed files with 123 additions and 0 deletions
--- a/宅男女神网.py
+++ b/宅男女神网.py
@ -0,0 +1,56 @@
+#coding = utf-8
+
+import requests
+import parsel
+import os
+
+headers = {
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
+}
+
+def get_onepage(url):
+    response = requests.get(url=url,headers=headers)
+    selector = parsel.Selector(response.text)
+    lis = selector.css('.listdiv ul li')
+    for li in lis:
+        fail_title = li.css('.galleryli_title a ::text').get()          # 文件名
+        href = li.css('.galleryli_title a ::attr(href)').get()          # 详情页短地址
+        # global href_full
+        href_full = "https://www.fnvshen.com/" + href                   # 进入详情页的完整地址
+        print("正在准备下载" + href_full)
+        # print('正在下载相册：', fail_title)
+
+        # 创建相册文件夹
+        if not os.path.exists('img\\' + fail_title):  # 如果该路径下没有该文件夹
+            os.mkdir('img\\' + fail_title)
+
+        try:
+            for page_url_num in range(1,101):
+                page_url = href_full + "{}.html".format(page_url_num)
+                response_page = requests.get(url=page_url,headers=headers)
+                selector_page = parsel.Selector(response_page.text)
+                imgs = selector_page.css(".gallery_wrapper ul#hgallery img")    #获取详情页中单页的对象
+                for img in imgs:
+                    img_name = img.css('img ::attr(alt)').get()                 #获得图片储存名称
+                    img_href = img.css('img ::attr(src)').get()                 #图片下载地址
+                    # print(img_name,img_href)
+                    suffix = img_href.split('/')[-1]                            #获取图片下载后缀
+                    response_page_1 = requests.get(url=img_href,headers=headers).content
+                    with open(f'img\\{fail_title}\\{img_name}' + suffix, mode='wb') as f:
+                        f.write(response_page_1)
+                        print("保存完成：", img_name)
+        except:
+            print("该页已经是最后一页")
+            continue
+
+
+# https://www.fnvshen.com/gallery/4.html
+# https://www.fnvshen.com/gallery/3.html
+
+for page in range(1,101):
+    print("正在下载第{}页".format(page))
+    url = "https://www.fnvshen.com/gallery/{}.html".format(page)
+    get_onepage(url)
+
+
+
--- a/美丽女人网.py
+++ b/美丽女人网.py
@ -0,0 +1,67 @@
+#coding = utf-8
+
+import requests
+import parsel
+import os
+
+
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
+}
+
+def get_oneclass(url_class):
+    response_page_class = requests.get(url=url_class,headers=headers)
+    selector_class = parsel.Selector(response_page_class.text)
+    # url_class_sorts = selector_class.css('.top_nav ul li ::attr(href)').getall()
+    # for url_class_sort in url_class_sorts:
+    dls = selector_class.css('.chanpin_list dl')
+    for dl in dls:
+        dl_url = dl.css('dt a ::attr(href)').get()
+        global dl_name
+        dl_name = dl.css('dt a ::attr(alt)').get()              #创建文件夹名
+        if not os.path.exists('img\\' + dl_name):          # 如果该路径下没有该文件夹
+            os.mkdir('img\\' + dl_name)
+            get_onepeple(dl_url)
+
+def get_onepeple(url):
+    response_page = requests.get(url=url,headers=headers)
+    selector = parsel.Selector(response_page.text)
+    imgs = selector.css('.neirong_body p strong img')
+    for img in imgs:
+        img_url = img.css('img ::attr(src)').get()
+        img_name = img_url.split("/")[-1]
+        # suffix = img_url.split(".")[-1]
+        # print(img_url)
+
+        response_page_1 = requests.get(url=img_url,headers=headers).content
+        with open(f"img\\{dl_name}\\{img_name}", mode='wb') as f:
+            f.write(response_page_1)
+            print("保存完成：", img_name)
+
+
+url_list_1 = []
+
+url_aepnu = "http://www.xunfangimg.com/aepnu/list_1.html"
+url_z7qnv_1 = 'http://www.xunfangimg.com/z7qnv/list_1.html'
+url_z7qnv_2 = 'http://www.xunfangimg.com/z7qnv/list_2.html'
+url_zp7sg_1 = 'http://www.xunfangimg.com/zp7sg/list_1.html'
+url_full_1 = "http://www.xunfangimg.com/"
+
+url_list_2 = [url_aepnu,url_z7qnv_1,url_z7qnv_2,url_zp7sg_1,url_full_1]
+
+response_full = requests.get(url=url_full_1,headers=headers)
+selector_full = parsel.Selector(response_full.text)
+lis_full = selector_full.css('.top_nav ul li ')
+for li_full in lis_full:
+    url_class_1 = li_full.css('a ::attr(href)').get()
+    # print(url_class_1)
+    url_list_1.append(url_class_1)
+
+url_list = url_list_1 + url_list_2
+# print(url_list)
+for url in url_list:
+    print(url + "开始爬取"  )
+    try:
+        get_oneclass(url)
+    except:
+        print("爬取失败")