You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kirin_jewelry/kirinjewelry2006视频spider.py

49 lines
1.7 KiB
Python

import requests
import parsel
from tqdm import tqdm
import csv
import time
header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
}
def get_vidio(pageurl):
# pageurl = "https://www.kirinjewelry2006.com/video/products-detail-967094"
response = requests.get(url=pageurl, headers=header)
if response.status_code == 200:
res_text = response.text
select = parsel.Selector(res_text)
vidio_name = select.xpath('//*[@id="v620a34694dad7"]//h1/text()').get().split("-")[-1]
# print(vidio_name)
vidio_url = select.xpath('//*[@id="v620a34694dad7"]//img/@video-src').get()
vidio_res = requests.get(url=vidio_url,headers=header).content
with open('vidio\\{}.mp4'.format(vidio_name), 'wb') as file:
file.write(vidio_res)
print("{}视频已保存".format(vidio_name))
with open('vidio.csv', encoding="utf-8", mode='a', newline="") as f:
csv_writer = csv.writer(f)
csv_writer.writerow([vidio_name,])
time.sleep(5)
else:
print("无法获取视频")
for page in tqdm(range(2,87)):
print("正在爬取第{}".format(page))
url = "https://www.kirinjewelry2006.com/products-list-{}".format(page)
response = requests.get(url=url,headers=header).text
select = parsel.Selector(response)
lis = select.xpath('//*[@id="v6204665765bf2"]/div/div[2]/div[2]//div[1]/ul/li')
for li in lis:
href = li.xpath('div/a/@href').get()
pageurl = "https://www.kirinjewelry2006.com" + href
# print(pageurl)
try:
get_vidio(pageurl)
except:
continue