成都网站建设设计

将想法与焦点和您一起共享

抖音网页版高清视频抓取教程selenium

废话不多说,直接上代码

站在用户的角度思考问题,与客户深入沟通,找到南关网站设计与南关网站推广的解决方案,凭借多年的经验,让设计与互联网技术结合,创造个性化、用户体验好的作品,建站类型包括:成都做网站、网站设计、企业官网、英文网站、手机端网站、网站推广、域名注册、虚拟空间、企业邮箱。业务覆盖南关地区。



from selenium import webdriver
from selenium.webdriver import ChromeOptions
import time
import re
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import uuid
import os
import requests

option = ChromeOptions()
option.add_argument(
'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36"'
)
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 防止系统检测到自动化工具
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})

browser.maximize_window() # 页面最大化


def douyincrawler(keyword):
url = 'https://www.douyin.com/search/' + keyword + '?publish_time=0&sort_type=0&source=switch_tab&type=video'
browser.get(url)
browser.find_element_by_xpath('//*[@id="qdblhsHs"]/button').click() # 点击登陆用抖音手机app扫码登陆
time.sleep(15) # 设置等待时间扫码登陆

for x in range(10): # 自动下拉
time.sleep(5)
js_bottom = "var q=document.documentElement.scrollTop="
browser.execute_script(js_bottom)
if '服务出现异常' in browser.page_source: # 刷新页面
browser.refresh()
if '服务异常,重新' in browser.page_source:
browser.find_element_by_xpath('//*[@id="dark"]/div[2]/div/div[3]/div[2]/div/div/span').click() # 点击加载

video_id_lists = list(set(re.findall(r' detail_url_lists = ['https://www.douyin.com/video/' + i for i in video_id_lists]
print('共计获取到{}个视频'.format(len(detail_url_lists)))
for i in detail_url_lists:
js = "window.open('{}')".format(i) #打开新tab页
browser.execute_script(js)
ws = browser.window_handles # 获取所有窗口
browser.switch_to.window(ws[1]) # 切换新句柄
WebDriverWait(browser, 10).until(EC.presence_of_element_located((
By.XPATH, "//xg-video-container[@class='xg-video-container']/video"
))) # 显示等待视频标签出现
video_url = 'https:' + re.findall(r' savevideo(video_url)
browser.close() # 关闭当前窗口
browser.switch_to.window(ws[0]) # 切回主页面这一步很关键



def savevideo(video_url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36",
}
video_dir = r'F:\2022_09_01\yan_ming\jian_kong_video\douyin_video'
video_full_path = os.path.join(video_dir, str(uuid.uuid4()) + '.mp4')
response = requests.get(url=video_url, headers=headers)
with open(video_full_path, 'wb') as f:
f.write(response.content)
print('已下载:{}'.format(video_url))


if __name__ == '__main__':
douyincrawler('街拍美女')

网站栏目:抖音网页版高清视频抓取教程selenium
转载源于:
http://chengdu.cdxwcx.cn/article/dsoghji.html