# Python爬虫

# Flag

xpath和css选择器

HeadlessBrowser

# 开源脚本

签到

京东

联通

# selenium

# 函数或变量

函数或变量 说明
def file_detector_context(self, file_detector_class, *args, **kwargs):
def mobile(self):
def name(self):
def start_client(self):
def stop_client(self):
def start_session(self, capabilities, browser_profile=None):
def create_web_element(self, element_id):
def execute(self, driver_command, params=None):
def get(self, url):
def title(self):
def find_element_by_id(self, id_):
def find_elements_by_id(self, id_):
def find_element_by_xpath(self, xpath):
def find_elements_by_xpath(self, xpath):
def find_element_by_link_text(self, link_text):
def find_elements_by_link_text(self, text):
def find_element_by_partial_link_text(self, link_text):
def find_elements_by_partial_link_text(self, link_text):
def find_element_by_name(self, name):
def find_elements_by_name(self, name):
def find_element_by_tag_name(self, name):
def find_elements_by_tag_name(self, name):
def find_element_by_class_name(self, name):
def find_elements_by_class_name(self, name):
def find_element_by_css_selector(self, css_selector):
def find_elements_by_css_selector(self, css_selector):
def execute_script(self, script, *args):
def execute_async_script(self, script, *args):
def current_url(self):
def page_source(self):
def close(self):
def quit(self):
def current_window_handle(self):
def window_handles(self):
def maximize_window(self):
def fullscreen_window(self):
def minimize_window(self):
def switch_to(self):
def switch_to_active_element(self):
def switch_to_window(self, window_name):
def switch_to_frame(self, frame_reference):
def switch_to_default_content(self):
def switch_to_alert(self):
def back(self):
def forward(self):
def refresh(self):
def get_cookies(self):
def get_cookie(self, name):
def delete_cookie(self, name):
def delete_all_cookies(self):
def add_cookie(self, cookie_dict):
def implicitly_wait(self, time_to_wait):
def set_script_timeout(self, time_to_wait):
def set_page_load_timeout(self, time_to_wait):
def find_element(self, by=By.ID, value=None):
def find_elements(self, by=By.ID, value=None):
def desired_capabilities(self):
def get_screenshot_as_file(self, filename):
def save_screenshot(self, filename):
def get_screenshot_as_png(self):
def get_screenshot_as_base64(self):
def set_window_size(self, width, height, windowHandle='current'):
def get_window_size(self, windowHandle='current'):
def set_window_position(self, x, y, windowHandle='current'):
def get_window_position(self, windowHandle='current'):
def get_window_rect(self):
def set_window_rect(self, x=None, y=None, width=None, height=None):
def file_detector(self):
def file_detector(self, detector):
def orientation(self):
def orientation(self, value):
def application_cache(self):
def log_types(self):
def get_log(self, log_type):

# 浏览器下载设置

# 向Selenium Webwdriver添加对Chrome "send_command"的支持
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
# allow自动、deny禁止、default默认
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'deny', 'downloadPath': "D:\\"}}
driver.execute("send_command", params)
driver.execute_cdp_cmd("Page.setDownloadBehavior", {'behavior': 'deny', 'downloadPath': "D:\\"})

# 打开新标签页

# 获取主窗口句柄
main_window = driver.current_window_handle
# 通过执行js打开新标签页并访问url
driver.execute_script(f"window.open('{url}')")
# 在新选项卡中打开空白页面
#driver.execute_script(f"window.open('','_blank')")
# 获取当前所有窗口句柄(窗口A、B),并切换到新标签页
driver.switch_to.window(driver.window_handles[-1])
# 访问url
#driver.get(url)
# 关闭当前窗口。
driver.close()
# 关闭新选项卡后回到主窗口,必须做这一步,否则会引发错误
driver.switch_to.window(main_window)

使用组合键

该方式在Chrome下无效

在增加了设置下载路径代码后,无法打开新标签页,但是捕获到的handler是两个,可以进行切换,只是没有切换动态效果了,实际是切换了的

OSX操作系统通过组合键COMMAND + TCOMMAND + W来实现选项卡的打开/关闭

在其他操作系统上,可以使用CONTROL + T / CONTROL + W

# windows 用Keys.CONTROL 如同ctrl+t打开新标签页
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 't')
# <CTRL> + <T>通过Action链发送
# ActionChains(driver).key_down(Keys.CONTROL).send_keys("t").key_up(Keys.CONTROL).perform()
# 获取当前所有窗口句柄(窗口A、B),并切换到新标签页
driver.switch_to.window(driver.window_handles[-1])
# 访问url
driver.get(url)
# windows 用Keys.CONTROL 如同ctrl+w关闭标签页
#driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
# 关闭当前窗口。
driver.close()

# 执行JavaScript

# 通过 js 移动到最下
driver.execute_script( "var q=document.documentElement.scrollTop=10000" )
# 通过 js 返回所有html
driver.execute_script( "return document.documentElement.outerHTML" )

# m3u8解析下载解密合并

M3U8有两层:第一层存放的是流信息(EXT-X-STREAM-INF)和第二层的下载链接,第二层才是存放加密(EXT-X-KEY)和ts文件的下载地址