Source code for selene.core.selenium.tasks

from IPython.display import Image, display
import os
from datetime import datetime

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.common.action_chains import ActionChains

from selene.core.config import *
from selene.core.selenium.scripts import *
from selene.core.selenium.conditions import *

import random


[docs] def task_navigate_to_url(driver, url, string="", wait=WAIT_NORMAL, logger=None): """ Navigate to a new url and check that the url is correct. Checking the correct url can be done in 2 ways: 1. Checking for an exact match 2. Checking whether the url contains a specified string. Parameters ---------- driver : selenium.webdriver a selenium webdriver instance url : str the url to navigate to string : str a specified string for the new url to contain wait : int a number of seconds to wait before raising a TimeoutException logger : logging.Logger a logger instance (see core.logger.py) Returns ---------- output : bool True if the operation was successful, False otherwise """ if logger: logger.debug(f'task_navigate_to_url: {"; ".join([url, string])}') # Get the original url url_prev = driver.current_url # If the original url is the same as the expected url, return True if not bool_url_unexpected(driver, WAIT_TINY, logger, url): return True # Navigate to new url, catching Webdriver failures try: driver.get(url) except WebDriverException as e: if logger: logger.exception(f"{e}") return False # Check that the url has changed if not bool_url_changed(driver, wait, logger, url_prev): return False # Check that the url is correct if string == "": return bool_url_expected(driver, wait, logger, url) else: return bool_url_contains(driver, wait, logger, string)
[docs] def task_navigate_to_url_in_new_tab( driver, url, string="", wait=WAIT_NORMAL, logger=None ): """ Navigate to a new url in a new tab, and check that the url is correct. Checking the correct url can be done in 2 ways: 1. Checking for an exact match 2. Checking whether the url contains a specified string. Parameters ---------- driver : selenium.webdriver a selenium webdriver instance url : str the url to navigate to string : str a specified string for the new url to contain wait : int a number of seconds to wait before raising a TimeoutException logger : logging.Logger a logger instance (see core.logger.py) Returns ---------- output : bool True if the operation was successful, False otherwise """ if logger: logger.debug(f'task_navigate_to_url_in_new_tab: {"; ".join([url, string])}') # Get the current number of handles handles_prev = driver.window_handles n_handles_prev = len(handles_prev) # Open url in new tab try: driver.execute_script(f'window.open("{url}");') except JavascriptException as e: if logger: logger.exception(f"{e}") return False # Check that new handle has been created if not bool_new_handle(driver, n_handles_prev, WAIT_SMALL, logger): return False # Switch to new tab handle_new = [x for x in driver.window_handles if x not in handles_prev][0] try: driver.switch_to.window(handle_new) except WebDriverException as e: if logger: logger.exception(f"{e}") return False # Check that the current handle is now changed if not bool_correct_handle(driver, handle_new, WAIT_SMALL, logger): return False # Check that the url is corrects if string == "": return bool_url_expected(driver, wait, logger, url) else: return bool_url_contains(driver, wait, logger, string)
[docs] def task_close_tab_return_to_url_and_handle( driver, url, handle, string="", wait=WAIT_NORMAL, logger=None ): """ Close the current tab and check that the driver is back at the expected handle and url. Checking the correct url can be done in 2 ways: 1. Checking for an exact match 2. Checking whether the url contains a specified string. Parameters ---------- driver : selenium.webdriver a selenium webdriver instance handle : str the handle to navigate to url : str the url to navigate to string : str a specified string for the new url to contain wait : int a number of seconds to wait before raising a TimeoutException logger : logging.Logger a logger instance (see core.logger.py) Returns ---------- output : bool True if the operation was successful, False otherwise """ if logger: logger.debug( f'task_close_tab_return_to_url_and_handle: {"; ".join([url, string])}' ) # Close tab try: driver.execute_script(f"window.close();") except JavascriptException as e: if logger: logger.exception(f"{e}") return False # Switch to handle try: driver.switch_to.window(handle) except WebDriverException as e: if logger: logger.exception(f"{e}") return False # Check that the handle is the expected one if not bool_correct_handle(driver, handle, WAIT_SMALL, logger): return False # Check that the url is the expected one if string == "": return bool_url_expected(driver, wait, logger, url) else: return bool_url_contains(driver, wait, logger, string)
[docs] def task_find(parent, by, identifier, wait=WAIT_NORMAL, logger=None): """ Find an element using a By. selector and an identifier. For more info, see: https://selenium-python.readthedocs.io/locating-elements.html If the operation is to find an element on the whole page, then the parent variable is a Selenium Webdriver instance (usually named driver). If the operation is to find an element on the whole page, then the parent variable is a Selenium WebElement instance (NOT a core.selenium.element.Element instance). Parameters ---------- parent : EITHER selenium.webdriver OR selenium.webdriver.remote.webelement.WebElement where to search for the element by : selenium.webdriver.common.by.By see https://selenium-python.readthedocs.io/locating-elements.html identifier : str see https://selenium-python.readthedocs.io/locating-elements.html wait : int a number of seconds to wait before raising a TimeoutException logger : logging.Logger a logger instance (see core.logger.py) Returns ---------- output : [None, selenium.webdriver.remote.webelement.WebElement] returns the webelement if it is found; None otherwise """ if logger: logger.debug(f"task_find: {identifier}") try: return WebDriverWait(parent, wait).until( EC.presence_of_element_located((by, identifier)), message=f"Element not found: {identifier}", ) except TimeoutException as e: if logger: logger.exception(e) return None
[docs] def task_find_all(parent, by, identifier, wait=WAIT_NORMAL, logger=None): """ Find a list of elements using a By. selector and an identifier. For more info, see: https://selenium-python.readthedocs.io/locating-elements.html If the operation is to find an element on the whole page, then the parent variable is a Selenium Webdriver instance (usually named driver). If the operation is to find an element on the whole page, then the parent variable is a Selenium WebElement instance (NOT a core.selenium.element.Element instance). Parameters ---------- parent : EITHER selenium.webdriver OR selenium.webdriver.remote.webelement.WebElement where to search for the element by : selenium.webdriver.common.by.By see https://selenium-python.readthedocs.io/locating-elements.html identifier : str see https://selenium-python.readthedocs.io/locating-elements.html wait : int a number of seconds to wait before raising a TimeoutException logger : logging.Logger a logger instance (see core.logger.py) Returns ---------- output : list returns a list of webelements if one or more are found; an empty list otherwise """ if logger: logger.debug(f"task_find_all: {identifier}") try: WebDriverWait(parent, wait).until( EC.presence_of_element_located((by, identifier)), message=f"No elements found: {identifier}", ) except TimeoutException as e: if logger: logger.exception(e) return [] return parent.find_elements(by, identifier)
[docs] def task_click(driver, by, identifier, wait=WAIT_NORMAL, logger=None): """ Click an element using a By. selector and an identifier. For more info, see: https://selenium-python.readthedocs.io/locating-elements.html Parameters ---------- driver : selenium.webdriver a selenium webdriver instance by : selenium.webdriver.common.by.By see https://selenium-python.readthedocs.io/locating-elements.html identifier : str see https://selenium-python.readthedocs.io/locating-elements.html wait : int a number of seconds to wait before raising a TimeoutException logger : logging.Logger a logger instance (see core.logger.py) Returns ---------- output : bool True if the operation was successful, False otherwise """ if logger: logger.debug(f"task_click: {identifier}") if not bool_clickable(driver, by, identifier, wait=wait, logger=logger): return False element = task_find(driver, by, identifier, wait=wait, logger=logger) return script_click_element(driver, element)
[docs] def task_screenshot_to_notebook(driver, width, height, logger): """ Display a browser screenshot in a Jupyter notebook. Parameters ---------- driver : selenium.webdriver a selenium webdriver instance width : int the width of the image height : int the height of the image logger : logging.Logger a logger instance (see core.logger.py) """ if logger: logger.debug(f"screenshot_to_notebook") image = Image(driver.get_screenshot_as_png(), width=width, height=height) display(image)
[docs] def task_screenshot_to_local(driver, dirpath, filestem, logger): """ Save a browser screenshot to a local directory Parameters ---------- driver : selenium.webdriver a selenium webdriver instance dirpath : str the directory path filestem : str a string to add to a datetime to create the filename TODO tidy up workflow? logger : logging.Logger a logger instance (see core.logger.py) """ if logger: logger.debug(f'task_screenshot_to_local: {"; ".join([dirpath, filestem])}') if not os.path.exists(dirpath): os.makedirs(dirpath) str_datetime = datetime.now().strftime("%Y%m%d%H%M%S%f") filename = f"{str_datetime}_{filestem}.png" filepath = f"{dirpath}/{filename}" driver.save_screenshot(filepath)
[docs] def mouse_move(driver, max_mouse_moves=10): """performs mouse move, for help with bot mitigation, partially ported from OpenWPM""" # bot mitigation: move the mouse randomly around a number of times window_size = driver.get_window_size() num_moves = 0 num_fails = 0 while num_moves < max_mouse_moves and num_fails < max_mouse_moves: try: if num_moves == 0: # move to the center of the screen x = int(round(window_size["height"] / 2)) y = int(round(window_size["width"] / 2)) else: # move a random amount in some direction move_max = random.randint(0, 500) x = random.randint(-move_max, move_max) y = random.randint(-move_max, move_max) action = ActionChains(driver) action.move_by_offset(x, y) action.perform() num_moves += 1 except: num_fails += 1 pass return num_moves