From b1e87476c69369a7a4cab24f136578242760b087 Mon Sep 17 00:00:00 2001 From: Robin Carlsson <crcarl@utu.fi> Date: Mon, 26 Feb 2024 16:03:22 +0200 Subject: [PATCH] 19: added easyocr component for cookie rejection; wip --- ida_selenium.py | 60 +++++++++++++++++++++++++++++++++++-------------- t3.txt | 3 +++ 2 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 t3.txt diff --git a/ida_selenium.py b/ida_selenium.py index 7cfa971..5d24b6d 100644 --- a/ida_selenium.py +++ b/ida_selenium.py @@ -6,10 +6,12 @@ from selenium.webdriver.common.by import By from urllib.parse import urlparse, parse_qs import pychrome import htmlreport +import pyautogui +import easyocr from datetime import datetime import os import time -import json +#import json import sys import re @@ -117,7 +119,7 @@ def find_inputs(): try: input_list.append(i) except Exception as err: - print(f"Error Message: {err.msg}") + print(f"Error Message: {err}") for d in driver.find_elements(By.TAG_NAME, "div"): # define recursive function for finding child input fields if is_search_related(d, "bar"): @@ -131,7 +133,7 @@ def find_inputs(): try: input_list.append(child) except Exception as err: - print(f"Error Message: {err.msg}") + print(f"Error Message: {err}") find_child_from_div(d) return input_list @@ -146,7 +148,7 @@ def find_buttons(): try: button_list.append(e) except Exception as err: - print(f"Error Message: {err.msg}") + print(f"Error Message: {err}") for d in driver.find_elements(By.TAG_NAME, "div"): if is_search_related(d, "button"): # define recursive function for finding child button equivalents @@ -159,7 +161,7 @@ def find_buttons(): try: button_list.append(child) except Exception as err: - print(f"Error Message: {err.msg}") + print(f"Error Message: {err}") find_child_from_div(d) return button_list @@ -192,7 +194,7 @@ def is_search_related(element, mode): return False return True except Exception as err: - print(f"Error Message: {err.msg}") + print(f"Error Message: {err}") @@ -232,6 +234,7 @@ def get_interceptor_attrs(errormsg): print(err) + # use javascript execution to remove obstructing elements def handle_interception(msg, button): try: @@ -254,7 +257,27 @@ def handle_interception(msg, button): input_search_term(inputs[0]) except Exception as err: print("\033[91m \nFAILED TO DEAL WITH INTERCEPTOR\n\033[00m") - print(err.msg) + print(err) + + + +# WORK-IN-PROGRESS +# use easyocr reject cookies, thus closing the obstructing window +def ocr_deny_cookies(): + words_of_denial = ["reject", "deny", "hylkaa"] + reader = easyocr.Reader(["en"]) + pyautogui.hotkey('ctrl', '1') + # full screenshot so the coordinates match + pyautogui.screenshot("./temp.jpg") + result = reader.readtext("./temp.jpg") + if ("cookie" in str(result).lower()) or ("evaste" in str(result).lower()): + for word in words_of_denial: + for (bbox, text, prob) in result: + if word in text.lower(): + print(f"EasyOCR found: {word}") + pyautogui.click(bbox[0][0], bbox[0][1]) + os.remove("./temp.jpg") + time.sleep(0.2) @@ -319,6 +342,9 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: time.sleep(3) + ocr_deny_cookies() + + driver.get_screenshot_as_file(f"screenshots_{datestamp}/{log_title}.png") # these might help avoiding certain elements getting in the way @@ -353,9 +379,9 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: break except Exception as err: try: - print(f"Error Message: {err.msg}") + print(f"Error Message: {err}") # sue me lmao - if "click intercepted" in err.msg: + if "click intercepted" in str(err): handle_interception(err.msg, b) except Exception as err: print(err) @@ -369,8 +395,8 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: break except Exception as err: try: - print(f"Error Message: {err.msg}") - if "click intercepted" in err.msg: + print(f"Error Message: {err}") + if "click intercepted" in str(err): handle_interception(err.msg, b) except Exception as err: print(err) @@ -384,8 +410,8 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: break except Exception as err: try: - print(f"Error Message: {err.msg}") - if "click intercepted" in err.msg: + print(f"Error Message: {err}") + if "click intercepted" in str(err): handle_interception(err.msg, b) except Exception as err: print(err) @@ -401,8 +427,8 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: break except Exception as err: try: - print(f"Error Message: {err.msg}") - if "click intercepted" in err.msg: + print(f"Error Message: {err}") + if "click intercepted" in str(err): handle_interception(err.msg, b) except Exception as err: print(err) @@ -446,7 +472,7 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: # parameter_list = parse_qs(query_string) # event["message"]["params"]["request"]["query_string_parameters"] = parameter_list # except Exception as err: - # print("Error Message: " + err.msg) + # print("Error Message: " + err) # # separate query string parameters into list in case of ExtraInfo # elif event["message"]["method"] == "Network.requestWillBeSentExtraInfo": # try: @@ -456,7 +482,7 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test: # event["message"]["params"]["query_string_parameters"] = parameter_list # except Exception as err: # if ":path" not in str(err): - # print("Error Message: " + err.msg) + # print("Error Message: " + err) # # write events related to sent network requests # if ("Network.requestWillBeSent" in event["message"]["method"]): # f.write(json.dumps(event["message"], indent=4) + ",") diff --git a/t3.txt b/t3.txt new file mode 100644 index 0000000..ad8c5b9 --- /dev/null +++ b/t3.txt @@ -0,0 +1,3 @@ +https://www.slideshare.net/ +https://www.cookiebot.com/ +https://www.google.nl \ No newline at end of file -- GitLab