From b1e87476c69369a7a4cab24f136578242760b087 Mon Sep 17 00:00:00 2001
From: Robin Carlsson <crcarl@utu.fi>
Date: Mon, 26 Feb 2024 16:03:22 +0200
Subject: [PATCH] 19: added easyocr component for cookie rejection; wip

---
 ida_selenium.py | 60 +++++++++++++++++++++++++++++++++++--------------
 t3.txt          |  3 +++
 2 files changed, 46 insertions(+), 17 deletions(-)
 create mode 100644 t3.txt

diff --git a/ida_selenium.py b/ida_selenium.py
index 7cfa971..5d24b6d 100644
--- a/ida_selenium.py
+++ b/ida_selenium.py
@@ -6,10 +6,12 @@ from selenium.webdriver.common.by import By
 from urllib.parse import urlparse, parse_qs
 import pychrome
 import htmlreport
+import pyautogui
+import easyocr
 from datetime import datetime
 import os
 import time
-import json
+#import json
 import sys
 import re
 
@@ -117,7 +119,7 @@ def find_inputs():
             try:
                 input_list.append(i)
             except Exception as err:
-                print(f"Error Message: {err.msg}")
+                print(f"Error Message: {err}")
     for d in driver.find_elements(By.TAG_NAME, "div"):
         # define recursive function for finding child input fields
         if is_search_related(d, "bar"):
@@ -131,7 +133,7 @@ def find_inputs():
                         try:
                             input_list.append(child)
                         except Exception as err:
-                            print(f"Error Message: {err.msg}")
+                            print(f"Error Message: {err}")
             find_child_from_div(d)
     return input_list
 
@@ -146,7 +148,7 @@ def find_buttons():
             try:
                 button_list.append(e)
             except Exception as err:
-                print(f"Error Message: {err.msg}")
+                print(f"Error Message: {err}")
     for d in driver.find_elements(By.TAG_NAME, "div"):
         if is_search_related(d, "button"):
             # define recursive function for finding child button equivalents
@@ -159,7 +161,7 @@ def find_buttons():
                         try:
                             button_list.append(child)
                         except Exception as err:
-                            print(f"Error Message: {err.msg}")
+                            print(f"Error Message: {err}")
             find_child_from_div(d)
     return button_list
 
@@ -192,7 +194,7 @@ def is_search_related(element, mode):
                     return False
         return True
     except Exception as err:
-        print(f"Error Message: {err.msg}")
+        print(f"Error Message: {err}")
 
 
 
@@ -232,6 +234,7 @@ def get_interceptor_attrs(errormsg):
         print(err)
 
 
+
 # use javascript execution to remove obstructing elements
 def handle_interception(msg, button):
     try:
@@ -254,7 +257,27 @@ def handle_interception(msg, button):
         input_search_term(inputs[0])
     except Exception as err:
         print("\033[91m \nFAILED TO DEAL WITH INTERCEPTOR\n\033[00m")
-        print(err.msg)
+        print(err)
+
+
+
+# WORK-IN-PROGRESS
+# use easyocr reject cookies, thus closing the obstructing window
+def ocr_deny_cookies():
+    words_of_denial = ["reject", "deny", "hylkaa"]
+    reader = easyocr.Reader(["en"])
+    pyautogui.hotkey('ctrl', '1')
+    # full screenshot so the coordinates match
+    pyautogui.screenshot("./temp.jpg")
+    result = reader.readtext("./temp.jpg")
+    if ("cookie" in str(result).lower()) or ("evaste" in str(result).lower()):
+        for word in words_of_denial:
+            for (bbox, text, prob) in result:
+                if word in text.lower():
+                    print(f"EasyOCR found: {word}")
+                    pyautogui.click(bbox[0][0], bbox[0][1])
+    os.remove("./temp.jpg")
+    time.sleep(0.2)
 
 
 
@@ -319,6 +342,9 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
 
         time.sleep(3)
 
+        ocr_deny_cookies()
+
+
         driver.get_screenshot_as_file(f"screenshots_{datestamp}/{log_title}.png")
 
         # these might help avoiding certain elements getting in the way
@@ -353,9 +379,9 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
                         break
                     except Exception as err:
                         try:
-                            print(f"Error Message: {err.msg}")
+                            print(f"Error Message: {err}")
                             # sue me lmao
-                            if "click intercepted" in err.msg:
+                            if "click intercepted" in str(err):
                                 handle_interception(err.msg, b)
                         except Exception as err:
                             print(err)
@@ -369,8 +395,8 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
                         break
                     except Exception as err:
                         try:
-                            print(f"Error Message: {err.msg}")
-                            if "click intercepted" in err.msg:
+                            print(f"Error Message: {err}")
+                            if "click intercepted" in str(err):
                                 handle_interception(err.msg, b)
                         except Exception as err:
                             print(err)
@@ -384,8 +410,8 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
                             break
                         except Exception as err:
                             try:
-                                print(f"Error Message: {err.msg}")
-                                if "click intercepted" in err.msg:
+                                print(f"Error Message: {err}")
+                                if "click intercepted" in str(err):
                                     handle_interception(err.msg, b)
                             except Exception as err:
                                 print(err)
@@ -401,8 +427,8 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
                         break
                     except Exception as err:
                         try:
-                            print(f"Error Message: {err.msg}")
-                            if "click intercepted" in err.msg:
+                            print(f"Error Message: {err}")
+                            if "click intercepted" in str(err):
                                 handle_interception(err.msg, b)
                         except Exception as err:
                             print(err)
@@ -446,7 +472,7 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
         #                    parameter_list = parse_qs(query_string)
         #                    event["message"]["params"]["request"]["query_string_parameters"] = parameter_list
         #            except Exception as err:
-        #                print("Error Message: " + err.msg)
+        #                print("Error Message: " + err)
         #        # separate query string parameters into list in case of ExtraInfo
         #        elif event["message"]["method"] == "Network.requestWillBeSentExtraInfo":
         #            try:
@@ -456,7 +482,7 @@ with open(sys.argv[1], encoding="utf-8") as pages_to_test:
         #                    event["message"]["params"]["query_string_parameters"] = parameter_list
         #            except Exception as err:
         #                if ":path" not in str(err):
-        #                    print("Error Message: " + err.msg)
+        #                    print("Error Message: " + err)
         #        # write events related to sent network requests
         #        if ("Network.requestWillBeSent" in event["message"]["method"]):
         #            f.write(json.dumps(event["message"], indent=4) + ",")
diff --git a/t3.txt b/t3.txt
new file mode 100644
index 0000000..ad8c5b9
--- /dev/null
+++ b/t3.txt
@@ -0,0 +1,3 @@
+https://www.slideshare.net/
+https://www.cookiebot.com/
+https://www.google.nl
\ No newline at end of file
-- 
GitLab