diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b85fe55 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +./data/logs/*.logs +./data/data.db +.vscode +.gitignore +.idea/ +./__pycache__/ +./GlobalExambBot/__pycache__/ \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3909079 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +configuration.yml +GlobalExambBot/__pycache__/ +data/profiles/prof_* +.idea/ +__pycache__/ +data/logs/*.log +data/data.db +.vscode \ No newline at end of file diff --git a/ChromeDriver/chromedriver b/ChromeDriver/chromedriver new file mode 100755 index 0000000..00138d5 Binary files /dev/null and b/ChromeDriver/chromedriver differ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0e2d6b3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.11-slim + +RUN apt-get update && apt-get install -yq wget + +# download and install the specific version of Chromium +RUN wget --no-verbose -O /tmp/chrome.deb http://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_104.0.5112.79-1_amd64.deb +RUN apt-get install -yf /tmp/chrome.deb + +# set display port to avoid crash +ENV DISPLAY=:99 + +# copy the script +COPY . /app/ + +# set the working directory +WORKDIR /app + +# install selenium +RUN pip install -r requirements.txt + +# run the script +ENTRYPOINT ["python", "main.py"] \ No newline at end of file diff --git a/GlobalExamBot/Sheets.py b/GlobalExamBot/Sheets.py new file mode 100644 index 0000000..507ada3 --- /dev/null +++ b/GlobalExamBot/Sheets.py @@ -0,0 +1,38 @@ +import logging +from math import floor + +from GlobalExamBot.helpers import wait_between +from GlobalExamBot.database import Database + +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as ec +from selenium.webdriver.support.ui import WebDriverWait + +class Sheets: + def __init__(self, driver, action, configuration): + self.driver = driver + self.actions = action + self.configuration = configuration + self.pagecard_xpath = '//a[@class="mb-4 w-full lg:w-auto lg:mb-0 button-solid-primary-small"]' + self.Sheetscard_xpath = '//div[@class="container py-8 lg:pt-12 lg:pb-12"]' + self.manageSheets = Database() + + def search(self): + WebDriverWait(self.driver, 15).until(ec.visibility_of_element_located((By.XPATH, self.pagecard_xpath))) + page_cards = self.driver.find_elements(by=By.XPATH, value=self.pagecard_xpath) + card_list = [] + for card in page_cards : + if not self.manageSheets.link_exist(card.get_attribute('href')): + card_list.append(card) + return card_list + + def watch(self, Sheets_el): + self.actions.move_to_element(Sheets_el).click(Sheets_el).perform() + WebDriverWait(self.driver, 15).until(ec.visibility_of_element_located((By.XPATH, self.Sheetscard_xpath))) + max_height = self.driver.execute_script("return document.body.scrollHeight") + for height in range(0, max_height, floor(max_height/10)) : + self.driver.execute_script(f"window.scrollTo(0, { height })") + logging.info(f'Position : { height } | MaxPosition: { max_height }') + wait_between(25,30) + logging.info(f'Add new url in database: { self.driver.current_url }') + self.manageSheets.add_link(self.driver.current_url) diff --git a/GlobalExamBot/bot.py b/GlobalExamBot/bot.py new file mode 100644 index 0000000..77cd3d1 --- /dev/null +++ b/GlobalExamBot/bot.py @@ -0,0 +1,68 @@ +import logging +import os + +from GlobalExamBot.helpers import TypeInField, element_exists, wait_between +from GlobalExamBot.Sheets import Sheets + + +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as ec +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.common.keys import Keys + +class Bot: + def __init__(self, driver, action, configuration): + self.driver = driver + self.actions = action + self.configuration = configuration + self.email_xpath = '//input[@name="email"]' + self.password_xpath = '//input[@name="password"]' + self.index = 0 + self.scrollcount = 0 + self.categories = ['https://exam.global-exam.com/library/study-sheets/categories/grammar', + 'https://exam.global-exam.com/library/study-sheets/categories/language-functions', + 'https://exam.global-exam.com/library/study-sheets/categories/vocabulary'] + + def login(self): + email_el = WebDriverWait(self.driver, 10).until(ec.visibility_of_element_located((By.XPATH, self.email_xpath))) + self.actions.move_to_element(email_el).click(email_el).perform() + TypeInField(self.driver, self.email_xpath, self.configuration.username) + password_el = WebDriverWait(self.driver, 10).until(ec.visibility_of_element_located((By.XPATH, self.password_xpath))) + self.actions.move_to_element(password_el).click(password_el).perform() + TypeInField(self.driver, self.password_xpath, self.configuration.password) + password_el.send_keys(Keys.RETURN) + + def run(self): + + profile = f'prof_{self.configuration.username}' + + if not os.path.exists(f'./Profiles/{profile}'): + self.driver.get('https://auth.global-exam.com/login') + self.login() + else : + self.driver.get('https://exam.global-exam.com/library/study-sheets/categories/grammar') + if element_exists('//input[@name="email"]', self.driver) : + self.login() + + logging.info('Logged in') + + Sheets_action = Sheets(self.driver, self.actions, self.configuration) + + while True: + self.driver.get('https://exam.global-exam.com/library/study-sheets/categories/grammar') + Sheets_list = Sheets_action.search() + if Sheets_list : + logging.info(f'Sheets n°{ self.index }') + Sheets_action.watch(Sheets_list[0]) + self.index +=1 + self.scrollcount = 0 + wait_between(3,10) + else: + logging.info('All visible Sheets have already been read. Need to scroll down ...') + self.driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight)") + self.scrollcount += 1 + wait_between(5,15) + if self.scrollcount > 10: + logging.info('End of page or network error.') + self.scrollcount = 0 + logging.info(self.driver.get_log('browser')) \ No newline at end of file diff --git a/GlobalExamBot/constants.py b/GlobalExamBot/constants.py new file mode 100644 index 0000000..2b263db --- /dev/null +++ b/GlobalExamBot/constants.py @@ -0,0 +1,4 @@ +def init(): + global VERSION, APP_NAME + APP_NAME = 'GlobalExamBot' + VERSION = '1.0.0' \ No newline at end of file diff --git a/GlobalExamBot/database.py b/GlobalExamBot/database.py new file mode 100644 index 0000000..a3be9c0 --- /dev/null +++ b/GlobalExamBot/database.py @@ -0,0 +1,48 @@ +import sqlite3 + +class Database: + def __init__(self, database_link='./data/data.db'): + """ + Database constructor + """ + self.database_link = database_link + + def add_link(self, link): + """ + Add a link to the database + + :param link String: link of a sheet + """ + connection = sqlite3.connect(self.database_link) + c = connection.cursor() + c.execute('''INSERT INTO sheet_links (link) VALUES (:link);''', (link,)) + c.close() + connection.commit() + + def link_exist(self, link): + """ + Returns true if the link exists in the database. + + :param link String: Link of a sheet + """ + connection = sqlite3.connect(self.database_link) + c = connection.cursor() + c.execute('''SELECT * FROM sheet_links WHERE link = ?;''', (link,)) + data = c.fetchall() + # If this link exist or not + if len(data) == 0: + return False + else: + return True + +def create_table_sheets(): + """ + Create new tables to save the sheets links. + + """ + connection = sqlite3.connect('./data/data.db') + c = connection.cursor() + c.execute('''CREATE TABLE IF NOT EXISTS sheet_links + (id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, link text);''') + c.close() + connection.commit() \ No newline at end of file diff --git a/GlobalExamBot/driver.py b/GlobalExamBot/driver.py new file mode 100644 index 0000000..9296519 --- /dev/null +++ b/GlobalExamBot/driver.py @@ -0,0 +1,65 @@ +from selenium import webdriver +from selenium.webdriver.common.action_chains import ActionChains +from selenium.webdriver.chrome.options import Options + +class Driver: + def __init__(self, profile): + self.profile = profile + self.chrome_options = None + self.driver = None + self.action = None + + def setup(self, log_path='./data/logs/', headless=True): + self.chrome_options = Options() + + # Anti bot detection + self.chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) + self.chrome_options.add_experimental_option('useAutomationExtension', False) + self.chrome_options.add_argument('--disable-blink-features=AutomationControlled') + + # Language Browser + self.chrome_options.add_argument('--lang=fr-FR') + + # Maximize Browser + self.chrome_options.add_argument('--start-maximized') + + # Headless Mode + if headless: + self.chrome_options.add_argument('--headless') + self.chrome_options.add_argument("window-size=1400,2100") + + + # Optimize CPU + self.chrome_options.add_argument("--no-sandbox") + self.chrome_options.add_argument("--disable-dev-shm-usage") + self.chrome_options.add_argument("--disable-renderer-backgrounding") + self.chrome_options.add_argument("--disable-background-timer-throttling") + self.chrome_options.add_argument("--disable-backgrounding-occluded-windows") + self.chrome_options.add_argument("--disable-client-side-phishing-detection") + self.chrome_options.add_argument("--disable-crash-reporter") + self.chrome_options.add_argument("--disable-oopr-debug-crash-dump") + self.chrome_options.add_argument("--no-crash-upload") + self.chrome_options.add_argument("--disable-gpu") + self.chrome_options.add_argument("--disable-extensions") + self.chrome_options.add_argument("--disable-low-res-tiling") + self.chrome_options.add_argument("--log-level=3") + self.chrome_options.add_argument("--silent") + + + # Disable save password + prefs = {'credentials_enable_service': False, + 'profile.password_manager_enabled': False} + self.chrome_options.add_experimental_option('prefs', prefs) + + # Set profile + self.chrome_options.add_argument(f'user-data-dir=./data/profiles/{self.profile}') + + self.driver = webdriver.Chrome(f'./ChromeDriver/chromedriver',options=self.chrome_options, service_args=[f'--log-path={log_path}ChromeDriver.log']) + self.action = ActionChains(self.driver) + return self.driver, self.action + + def get_driver(self): + return self.driver + + def get_action(self): + return self.action \ No newline at end of file diff --git a/GlobalExamBot/helpers.py b/GlobalExamBot/helpers.py new file mode 100644 index 0000000..6552000 --- /dev/null +++ b/GlobalExamBot/helpers.py @@ -0,0 +1,104 @@ +# Standard libraries +import sys +import logging +from time import sleep +from random import uniform +import argparse + +from selenium.webdriver.common.by import By + +import GlobalExamBot.constants as const + + +class Helpers: + def __init__(self): + """ + Helpers class constructor + """ + + def ask_to_exit(self): + """ + The user is asked if he wants to leave + + :return: Boolean + """ + try: + user_input = input('Type "STOP" to stop the application:\n') + + # Continue + if user_input.upper() == "STOP": + return True + # Exit + else: + print('/!\\ Please type "STOP" to exit the bot execution /!\\') + return False + except Exception: + return False + + def load_configuration(self): + """ + this method allows you to load arguments. + :return: args + """ + header() + # Load all configuration variables + parser = argparse.ArgumentParser() + parser.add_argument('-p','--password', help='Set GlobalExam password', type=str, required=True) + parser.add_argument('-u','--username',help='Set GlobalExam username', type=str, required=True) + parser.add_argument('--noheadless',help='Desactivate Chrome headless mode', required=False, action='store_true') + args = parser.parse_args() + return args + + def logging_configuration(self, logging_level=logging.INFO, filename='data/logs/bot_globalexam.log'): + logging.basicConfig(filename=filename, + level=logging_level, + format='%(asctime)s - %(levelname)s - %(message)s') + + root_logger = logging.getLogger() + root_logger.setLevel(logging_level) + + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.INFO) + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + root_logger.addHandler(handler) + +# Utilities methods +def header(): + """ + This function display an header when the script start + """ + const.init() + logging.info('==\t=============================================================\t==') + logging.info('==\t ' + const.APP_NAME + ' \t==') + logging.info('==\t version : ' + const.VERSION + ' \t==') + logging.info('==\t=============================================================\t==') + +def wait_between( min, max): + """ + Wait random time in second beetween min and max seconds, to have an not linear behavior and be more human. + """ + rand=uniform(min, max) + sleep(rand) + +def TypeInField(element, xpath, myValue): + """Type in a field""" + val = myValue + elem = element.find_element(by=By.XPATH, value=xpath) + for i in range(len(val)): + elem.send_keys(val[i]) + wait_between(0.2, 0.4) + wait_between(0.4, 0.7) + +def element_exists(xpath, element, by=By.XPATH): + """ + Check if an element exist + + :return: Boolean + """ + try: + wait_between(2,3) + element.find_element(by=by, value=xpath) + except: + return False + return True \ No newline at end of file diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/logs/.gitkeep b/data/logs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/profiles/.gitkeep b/data/profiles/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..bc3d071 --- /dev/null +++ b/main.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 + +import sys +import logging + +from GlobalExamBot.helpers import Helpers +from GlobalExamBot.database import create_table_sheets + +from GlobalExamBot.driver import Driver +from GlobalExamBot.bot import Bot + +def main(): + try: + helpers = Helpers() + + # Configuration of the logging library + helpers.logging_configuration() + + # Load all configuration variables + config = helpers.load_configuration() + + logging.info('Starting bot ...') + + create_table_sheets() + + profile = f'prof_{config.username}' + + logging.info(f'Username : {config.username}') + + # Initialize driver and actions + if config.noheadless : + driver, action = Driver(profile).setup(headless=False) + else: + driver, action = Driver(profile).setup() + + # Start bot actions + Bot(driver, action, config).run() + + except KeyboardInterrupt : + if not helpers.ask_to_exit() : + logging.info('Restart bot ...') + driver.quit() + main() + else : + logging.info('Bye bye !') + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c1bc13d --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +selenium==4.5.0 \ No newline at end of file