first commit

This commit is contained in:
CAJNA Jarod 2023-02-05 18:03:00 +01:00
parent 2afd24cce6
commit 7dfed35b3e
15 changed files with 414 additions and 0 deletions

7
.dockerignore Normal file
View File

@ -0,0 +1,7 @@
./data/logs/*.logs
./data/data.db
.vscode
.gitignore
.idea/
./__pycache__/
./GlobalExambBot/__pycache__/

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
configuration.yml
GlobalExambBot/__pycache__/
data/profiles/prof_*
.idea/
__pycache__/
data/logs/*.log
data/data.db
.vscode

BIN
ChromeDriver/chromedriver Executable file

Binary file not shown.

22
Dockerfile Normal file
View File

@ -0,0 +1,22 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -yq wget
# download and install the specific version of Chromium
RUN wget --no-verbose -O /tmp/chrome.deb http://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_104.0.5112.79-1_amd64.deb
RUN apt-get install -yf /tmp/chrome.deb
# set display port to avoid crash
ENV DISPLAY=:99
# copy the script
COPY . /app/
# set the working directory
WORKDIR /app
# install selenium
RUN pip install -r requirements.txt
# run the script
ENTRYPOINT ["python", "main.py"]

38
GlobalExamBot/Sheets.py Normal file
View File

@ -0,0 +1,38 @@
import logging
from math import floor
from GlobalExamBot.helpers import wait_between
from GlobalExamBot.database import Database
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait
class Sheets:
def __init__(self, driver, action, configuration):
self.driver = driver
self.actions = action
self.configuration = configuration
self.pagecard_xpath = '//a[@class="mb-4 w-full lg:w-auto lg:mb-0 button-solid-primary-small"]'
self.Sheetscard_xpath = '//div[@class="container py-8 lg:pt-12 lg:pb-12"]'
self.manageSheets = Database()
def search(self):
WebDriverWait(self.driver, 15).until(ec.visibility_of_element_located((By.XPATH, self.pagecard_xpath)))
page_cards = self.driver.find_elements(by=By.XPATH, value=self.pagecard_xpath)
card_list = []
for card in page_cards :
if not self.manageSheets.link_exist(card.get_attribute('href')):
card_list.append(card)
return card_list
def watch(self, Sheets_el):
self.actions.move_to_element(Sheets_el).click(Sheets_el).perform()
WebDriverWait(self.driver, 15).until(ec.visibility_of_element_located((By.XPATH, self.Sheetscard_xpath)))
max_height = self.driver.execute_script("return document.body.scrollHeight")
for height in range(0, max_height, floor(max_height/10)) :
self.driver.execute_script(f"window.scrollTo(0, { height })")
logging.info(f'Position : { height } | MaxPosition: { max_height }')
wait_between(25,30)
logging.info(f'Add new url in database: { self.driver.current_url }')
self.manageSheets.add_link(self.driver.current_url)

68
GlobalExamBot/bot.py Normal file
View File

@ -0,0 +1,68 @@
import logging
import os
from GlobalExamBot.helpers import TypeInField, element_exists, wait_between
from GlobalExamBot.Sheets import Sheets
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
class Bot:
def __init__(self, driver, action, configuration):
self.driver = driver
self.actions = action
self.configuration = configuration
self.email_xpath = '//input[@name="email"]'
self.password_xpath = '//input[@name="password"]'
self.index = 0
self.scrollcount = 0
self.categories = ['https://exam.global-exam.com/library/study-sheets/categories/grammar',
'https://exam.global-exam.com/library/study-sheets/categories/language-functions',
'https://exam.global-exam.com/library/study-sheets/categories/vocabulary']
def login(self):
email_el = WebDriverWait(self.driver, 10).until(ec.visibility_of_element_located((By.XPATH, self.email_xpath)))
self.actions.move_to_element(email_el).click(email_el).perform()
TypeInField(self.driver, self.email_xpath, self.configuration.username)
password_el = WebDriverWait(self.driver, 10).until(ec.visibility_of_element_located((By.XPATH, self.password_xpath)))
self.actions.move_to_element(password_el).click(password_el).perform()
TypeInField(self.driver, self.password_xpath, self.configuration.password)
password_el.send_keys(Keys.RETURN)
def run(self):
profile = f'prof_{self.configuration.username}'
if not os.path.exists(f'./Profiles/{profile}'):
self.driver.get('https://auth.global-exam.com/login')
self.login()
else :
self.driver.get('https://exam.global-exam.com/library/study-sheets/categories/grammar')
if element_exists('//input[@name="email"]', self.driver) :
self.login()
logging.info('Logged in')
Sheets_action = Sheets(self.driver, self.actions, self.configuration)
while True:
self.driver.get('https://exam.global-exam.com/library/study-sheets/categories/grammar')
Sheets_list = Sheets_action.search()
if Sheets_list :
logging.info(f'Sheets n°{ self.index }')
Sheets_action.watch(Sheets_list[0])
self.index +=1
self.scrollcount = 0
wait_between(3,10)
else:
logging.info('All visible Sheets have already been read. Need to scroll down ...')
self.driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight)")
self.scrollcount += 1
wait_between(5,15)
if self.scrollcount > 10:
logging.info('End of page or network error.')
self.scrollcount = 0
logging.info(self.driver.get_log('browser'))

View File

@ -0,0 +1,4 @@
def init():
global VERSION, APP_NAME
APP_NAME = 'GlobalExamBot'
VERSION = '1.0.0'

48
GlobalExamBot/database.py Normal file
View File

@ -0,0 +1,48 @@
import sqlite3
class Database:
def __init__(self, database_link='./data/data.db'):
"""
Database constructor
"""
self.database_link = database_link
def add_link(self, link):
"""
Add a link to the database
:param link String: link of a sheet
"""
connection = sqlite3.connect(self.database_link)
c = connection.cursor()
c.execute('''INSERT INTO sheet_links (link) VALUES (:link);''', (link,))
c.close()
connection.commit()
def link_exist(self, link):
"""
Returns true if the link exists in the database.
:param link String: Link of a sheet
"""
connection = sqlite3.connect(self.database_link)
c = connection.cursor()
c.execute('''SELECT * FROM sheet_links WHERE link = ?;''', (link,))
data = c.fetchall()
# If this link exist or not
if len(data) == 0:
return False
else:
return True
def create_table_sheets():
"""
Create new tables to save the sheets links.
"""
connection = sqlite3.connect('./data/data.db')
c = connection.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS sheet_links
(id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, link text);''')
c.close()
connection.commit()

65
GlobalExamBot/driver.py Normal file
View File

@ -0,0 +1,65 @@
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
class Driver:
def __init__(self, profile):
self.profile = profile
self.chrome_options = None
self.driver = None
self.action = None
def setup(self, log_path='./data/logs/', headless=True):
self.chrome_options = Options()
# Anti bot detection
self.chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
self.chrome_options.add_experimental_option('useAutomationExtension', False)
self.chrome_options.add_argument('--disable-blink-features=AutomationControlled')
# Language Browser
self.chrome_options.add_argument('--lang=fr-FR')
# Maximize Browser
self.chrome_options.add_argument('--start-maximized')
# Headless Mode
if headless:
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument("window-size=1400,2100")
# Optimize CPU
self.chrome_options.add_argument("--no-sandbox")
self.chrome_options.add_argument("--disable-dev-shm-usage")
self.chrome_options.add_argument("--disable-renderer-backgrounding")
self.chrome_options.add_argument("--disable-background-timer-throttling")
self.chrome_options.add_argument("--disable-backgrounding-occluded-windows")
self.chrome_options.add_argument("--disable-client-side-phishing-detection")
self.chrome_options.add_argument("--disable-crash-reporter")
self.chrome_options.add_argument("--disable-oopr-debug-crash-dump")
self.chrome_options.add_argument("--no-crash-upload")
self.chrome_options.add_argument("--disable-gpu")
self.chrome_options.add_argument("--disable-extensions")
self.chrome_options.add_argument("--disable-low-res-tiling")
self.chrome_options.add_argument("--log-level=3")
self.chrome_options.add_argument("--silent")
# Disable save password
prefs = {'credentials_enable_service': False,
'profile.password_manager_enabled': False}
self.chrome_options.add_experimental_option('prefs', prefs)
# Set profile
self.chrome_options.add_argument(f'user-data-dir=./data/profiles/{self.profile}')
self.driver = webdriver.Chrome(f'./ChromeDriver/chromedriver',options=self.chrome_options, service_args=[f'--log-path={log_path}ChromeDriver.log'])
self.action = ActionChains(self.driver)
return self.driver, self.action
def get_driver(self):
return self.driver
def get_action(self):
return self.action

104
GlobalExamBot/helpers.py Normal file
View File

@ -0,0 +1,104 @@
# Standard libraries
import sys
import logging
from time import sleep
from random import uniform
import argparse
from selenium.webdriver.common.by import By
import GlobalExamBot.constants as const
class Helpers:
def __init__(self):
"""
Helpers class constructor
"""
def ask_to_exit(self):
"""
The user is asked if he wants to leave
:return: Boolean
"""
try:
user_input = input('Type "STOP" to stop the application:\n')
# Continue
if user_input.upper() == "STOP":
return True
# Exit
else:
print('/!\\ Please type "STOP" to exit the bot execution /!\\')
return False
except Exception:
return False
def load_configuration(self):
"""
this method allows you to load arguments.
:return: args
"""
header()
# Load all configuration variables
parser = argparse.ArgumentParser()
parser.add_argument('-p','--password', help='Set GlobalExam password', type=str, required=True)
parser.add_argument('-u','--username',help='Set GlobalExam username', type=str, required=True)
parser.add_argument('--noheadless',help='Desactivate Chrome headless mode', required=False, action='store_true')
args = parser.parse_args()
return args
def logging_configuration(self, logging_level=logging.INFO, filename='data/logs/bot_globalexam.log'):
logging.basicConfig(filename=filename,
level=logging_level,
format='%(asctime)s - %(levelname)s - %(message)s')
root_logger = logging.getLogger()
root_logger.setLevel(logging_level)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root_logger.addHandler(handler)
# Utilities methods
def header():
"""
This function display an header when the script start
"""
const.init()
logging.info('==\t=============================================================\t==')
logging.info('==\t ' + const.APP_NAME + ' \t==')
logging.info('==\t version : ' + const.VERSION + ' \t==')
logging.info('==\t=============================================================\t==')
def wait_between( min, max):
"""
Wait random time in second beetween min and max seconds, to have an not linear behavior and be more human.
"""
rand=uniform(min, max)
sleep(rand)
def TypeInField(element, xpath, myValue):
"""Type in a field"""
val = myValue
elem = element.find_element(by=By.XPATH, value=xpath)
for i in range(len(val)):
elem.send_keys(val[i])
wait_between(0.2, 0.4)
wait_between(0.4, 0.7)
def element_exists(xpath, element, by=By.XPATH):
"""
Check if an element exist
:return: Boolean
"""
try:
wait_between(2,3)
element.find_element(by=by, value=xpath)
except:
return False
return True

0
data/.gitkeep Normal file
View File

0
data/logs/.gitkeep Normal file
View File

0
data/profiles/.gitkeep Normal file
View File

49
main.py Normal file
View File

@ -0,0 +1,49 @@
#!/usr/bin/env python3
import sys
import logging
from GlobalExamBot.helpers import Helpers
from GlobalExamBot.database import create_table_sheets
from GlobalExamBot.driver import Driver
from GlobalExamBot.bot import Bot
def main():
try:
helpers = Helpers()
# Configuration of the logging library
helpers.logging_configuration()
# Load all configuration variables
config = helpers.load_configuration()
logging.info('Starting bot ...')
create_table_sheets()
profile = f'prof_{config.username}'
logging.info(f'Username : {config.username}')
# Initialize driver and actions
if config.noheadless :
driver, action = Driver(profile).setup(headless=False)
else:
driver, action = Driver(profile).setup()
# Start bot actions
Bot(driver, action, config).run()
except KeyboardInterrupt :
if not helpers.ask_to_exit() :
logging.info('Restart bot ...')
driver.quit()
main()
else :
logging.info('Bye bye !')
sys.exit(1)
if __name__ == "__main__":
main()

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
selenium==4.5.0