diff --git a/scrape.py b/scrape.py index 5caba2863b8e0aad71205ab450d62f7f19246a55..ed9e81b5a17a40676ea2c9df997764f359ff9061 100644 --- a/scrape.py +++ b/scrape.py @@ -1,24 +1,100 @@ -import requests -from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support import expected_conditions as EC + +### 0. SETUP # Define the URL url = "https://cloud.timeedit.net/liu/web/schema/" -# Send an HTTP GET request to the URL -response = requests.get(url) - -# Check if the request was successful -if response.status_code == 200: - # Parse the HTML content of the page - soup = BeautifulSoup(response.text, 'html.parser') - - # Locate the element with the selector "h2.greenlink" - greenlink_element = soup.select_one("h2.greenlink") - - if greenlink_element: - # Extract and print the text content of the element - print("Text Content of h2.greenlink:", greenlink_element.text.strip()) - else: - print("Element not found on the page.") -else: - print("Failed to retrieve the webpage. Status code:", response.status_code) +# Start headless webdriver! +options = webdriver.ChromeOptions() +options.add_argument('--headless') +driver = webdriver.Chrome(options=options) + +# Send an HTTP GET request to the URL using Selenium +driver.get(url) +wait = WebDriverWait(driver, 5) # Adjust the timeout as needed + + +### 1. ENTER SCHEMA SÖK + +# Find and click the element with the selector "h2.greenlink" +greenlink_element = driver.find_element(By.CSS_SELECTOR, "h2.greenlink") +greenlink_element.click() + +# Wait for the URL to change (indicating the redirection) +wait.until(EC.url_changes(url)) + +## Print the new URL after being redirected +#new_url = driver.current_url +#print("New URL after redirection:", new_url) + +## 2. SEARCH FOR COURSE ON ri1Q7.html + +# Locate the select element by its "id" attribute +select_element = Select(driver.find_element(By.ID, "fancytypeselector")) + +# Select the option with textContent "Kurs" +select_element.select_by_visible_text("Kurs") + +# Input course name in search +input_box = driver.find_element(By.ID, "ffsearchname") +input_box.send_keys("TSTE24") + +# Click "Sök" +driver.find_element(By.CLASS_NAME, "ffsearchbutton").click() + +# Click "Lägg till alla" +addallbutton = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "addallbutton"))) +addallbutton.click() + +# Click "Visa Schema" +showScheduleButton = wait.until(EC.presence_of_element_located((By.ID, "objectbasketgo"))) +showScheduleButton.click() + + +## 3. Count the occurences of future 'Föreläsning' +# Find all <td> elements with class "column1" +td_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'td.column1'))) + +# Initialize a count variable +future_count = 0 + +# Loop through the found <td> elements and count occurrences of "Föreläsning" +for td in td_elements: + if td.text.strip() == "Föreläsning": + future_count += 1 + +## 4. Change the date from (start of termin) to (yesterday) + +# Open start range popup +startRangeButton = driver.find_element(By.ID, "openStartRangeButton") +startRangeButton.click() + +## start range +# Pick august +monthPicker = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "ui-datepicker-month"))) +Select(monthPicker).select_by_visible_text("Augusti") + +# Click the 1st of month +day_elements = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "ui-state-default"))) +day_elements[1].click() + +td_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'td.column1'))) + +# Initialize a count variable +total_count = 0 + +# Loop through the found <td> elements and count occurrences of "Föreläsning" +for td in td_elements: + if td.text.strip() == "Föreläsning": + total_count += 1 + +print(total_count-future_count, "/", total_count) + +# Close the WebDriver +driver.quit() +