diff --git a/Seleniumparser.py b/Seleniumparser.py index 71c66cc6b9068ae4310e9872b36c228a41b9edcd..d41ea9b077ef7071ccc68d5d742adc5592ec36f3 100644 --- a/Seleniumparser.py +++ b/Seleniumparser.py @@ -83,8 +83,8 @@ def scraper(urllist): try: WebDriverWait(driver=driver, timeout=60, poll_frequency = 5).until( EC.presence_of_element_located((By.CSS_SELECTOR, '[role="article"]'))) - driver.execute_script("window.scrollTo(0, 1500)") - + driver.execute_script("window.scrollTo(0, 2500)") + time.sleep(5) WebDriverWait(driver=driver, timeout=60).until( EC.presence_of_element_located((By.CSS_SELECTOR, '[role="article"]'))) CommentSections = driver.find_elements(by=By.PARTIAL_LINK_TEXT, value=" Comment") @@ -168,7 +168,6 @@ def scraper(urllist): elif "hr" in Date: HourShift = int(re.sub(" hr.*", "", Date)) PostHour = datetime.datetime.now() - timedelta(hours=HourShift, minutes=0) - print(PostHour) CoarseDate = PostHour.replace(year=datetime.date.today().year).strftime("%Y-%m-%d-%H:%M") elif "Today" in Date: CoarseDate = datetime.datetime.strptime(Date, "Today at %I:%M %p") @@ -238,7 +237,9 @@ def postformatter(pagebunch): if ext == "jpeg": ext = "jpg" CheckedImagePath = f"{ImagePath}.{ext}" + trustedImagePaths = [] if CheckedImagePath.rsplit("/", 1)[-1] in os.listdir("html/images"): + trustedImagePaths.append(CheckedImagePath) os.remove(f"html/{ImagePath}") print(f"{CheckedImagePath} exists, removing {ImagePath}") else: @@ -258,9 +259,12 @@ def postformatter(pagebunch): heuristicHashMatch = heuristicHashMatch.encode('utf-8') hashedPost = hashlib.sha256(heuristicHashMatch).hexdigest() print(hashedPost) - - for i in ImagePaths[1:]: - postString += f" \n\n" + if ImagePaths: + for i in ImagePaths[1:]: + postString += f" \n\n" + else: + for i in trustedImagePaths[1:]: + postString += f" \n\n" postString = re.sub("(?<=.)\n(?=.)", "<br />", postString) for text, link in post[3].items(): @@ -293,6 +297,7 @@ def postformatter(pagebunch): postString = f" ## {TitleDate} \n {postString}" except: print("ImagePaths empty") + postString = f" ## {TitleDate} \n {postString}" pass