import pathlib import time import re from typing import Self from selenium.webdriver import Keys from selenium.webdriver.common.by import By from webdriver_manager.chrome import ChromeDriverManager from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import NoSuchElementException, TimeoutException class Parser: keyword = "Велесстрой" url = "https://www.b2b-center.ru/market" _driver: webdriver.Chrome _options: webdriver.ChromeOptions = webdriver.ChromeOptions() _service: webdriver.ChromeService def __init__(self): prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}; self._options.add_experimental_option("prefs", prefs) self._options.add_argument("--disable-extensions") self._options.add_argument("--disable-gpu") self._options.add_argument("--headless=new") self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install()) def __enter__(self) -> Self: self._driver = webdriver.Chrome(service=self._service, options=self._options) return self def __exit__(self, exc_type, exc_val, exc_tb): print("Gracefully shutting down...") self._driver.close() def find_elem(self, by: str, value: str): try: return WebDriverWait(self._driver, 10).until(EC.presence_of_element_located((by, value))) except TimeoutException: raise NoSuchElementException("Element not found") def login(self): self._driver.get(self.url) time.sleep(3) # Open login modal element = self.find_elem(By.ID, "auth_ajax_modal_trigger") element.click() login_control = self.find_elem(By.ID, "login_control") password_control = self.find_elem(By.ID, "password_control") login_control.click() login_control.clear() login_control.send_keys("jde2015") password_control.click() password_control.clear() password_control.send_keys("Bel8#Ans3") time.sleep(5) password_control.send_keys(Keys.RETURN) def search(self): time.sleep(5) self._driver.get(self.url + f'/?f_keyword={self.keyword}') time.sleep(10) table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody") links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a") for link in links: href = link.get_attribute("href") description = link.find_element(By.CSS_SELECTOR, 'div').text # Check LTL if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0: self.accept_documentation(href) break def accept_documentation(self, url: str): time.sleep(3) self._driver.get(url) # Скачать документацию try: download_documentation_button = self.find_elem(By.CSS_SELECTOR, '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]') time.sleep(1) download_documentation_button.click() self.download_documentation() except NoSuchElementException: self.download_documentation() def download_documentation(self): time.sleep(5) documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation') docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a') for doc in docs: href = doc.get_attribute('href') if not href.endswith('.xlsx'): continue self._driver.get(href) time.sleep(3) if __name__ == "__main__": with Parser() as parser: parser.login() parser.search()