From 45b540b7537d8d4fbc87cc60a17fd4831517f0f4 Mon Sep 17 00:00:00 2001 From: Ernest Litvinenko Date: Fri, 26 Jan 2024 16:21:44 +0300 Subject: [PATCH] auth + downloading xlsx files --- README.md | 0 main.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.in | 2 + requirements.txt | 59 ++++++++++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 README.md create mode 100644 main.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..6e066b5 --- /dev/null +++ b/main.py @@ -0,0 +1,108 @@ +import pathlib +import time +import re +from typing import Self + +from selenium.webdriver import Keys +from selenium.webdriver.common.by import By +from webdriver_manager.chrome import ChromeDriverManager +from selenium import webdriver +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import NoSuchElementException, TimeoutException + + +class Parser: + keyword = "Велесстрой" + url = "https://www.b2b-center.ru/market" + + _driver: webdriver.Chrome + _options: webdriver.ChromeOptions = webdriver.ChromeOptions() + _service: webdriver.ChromeService + + def __init__(self): + prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}; + self._options.add_experimental_option("prefs", prefs) + self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install()) + + def __enter__(self) -> Self: + self._driver = webdriver.Chrome(service=self._service, options=self._options) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + print("Gracefully shutting down...") + self._driver.close() + + def find_elem(self, by: str, value: str): + try: + return WebDriverWait(self._driver, 10).until(EC.presence_of_element_located((by, value))) + except TimeoutException: + raise NoSuchElementException("Element not found") + + def login(self): + self._driver.get(self.url) + + time.sleep(3) + # Open login modal + element = self.find_elem(By.ID, "auth_ajax_modal_trigger") + element.click() + + login_control = self.find_elem(By.ID, "login_control") + password_control = self.find_elem(By.ID, "password_control") + + login_control.click() + login_control.clear() + login_control.send_keys("jde2015") + + password_control.click() + password_control.clear() + password_control.send_keys("Bel8#Ans3") + time.sleep(5) + password_control.send_keys(Keys.RETURN) + + def search(self): + time.sleep(5) + self._driver.get(self.url + f'/?f_keyword={self.keyword}') + time.sleep(10) + table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody") + links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a") + for link in links: + href = link.get_attribute("href") + description = link.find_element(By.CSS_SELECTOR, 'div').text + + # Check LTL + if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0: + self.accept_documentation(href) + break + + def accept_documentation(self, url: str): + time.sleep(3) + self._driver.get(url) + + # Скачать документацию + try: + download_documentation_button = self.find_elem(By.CSS_SELECTOR, '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]') + time.sleep(1) + download_documentation_button.click() + self.download_documentation() + + except NoSuchElementException: + self.download_documentation() + + def download_documentation(self): + time.sleep(5) + documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation') + docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a') + for doc in docs: + href = doc.get_attribute('href') + if not href.endswith('.xlsx'): + continue + self._driver.get(href) + time.sleep(3) + + +if __name__ == "__main__": + with Parser() as parser: + parser.login() + parser.search() + diff --git a/requirements.in b/requirements.in index e69de29..dcbe6a6 100644 --- a/requirements.in +++ b/requirements.in @@ -0,0 +1,2 @@ +selenium +webdriver-manager \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e69de29..cf0cbb8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,59 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile +# +attrs==23.2.0 + # via + # outcome + # trio +certifi==2023.11.17 + # via + # requests + # selenium +charset-normalizer==3.3.2 + # via requests +exceptiongroup==1.2.0 + # via + # trio + # trio-websocket +h11==0.14.0 + # via wsproto +idna==3.6 + # via + # requests + # trio +outcome==1.3.0.post0 + # via trio +packaging==23.2 + # via webdriver-manager +pysocks==1.7.1 + # via urllib3 +python-dotenv==1.0.1 + # via webdriver-manager +requests==2.31.0 + # via webdriver-manager +selenium==4.17.2 + # via -r requirements.in +sniffio==1.3.0 + # via trio +sortedcontainers==2.4.0 + # via trio +trio==0.24.0 + # via + # selenium + # trio-websocket +trio-websocket==0.11.1 + # via selenium +typing-extensions==4.9.0 + # via selenium +urllib3[socks]==2.1.0 + # via + # requests + # selenium + # urllib3 +webdriver-manager==4.0.1 + # via -r requirements.in +wsproto==1.2.0 + # via trio-websocket