112 lines
3.8 KiB
Python
112 lines
3.8 KiB
Python
import pathlib
|
|
import time
|
|
import re
|
|
from typing import Self
|
|
|
|
from selenium.webdriver import Keys
|
|
from selenium.webdriver.common.by import By
|
|
from webdriver_manager.chrome import ChromeDriverManager
|
|
from selenium import webdriver
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
|
|
|
|
|
class Parser:
|
|
keyword = "Велесстрой"
|
|
url = "https://www.b2b-center.ru/market"
|
|
|
|
_driver: webdriver.Chrome
|
|
_options: webdriver.ChromeOptions = webdriver.ChromeOptions()
|
|
_service: webdriver.ChromeService
|
|
|
|
def __init__(self):
|
|
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())};
|
|
self._options.add_experimental_option("prefs", prefs)
|
|
self._options.add_argument("--disable-extensions")
|
|
self._options.add_argument("--disable-gpu")
|
|
self._options.add_argument("--headless=new")
|
|
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
|
|
|
|
def __enter__(self) -> Self:
|
|
self._driver = webdriver.Chrome(service=self._service, options=self._options)
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
print("Gracefully shutting down...")
|
|
self._driver.close()
|
|
|
|
def find_elem(self, by: str, value: str):
|
|
try:
|
|
return WebDriverWait(self._driver, 10).until(EC.presence_of_element_located((by, value)))
|
|
except TimeoutException:
|
|
raise NoSuchElementException("Element not found")
|
|
|
|
def login(self):
|
|
self._driver.get(self.url)
|
|
|
|
time.sleep(3)
|
|
# Open login modal
|
|
element = self.find_elem(By.ID, "auth_ajax_modal_trigger")
|
|
element.click()
|
|
|
|
login_control = self.find_elem(By.ID, "login_control")
|
|
password_control = self.find_elem(By.ID, "password_control")
|
|
|
|
login_control.click()
|
|
login_control.clear()
|
|
login_control.send_keys("jde2015")
|
|
|
|
password_control.click()
|
|
password_control.clear()
|
|
password_control.send_keys("Bel8#Ans3")
|
|
time.sleep(5)
|
|
password_control.send_keys(Keys.RETURN)
|
|
|
|
def search(self):
|
|
time.sleep(5)
|
|
self._driver.get(self.url + f'/?f_keyword={self.keyword}')
|
|
time.sleep(10)
|
|
table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody")
|
|
links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a")
|
|
for link in links:
|
|
href = link.get_attribute("href")
|
|
description = link.find_element(By.CSS_SELECTOR, 'div').text
|
|
|
|
# Check LTL
|
|
if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0:
|
|
self.accept_documentation(href)
|
|
break
|
|
|
|
def accept_documentation(self, url: str):
|
|
time.sleep(3)
|
|
self._driver.get(url)
|
|
|
|
# Скачать документацию
|
|
try:
|
|
download_documentation_button = self.find_elem(By.CSS_SELECTOR, '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]')
|
|
time.sleep(1)
|
|
download_documentation_button.click()
|
|
self.download_documentation()
|
|
|
|
except NoSuchElementException:
|
|
self.download_documentation()
|
|
|
|
def download_documentation(self):
|
|
time.sleep(5)
|
|
documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation')
|
|
docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a')
|
|
for doc in docs:
|
|
href = doc.get_attribute('href')
|
|
if not href.endswith('.xlsx'):
|
|
continue
|
|
self._driver.get(href)
|
|
time.sleep(3)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
with Parser() as parser:
|
|
parser.login()
|
|
parser.search()
|
|
|