auth + downloading xlsx files
parent
26aaaf672a
commit
45b540b753
|
@ -0,0 +1,108 @@
|
|||
import pathlib
|
||||
import time
|
||||
import re
|
||||
from typing import Self
|
||||
|
||||
from selenium.webdriver import Keys
|
||||
from selenium.webdriver.common.by import By
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
||||
|
||||
|
||||
class Parser:
|
||||
keyword = "Велесстрой"
|
||||
url = "https://www.b2b-center.ru/market"
|
||||
|
||||
_driver: webdriver.Chrome
|
||||
_options: webdriver.ChromeOptions = webdriver.ChromeOptions()
|
||||
_service: webdriver.ChromeService
|
||||
|
||||
def __init__(self):
|
||||
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())};
|
||||
self._options.add_experimental_option("prefs", prefs)
|
||||
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
self._driver = webdriver.Chrome(service=self._service, options=self._options)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
print("Gracefully shutting down...")
|
||||
self._driver.close()
|
||||
|
||||
def find_elem(self, by: str, value: str):
|
||||
try:
|
||||
return WebDriverWait(self._driver, 10).until(EC.presence_of_element_located((by, value)))
|
||||
except TimeoutException:
|
||||
raise NoSuchElementException("Element not found")
|
||||
|
||||
def login(self):
|
||||
self._driver.get(self.url)
|
||||
|
||||
time.sleep(3)
|
||||
# Open login modal
|
||||
element = self.find_elem(By.ID, "auth_ajax_modal_trigger")
|
||||
element.click()
|
||||
|
||||
login_control = self.find_elem(By.ID, "login_control")
|
||||
password_control = self.find_elem(By.ID, "password_control")
|
||||
|
||||
login_control.click()
|
||||
login_control.clear()
|
||||
login_control.send_keys("jde2015")
|
||||
|
||||
password_control.click()
|
||||
password_control.clear()
|
||||
password_control.send_keys("Bel8#Ans3")
|
||||
time.sleep(5)
|
||||
password_control.send_keys(Keys.RETURN)
|
||||
|
||||
def search(self):
|
||||
time.sleep(5)
|
||||
self._driver.get(self.url + f'/?f_keyword={self.keyword}')
|
||||
time.sleep(10)
|
||||
table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody")
|
||||
links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a")
|
||||
for link in links:
|
||||
href = link.get_attribute("href")
|
||||
description = link.find_element(By.CSS_SELECTOR, 'div').text
|
||||
|
||||
# Check LTL
|
||||
if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0:
|
||||
self.accept_documentation(href)
|
||||
break
|
||||
|
||||
def accept_documentation(self, url: str):
|
||||
time.sleep(3)
|
||||
self._driver.get(url)
|
||||
|
||||
# Скачать документацию
|
||||
try:
|
||||
download_documentation_button = self.find_elem(By.CSS_SELECTOR, '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]')
|
||||
time.sleep(1)
|
||||
download_documentation_button.click()
|
||||
self.download_documentation()
|
||||
|
||||
except NoSuchElementException:
|
||||
self.download_documentation()
|
||||
|
||||
def download_documentation(self):
|
||||
time.sleep(5)
|
||||
documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation')
|
||||
docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a')
|
||||
for doc in docs:
|
||||
href = doc.get_attribute('href')
|
||||
if not href.endswith('.xlsx'):
|
||||
continue
|
||||
self._driver.get(href)
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with Parser() as parser:
|
||||
parser.login()
|
||||
parser.search()
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
selenium
|
||||
webdriver-manager
|
|
@ -0,0 +1,59 @@
|
|||
#
|
||||
# This file is autogenerated by pip-compile with Python 3.10
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile
|
||||
#
|
||||
attrs==23.2.0
|
||||
# via
|
||||
# outcome
|
||||
# trio
|
||||
certifi==2023.11.17
|
||||
# via
|
||||
# requests
|
||||
# selenium
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
exceptiongroup==1.2.0
|
||||
# via
|
||||
# trio
|
||||
# trio-websocket
|
||||
h11==0.14.0
|
||||
# via wsproto
|
||||
idna==3.6
|
||||
# via
|
||||
# requests
|
||||
# trio
|
||||
outcome==1.3.0.post0
|
||||
# via trio
|
||||
packaging==23.2
|
||||
# via webdriver-manager
|
||||
pysocks==1.7.1
|
||||
# via urllib3
|
||||
python-dotenv==1.0.1
|
||||
# via webdriver-manager
|
||||
requests==2.31.0
|
||||
# via webdriver-manager
|
||||
selenium==4.17.2
|
||||
# via -r requirements.in
|
||||
sniffio==1.3.0
|
||||
# via trio
|
||||
sortedcontainers==2.4.0
|
||||
# via trio
|
||||
trio==0.24.0
|
||||
# via
|
||||
# selenium
|
||||
# trio-websocket
|
||||
trio-websocket==0.11.1
|
||||
# via selenium
|
||||
typing-extensions==4.9.0
|
||||
# via selenium
|
||||
urllib3[socks]==2.1.0
|
||||
# via
|
||||
# requests
|
||||
# selenium
|
||||
# urllib3
|
||||
webdriver-manager==4.0.1
|
||||
# via -r requirements.in
|
||||
wsproto==1.2.0
|
||||
# via trio-websocket
|
Loading…
Reference in New Issue