auth + downloading xlsx files

master
Ernest Litvinenko 2024-01-26 16:21:44 +03:00
parent 26aaaf672a
commit 45b540b753
4 changed files with 169 additions and 0 deletions

0
README.md Normal file
View File

108
main.py Normal file
View File

@ -0,0 +1,108 @@
import pathlib
import time
import re
from typing import Self
from selenium.webdriver import Keys
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
class Parser:
keyword = "Велесстрой"
url = "https://www.b2b-center.ru/market"
_driver: webdriver.Chrome
_options: webdriver.ChromeOptions = webdriver.ChromeOptions()
_service: webdriver.ChromeService
def __init__(self):
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())};
self._options.add_experimental_option("prefs", prefs)
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
def __enter__(self) -> Self:
self._driver = webdriver.Chrome(service=self._service, options=self._options)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
print("Gracefully shutting down...")
self._driver.close()
def find_elem(self, by: str, value: str):
try:
return WebDriverWait(self._driver, 10).until(EC.presence_of_element_located((by, value)))
except TimeoutException:
raise NoSuchElementException("Element not found")
def login(self):
self._driver.get(self.url)
time.sleep(3)
# Open login modal
element = self.find_elem(By.ID, "auth_ajax_modal_trigger")
element.click()
login_control = self.find_elem(By.ID, "login_control")
password_control = self.find_elem(By.ID, "password_control")
login_control.click()
login_control.clear()
login_control.send_keys("jde2015")
password_control.click()
password_control.clear()
password_control.send_keys("Bel8#Ans3")
time.sleep(5)
password_control.send_keys(Keys.RETURN)
def search(self):
time.sleep(5)
self._driver.get(self.url + f'/?f_keyword={self.keyword}')
time.sleep(10)
table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody")
links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a")
for link in links:
href = link.get_attribute("href")
description = link.find_element(By.CSS_SELECTOR, 'div').text
# Check LTL
if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0:
self.accept_documentation(href)
break
def accept_documentation(self, url: str):
time.sleep(3)
self._driver.get(url)
# Скачать документацию
try:
download_documentation_button = self.find_elem(By.CSS_SELECTOR, '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]')
time.sleep(1)
download_documentation_button.click()
self.download_documentation()
except NoSuchElementException:
self.download_documentation()
def download_documentation(self):
time.sleep(5)
documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation')
docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a')
for doc in docs:
href = doc.get_attribute('href')
if not href.endswith('.xlsx'):
continue
self._driver.get(href)
time.sleep(3)
if __name__ == "__main__":
with Parser() as parser:
parser.login()
parser.search()

View File

@ -0,0 +1,2 @@
selenium
webdriver-manager

View File

@ -0,0 +1,59 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile
#
attrs==23.2.0
# via
# outcome
# trio
certifi==2023.11.17
# via
# requests
# selenium
charset-normalizer==3.3.2
# via requests
exceptiongroup==1.2.0
# via
# trio
# trio-websocket
h11==0.14.0
# via wsproto
idna==3.6
# via
# requests
# trio
outcome==1.3.0.post0
# via trio
packaging==23.2
# via webdriver-manager
pysocks==1.7.1
# via urllib3
python-dotenv==1.0.1
# via webdriver-manager
requests==2.31.0
# via webdriver-manager
selenium==4.17.2
# via -r requirements.in
sniffio==1.3.0
# via trio
sortedcontainers==2.4.0
# via trio
trio==0.24.0
# via
# selenium
# trio-websocket
trio-websocket==0.11.1
# via selenium
typing-extensions==4.9.0
# via selenium
urllib3[socks]==2.1.0
# via
# requests
# selenium
# urllib3
webdriver-manager==4.0.1
# via -r requirements.in
wsproto==1.2.0
# via trio-websocket