master
Ernest Litvinenko 2024-02-15 13:26:32 +03:00
parent 7beec4238b
commit 1741d3e1e0
6 changed files with 61 additions and 29 deletions

6
.gitignore vendored
View File

@ -158,3 +158,9 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/ .idea/
database.db
test*
probe*
*.xlsx

View File

@ -1,12 +1,12 @@
import json import json
import pathlib import pathlib
from urllib.parse import urlparse, parse_qs
import numpy as np import numpy as np
from functools import reduce from functools import reduce
import requests import requests
from openpyxl import load_workbook from openpyxl import load_workbook
from pathlib import Path
import pandas as pd import pandas as pd
from storage import Storage from storage import Storage
@ -35,7 +35,6 @@ class ExcelParser:
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx'))) pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
self._wb = load_workbook(str(path.with_suffix('.xlsx'))) self._wb = load_workbook(str(path.with_suffix('.xlsx')))
@classmethod
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame: def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"] columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
@ -54,6 +53,7 @@ class ExcelParser:
not_existed_columns = list(set(columns) - set(df.columns)) not_existed_columns = list(set(columns) - set(df.columns))
if len(not_existed_columns) > 0: if len(not_existed_columns) > 0:
self.add_link_to_database()
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}") raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
df = df.loc[:, columns] df = df.loc[:, columns]
@ -62,7 +62,8 @@ class ExcelParser:
isna_values_y = list(set(np.where(df.isna())[1])) isna_values_y = list(set(np.where(df.isna())[1]))
if isna_values_y: if isna_values_y:
raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}") raise ValueError(
f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
return df return df
@ -106,15 +107,27 @@ class ExcelParser:
"vat": int(data['percent_vat']), "vat": int(data['percent_vat']),
"max_days": int(data['maxdays']), "max_days": int(data['maxdays']),
"transport_delivery_date": df["Дата загрузки"]} "transport_delivery_date": df["Дата загрузки"]}
self.add_link_to_database(query, answer=data)
return None return None
def add_link_to_database(self, query: dict, answer: dict): def add_link_to_database(self, query: dict | None = None, answer: dict | None = None):
idx = int(parse_qs(urlparse(self.url).query).get('id')[0])
if answer is None:
price = None
else:
price = answer.get('price', None)
answer = json.dumps(answer)
if query:
query = json.dumps(query)
[self.storage.add_link( [self.storage.add_link(
idx,
str(file.absolute()), str(file.absolute()),
self.url, price,
int(answer["price"]), query,
json.dumps(query), answer) for file in self._paths
json.dumps(answer)) for file in self._paths
] ]

23
main.py
View File

@ -3,6 +3,7 @@ import os
import time import time
import re import re
from typing import Self from typing import Self
from urllib.parse import urlparse, parse_qs
from selenium.webdriver import Keys, ActionChains from selenium.webdriver import Keys, ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
@ -16,6 +17,11 @@ from storage import Storage
from telegram_logs import logger from telegram_logs import logger
import dotenv
dotenv.load_dotenv('.env')
IS_PROD = os.environ.get('PROD_ENV') == '1'
class Parser: class Parser:
keyword = "Велесстрой" keyword = "Велесстрой"
@ -30,10 +36,11 @@ class Parser:
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())} prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
self._options.add_experimental_option("prefs", prefs) self._options.add_experimental_option("prefs", prefs)
self._options.add_argument("--disable-extensions") if IS_PROD:
self._options.add_argument("--disable-gpu") self._options.add_argument("--disable-extensions")
self._options.add_argument("--headless=new") self._options.add_argument("--disable-gpu")
self._options.add_argument("window-size=1920,1080") self._options.add_argument("--headless=new")
self._options.add_argument("window-size=1920,1080")
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install()) self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
self.storage = Storage() self.storage = Storage()
@ -88,7 +95,9 @@ class Parser:
links = [link.get_attribute("href") for link in links if links = [link.get_attribute("href") for link in links if
self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)] self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
links = [link for link in links if link not in self.storage.get_links()] links = [link for link in links if
f"https://www.b2b-center.ru/market/view.html?id=" + parse_qs(urlparse(link).query).get('id')[
0] not in self.storage.get_links()]
for link in links: for link in links:
logger.info("Обработка заявки: " + link) logger.info("Обработка заявки: " + link)
@ -190,8 +199,10 @@ class Parser:
row_15.click() row_15.click()
time.sleep(10) time.sleep(10)
self.apply_offer() if IS_PROD:
self.apply_offer()
logger.success("Заявка успешно отправлена") logger.success("Заявка успешно отправлена")
time.sleep(10)
except Exception as exc: except Exception as exc:
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}") logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")

View File

@ -11,3 +11,4 @@ pyexcel
pyexcel-xls pyexcel-xls
pyexcel-xlsx pyexcel-xlsx
loguru loguru
pydotenv

View File

@ -59,6 +59,8 @@ pandas==2.2.0
# via -r requirements.in # via -r requirements.in
pillow==10.2.0 pillow==10.2.0
# via xls2xlsx # via xls2xlsx
pydotenv==0.0.7
# via -r requirements.in
pyexcel==0.7.0 pyexcel==0.7.0
# via -r requirements.in # via -r requirements.in
pyexcel-io==0.6.6 pyexcel-io==0.6.6

View File

@ -19,17 +19,23 @@ class Storage:
with self.get_cursor() as cur: with self.get_cursor() as cur:
cur.execute(""" cur.execute("""
CREATE TABLE IF NOT EXISTS ltl ( CREATE TABLE IF NOT EXISTS ltl (
id INTEGER PRIMARY KEY, id INTEGER,
doc_filepath TEXT, doc_filepath TEXT,
link TEXT, link TEXT,
total_cost INTEGER, total_cost INTEGER,
query TEXT, query TEXT,
answer TEXT answer TEXT,
PRIMARY KEY (id, doc_filepath)
); );
""") """)
cur.execute(""" cur.execute("""
create unique index if not exists fp_link_index on ltl(doc_filepath, link); create trigger if not exists add_link_trig
after insert
on ltl
begin
update ltl set link = 'https://www.b2b-center.ru/market/view.html?id=' || new.id where id=new.id;
end;
""") """)
cur.execute(""" cur.execute("""
@ -43,19 +49,12 @@ CREATE TABLE IF NOT EXISTS ltl (
where link = new.link; where link = new.link;
end; end;
""") """)
cur.execute("""
create table if not exists users (
id INTEGER PRIMARY KEY,
tg_user_id TEXT
);
""")
self.con.commit() self.con.commit()
def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str): def add_link(self, id: int, doc_filepath: str, total_cost: int | None, query: str | None, answer: str | None):
with self.get_cursor() as cur: with self.get_cursor() as cur:
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)", cur.execute("INSERT INTO ltl (id, doc_filepath, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
(doc_filepath, link, total_cost, query, answer)) (id, doc_filepath, total_cost, query, answer))
self.con.commit() self.con.commit()
def get_links(self): def get_links(self):