master
Ernest Litvinenko 2024-02-15 13:26:32 +03:00
parent 7beec4238b
commit 1741d3e1e0
6 changed files with 61 additions and 29 deletions

6
.gitignore vendored
View File

@ -158,3 +158,9 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
database.db
test*
probe*
*.xlsx

View File

@ -1,12 +1,12 @@
import json
import pathlib
from urllib.parse import urlparse, parse_qs
import numpy as np
from functools import reduce
import requests
from openpyxl import load_workbook
from pathlib import Path
import pandas as pd
from storage import Storage
@ -35,7 +35,6 @@ class ExcelParser:
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
self._wb = load_workbook(str(path.with_suffix('.xlsx')))
@classmethod
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
@ -54,6 +53,7 @@ class ExcelParser:
not_existed_columns = list(set(columns) - set(df.columns))
if len(not_existed_columns) > 0:
self.add_link_to_database()
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
df = df.loc[:, columns]
@ -62,7 +62,8 @@ class ExcelParser:
isna_values_y = list(set(np.where(df.isna())[1]))
if isna_values_y:
raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
raise ValueError(
f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
return df
@ -106,15 +107,27 @@ class ExcelParser:
"vat": int(data['percent_vat']),
"max_days": int(data['maxdays']),
"transport_delivery_date": df["Дата загрузки"]}
self.add_link_to_database(query, answer=data)
return None
def add_link_to_database(self, query: dict, answer: dict):
def add_link_to_database(self, query: dict | None = None, answer: dict | None = None):
idx = int(parse_qs(urlparse(self.url).query).get('id')[0])
if answer is None:
price = None
else:
price = answer.get('price', None)
answer = json.dumps(answer)
if query:
query = json.dumps(query)
[self.storage.add_link(
idx,
str(file.absolute()),
self.url,
int(answer["price"]),
json.dumps(query),
json.dumps(answer)) for file in self._paths
price,
query,
answer) for file in self._paths
]

23
main.py
View File

@ -3,6 +3,7 @@ import os
import time
import re
from typing import Self
from urllib.parse import urlparse, parse_qs
from selenium.webdriver import Keys, ActionChains
from selenium.webdriver.common.by import By
@ -16,6 +17,11 @@ from storage import Storage
from telegram_logs import logger
import dotenv
dotenv.load_dotenv('.env')
IS_PROD = os.environ.get('PROD_ENV') == '1'
class Parser:
keyword = "Велесстрой"
@ -30,10 +36,11 @@ class Parser:
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
self._options.add_experimental_option("prefs", prefs)
self._options.add_argument("--disable-extensions")
self._options.add_argument("--disable-gpu")
self._options.add_argument("--headless=new")
self._options.add_argument("window-size=1920,1080")
if IS_PROD:
self._options.add_argument("--disable-extensions")
self._options.add_argument("--disable-gpu")
self._options.add_argument("--headless=new")
self._options.add_argument("window-size=1920,1080")
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
self.storage = Storage()
@ -88,7 +95,9 @@ class Parser:
links = [link.get_attribute("href") for link in links if
self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
links = [link for link in links if link not in self.storage.get_links()]
links = [link for link in links if
f"https://www.b2b-center.ru/market/view.html?id=" + parse_qs(urlparse(link).query).get('id')[
0] not in self.storage.get_links()]
for link in links:
logger.info("Обработка заявки: " + link)
@ -190,8 +199,10 @@ class Parser:
row_15.click()
time.sleep(10)
self.apply_offer()
if IS_PROD:
self.apply_offer()
logger.success("Заявка успешно отправлена")
time.sleep(10)
except Exception as exc:
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")

View File

@ -11,3 +11,4 @@ pyexcel
pyexcel-xls
pyexcel-xlsx
loguru
pydotenv

View File

@ -59,6 +59,8 @@ pandas==2.2.0
# via -r requirements.in
pillow==10.2.0
# via xls2xlsx
pydotenv==0.0.7
# via -r requirements.in
pyexcel==0.7.0
# via -r requirements.in
pyexcel-io==0.6.6

View File

@ -19,17 +19,23 @@ class Storage:
with self.get_cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS ltl (
id INTEGER PRIMARY KEY,
id INTEGER,
doc_filepath TEXT,
link TEXT,
total_cost INTEGER,
query TEXT,
answer TEXT
answer TEXT,
PRIMARY KEY (id, doc_filepath)
);
""")
cur.execute("""
create unique index if not exists fp_link_index on ltl(doc_filepath, link);
create trigger if not exists add_link_trig
after insert
on ltl
begin
update ltl set link = 'https://www.b2b-center.ru/market/view.html?id=' || new.id where id=new.id;
end;
""")
cur.execute("""
@ -43,19 +49,12 @@ CREATE TABLE IF NOT EXISTS ltl (
where link = new.link;
end;
""")
cur.execute("""
create table if not exists users (
id INTEGER PRIMARY KEY,
tg_user_id TEXT
);
""")
self.con.commit()
def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str):
def add_link(self, id: int, doc_filepath: str, total_cost: int | None, query: str | None, answer: str | None):
with self.get_cursor() as cur:
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
(doc_filepath, link, total_cost, query, answer))
cur.execute("INSERT INTO ltl (id, doc_filepath, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
(id, doc_filepath, total_cost, query, answer))
self.con.commit()
def get_links(self):