hotfix 1
parent
7beec4238b
commit
1741d3e1e0
|
@ -157,4 +157,10 @@ cython_debug/
|
|||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
.idea/
|
||||
|
||||
database.db
|
||||
|
||||
test*
|
||||
probe*
|
||||
*.xlsx
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import json
|
||||
import pathlib
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
import numpy as np
|
||||
from functools import reduce
|
||||
|
||||
import requests
|
||||
from openpyxl import load_workbook
|
||||
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
from storage import Storage
|
||||
|
@ -35,7 +35,6 @@ class ExcelParser:
|
|||
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
|
||||
self._wb = load_workbook(str(path.with_suffix('.xlsx')))
|
||||
|
||||
@classmethod
|
||||
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
|
||||
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
|
||||
|
||||
|
@ -54,6 +53,7 @@ class ExcelParser:
|
|||
not_existed_columns = list(set(columns) - set(df.columns))
|
||||
|
||||
if len(not_existed_columns) > 0:
|
||||
self.add_link_to_database()
|
||||
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
|
||||
df = df.loc[:, columns]
|
||||
|
||||
|
@ -62,7 +62,8 @@ class ExcelParser:
|
|||
isna_values_y = list(set(np.where(df.isna())[1]))
|
||||
|
||||
if isna_values_y:
|
||||
raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
|
||||
raise ValueError(
|
||||
f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
|
||||
|
||||
return df
|
||||
|
||||
|
@ -106,15 +107,27 @@ class ExcelParser:
|
|||
"vat": int(data['percent_vat']),
|
||||
"max_days": int(data['maxdays']),
|
||||
"transport_delivery_date": df["Дата загрузки"]}
|
||||
|
||||
self.add_link_to_database(query, answer=data)
|
||||
return None
|
||||
|
||||
def add_link_to_database(self, query: dict, answer: dict):
|
||||
def add_link_to_database(self, query: dict | None = None, answer: dict | None = None):
|
||||
idx = int(parse_qs(urlparse(self.url).query).get('id')[0])
|
||||
if answer is None:
|
||||
price = None
|
||||
else:
|
||||
price = answer.get('price', None)
|
||||
answer = json.dumps(answer)
|
||||
|
||||
if query:
|
||||
query = json.dumps(query)
|
||||
|
||||
[self.storage.add_link(
|
||||
idx,
|
||||
str(file.absolute()),
|
||||
self.url,
|
||||
int(answer["price"]),
|
||||
json.dumps(query),
|
||||
json.dumps(answer)) for file in self._paths
|
||||
price,
|
||||
query,
|
||||
answer) for file in self._paths
|
||||
]
|
||||
|
||||
|
||||
|
|
23
main.py
23
main.py
|
@ -3,6 +3,7 @@ import os
|
|||
import time
|
||||
import re
|
||||
from typing import Self
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
from selenium.webdriver import Keys, ActionChains
|
||||
from selenium.webdriver.common.by import By
|
||||
|
@ -16,6 +17,11 @@ from storage import Storage
|
|||
|
||||
from telegram_logs import logger
|
||||
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv('.env')
|
||||
IS_PROD = os.environ.get('PROD_ENV') == '1'
|
||||
|
||||
|
||||
class Parser:
|
||||
keyword = "Велесстрой"
|
||||
|
@ -30,10 +36,11 @@ class Parser:
|
|||
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
|
||||
|
||||
self._options.add_experimental_option("prefs", prefs)
|
||||
self._options.add_argument("--disable-extensions")
|
||||
self._options.add_argument("--disable-gpu")
|
||||
self._options.add_argument("--headless=new")
|
||||
self._options.add_argument("window-size=1920,1080")
|
||||
if IS_PROD:
|
||||
self._options.add_argument("--disable-extensions")
|
||||
self._options.add_argument("--disable-gpu")
|
||||
self._options.add_argument("--headless=new")
|
||||
self._options.add_argument("window-size=1920,1080")
|
||||
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
|
||||
|
||||
self.storage = Storage()
|
||||
|
@ -88,7 +95,9 @@ class Parser:
|
|||
|
||||
links = [link.get_attribute("href") for link in links if
|
||||
self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
|
||||
links = [link for link in links if link not in self.storage.get_links()]
|
||||
links = [link for link in links if
|
||||
f"https://www.b2b-center.ru/market/view.html?id=" + parse_qs(urlparse(link).query).get('id')[
|
||||
0] not in self.storage.get_links()]
|
||||
|
||||
for link in links:
|
||||
logger.info("Обработка заявки: " + link)
|
||||
|
@ -190,8 +199,10 @@ class Parser:
|
|||
row_15.click()
|
||||
time.sleep(10)
|
||||
|
||||
self.apply_offer()
|
||||
if IS_PROD:
|
||||
self.apply_offer()
|
||||
logger.success("Заявка успешно отправлена")
|
||||
time.sleep(10)
|
||||
except Exception as exc:
|
||||
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")
|
||||
|
||||
|
|
|
@ -10,4 +10,5 @@ xls2xlsx
|
|||
pyexcel
|
||||
pyexcel-xls
|
||||
pyexcel-xlsx
|
||||
loguru
|
||||
loguru
|
||||
pydotenv
|
|
@ -59,6 +59,8 @@ pandas==2.2.0
|
|||
# via -r requirements.in
|
||||
pillow==10.2.0
|
||||
# via xls2xlsx
|
||||
pydotenv==0.0.7
|
||||
# via -r requirements.in
|
||||
pyexcel==0.7.0
|
||||
# via -r requirements.in
|
||||
pyexcel-io==0.6.6
|
||||
|
|
25
storage.py
25
storage.py
|
@ -19,17 +19,23 @@ class Storage:
|
|||
with self.get_cursor() as cur:
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ltl (
|
||||
id INTEGER PRIMARY KEY,
|
||||
id INTEGER,
|
||||
doc_filepath TEXT,
|
||||
link TEXT,
|
||||
total_cost INTEGER,
|
||||
query TEXT,
|
||||
answer TEXT
|
||||
answer TEXT,
|
||||
PRIMARY KEY (id, doc_filepath)
|
||||
);
|
||||
|
||||
""")
|
||||
cur.execute("""
|
||||
create unique index if not exists fp_link_index on ltl(doc_filepath, link);
|
||||
create trigger if not exists add_link_trig
|
||||
after insert
|
||||
on ltl
|
||||
begin
|
||||
update ltl set link = 'https://www.b2b-center.ru/market/view.html?id=' || new.id where id=new.id;
|
||||
end;
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
|
@ -43,19 +49,12 @@ CREATE TABLE IF NOT EXISTS ltl (
|
|||
where link = new.link;
|
||||
end;
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
create table if not exists users (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tg_user_id TEXT
|
||||
);
|
||||
""")
|
||||
self.con.commit()
|
||||
|
||||
def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str):
|
||||
def add_link(self, id: int, doc_filepath: str, total_cost: int | None, query: str | None, answer: str | None):
|
||||
with self.get_cursor() as cur:
|
||||
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
|
||||
(doc_filepath, link, total_cost, query, answer))
|
||||
cur.execute("INSERT INTO ltl (id, doc_filepath, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
|
||||
(id, doc_filepath, total_cost, query, answer))
|
||||
self.con.commit()
|
||||
|
||||
def get_links(self):
|
||||
|
|
Loading…
Reference in New Issue