complete bot

master
Ernest Litvinenko 2024-02-15 10:56:32 +03:00
parent f69c4377bc
commit 2ba3fb1de0
7 changed files with 270 additions and 85 deletions

BIN
database.db Normal file

Binary file not shown.

View File

@ -1,13 +1,16 @@
import json
import pathlib import pathlib
from sqlite3 import Row import numpy as np
from functools import reduce
import requests import requests
from openpyxl import load_workbook, Workbook from openpyxl import load_workbook
from openpyxl.cell import Cell
from openpyxl.worksheet.worksheet import Worksheet
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
from storage import Storage
API_CALC_URL = "https://api.jde.ru/vD/calculator/PriceAddress" API_CALC_URL = "https://api.jde.ru/vD/calculator/PriceAddress"
TYPE = "1" TYPE = "1"
PICKUP = "1" PICKUP = "1"
@ -17,14 +20,12 @@ TOKEN = "67065749269910593"
class ExcelParser: class ExcelParser:
def __init__(self, path: Path | str): def __init__(self, paths: list[pathlib.Path] | list[str], url: str):
self._wb: Workbook | None = None self.url = url
if isinstance(path, str): self._paths = [pathlib.Path(p) for p in paths]
path = Path(path) self.storage = Storage()
for path in self._paths:
assert path.is_file() is True assert path.is_file() is True, "Файл не найден"
self.convert_to_xlsx(path)
self._sheet: Worksheet = self._wb.active
def convert_to_xlsx(self, path: pathlib.Path): def convert_to_xlsx(self, path: pathlib.Path):
import pyexcel as pe import pyexcel as pe
@ -34,54 +35,94 @@ class ExcelParser:
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx'))) pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
self._wb = load_workbook(str(path.with_suffix('.xlsx'))) self._wb = load_workbook(str(path.with_suffix('.xlsx')))
def clean_up_wb(self) -> pd.DataFrame: @classmethod
triggered = False def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
for row in self._sheet.rows: columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
for cell in row:
cell: Cell df = pd.read_excel(path)
row: Row cond = reduce(lambda x, y: x | y, (df == val for val in columns))
if isinstance(cell.value, str) and cell.value.startswith('№ заявки'):
self._sheet.delete_rows(1, cell.row - 1) for row in df[cond].iterrows():
triggered = True if row[1].any():
df = df[row[0]:]
break break
df: pd.DataFrame = df.loc[:, df.iloc[0].dropna().index]
if triggered: df.columns = df.iloc[0]
break df = df.drop(df.index[0], axis=0)
df = pd.DataFrame(self._sheet.values) not_existed_columns = list(set(columns) - set(df.columns))
not_nullable_cols = df.iloc[0].dropna().index
df = df.iloc[:, not_nullable_cols]
df.columns = df.iloc[0, :]
df: pd.DataFrame = df.drop(0, axis=0)
df.to_excel('./text.xlsx') if len(not_existed_columns) > 0:
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
df = df.loc[:, columns]
return df[['Адрес отгрузки', 'Адрес склада', 'Масса', 'Объем']].to_dict(orient='records')[0] df = df.drop([idx for idx, row in df.iterrows() if row.isna().all()], axis=0)
def calculate(self) -> int | None: isna_values_y = list(set(np.where(df.isna())[1]))
if isna_values_y:
raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
return df
def clean_up_wb(self):
dfs: list[pd.DataFrame] = []
for path in self._paths:
df = self._clean_up_wb(path)
if len(df) < 1:
raise ValueError("Требуется ручная обработка, не удалось считать одно из полей.")
dfs.append(df)
concat = pd.concat(dfs)
concat = concat.set_index(pd.RangeIndex(stop=len(concat)))
return pd.DataFrame({
'Адрес отгрузки': [concat['Адрес отгрузки'][0]],
'Адрес разгрузки': [concat['Адрес разгрузки'][0]],
'Дата загрузки': [concat['Дата загрузки'][0]],
'Масса': [concat['Масса'].sum()],
'Объем': [concat['Объем'].sum()]})
def calculate(self) -> dict | None:
df = self.clean_up_wb() df = self.clean_up_wb()
query = { query = {
"type": TYPE, "type": TYPE,
"token": TOKEN, "token": TOKEN,
"delivery": DELIVERY, "delivery": DELIVERY,
"pickup": PICKUP, "pickup": PICKUP,
"user": USER, "user": USER,
"addr_from": df['Адрес отгрузки'], "addr_from": df['Адрес отгрузки'].iloc[0],
"addr_to": df['Адрес склада'], "addr_to": df['Адрес разгрузки'].iloc[0],
"weight": df['Масса'], "weight": float(df['Масса'].iloc[0]) * 1000,
"volume": df['Объем'], "volume": df['Объем'].iloc[0],
"pr_vat": "1"
} }
if query['volume'] is None:
query['volume'] = 0.01
print(query)
data = requests.get(API_CALC_URL, params=query).json() data = requests.get(API_CALC_URL, params=query).json()
print(data)
if data.get('price', None) is not None: if data.get('price', None) is not None:
return int(data['price']) self.add_link_to_database(query, answer=data)
return {"price": int(data['price']),
"vat": int(data['percent_vat']),
"max_days": int(data['maxdays']),
"transport_delivery_date": df["Дата загрузки"]}
return None return None
def add_link_to_database(self, query: dict, answer: dict):
[self.storage.add_link(
str(file.absolute()),
self.url,
int(answer["price"]),
json.dumps(query),
json.dumps(answer)) for file in self._paths
]
if __name__ == '__main__': if __name__ == '__main__':
parser = ExcelParser('./downloads/1342AWP1A.xls') pass
print(parser.calculate()) # parser = ExcelParser('./downloads/130224-РС.xls')
# print(parser.calculate())
#
# df1 = pd.read_excel('./downloads/Тендер 3574955 КГП165.1.xls')
# df2 = pd.read_excel('./downloads/Тендер 3574955 КГП165.2.xls')
# df3 = pd.read_excel('./downloads/Тендер 3574955 КГП165.xls')

113
main.py
View File

@ -14,6 +14,8 @@ from selenium.common.exceptions import NoSuchElementException, TimeoutException
from excel_parser import ExcelParser from excel_parser import ExcelParser
from storage import Storage from storage import Storage
from telegram_logs import logger
class Parser: class Parser:
keyword = "Велесстрой" keyword = "Велесстрой"
@ -24,7 +26,9 @@ class Parser:
_service: webdriver.ChromeService _service: webdriver.ChromeService
def __init__(self): def __init__(self):
logger.info("Бот запущен")
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())} prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
self._options.add_experimental_option("prefs", prefs) self._options.add_experimental_option("prefs", prefs)
# self._options.add_argument("--disable-extensions") # self._options.add_argument("--disable-extensions")
# self._options.add_argument("--disable-gpu") # self._options.add_argument("--disable-gpu")
@ -39,7 +43,9 @@ class Parser:
return self return self
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
logger.error("Бот остановлен. Причина: " + str(exc_val))
print("Gracefully shutting down...") print("Gracefully shutting down...")
self._driver.close() self._driver.close()
def find_elem(self, by: str, value: str): def find_elem(self, by: str, value: str):
@ -69,20 +75,35 @@ class Parser:
time.sleep(5) time.sleep(5)
password_control.send_keys(Keys.RETURN) password_control.send_keys(Keys.RETURN)
def check_ltl(self, text):
return len(re.findall(r"[Ll][Tt][Ll]", text)) != 0
def search(self): def search(self):
time.sleep(5) time.sleep(5)
self._driver.get(self.url + f'/?f_keyword={self.keyword}') self._driver.get(self.url + f'/?f_keyword={self.keyword}')
time.sleep(10) time.sleep(10)
table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody") table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody")
links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a") links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a")
for link in links:
href = link.get_attribute("href")
description = link.find_element(By.CSS_SELECTOR, 'div').text
# Check LTL links = [link.get_attribute("href") for link in links if
if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0: self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
self.accept_documentation(href) links = [link for link in links if link not in self.storage.get_links()]
break
for link in links:
logger.info("Обработка заявки: " + link)
try:
self.accept_documentation(link)
except Exception as exc:
logger.error("Не удалось обработать заявку. Подробности: " + str(exc))
def parse(self, url: str = None) -> dict:
fp = self.download_documentation()
e_parser = ExcelParser(fp, url)
price = e_parser.calculate()
if not price:
logger.error("Не удалось расcчитать цену, переходим далее")
return price
def accept_documentation(self, url: str): def accept_documentation(self, url: str):
time.sleep(3) time.sleep(3)
@ -93,29 +114,27 @@ class Parser:
download_documentation_button = self.find_elem(By.CSS_SELECTOR, download_documentation_button = self.find_elem(By.CSS_SELECTOR,
'#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]') '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]')
time.sleep(5) time.sleep(5)
ActionChains(self._driver).scroll_to_element(download_documentation_button).scroll_by_amount(0, 100).perform() ActionChains(self._driver).scroll_to_element(download_documentation_button).scroll_by_amount(0,
100).perform()
download_documentation_button.click() download_documentation_button.click()
fp = self.download_documentation()
for file in fp: price = self.parse(url)
e_parser = ExcelParser(file) self.send_offer_link(price['price'], nds=price['vat'],
price = e_parser.calculate() delivery_time=price['transport_delivery_date'],
if not price: delivery_range=price['max_days'])
break
self.storage.add_link(str(file.absolute()), url, price)
except NoSuchElementException: except NoSuchElementException:
fp = self.download_documentation() # logger.info("Отсутствует кнопка скачивания документации, переходим к документации")
for file in fp: price = self.parse(url)
e_parser = ExcelParser(file) self.send_offer_link(price['price'], nds=price['vat'],
price = e_parser.calculate() delivery_time=price['transport_delivery_date'],
if not price: delivery_range=price['max_days'])
break
self.storage.add_link(str(file.absolute()), url, price)
def download_documentation(self) -> list[pathlib.Path]: def download_documentation(self) -> list[pathlib.Path]:
all_files_1 = set(pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2]) try:
all_files_1 = set(
pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2])
time.sleep(5) time.sleep(5)
documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation') documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation')
@ -127,11 +146,57 @@ class Parser:
self._driver.get(href) self._driver.get(href)
time.sleep(3) time.sleep(3)
all_files_2 = set(pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2]) all_files_2 = set(
pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2])
fp = all_files_2 - all_files_1 fp = all_files_2 - all_files_1
logger.debug(fp)
return [file for file in fp] return [file for file in fp]
except Exception as exc:
logger.info(f"Не удалось скачать документацию. Подробности: {type(exc)} {str(exc)}")
raise KeyboardInterrupt()
def send_offer_link(self, price: int, nds: int, delivery_range: str, delivery_time: str):
try:
logger.info(
f"Предварительные данные по заявке: Цена: {price}, НДС: {nds}%, Доставка: {delivery_range} дн., Подача машины {delivery_time}")
offer_link = self.find_elem(By.ID, "send_offer_link")
ActionChains(self._driver).scroll_to_element(offer_link).scroll_by_amount(0, 100).perform()
offer_link.click()
price_id = self.find_elem(By.ID, 'price_id')
price_id.send_keys(str(price))
nds_elem = self.find_elem(By.CSS_SELECTOR, 'input[name="AUCTION_OFFER[price_vat]"]')
if nds == 0:
self.find_elem(By.ID, 'price_lot_no_tax').click()
else:
nds_elem.send_keys(str(nds))
row_11 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_11 > td:nth-child(2) > textarea")
row_12 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_12 > td:nth-child(2) > textarea")
row_13 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_13 > td:nth-child(2) > textarea")
row_15 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_15 > td:nth-child(2) > input[type=submit]")
row_11.send_keys(str(delivery_range) + " дней")
row_12.send_keys(delivery_time)
row_13.send_keys(
"Дата подачи транспорта предварительная, согласовывается по звонку оператора ЖДЭ клиенту-отправителю")
time.sleep(10)
ActionChains(self._driver).scroll_to_element(row_15).scroll_by_amount(0, 100).perform()
row_15.click()
time.sleep(10)
self.apply_offer()
logger.success("Заявка успешно отправлена")
except Exception as exc:
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")
def apply_offer(self):
btn = self.find_elem(By.CSS_SELECTOR, "form[name=AUCTION_OFFER] table.form-control_table button")
btn.click()
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -10,3 +10,4 @@ xls2xlsx
pyexcel pyexcel
pyexcel-xls pyexcel-xls
pyexcel-xlsx pyexcel-xlsx
loguru

View File

@ -42,6 +42,8 @@ lml==0.1.0
# via # via
# pyexcel # pyexcel
# pyexcel-io # pyexcel-io
loguru==0.7.2
# via -r requirements.in
numpy==1.26.3 numpy==1.26.3
# via pandas # via pandas
openpyxl==3.1.2 openpyxl==3.1.2

View File

@ -22,15 +22,47 @@ class Storage:
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
doc_filepath TEXT, doc_filepath TEXT,
link TEXT, link TEXT,
total_cost INTEGER total_cost INTEGER,
);""") query TEXT,
answer TEXT
);
def add_link(self, doc_filepath: str, link: str, total_cost: int): """)
with self.get_cursor() as cur: cur.execute("""
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost) VALUES (?, ?, ?)", create unique index if not exists fp_link_index on ltl(doc_filepath, link);
(doc_filepath, link, total_cost)) """)
cur.execute("""
create trigger if not exists query_answer_trig
after insert
on ltl
begin
update ltl set total_cost = new.total_cost,
query = new.query,
answer = new.answer
where link = new.link;
end;
""")
cur.execute("""
create table if not exists users (
id INTEGER PRIMARY KEY,
tg_user_id TEXT
);
""")
self.con.commit() self.con.commit()
def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str):
with self.get_cursor() as cur:
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
(doc_filepath, link, total_cost, query, answer))
self.con.commit()
def get_links(self):
with self.get_cursor() as cur:
res = cur.execute("SELECT DISTINCT link FROM ltl")
return [row[0] for row in res.fetchall()]
def is_link_exists(self, link: str) -> bool: def is_link_exists(self, link: str) -> bool:
with self.get_cursor() as cur: with self.get_cursor() as cur:
res = cur.execute("SELECT * FROM ltl WHERE link = ?", (link,)) res = cur.execute("SELECT * FROM ltl WHERE link = ?", (link,))
@ -40,3 +72,13 @@ class Storage:
with self.get_cursor() as cur: with self.get_cursor() as cur:
res = cur.execute("SELECT * FROM ltl WHERE doc_filepath = ?", (doc_filepath,)) res = cur.execute("SELECT * FROM ltl WHERE doc_filepath = ?", (doc_filepath,))
return res.fetchone() return res.fetchone()
def get_users(self):
with self.get_cursor() as cur:
res = cur.execute("SELECT DISTINCT tg_user_id FROM users")
return [row[0] for row in res.fetchall()]
if __name__ == "__main__":
s = Storage()
print(s.get_users())

34
telegram_logs.py Normal file
View File

@ -0,0 +1,34 @@
import requests
from loguru import logger
from storage import Storage
s = Storage()
_TOKEN = '6767909836:AAFpsqtWeBNIBgSSi2_19rltEHOF0mrvTg0'
_URL = 'https://api.telegram.org/bot'
def _log(message):
if message.record.get('level').name == "ERROR":
icon = ""
elif message.record.get('level').name == "SUCCESS":
icon = ""
else:
icon = " "
for chat_id in s.get_users():
r = requests.post("{0}{1}/sendMessage".format(_URL, _TOKEN), {
"chat_id": int(chat_id),
"disable_notification": True,
"text": icon + message
})
if r.status_code >= 400:
logger.error("Failed to send message: {0} {1}".format(r.status_code, r.text))
def _filter_info_only(record):
return record.get('level').name == "INFO" or record.get('level').name == "SUCCESS"
logger.add(_log, format="{message}", filter=_filter_info_only)
logger.add(_log, format=" {message}", filter=lambda record: record.get('level').name == "ERROR")