complete bot
parent
f69c4377bc
commit
2ba3fb1de0
Binary file not shown.
125
excel_parser.py
125
excel_parser.py
|
@ -1,13 +1,16 @@
|
|||
import json
|
||||
import pathlib
|
||||
from sqlite3 import Row
|
||||
import numpy as np
|
||||
from functools import reduce
|
||||
|
||||
import requests
|
||||
from openpyxl import load_workbook, Workbook
|
||||
from openpyxl.cell import Cell
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from openpyxl import load_workbook
|
||||
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
from storage import Storage
|
||||
|
||||
API_CALC_URL = "https://api.jde.ru/vD/calculator/PriceAddress"
|
||||
TYPE = "1"
|
||||
PICKUP = "1"
|
||||
|
@ -17,14 +20,12 @@ TOKEN = "67065749269910593"
|
|||
|
||||
|
||||
class ExcelParser:
|
||||
def __init__(self, path: Path | str):
|
||||
self._wb: Workbook | None = None
|
||||
if isinstance(path, str):
|
||||
path = Path(path)
|
||||
|
||||
assert path.is_file() is True
|
||||
self.convert_to_xlsx(path)
|
||||
self._sheet: Worksheet = self._wb.active
|
||||
def __init__(self, paths: list[pathlib.Path] | list[str], url: str):
|
||||
self.url = url
|
||||
self._paths = [pathlib.Path(p) for p in paths]
|
||||
self.storage = Storage()
|
||||
for path in self._paths:
|
||||
assert path.is_file() is True, "Файл не найден"
|
||||
|
||||
def convert_to_xlsx(self, path: pathlib.Path):
|
||||
import pyexcel as pe
|
||||
|
@ -34,54 +35,94 @@ class ExcelParser:
|
|||
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
|
||||
self._wb = load_workbook(str(path.with_suffix('.xlsx')))
|
||||
|
||||
def clean_up_wb(self) -> pd.DataFrame:
|
||||
triggered = False
|
||||
for row in self._sheet.rows:
|
||||
for cell in row:
|
||||
cell: Cell
|
||||
row: Row
|
||||
if isinstance(cell.value, str) and cell.value.startswith('№ заявки'):
|
||||
self._sheet.delete_rows(1, cell.row - 1)
|
||||
triggered = True
|
||||
@classmethod
|
||||
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
|
||||
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
|
||||
|
||||
df = pd.read_excel(path)
|
||||
cond = reduce(lambda x, y: x | y, (df == val for val in columns))
|
||||
|
||||
for row in df[cond].iterrows():
|
||||
if row[1].any():
|
||||
df = df[row[0]:]
|
||||
break
|
||||
df: pd.DataFrame = df.loc[:, df.iloc[0].dropna().index]
|
||||
|
||||
if triggered:
|
||||
break
|
||||
df.columns = df.iloc[0]
|
||||
df = df.drop(df.index[0], axis=0)
|
||||
|
||||
df = pd.DataFrame(self._sheet.values)
|
||||
not_nullable_cols = df.iloc[0].dropna().index
|
||||
df = df.iloc[:, not_nullable_cols]
|
||||
df.columns = df.iloc[0, :]
|
||||
df: pd.DataFrame = df.drop(0, axis=0)
|
||||
not_existed_columns = list(set(columns) - set(df.columns))
|
||||
|
||||
df.to_excel('./text.xlsx')
|
||||
if len(not_existed_columns) > 0:
|
||||
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
|
||||
df = df.loc[:, columns]
|
||||
|
||||
return df[['Адрес отгрузки', 'Адрес склада', 'Масса', 'Объем']].to_dict(orient='records')[0]
|
||||
df = df.drop([idx for idx, row in df.iterrows() if row.isna().all()], axis=0)
|
||||
|
||||
def calculate(self) -> int | None:
|
||||
isna_values_y = list(set(np.where(df.isna())[1]))
|
||||
|
||||
if isna_values_y:
|
||||
raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
|
||||
|
||||
return df
|
||||
|
||||
def clean_up_wb(self):
|
||||
dfs: list[pd.DataFrame] = []
|
||||
for path in self._paths:
|
||||
df = self._clean_up_wb(path)
|
||||
if len(df) < 1:
|
||||
raise ValueError("Требуется ручная обработка, не удалось считать одно из полей.")
|
||||
dfs.append(df)
|
||||
concat = pd.concat(dfs)
|
||||
concat = concat.set_index(pd.RangeIndex(stop=len(concat)))
|
||||
return pd.DataFrame({
|
||||
'Адрес отгрузки': [concat['Адрес отгрузки'][0]],
|
||||
'Адрес разгрузки': [concat['Адрес разгрузки'][0]],
|
||||
'Дата загрузки': [concat['Дата загрузки'][0]],
|
||||
'Масса': [concat['Масса'].sum()],
|
||||
'Объем': [concat['Объем'].sum()]})
|
||||
|
||||
def calculate(self) -> dict | None:
|
||||
df = self.clean_up_wb()
|
||||
|
||||
query = {
|
||||
"type": TYPE,
|
||||
"token": TOKEN,
|
||||
"delivery": DELIVERY,
|
||||
"pickup": PICKUP,
|
||||
"user": USER,
|
||||
"addr_from": df['Адрес отгрузки'],
|
||||
"addr_to": df['Адрес склада'],
|
||||
"weight": df['Масса'],
|
||||
"volume": df['Объем'],
|
||||
"addr_from": df['Адрес отгрузки'].iloc[0],
|
||||
"addr_to": df['Адрес разгрузки'].iloc[0],
|
||||
"weight": float(df['Масса'].iloc[0]) * 1000,
|
||||
"volume": df['Объем'].iloc[0],
|
||||
"pr_vat": "1"
|
||||
}
|
||||
if query['volume'] is None:
|
||||
query['volume'] = 0.01
|
||||
print(query)
|
||||
|
||||
data = requests.get(API_CALC_URL, params=query).json()
|
||||
print(data)
|
||||
|
||||
if data.get('price', None) is not None:
|
||||
return int(data['price'])
|
||||
self.add_link_to_database(query, answer=data)
|
||||
return {"price": int(data['price']),
|
||||
"vat": int(data['percent_vat']),
|
||||
"max_days": int(data['maxdays']),
|
||||
"transport_delivery_date": df["Дата загрузки"]}
|
||||
return None
|
||||
|
||||
def add_link_to_database(self, query: dict, answer: dict):
|
||||
[self.storage.add_link(
|
||||
str(file.absolute()),
|
||||
self.url,
|
||||
int(answer["price"]),
|
||||
json.dumps(query),
|
||||
json.dumps(answer)) for file in self._paths
|
||||
]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ExcelParser('./downloads/1342AWP1A.xls')
|
||||
print(parser.calculate())
|
||||
pass
|
||||
# parser = ExcelParser('./downloads/130224-РС.xls')
|
||||
# print(parser.calculate())
|
||||
#
|
||||
# df1 = pd.read_excel('./downloads/Тендер 3574955 КГП165.1.xls')
|
||||
# df2 = pd.read_excel('./downloads/Тендер 3574955 КГП165.2.xls')
|
||||
# df3 = pd.read_excel('./downloads/Тендер 3574955 КГП165.xls')
|
||||
|
|
113
main.py
113
main.py
|
@ -14,6 +14,8 @@ from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
|||
from excel_parser import ExcelParser
|
||||
from storage import Storage
|
||||
|
||||
from telegram_logs import logger
|
||||
|
||||
|
||||
class Parser:
|
||||
keyword = "Велесстрой"
|
||||
|
@ -24,7 +26,9 @@ class Parser:
|
|||
_service: webdriver.ChromeService
|
||||
|
||||
def __init__(self):
|
||||
logger.info("Бот запущен")
|
||||
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
|
||||
|
||||
self._options.add_experimental_option("prefs", prefs)
|
||||
# self._options.add_argument("--disable-extensions")
|
||||
# self._options.add_argument("--disable-gpu")
|
||||
|
@ -39,7 +43,9 @@ class Parser:
|
|||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
logger.error("Бот остановлен. Причина: " + str(exc_val))
|
||||
print("Gracefully shutting down...")
|
||||
|
||||
self._driver.close()
|
||||
|
||||
def find_elem(self, by: str, value: str):
|
||||
|
@ -69,20 +75,35 @@ class Parser:
|
|||
time.sleep(5)
|
||||
password_control.send_keys(Keys.RETURN)
|
||||
|
||||
def check_ltl(self, text):
|
||||
return len(re.findall(r"[Ll][Tt][Ll]", text)) != 0
|
||||
|
||||
def search(self):
|
||||
time.sleep(5)
|
||||
self._driver.get(self.url + f'/?f_keyword={self.keyword}')
|
||||
time.sleep(10)
|
||||
table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody")
|
||||
links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a")
|
||||
for link in links:
|
||||
href = link.get_attribute("href")
|
||||
description = link.find_element(By.CSS_SELECTOR, 'div').text
|
||||
|
||||
# Check LTL
|
||||
if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0:
|
||||
self.accept_documentation(href)
|
||||
break
|
||||
links = [link.get_attribute("href") for link in links if
|
||||
self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
|
||||
links = [link for link in links if link not in self.storage.get_links()]
|
||||
|
||||
for link in links:
|
||||
logger.info("Обработка заявки: " + link)
|
||||
try:
|
||||
self.accept_documentation(link)
|
||||
except Exception as exc:
|
||||
logger.error("Не удалось обработать заявку. Подробности: " + str(exc))
|
||||
|
||||
def parse(self, url: str = None) -> dict:
|
||||
fp = self.download_documentation()
|
||||
e_parser = ExcelParser(fp, url)
|
||||
price = e_parser.calculate()
|
||||
if not price:
|
||||
logger.error("Не удалось расcчитать цену, переходим далее")
|
||||
|
||||
return price
|
||||
|
||||
def accept_documentation(self, url: str):
|
||||
time.sleep(3)
|
||||
|
@ -93,29 +114,27 @@ class Parser:
|
|||
download_documentation_button = self.find_elem(By.CSS_SELECTOR,
|
||||
'#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]')
|
||||
time.sleep(5)
|
||||
ActionChains(self._driver).scroll_to_element(download_documentation_button).scroll_by_amount(0, 100).perform()
|
||||
ActionChains(self._driver).scroll_to_element(download_documentation_button).scroll_by_amount(0,
|
||||
100).perform()
|
||||
download_documentation_button.click()
|
||||
fp = self.download_documentation()
|
||||
|
||||
for file in fp:
|
||||
e_parser = ExcelParser(file)
|
||||
price = e_parser.calculate()
|
||||
if not price:
|
||||
break
|
||||
self.storage.add_link(str(file.absolute()), url, price)
|
||||
price = self.parse(url)
|
||||
self.send_offer_link(price['price'], nds=price['vat'],
|
||||
delivery_time=price['transport_delivery_date'],
|
||||
delivery_range=price['max_days'])
|
||||
|
||||
except NoSuchElementException:
|
||||
fp = self.download_documentation()
|
||||
# logger.info("Отсутствует кнопка скачивания документации, переходим к документации")
|
||||
|
||||
for file in fp:
|
||||
e_parser = ExcelParser(file)
|
||||
price = e_parser.calculate()
|
||||
if not price:
|
||||
break
|
||||
self.storage.add_link(str(file.absolute()), url, price)
|
||||
price = self.parse(url)
|
||||
self.send_offer_link(price['price'], nds=price['vat'],
|
||||
delivery_time=price['transport_delivery_date'],
|
||||
delivery_range=price['max_days'])
|
||||
|
||||
def download_documentation(self) -> list[pathlib.Path]:
|
||||
all_files_1 = set(pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2])
|
||||
try:
|
||||
all_files_1 = set(
|
||||
pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2])
|
||||
|
||||
time.sleep(5)
|
||||
documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation')
|
||||
|
@ -127,11 +146,57 @@ class Parser:
|
|||
self._driver.get(href)
|
||||
time.sleep(3)
|
||||
|
||||
all_files_2 = set(pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2])
|
||||
all_files_2 = set(
|
||||
pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2])
|
||||
|
||||
fp = all_files_2 - all_files_1
|
||||
logger.debug(fp)
|
||||
|
||||
return [file for file in fp]
|
||||
except Exception as exc:
|
||||
logger.info(f"Не удалось скачать документацию. Подробности: {type(exc)} {str(exc)}")
|
||||
raise KeyboardInterrupt()
|
||||
|
||||
def send_offer_link(self, price: int, nds: int, delivery_range: str, delivery_time: str):
|
||||
try:
|
||||
logger.info(
|
||||
f"Предварительные данные по заявке: Цена: {price}, НДС: {nds}%, Доставка: {delivery_range} дн., Подача машины {delivery_time}")
|
||||
offer_link = self.find_elem(By.ID, "send_offer_link")
|
||||
ActionChains(self._driver).scroll_to_element(offer_link).scroll_by_amount(0, 100).perform()
|
||||
offer_link.click()
|
||||
|
||||
price_id = self.find_elem(By.ID, 'price_id')
|
||||
price_id.send_keys(str(price))
|
||||
nds_elem = self.find_elem(By.CSS_SELECTOR, 'input[name="AUCTION_OFFER[price_vat]"]')
|
||||
|
||||
if nds == 0:
|
||||
self.find_elem(By.ID, 'price_lot_no_tax').click()
|
||||
else:
|
||||
nds_elem.send_keys(str(nds))
|
||||
|
||||
row_11 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_11 > td:nth-child(2) > textarea")
|
||||
row_12 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_12 > td:nth-child(2) > textarea")
|
||||
row_13 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_13 > td:nth-child(2) > textarea")
|
||||
row_15 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_15 > td:nth-child(2) > input[type=submit]")
|
||||
|
||||
row_11.send_keys(str(delivery_range) + " дней")
|
||||
row_12.send_keys(delivery_time)
|
||||
row_13.send_keys(
|
||||
"Дата подачи транспорта предварительная, согласовывается по звонку оператора ЖДЭ клиенту-отправителю")
|
||||
time.sleep(10)
|
||||
|
||||
ActionChains(self._driver).scroll_to_element(row_15).scroll_by_amount(0, 100).perform()
|
||||
row_15.click()
|
||||
time.sleep(10)
|
||||
|
||||
self.apply_offer()
|
||||
logger.success("Заявка успешно отправлена")
|
||||
except Exception as exc:
|
||||
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")
|
||||
|
||||
def apply_offer(self):
|
||||
btn = self.find_elem(By.CSS_SELECTOR, "form[name=AUCTION_OFFER] table.form-control_table button")
|
||||
btn.click()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -10,3 +10,4 @@ xls2xlsx
|
|||
pyexcel
|
||||
pyexcel-xls
|
||||
pyexcel-xlsx
|
||||
loguru
|
|
@ -42,6 +42,8 @@ lml==0.1.0
|
|||
# via
|
||||
# pyexcel
|
||||
# pyexcel-io
|
||||
loguru==0.7.2
|
||||
# via -r requirements.in
|
||||
numpy==1.26.3
|
||||
# via pandas
|
||||
openpyxl==3.1.2
|
||||
|
|
56
storage.py
56
storage.py
|
@ -18,19 +18,51 @@ class Storage:
|
|||
def create_tables(self):
|
||||
with self.get_cursor() as cur:
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ltl (
|
||||
CREATE TABLE IF NOT EXISTS ltl (
|
||||
id INTEGER PRIMARY KEY,
|
||||
doc_filepath TEXT,
|
||||
link TEXT,
|
||||
total_cost INTEGER
|
||||
);""")
|
||||
total_cost INTEGER,
|
||||
query TEXT,
|
||||
answer TEXT
|
||||
);
|
||||
|
||||
def add_link(self, doc_filepath: str, link: str, total_cost: int):
|
||||
with self.get_cursor() as cur:
|
||||
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost) VALUES (?, ?, ?)",
|
||||
(doc_filepath, link, total_cost))
|
||||
""")
|
||||
cur.execute("""
|
||||
create unique index if not exists fp_link_index on ltl(doc_filepath, link);
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
create trigger if not exists query_answer_trig
|
||||
after insert
|
||||
on ltl
|
||||
begin
|
||||
update ltl set total_cost = new.total_cost,
|
||||
query = new.query,
|
||||
answer = new.answer
|
||||
where link = new.link;
|
||||
end;
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
create table if not exists users (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tg_user_id TEXT
|
||||
);
|
||||
""")
|
||||
self.con.commit()
|
||||
|
||||
def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str):
|
||||
with self.get_cursor() as cur:
|
||||
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
|
||||
(doc_filepath, link, total_cost, query, answer))
|
||||
self.con.commit()
|
||||
|
||||
def get_links(self):
|
||||
with self.get_cursor() as cur:
|
||||
res = cur.execute("SELECT DISTINCT link FROM ltl")
|
||||
return [row[0] for row in res.fetchall()]
|
||||
|
||||
def is_link_exists(self, link: str) -> bool:
|
||||
with self.get_cursor() as cur:
|
||||
res = cur.execute("SELECT * FROM ltl WHERE link = ?", (link,))
|
||||
|
@ -40,3 +72,13 @@ class Storage:
|
|||
with self.get_cursor() as cur:
|
||||
res = cur.execute("SELECT * FROM ltl WHERE doc_filepath = ?", (doc_filepath,))
|
||||
return res.fetchone()
|
||||
|
||||
def get_users(self):
|
||||
with self.get_cursor() as cur:
|
||||
res = cur.execute("SELECT DISTINCT tg_user_id FROM users")
|
||||
return [row[0] for row in res.fetchall()]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
s = Storage()
|
||||
print(s.get_users())
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
import requests
|
||||
from loguru import logger
|
||||
from storage import Storage
|
||||
|
||||
s = Storage()
|
||||
|
||||
_TOKEN = '6767909836:AAFpsqtWeBNIBgSSi2_19rltEHOF0mrvTg0'
|
||||
_URL = 'https://api.telegram.org/bot'
|
||||
|
||||
|
||||
def _log(message):
|
||||
if message.record.get('level').name == "ERROR":
|
||||
icon = "❌ "
|
||||
elif message.record.get('level').name == "SUCCESS":
|
||||
icon = "✅ "
|
||||
else:
|
||||
icon = "ℹ️ "
|
||||
for chat_id in s.get_users():
|
||||
r = requests.post("{0}{1}/sendMessage".format(_URL, _TOKEN), {
|
||||
"chat_id": int(chat_id),
|
||||
"disable_notification": True,
|
||||
"text": icon + message
|
||||
})
|
||||
|
||||
if r.status_code >= 400:
|
||||
logger.error("Failed to send message: {0} {1}".format(r.status_code, r.text))
|
||||
|
||||
|
||||
def _filter_info_only(record):
|
||||
return record.get('level').name == "INFO" or record.get('level').name == "SUCCESS"
|
||||
|
||||
|
||||
logger.add(_log, format="{message}", filter=_filter_info_only)
|
||||
logger.add(_log, format=" {message}", filter=lambda record: record.get('level').name == "ERROR")
|
Loading…
Reference in New Issue