diff --git a/database.db b/database.db new file mode 100644 index 0000000..030f06f Binary files /dev/null and b/database.db differ diff --git a/excel_parser.py b/excel_parser.py index 593dcee..9d81ba1 100644 --- a/excel_parser.py +++ b/excel_parser.py @@ -1,13 +1,16 @@ +import json import pathlib -from sqlite3 import Row +import numpy as np +from functools import reduce import requests -from openpyxl import load_workbook, Workbook -from openpyxl.cell import Cell -from openpyxl.worksheet.worksheet import Worksheet +from openpyxl import load_workbook + from pathlib import Path import pandas as pd +from storage import Storage + API_CALC_URL = "https://api.jde.ru/vD/calculator/PriceAddress" TYPE = "1" PICKUP = "1" @@ -17,14 +20,12 @@ TOKEN = "67065749269910593" class ExcelParser: - def __init__(self, path: Path | str): - self._wb: Workbook | None = None - if isinstance(path, str): - path = Path(path) - - assert path.is_file() is True - self.convert_to_xlsx(path) - self._sheet: Worksheet = self._wb.active + def __init__(self, paths: list[pathlib.Path] | list[str], url: str): + self.url = url + self._paths = [pathlib.Path(p) for p in paths] + self.storage = Storage() + for path in self._paths: + assert path.is_file() is True, "Файл не найден" def convert_to_xlsx(self, path: pathlib.Path): import pyexcel as pe @@ -34,54 +35,94 @@ class ExcelParser: pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx'))) self._wb = load_workbook(str(path.with_suffix('.xlsx'))) - def clean_up_wb(self) -> pd.DataFrame: - triggered = False - for row in self._sheet.rows: - for cell in row: - cell: Cell - row: Row - if isinstance(cell.value, str) and cell.value.startswith('№ заявки'): - self._sheet.delete_rows(1, cell.row - 1) - triggered = True - break + @classmethod + def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame: + columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"] - if triggered: + df = pd.read_excel(path) + cond = reduce(lambda x, y: x | y, (df == val for val in columns)) + + for row in df[cond].iterrows(): + if row[1].any(): + df = df[row[0]:] break + df: pd.DataFrame = df.loc[:, df.iloc[0].dropna().index] - df = pd.DataFrame(self._sheet.values) - not_nullable_cols = df.iloc[0].dropna().index - df = df.iloc[:, not_nullable_cols] - df.columns = df.iloc[0, :] - df: pd.DataFrame = df.drop(0, axis=0) + df.columns = df.iloc[0] + df = df.drop(df.index[0], axis=0) - df.to_excel('./text.xlsx') + not_existed_columns = list(set(columns) - set(df.columns)) - return df[['Адрес отгрузки', 'Адрес склада', 'Масса', 'Объем']].to_dict(orient='records')[0] + if len(not_existed_columns) > 0: + raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}") + df = df.loc[:, columns] - def calculate(self) -> int | None: + df = df.drop([idx for idx, row in df.iterrows() if row.isna().all()], axis=0) + + isna_values_y = list(set(np.where(df.isna())[1])) + + if isna_values_y: + raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}") + + return df + + def clean_up_wb(self): + dfs: list[pd.DataFrame] = [] + for path in self._paths: + df = self._clean_up_wb(path) + if len(df) < 1: + raise ValueError("Требуется ручная обработка, не удалось считать одно из полей.") + dfs.append(df) + concat = pd.concat(dfs) + concat = concat.set_index(pd.RangeIndex(stop=len(concat))) + return pd.DataFrame({ + 'Адрес отгрузки': [concat['Адрес отгрузки'][0]], + 'Адрес разгрузки': [concat['Адрес разгрузки'][0]], + 'Дата загрузки': [concat['Дата загрузки'][0]], + 'Масса': [concat['Масса'].sum()], + 'Объем': [concat['Объем'].sum()]}) + + def calculate(self) -> dict | None: df = self.clean_up_wb() + query = { "type": TYPE, "token": TOKEN, "delivery": DELIVERY, "pickup": PICKUP, "user": USER, - "addr_from": df['Адрес отгрузки'], - "addr_to": df['Адрес склада'], - "weight": df['Масса'], - "volume": df['Объем'], + "addr_from": df['Адрес отгрузки'].iloc[0], + "addr_to": df['Адрес разгрузки'].iloc[0], + "weight": float(df['Масса'].iloc[0]) * 1000, + "volume": df['Объем'].iloc[0], + "pr_vat": "1" } - if query['volume'] is None: - query['volume'] = 0.01 - print(query) + data = requests.get(API_CALC_URL, params=query).json() - print(data) if data.get('price', None) is not None: - return int(data['price']) + self.add_link_to_database(query, answer=data) + return {"price": int(data['price']), + "vat": int(data['percent_vat']), + "max_days": int(data['maxdays']), + "transport_delivery_date": df["Дата загрузки"]} return None + def add_link_to_database(self, query: dict, answer: dict): + [self.storage.add_link( + str(file.absolute()), + self.url, + int(answer["price"]), + json.dumps(query), + json.dumps(answer)) for file in self._paths + ] + if __name__ == '__main__': - parser = ExcelParser('./downloads/1342AWP1A.xls') - print(parser.calculate()) + pass + # parser = ExcelParser('./downloads/130224-РС.xls') + # print(parser.calculate()) + # + # df1 = pd.read_excel('./downloads/Тендер 3574955 КГП165.1.xls') + # df2 = pd.read_excel('./downloads/Тендер 3574955 КГП165.2.xls') + # df3 = pd.read_excel('./downloads/Тендер 3574955 КГП165.xls') diff --git a/main.py b/main.py index e3830e4..317b468 100644 --- a/main.py +++ b/main.py @@ -14,6 +14,8 @@ from selenium.common.exceptions import NoSuchElementException, TimeoutException from excel_parser import ExcelParser from storage import Storage +from telegram_logs import logger + class Parser: keyword = "Велесстрой" @@ -24,7 +26,9 @@ class Parser: _service: webdriver.ChromeService def __init__(self): + logger.info("Бот запущен") prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())} + self._options.add_experimental_option("prefs", prefs) # self._options.add_argument("--disable-extensions") # self._options.add_argument("--disable-gpu") @@ -39,7 +43,9 @@ class Parser: return self def __exit__(self, exc_type, exc_val, exc_tb): + logger.error("Бот остановлен. Причина: " + str(exc_val)) print("Gracefully shutting down...") + self._driver.close() def find_elem(self, by: str, value: str): @@ -69,20 +75,35 @@ class Parser: time.sleep(5) password_control.send_keys(Keys.RETURN) + def check_ltl(self, text): + return len(re.findall(r"[Ll][Tt][Ll]", text)) != 0 + def search(self): time.sleep(5) self._driver.get(self.url + f'/?f_keyword={self.keyword}') time.sleep(10) table = self.find_elem(By.CSS_SELECTOR, ".search-results > tbody") links = table.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > a") - for link in links: - href = link.get_attribute("href") - description = link.find_element(By.CSS_SELECTOR, 'div').text - # Check LTL - if len(re.findall(r"[Ll][Tt][Ll]", description)) != 0: - self.accept_documentation(href) - break + links = [link.get_attribute("href") for link in links if + self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)] + links = [link for link in links if link not in self.storage.get_links()] + + for link in links: + logger.info("Обработка заявки: " + link) + try: + self.accept_documentation(link) + except Exception as exc: + logger.error("Не удалось обработать заявку. Подробности: " + str(exc)) + + def parse(self, url: str = None) -> dict: + fp = self.download_documentation() + e_parser = ExcelParser(fp, url) + price = e_parser.calculate() + if not price: + logger.error("Не удалось расcчитать цену, переходим далее") + + return price def accept_documentation(self, url: str): time.sleep(3) @@ -93,45 +114,89 @@ class Parser: download_documentation_button = self.find_elem(By.CSS_SELECTOR, '#auction_info_td > table > tbody > tr:nth-child(4) table input[type=submit]') time.sleep(5) - ActionChains(self._driver).scroll_to_element(download_documentation_button).scroll_by_amount(0, 100).perform() + ActionChains(self._driver).scroll_to_element(download_documentation_button).scroll_by_amount(0, + 100).perform() download_documentation_button.click() - fp = self.download_documentation() - for file in fp: - e_parser = ExcelParser(file) - price = e_parser.calculate() - if not price: - break - self.storage.add_link(str(file.absolute()), url, price) + price = self.parse(url) + self.send_offer_link(price['price'], nds=price['vat'], + delivery_time=price['transport_delivery_date'], + delivery_range=price['max_days']) except NoSuchElementException: - fp = self.download_documentation() + # logger.info("Отсутствует кнопка скачивания документации, переходим к документации") - for file in fp: - e_parser = ExcelParser(file) - price = e_parser.calculate() - if not price: - break - self.storage.add_link(str(file.absolute()), url, price) + price = self.parse(url) + self.send_offer_link(price['price'], nds=price['vat'], + delivery_time=price['transport_delivery_date'], + delivery_range=price['max_days']) def download_documentation(self) -> list[pathlib.Path]: - all_files_1 = set(pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2]) + try: + all_files_1 = set( + pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2]) - time.sleep(5) - documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation') - docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a') - for doc in docs: - href = doc.get_attribute('href') - if not href.endswith('.xlsx') and not href.endswith('.xls'): - continue - self._driver.get(href) - time.sleep(3) + time.sleep(5) + documentation_block = self.find_elem(By.CSS_SELECTOR, '#download_documentation') + docs = documentation_block.find_elements(By.CSS_SELECTOR, 'a') + for doc in docs: + href = doc.get_attribute('href') + if not href.endswith('.xlsx') and not href.endswith('.xls'): + continue + self._driver.get(href) + time.sleep(3) - all_files_2 = set(pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2]) + all_files_2 = set( + pathlib.Path('./downloads') / pathlib.Path(file) for tree in os.walk('./downloads') for file in tree[2]) - fp = all_files_2 - all_files_1 + fp = all_files_2 - all_files_1 + logger.debug(fp) - return [file for file in fp] + return [file for file in fp] + except Exception as exc: + logger.info(f"Не удалось скачать документацию. Подробности: {type(exc)} {str(exc)}") + raise KeyboardInterrupt() + + def send_offer_link(self, price: int, nds: int, delivery_range: str, delivery_time: str): + try: + logger.info( + f"Предварительные данные по заявке: Цена: {price}, НДС: {nds}%, Доставка: {delivery_range} дн., Подача машины {delivery_time}") + offer_link = self.find_elem(By.ID, "send_offer_link") + ActionChains(self._driver).scroll_to_element(offer_link).scroll_by_amount(0, 100).perform() + offer_link.click() + + price_id = self.find_elem(By.ID, 'price_id') + price_id.send_keys(str(price)) + nds_elem = self.find_elem(By.CSS_SELECTOR, 'input[name="AUCTION_OFFER[price_vat]"]') + + if nds == 0: + self.find_elem(By.ID, 'price_lot_no_tax').click() + else: + nds_elem.send_keys(str(nds)) + + row_11 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_11 > td:nth-child(2) > textarea") + row_12 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_12 > td:nth-child(2) > textarea") + row_13 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_13 > td:nth-child(2) > textarea") + row_15 = self.find_elem(By.CSS_SELECTOR, "tr#row_id_15 > td:nth-child(2) > input[type=submit]") + + row_11.send_keys(str(delivery_range) + " дней") + row_12.send_keys(delivery_time) + row_13.send_keys( + "Дата подачи транспорта предварительная, согласовывается по звонку оператора ЖДЭ клиенту-отправителю") + time.sleep(10) + + ActionChains(self._driver).scroll_to_element(row_15).scroll_by_amount(0, 100).perform() + row_15.click() + time.sleep(10) + + self.apply_offer() + logger.success("Заявка успешно отправлена") + except Exception as exc: + logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}") + + def apply_offer(self): + btn = self.find_elem(By.CSS_SELECTOR, "form[name=AUCTION_OFFER] table.form-control_table button") + btn.click() if __name__ == "__main__": diff --git a/requirements.in b/requirements.in index b2966c9..b478422 100644 --- a/requirements.in +++ b/requirements.in @@ -9,4 +9,5 @@ xlutils xls2xlsx pyexcel pyexcel-xls -pyexcel-xlsx \ No newline at end of file +pyexcel-xlsx +loguru \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b92c535..ca36794 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,6 +42,8 @@ lml==0.1.0 # via # pyexcel # pyexcel-io +loguru==0.7.2 + # via -r requirements.in numpy==1.26.3 # via pandas openpyxl==3.1.2 diff --git a/storage.py b/storage.py index 25e306c..6a7f13d 100644 --- a/storage.py +++ b/storage.py @@ -18,19 +18,51 @@ class Storage: def create_tables(self): with self.get_cursor() as cur: cur.execute(""" - CREATE TABLE IF NOT EXISTS ltl ( +CREATE TABLE IF NOT EXISTS ltl ( id INTEGER PRIMARY KEY, doc_filepath TEXT, link TEXT, - total_cost INTEGER - );""") + total_cost INTEGER, + query TEXT, + answer TEXT + ); - def add_link(self, doc_filepath: str, link: str, total_cost: int): - with self.get_cursor() as cur: - cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost) VALUES (?, ?, ?)", - (doc_filepath, link, total_cost)) +""") + cur.execute(""" + create unique index if not exists fp_link_index on ltl(doc_filepath, link); + """) + + cur.execute(""" + create trigger if not exists query_answer_trig + after insert + on ltl + begin + update ltl set total_cost = new.total_cost, + query = new.query, + answer = new.answer + where link = new.link; + end; + """) + + cur.execute(""" + create table if not exists users ( + id INTEGER PRIMARY KEY, + tg_user_id TEXT +); + """) self.con.commit() + def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str): + with self.get_cursor() as cur: + cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)", + (doc_filepath, link, total_cost, query, answer)) + self.con.commit() + + def get_links(self): + with self.get_cursor() as cur: + res = cur.execute("SELECT DISTINCT link FROM ltl") + return [row[0] for row in res.fetchall()] + def is_link_exists(self, link: str) -> bool: with self.get_cursor() as cur: res = cur.execute("SELECT * FROM ltl WHERE link = ?", (link,)) @@ -40,3 +72,13 @@ class Storage: with self.get_cursor() as cur: res = cur.execute("SELECT * FROM ltl WHERE doc_filepath = ?", (doc_filepath,)) return res.fetchone() + + def get_users(self): + with self.get_cursor() as cur: + res = cur.execute("SELECT DISTINCT tg_user_id FROM users") + return [row[0] for row in res.fetchall()] + + +if __name__ == "__main__": + s = Storage() + print(s.get_users()) diff --git a/telegram_logs.py b/telegram_logs.py new file mode 100644 index 0000000..379dce2 --- /dev/null +++ b/telegram_logs.py @@ -0,0 +1,34 @@ +import requests +from loguru import logger +from storage import Storage + +s = Storage() + +_TOKEN = '6767909836:AAFpsqtWeBNIBgSSi2_19rltEHOF0mrvTg0' +_URL = 'https://api.telegram.org/bot' + + +def _log(message): + if message.record.get('level').name == "ERROR": + icon = "❌ " + elif message.record.get('level').name == "SUCCESS": + icon = "✅ " + else: + icon = "ℹ️ " + for chat_id in s.get_users(): + r = requests.post("{0}{1}/sendMessage".format(_URL, _TOKEN), { + "chat_id": int(chat_id), + "disable_notification": True, + "text": icon + message + }) + + if r.status_code >= 400: + logger.error("Failed to send message: {0} {1}".format(r.status_code, r.text)) + + +def _filter_info_only(record): + return record.get('level').name == "INFO" or record.get('level').name == "SUCCESS" + + +logger.add(_log, format="{message}", filter=_filter_info_only) +logger.add(_log, format=" {message}", filter=lambda record: record.get('level').name == "ERROR")