diff --git a/.gitignore b/.gitignore index b0b6f3a..e3c6518 100644 --- a/.gitignore +++ b/.gitignore @@ -157,4 +157,10 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ \ No newline at end of file +.idea/ + +database.db + +test* +probe* +*.xlsx diff --git a/excel_parser.py b/excel_parser.py index 9d81ba1..54d4e59 100644 --- a/excel_parser.py +++ b/excel_parser.py @@ -1,12 +1,12 @@ import json import pathlib +from urllib.parse import urlparse, parse_qs import numpy as np from functools import reduce import requests from openpyxl import load_workbook -from pathlib import Path import pandas as pd from storage import Storage @@ -35,7 +35,6 @@ class ExcelParser: pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx'))) self._wb = load_workbook(str(path.with_suffix('.xlsx'))) - @classmethod def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame: columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"] @@ -54,6 +53,7 @@ class ExcelParser: not_existed_columns = list(set(columns) - set(df.columns)) if len(not_existed_columns) > 0: + self.add_link_to_database() raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}") df = df.loc[:, columns] @@ -62,7 +62,8 @@ class ExcelParser: isna_values_y = list(set(np.where(df.isna())[1])) if isna_values_y: - raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}") + raise ValueError( + f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}") return df @@ -106,15 +107,27 @@ class ExcelParser: "vat": int(data['percent_vat']), "max_days": int(data['maxdays']), "transport_delivery_date": df["Дата загрузки"]} + + self.add_link_to_database(query, answer=data) return None - def add_link_to_database(self, query: dict, answer: dict): + def add_link_to_database(self, query: dict | None = None, answer: dict | None = None): + idx = int(parse_qs(urlparse(self.url).query).get('id')[0]) + if answer is None: + price = None + else: + price = answer.get('price', None) + answer = json.dumps(answer) + + if query: + query = json.dumps(query) + [self.storage.add_link( + idx, str(file.absolute()), - self.url, - int(answer["price"]), - json.dumps(query), - json.dumps(answer)) for file in self._paths + price, + query, + answer) for file in self._paths ] diff --git a/main.py b/main.py index 98e99ca..ccff220 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ import os import time import re from typing import Self +from urllib.parse import urlparse, parse_qs from selenium.webdriver import Keys, ActionChains from selenium.webdriver.common.by import By @@ -16,6 +17,11 @@ from storage import Storage from telegram_logs import logger +import dotenv + +dotenv.load_dotenv('.env') +IS_PROD = os.environ.get('PROD_ENV') == '1' + class Parser: keyword = "Велесстрой" @@ -30,10 +36,11 @@ class Parser: prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())} self._options.add_experimental_option("prefs", prefs) - self._options.add_argument("--disable-extensions") - self._options.add_argument("--disable-gpu") - self._options.add_argument("--headless=new") - self._options.add_argument("window-size=1920,1080") + if IS_PROD: + self._options.add_argument("--disable-extensions") + self._options.add_argument("--disable-gpu") + self._options.add_argument("--headless=new") + self._options.add_argument("window-size=1920,1080") self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install()) self.storage = Storage() @@ -88,7 +95,9 @@ class Parser: links = [link.get_attribute("href") for link in links if self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)] - links = [link for link in links if link not in self.storage.get_links()] + links = [link for link in links if + f"https://www.b2b-center.ru/market/view.html?id=" + parse_qs(urlparse(link).query).get('id')[ + 0] not in self.storage.get_links()] for link in links: logger.info("Обработка заявки: " + link) @@ -190,8 +199,10 @@ class Parser: row_15.click() time.sleep(10) - self.apply_offer() + if IS_PROD: + self.apply_offer() logger.success("Заявка успешно отправлена") + time.sleep(10) except Exception as exc: logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}") diff --git a/requirements.in b/requirements.in index b478422..4f03502 100644 --- a/requirements.in +++ b/requirements.in @@ -10,4 +10,5 @@ xls2xlsx pyexcel pyexcel-xls pyexcel-xlsx -loguru \ No newline at end of file +loguru +pydotenv \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ca36794..95dfbef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -59,6 +59,8 @@ pandas==2.2.0 # via -r requirements.in pillow==10.2.0 # via xls2xlsx +pydotenv==0.0.7 + # via -r requirements.in pyexcel==0.7.0 # via -r requirements.in pyexcel-io==0.6.6 diff --git a/storage.py b/storage.py index 6a7f13d..83f8f0d 100644 --- a/storage.py +++ b/storage.py @@ -19,17 +19,23 @@ class Storage: with self.get_cursor() as cur: cur.execute(""" CREATE TABLE IF NOT EXISTS ltl ( - id INTEGER PRIMARY KEY, + id INTEGER, doc_filepath TEXT, link TEXT, total_cost INTEGER, query TEXT, - answer TEXT + answer TEXT, + PRIMARY KEY (id, doc_filepath) ); """) cur.execute(""" - create unique index if not exists fp_link_index on ltl(doc_filepath, link); + create trigger if not exists add_link_trig + after insert + on ltl + begin + update ltl set link = 'https://www.b2b-center.ru/market/view.html?id=' || new.id where id=new.id; + end; """) cur.execute(""" @@ -43,19 +49,12 @@ CREATE TABLE IF NOT EXISTS ltl ( where link = new.link; end; """) - - cur.execute(""" - create table if not exists users ( - id INTEGER PRIMARY KEY, - tg_user_id TEXT -); - """) self.con.commit() - def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str): + def add_link(self, id: int, doc_filepath: str, total_cost: int | None, query: str | None, answer: str | None): with self.get_cursor() as cur: - cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)", - (doc_filepath, link, total_cost, query, answer)) + cur.execute("INSERT INTO ltl (id, doc_filepath, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)", + (id, doc_filepath, total_cost, query, answer)) self.con.commit() def get_links(self):