hotfix 1
parent
7beec4238b
commit
1741d3e1e0
|
@ -158,3 +158,9 @@ cython_debug/
|
||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
|
database.db
|
||||||
|
|
||||||
|
test*
|
||||||
|
probe*
|
||||||
|
*.xlsx
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from storage import Storage
|
from storage import Storage
|
||||||
|
@ -35,7 +35,6 @@ class ExcelParser:
|
||||||
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
|
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
|
||||||
self._wb = load_workbook(str(path.with_suffix('.xlsx')))
|
self._wb = load_workbook(str(path.with_suffix('.xlsx')))
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
|
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
|
||||||
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
|
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
|
||||||
|
|
||||||
|
@ -54,6 +53,7 @@ class ExcelParser:
|
||||||
not_existed_columns = list(set(columns) - set(df.columns))
|
not_existed_columns = list(set(columns) - set(df.columns))
|
||||||
|
|
||||||
if len(not_existed_columns) > 0:
|
if len(not_existed_columns) > 0:
|
||||||
|
self.add_link_to_database()
|
||||||
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
|
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
|
||||||
df = df.loc[:, columns]
|
df = df.loc[:, columns]
|
||||||
|
|
||||||
|
@ -62,7 +62,8 @@ class ExcelParser:
|
||||||
isna_values_y = list(set(np.where(df.isna())[1]))
|
isna_values_y = list(set(np.where(df.isna())[1]))
|
||||||
|
|
||||||
if isna_values_y:
|
if isna_values_y:
|
||||||
raise ValueError(f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
|
raise ValueError(
|
||||||
|
f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
@ -106,15 +107,27 @@ class ExcelParser:
|
||||||
"vat": int(data['percent_vat']),
|
"vat": int(data['percent_vat']),
|
||||||
"max_days": int(data['maxdays']),
|
"max_days": int(data['maxdays']),
|
||||||
"transport_delivery_date": df["Дата загрузки"]}
|
"transport_delivery_date": df["Дата загрузки"]}
|
||||||
|
|
||||||
|
self.add_link_to_database(query, answer=data)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def add_link_to_database(self, query: dict, answer: dict):
|
def add_link_to_database(self, query: dict | None = None, answer: dict | None = None):
|
||||||
|
idx = int(parse_qs(urlparse(self.url).query).get('id')[0])
|
||||||
|
if answer is None:
|
||||||
|
price = None
|
||||||
|
else:
|
||||||
|
price = answer.get('price', None)
|
||||||
|
answer = json.dumps(answer)
|
||||||
|
|
||||||
|
if query:
|
||||||
|
query = json.dumps(query)
|
||||||
|
|
||||||
[self.storage.add_link(
|
[self.storage.add_link(
|
||||||
|
idx,
|
||||||
str(file.absolute()),
|
str(file.absolute()),
|
||||||
self.url,
|
price,
|
||||||
int(answer["price"]),
|
query,
|
||||||
json.dumps(query),
|
answer) for file in self._paths
|
||||||
json.dumps(answer)) for file in self._paths
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
23
main.py
23
main.py
|
@ -3,6 +3,7 @@ import os
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from typing import Self
|
from typing import Self
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
from selenium.webdriver import Keys, ActionChains
|
from selenium.webdriver import Keys, ActionChains
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
@ -16,6 +17,11 @@ from storage import Storage
|
||||||
|
|
||||||
from telegram_logs import logger
|
from telegram_logs import logger
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
|
||||||
|
dotenv.load_dotenv('.env')
|
||||||
|
IS_PROD = os.environ.get('PROD_ENV') == '1'
|
||||||
|
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
keyword = "Велесстрой"
|
keyword = "Велесстрой"
|
||||||
|
@ -30,10 +36,11 @@ class Parser:
|
||||||
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
|
prefs = {"download.default_directory": str(pathlib.Path('./downloads').absolute())}
|
||||||
|
|
||||||
self._options.add_experimental_option("prefs", prefs)
|
self._options.add_experimental_option("prefs", prefs)
|
||||||
self._options.add_argument("--disable-extensions")
|
if IS_PROD:
|
||||||
self._options.add_argument("--disable-gpu")
|
self._options.add_argument("--disable-extensions")
|
||||||
self._options.add_argument("--headless=new")
|
self._options.add_argument("--disable-gpu")
|
||||||
self._options.add_argument("window-size=1920,1080")
|
self._options.add_argument("--headless=new")
|
||||||
|
self._options.add_argument("window-size=1920,1080")
|
||||||
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
|
self._service = webdriver.ChromeService(executable_path=ChromeDriverManager().install())
|
||||||
|
|
||||||
self.storage = Storage()
|
self.storage = Storage()
|
||||||
|
@ -88,7 +95,9 @@ class Parser:
|
||||||
|
|
||||||
links = [link.get_attribute("href") for link in links if
|
links = [link.get_attribute("href") for link in links if
|
||||||
self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
|
self.check_ltl(link.find_element(By.CSS_SELECTOR, 'div').text)]
|
||||||
links = [link for link in links if link not in self.storage.get_links()]
|
links = [link for link in links if
|
||||||
|
f"https://www.b2b-center.ru/market/view.html?id=" + parse_qs(urlparse(link).query).get('id')[
|
||||||
|
0] not in self.storage.get_links()]
|
||||||
|
|
||||||
for link in links:
|
for link in links:
|
||||||
logger.info("Обработка заявки: " + link)
|
logger.info("Обработка заявки: " + link)
|
||||||
|
@ -190,8 +199,10 @@ class Parser:
|
||||||
row_15.click()
|
row_15.click()
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
self.apply_offer()
|
if IS_PROD:
|
||||||
|
self.apply_offer()
|
||||||
logger.success("Заявка успешно отправлена")
|
logger.success("Заявка успешно отправлена")
|
||||||
|
time.sleep(10)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")
|
logger.error(f"Не удалось отправить заявку. Ошибка: {type(exc)} : {str(exc)}")
|
||||||
|
|
||||||
|
|
|
@ -11,3 +11,4 @@ pyexcel
|
||||||
pyexcel-xls
|
pyexcel-xls
|
||||||
pyexcel-xlsx
|
pyexcel-xlsx
|
||||||
loguru
|
loguru
|
||||||
|
pydotenv
|
|
@ -59,6 +59,8 @@ pandas==2.2.0
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
pillow==10.2.0
|
pillow==10.2.0
|
||||||
# via xls2xlsx
|
# via xls2xlsx
|
||||||
|
pydotenv==0.0.7
|
||||||
|
# via -r requirements.in
|
||||||
pyexcel==0.7.0
|
pyexcel==0.7.0
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
pyexcel-io==0.6.6
|
pyexcel-io==0.6.6
|
||||||
|
|
25
storage.py
25
storage.py
|
@ -19,17 +19,23 @@ class Storage:
|
||||||
with self.get_cursor() as cur:
|
with self.get_cursor() as cur:
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS ltl (
|
CREATE TABLE IF NOT EXISTS ltl (
|
||||||
id INTEGER PRIMARY KEY,
|
id INTEGER,
|
||||||
doc_filepath TEXT,
|
doc_filepath TEXT,
|
||||||
link TEXT,
|
link TEXT,
|
||||||
total_cost INTEGER,
|
total_cost INTEGER,
|
||||||
query TEXT,
|
query TEXT,
|
||||||
answer TEXT
|
answer TEXT,
|
||||||
|
PRIMARY KEY (id, doc_filepath)
|
||||||
);
|
);
|
||||||
|
|
||||||
""")
|
""")
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
create unique index if not exists fp_link_index on ltl(doc_filepath, link);
|
create trigger if not exists add_link_trig
|
||||||
|
after insert
|
||||||
|
on ltl
|
||||||
|
begin
|
||||||
|
update ltl set link = 'https://www.b2b-center.ru/market/view.html?id=' || new.id where id=new.id;
|
||||||
|
end;
|
||||||
""")
|
""")
|
||||||
|
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
|
@ -43,19 +49,12 @@ CREATE TABLE IF NOT EXISTS ltl (
|
||||||
where link = new.link;
|
where link = new.link;
|
||||||
end;
|
end;
|
||||||
""")
|
""")
|
||||||
|
|
||||||
cur.execute("""
|
|
||||||
create table if not exists users (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
tg_user_id TEXT
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
self.con.commit()
|
self.con.commit()
|
||||||
|
|
||||||
def add_link(self, doc_filepath: str, link: str, total_cost: int, query: str, answer: str):
|
def add_link(self, id: int, doc_filepath: str, total_cost: int | None, query: str | None, answer: str | None):
|
||||||
with self.get_cursor() as cur:
|
with self.get_cursor() as cur:
|
||||||
cur.execute("INSERT INTO ltl (doc_filepath, link, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
|
cur.execute("INSERT INTO ltl (id, doc_filepath, total_cost, query, answer) VALUES (?, ?, ?, ?, ?)",
|
||||||
(doc_filepath, link, total_cost, query, answer))
|
(id, doc_filepath, total_cost, query, answer))
|
||||||
self.con.commit()
|
self.con.commit()
|
||||||
|
|
||||||
def get_links(self):
|
def get_links(self):
|
||||||
|
|
Loading…
Reference in New Issue