b2bcenter-parser/excel_parser.py

142 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json
import pathlib
from urllib.parse import urlparse, parse_qs
import numpy as np
from functools import reduce
import requests
from openpyxl import load_workbook
import pandas as pd
from storage import Storage
API_CALC_URL = "https://api.jde.ru/vD/calculator/PriceAddress"
TYPE = "1"
PICKUP = "1"
DELIVERY = "1"
USER = "2252130929823409"
TOKEN = "67065749269910593"
class ExcelParser:
def __init__(self, paths: list[pathlib.Path] | list[str], url: str):
self.url = url
self._paths = [pathlib.Path(p) for p in paths]
self.storage = Storage()
for path in self._paths:
assert path.is_file() is True, "Файл не найден"
def convert_to_xlsx(self, path: pathlib.Path):
import pyexcel as pe
if path.suffix == '.xlsx':
self._wb = load_workbook(str(path))
return
pe.save_book_as(file_name=str(path), dest_file_name=str(path.with_suffix('.xlsx')))
self._wb = load_workbook(str(path.with_suffix('.xlsx')))
def _clean_up_wb(self, path: pathlib.Path) -> pd.DataFrame:
columns = ["Адрес отгрузки", "Адрес разгрузки", "Масса", "Объем", "Дата загрузки"]
df = pd.read_excel(path)
cond = reduce(lambda x, y: x | y, (df == val for val in columns))
for row in df[cond].iterrows():
if row[1].any():
df = df[row[0]:]
break
df: pd.DataFrame = df.loc[:, df.iloc[0].dropna().index]
df.columns = df.iloc[0]
df = df.drop(df.index[0], axis=0)
not_existed_columns = list(set(columns) - set(df.columns))
if len(not_existed_columns) > 0:
self.add_link_to_database()
raise KeyError(f"Не удалось обработать заявку по причине отсутствия полей в шаблоне: {not_existed_columns}")
df = df.loc[:, columns]
df = df.drop([idx for idx, row in df.iterrows() if row.isna().all()], axis=0)
isna_values_y = list(set(np.where(df.isna())[1]))
if isna_values_y:
raise ValueError(
f"Не удалось обработать заявку по причине отсутствия значений в полях: {list(df.columns[isna_values_y])}")
return df
def clean_up_wb(self):
dfs: list[pd.DataFrame] = []
for path in self._paths:
df = self._clean_up_wb(path)
if len(df) < 1:
raise ValueError("Требуется ручная обработка, не удалось считать одно из полей.")
dfs.append(df)
concat = pd.concat(dfs)
concat = concat.set_index(pd.RangeIndex(stop=len(concat)))
return pd.DataFrame({
'Адрес отгрузки': [concat['Адрес отгрузки'][0]],
'Адрес разгрузки': [concat['Адрес разгрузки'][0]],
'Дата загрузки': [concat['Дата загрузки'][0]],
'Масса': [concat['Масса'].sum()],
'Объем': [concat['Объем'].sum()]})
def calculate(self) -> dict | None:
df = self.clean_up_wb()
query = {
"type": TYPE,
"token": TOKEN,
"delivery": DELIVERY,
"pickup": PICKUP,
"user": USER,
"addr_from": df['Адрес отгрузки'].iloc[0],
"addr_to": df['Адрес разгрузки'].iloc[0],
"weight": float(df['Масса'].iloc[0]) * 1000,
"volume": df['Объем'].iloc[0],
"pr_vat": "1"
}
data = requests.get(API_CALC_URL, params=query).json()
if data.get('price', None) is not None:
self.add_link_to_database(query, answer=data)
return {"price": int(data['price']),
"vat": int(data['percent_vat']),
"max_days": int(data['maxdays']),
"transport_delivery_date": df["Дата загрузки"]}
self.add_link_to_database(query, answer=data)
return None
def add_link_to_database(self, query: dict | None = None, answer: dict | None = None):
idx = int(parse_qs(urlparse(self.url).query).get('id')[0])
if answer is None:
price = None
else:
price = answer.get('price', None)
answer = json.dumps(answer)
if query:
query = json.dumps(query)
[self.storage.add_link(
idx,
str(file.absolute()),
price,
query,
answer) for file in self._paths
]
if __name__ == '__main__':
# pass
parser = ExcelParser('./downloads/1297-РС.xls')
print(parser.calculate())
df1 = pd.read_excel('./downloads/Тендер 3574955 КГП165.1.xls')
df2 = pd.read_excel('./downloads/Тендер 3574955 КГП165.2.xls')
df3 = pd.read_excel('./downloads/Тендер 3574955 КГП165.xls')