|
|
import os
|
|
|
import datetime
|
|
|
import re
|
|
|
from docx import Document
|
|
|
import logging
|
|
|
from fastapi import HTTPException
|
|
|
from datetime import date, timedelta
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
def extract_overload_info_from_previous_day(
|
|
|
current_word=None, base_folder="temp_download_raw"
|
|
|
):
|
|
|
"""
|
|
|
根据传入的 Word 文档名称,查找前一天的 Word 文档并提取过载台数信息。
|
|
|
|
|
|
参数:
|
|
|
current_word (str): 当前 Word 文档名称,默认 None,必须动态传入
|
|
|
base_folder (str): 存放 Word 文档的基础文件夹路径,默认为 'temp_download_raw'
|
|
|
|
|
|
返回:
|
|
|
str: 前一天的 Word 文档中的过载台数信息,如果没有找到则返回 None
|
|
|
"""
|
|
|
print(current_word)
|
|
|
print(type(current_word))
|
|
|
if not current_word:
|
|
|
logger.error("需要提供当前 Word 文档名称")
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
# 提取当前文档中的日期部分(假设文档名中日期格式为 YYYYMMDD)
|
|
|
current_date_str = re.search(r"\d{8}", current_word).group()
|
|
|
current_date = date(
|
|
|
int(current_date_str[:4]),
|
|
|
int(current_date_str[4:6]),
|
|
|
int(current_date_str[6:8]),
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(f"无法从当前文档名称中提取日期:{e}")
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
# 计算前一天的日期
|
|
|
previous_date = current_date - timedelta(days=1)
|
|
|
|
|
|
# 构造前一天的 Word 文档名称
|
|
|
previous_word_name = current_word.replace(
|
|
|
current_date_str, previous_date.strftime("%Y%m%d")
|
|
|
)
|
|
|
|
|
|
# 构造前一天的文件夹路径
|
|
|
previous_month_folder = previous_date.strftime("%Y%m")
|
|
|
previous_folder_path = os.path.join(base_folder, previous_month_folder)
|
|
|
|
|
|
# 检查前一天的文件夹是否存在
|
|
|
if not os.path.exists(previous_folder_path):
|
|
|
logger.error(f"前一天的文件夹不存在:{previous_folder_path}")
|
|
|
return None
|
|
|
|
|
|
# 查找前一天的 Word 文档
|
|
|
previous_word_path = None
|
|
|
for file_name in os.listdir(previous_folder_path):
|
|
|
if file_name == previous_word_name:
|
|
|
previous_word_path = os.path.join(previous_folder_path, file_name)
|
|
|
break
|
|
|
|
|
|
if not previous_word_path:
|
|
|
logger.error(f"前一天的 Word 文档不存在:{previous_word_name}")
|
|
|
return None
|
|
|
|
|
|
# 读取前一天的 Word 文档
|
|
|
doc = Document(previous_word_path)
|
|
|
|
|
|
# 提取过载台数信息
|
|
|
overload_info = None
|
|
|
for para in doc.paragraphs:
|
|
|
if re.search(r"过载\d+台", para.text):
|
|
|
overload_info = (
|
|
|
re.search(r"过载\d+台", para.text)
|
|
|
.group()
|
|
|
.replace("过载", "")
|
|
|
.replace("台", "")
|
|
|
)
|
|
|
break
|
|
|
|
|
|
if overload_info:
|
|
|
logger.info(f"前一天的过载台数信息:{overload_info}")
|
|
|
return overload_info
|
|
|
else:
|
|
|
logger.error("未找到前一天的过载台数信息")
|
|
|
return None
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.exception(f"提取前一天过载台数信息失败:{e}")
|
|
|
raise HTTPException(status_code=500, detail=f"提取前一天过载台数信息失败:{e}")
|
|
|
|
|
|
|
|
|
# # 示例用法
|
|
|
# year = "2025"
|
|
|
# month = "03"
|
|
|
# day = "23"
|
|
|
# current_word = f'南方电网公司停电抢修投诉服务舆情管控三工单联动监测日报{year}{month}{day}.docx'
|
|
|
# print(current_word)
|
|
|
# overload_info = extract_overload_info_from_previous_day(current_word)
|
|
|
|
|
|
# if overload_info:
|
|
|
# print(f"前一天的过载台数信息:{overload_info}")
|
|
|
# else:
|
|
|
# print("未找到前一天的过载台数信息")
|