import os import datetime import re from docx import Document import logging from fastapi import HTTPException from datetime import date, timedelta logger = logging.getLogger(__name__) def extract_overload_info_from_previous_day( current_word=None, base_folder="temp_download_raw" ): """ 根据传入的 Word 文档名称,查找前一天的 Word 文档并提取过载台数信息。 参数: current_word (str): 当前 Word 文档名称,默认 None,必须动态传入 base_folder (str): 存放 Word 文档的基础文件夹路径,默认为 'temp_download_raw' 返回: str: 前一天的 Word 文档中的过载台数信息,如果没有找到则返回 None """ print(current_word) print(type(current_word)) if not current_word: logger.error("需要提供当前 Word 文档名称") return None try: # 提取当前文档中的日期部分(假设文档名中日期格式为 YYYYMMDD) current_date_str = re.search(r"\d{8}", current_word).group() current_date = date( int(current_date_str[:4]), int(current_date_str[4:6]), int(current_date_str[6:8]), ) except Exception as e: logger.error(f"无法从当前文档名称中提取日期:{e}") return None try: # 计算前一天的日期 previous_date = current_date - timedelta(days=1) # 构造前一天的 Word 文档名称 previous_word_name = current_word.replace( current_date_str, previous_date.strftime("%Y%m%d") ) # 构造前一天的文件夹路径 previous_month_folder = previous_date.strftime("%Y%m") previous_folder_path = os.path.join(base_folder, previous_month_folder) # 检查前一天的文件夹是否存在 if not os.path.exists(previous_folder_path): logger.error(f"前一天的文件夹不存在:{previous_folder_path}") return None # 查找前一天的 Word 文档 previous_word_path = None for file_name in os.listdir(previous_folder_path): if file_name == previous_word_name: previous_word_path = os.path.join(previous_folder_path, file_name) break if not previous_word_path: logger.error(f"前一天的 Word 文档不存在:{previous_word_name}") return None # 读取前一天的 Word 文档 doc = Document(previous_word_path) # 提取过载台数信息 overload_info = None for para in doc.paragraphs: if re.search(r"过载(\d+)[条台]", para.text): overload_info = ( re.search(r"过载(\d+)[条台]", para.text) .group() .replace("过载", "") .replace("台", "") .replace("条", "") ) break if overload_info: logger.info(f"前一天的过载台数信息:{overload_info}") return overload_info else: logger.error("未找到前一天的过载台数信息") return None except Exception as e: logger.exception(f"提取前一天过载台数信息失败:{e}") raise HTTPException(status_code=500, detail=f"提取前一天过载台数信息失败:{e}") # # 示例用法 # year = "2025" # month = "03" # day = "23" # current_word = f'南方电网公司停电抢修投诉服务舆情管控三工单联动监测日报{year}{month}{day}.docx' # print(current_word) # overload_info = extract_overload_info_from_previous_day(current_word) # if overload_info: # print(f"前一天的过载台数信息:{overload_info}") # else: # print("未找到前一天的过载台数信息")