You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
report_app/app/tools/find_before_word.py

111 lines
3.7 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import datetime
import re
from docx import Document
import logging
from fastapi import HTTPException
from datetime import date, timedelta
logger = logging.getLogger(__name__)
def extract_overload_info_from_previous_day(
current_word=None, base_folder="temp_download_raw"
):
"""
根据传入的 Word 文档名称,查找前一天的 Word 文档并提取过载台数信息。
参数:
current_word (str): 当前 Word 文档名称,默认 None必须动态传入
base_folder (str): 存放 Word 文档的基础文件夹路径,默认为 'temp_download_raw'
返回:
str: 前一天的 Word 文档中的过载台数信息,如果没有找到则返回 None
"""
print(current_word)
print(type(current_word))
if not current_word:
logger.error("需要提供当前 Word 文档名称")
return None
try:
# 提取当前文档中的日期部分(假设文档名中日期格式为 YYYYMMDD
current_date_str = re.search(r"\d{8}", current_word).group()
current_date = date(
int(current_date_str[:4]),
int(current_date_str[4:6]),
int(current_date_str[6:8]),
)
except Exception as e:
logger.error(f"无法从当前文档名称中提取日期:{e}")
return None
try:
# 计算前一天的日期
previous_date = current_date - timedelta(days=1)
# 构造前一天的 Word 文档名称
previous_word_name = current_word.replace(
current_date_str, previous_date.strftime("%Y%m%d")
)
# 构造前一天的文件夹路径
previous_month_folder = previous_date.strftime("%Y%m")
previous_folder_path = os.path.join(base_folder, previous_month_folder)
# 检查前一天的文件夹是否存在
if not os.path.exists(previous_folder_path):
logger.error(f"前一天的文件夹不存在:{previous_folder_path}")
return None
# 查找前一天的 Word 文档
previous_word_path = None
for file_name in os.listdir(previous_folder_path):
if file_name == previous_word_name:
previous_word_path = os.path.join(previous_folder_path, file_name)
break
if not previous_word_path:
logger.error(f"前一天的 Word 文档不存在:{previous_word_name}")
return None
# 读取前一天的 Word 文档
doc = Document(previous_word_path)
# 提取过载台数信息
overload_info = None
for para in doc.paragraphs:
if re.search(r"过载\d+台", para.text):
overload_info = (
re.search(r"过载\d+台", para.text)
.group()
.replace("过载", "")
.replace("", "")
)
break
if overload_info:
logger.info(f"前一天的过载台数信息:{overload_info}")
return overload_info
else:
logger.error("未找到前一天的过载台数信息")
return None
except Exception as e:
logger.exception(f"提取前一天过载台数信息失败:{e}")
raise HTTPException(status_code=500, detail=f"提取前一天过载台数信息失败:{e}")
# # 示例用法
# year = "2025"
# month = "03"
# day = "23"
# current_word = f'南方电网公司停电抢修投诉服务舆情管控三工单联动监测日报{year}{month}{day}.docx'
# print(current_word)
# overload_info = extract_overload_info_from_previous_day(current_word)
# if overload_info:
# print(f"前一天的过载台数信息:{overload_info}")
# else:
# print("未找到前一天的过载台数信息")