Fix HTTP Request node to give priority to file extension of content-disposition (#12653)

pull/16110/head
Gen Sato 1 year ago committed by GitHub
parent 963b6f628a
commit 475b8d731e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -109,14 +109,12 @@ class Response:
3. MIME type analysis 3. MIME type analysis
""" """
content_type = self.content_type.split(";")[0].strip().lower() content_type = self.content_type.split(";")[0].strip().lower()
content_disposition = self.response.headers.get("content-disposition", "") parsed_content_disposition = self.parsed_content_disposition
# Check if it's explicitly marked as an attachment # Check if it's explicitly marked as an attachment
if content_disposition: if parsed_content_disposition:
msg = Message() disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None
msg["content-disposition"] = content_disposition filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise
disp_type = msg.get_content_disposition() # Returns 'attachment', 'inline', or None
filename = msg.get_filename() # Returns filename if present, None otherwise
if disp_type == "attachment" or filename is not None: if disp_type == "attachment" or filename is not None:
return True return True
@ -182,3 +180,12 @@ class Response:
return f"{(self.size / 1024):.2f} KB" return f"{(self.size / 1024):.2f} KB"
else: else:
return f"{(self.size / 1024 / 1024):.2f} MB" return f"{(self.size / 1024 / 1024):.2f} MB"
@property
def parsed_content_disposition(self) -> Optional[Message]:
content_disposition = self.headers.get("content-disposition", "")
if content_disposition:
msg = Message()
msg["content-disposition"] = content_disposition
return msg
return None

@ -169,32 +169,44 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
""" """
Extract files from response by checking both Content-Type header and URL Extract files from response by checking both Content-Type header and URL
""" """
files = [] files: list[File] = []
is_file = response.is_file is_file = response.is_file
content_type = response.content_type content_type = response.content_type
content = response.content content = response.content
parsed_content_disposition = response.parsed_content_disposition
if is_file: content_disposition_type = None
# Guess file extension from URL or Content-Type header
filename = url.split("?")[0].split("/")[-1] or "" if not is_file:
mime_type = content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream" return files
tool_file = ToolFileManager.create_file_by_raw( if parsed_content_disposition:
user_id=self.user_id, content_disposition_filename = parsed_content_disposition.get_filename()
tenant_id=self.tenant_id, if content_disposition_filename:
conversation_id=None, # If filename is available from content-disposition, use it to guess the content type
file_binary=content, content_disposition_type = mimetypes.guess_type(content_disposition_filename)[0]
mimetype=mime_type,
) # Guess file extension from URL or Content-Type header
filename = url.split("?")[0].split("/")[-1] or ""
mapping = { mime_type = (
"tool_file_id": tool_file.id, content_disposition_type or content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
"transfer_method": FileTransferMethod.TOOL_FILE.value, )
}
file = file_factory.build_from_mapping( tool_file = ToolFileManager.create_file_by_raw(
mapping=mapping, user_id=self.user_id,
tenant_id=self.tenant_id, tenant_id=self.tenant_id,
) conversation_id=None,
files.append(file) file_binary=content,
mimetype=mime_type,
)
mapping = {
"tool_file_id": tool_file.id,
"transfer_method": FileTransferMethod.TOOL_FILE.value,
}
file = file_factory.build_from_mapping(
mapping=mapping,
tenant_id=self.tenant_id,
)
files.append(file)
return files return files

Loading…
Cancel
Save