From 47c9cdad6f700fad67f2347ee852ba111809581c Mon Sep 17 00:00:00 2001 From: Amir Mohsen Date: Mon, 19 May 2025 21:16:41 +0200 Subject: [PATCH] feat(watercrawl custom exception): add custom exceptions and error handling - Added WaterCrawlError as base exception class - Implemented WaterCrawlBadRequestError for 400-level errors - Added WaterCrawlPermissionError for 403 Forbidden errors - Added WaterCrawlAuthenticationError for 401 Unauthorized errors - Updated process_response to raise appropriate exceptions - Included detailed error messages and response handling --- api/core/rag/extractor/watercrawl/client.py | 16 ++++++++++ .../rag/extractor/watercrawl/exceptions.py | 32 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 api/core/rag/extractor/watercrawl/exceptions.py diff --git a/api/core/rag/extractor/watercrawl/client.py b/api/core/rag/extractor/watercrawl/client.py index 6eaede7dbc..e29878dbc3 100644 --- a/api/core/rag/extractor/watercrawl/client.py +++ b/api/core/rag/extractor/watercrawl/client.py @@ -6,6 +6,12 @@ from urllib.parse import urljoin import requests from requests import Response +from core.rag.extractor.watercrawl.exceptions import ( + WaterCrawlAuthenticationError, + WaterCrawlBadRequestError, + WaterCrawlPermissionError, +) + class BaseAPIClient: def __init__(self, api_key, base_url): @@ -53,6 +59,16 @@ class WaterCrawlAPIClient(BaseAPIClient): yield data def process_response(self, response: Response) -> dict | bytes | list | None | Generator: + + if response.status_code == 401: + raise WaterCrawlAuthenticationError(response) + + if response.status_code == 403: + raise WaterCrawlPermissionError(response) + + if 400 <= response.status_code < 500: + raise WaterCrawlBadRequestError(response) + response.raise_for_status() if response.status_code == 204: return None diff --git a/api/core/rag/extractor/watercrawl/exceptions.py b/api/core/rag/extractor/watercrawl/exceptions.py new file mode 100644 index 0000000000..3cb36f3857 --- /dev/null +++ b/api/core/rag/extractor/watercrawl/exceptions.py @@ -0,0 +1,32 @@ +import json + + +class WaterCrawlError(Exception): + pass + + +class WaterCrawlBadRequestError(WaterCrawlError): + def __init__(self, response): + self.status_code = response.status_code + self.response = response + data = response.json() + self.message = data.get("message", "Unknown error occurred") + self.errors = data.get("errors", {}) + super().__init__(self.message) + + @property + def flat_errors(self): + return json.dumps(self.errors) + + def __str__(self): + return f"WaterCrawlBadRequestError: {self.message} \n {self.flat_errors}" + + +class WaterCrawlPermissionError(WaterCrawlBadRequestError): + def __str__(self): + return f"You are exceeding your WaterCrawl API limits. {self.message}" + +class WaterCrawlAuthenticationError(WaterCrawlBadRequestError): + + def __str__(self): + return "WaterCrawl API key is invalid or expired. Please check your API key and try again."