You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{"log":"Error getting embedding: Error code: 429 - {'error': {'message': 'Rate limit reached for text-embedding-3-small in organization org-XXXXXXXXX on tokens per min (TPM): Limit 1000000, Used 1000000, Requested 603. Please try again in 36ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}\n","stream":"stdout","time":"2025-03-06T11:04:24.131619492Z"}
Implementation Details (optional)
async def get_embedding(text: str, max_retries: int = 5) -> List[float]:
"""Get embedding vector from OpenAI with rate limit handling.
Args:
text: The text to get embedding for
max_retries: Maximum number of retry attempts for rate-limited requests
Returns:
The embedding vector or zero vector on unrecoverable error
"""
retry_count = 0
base_delay = 0.1 # Start with 100ms delay
while retry_count <= max_retries:
try:
response = await openai_client.embeddings.create(
model=embedding_model,
input=text
)
return response.data[0].embedding
except Exception as e:
error_message = str(e)
# Check if this is a rate limit error
if "rate_limit_exceeded" in error_message and retry_count < max_retries:
retry_count += 1
# Try to extract wait time from error message
wait_time = None
if "Please try again in " in error_message:
try:
wait_text = error_message.split("Please try again in ")[1].split(".")[0]
if "ms" in wait_text:
wait_time = float(wait_text.replace("ms", "")) / 1000.0
elif "s" in wait_text:
wait_time = float(wait_text.replace("s", ""))
except:
pass
# If we couldn't extract wait time, use exponential backoff with jitter
if wait_time is None:
# Calculate delay with exponential backoff and some random jitter
wait_time = base_delay * (2 ** retry_count) * (0.5 + random.random())
print(f"Rate limit reached, waiting {wait_time:.2f}s before retry {retry_count}/{max_retries}")
await asyncio.sleep(wait_time)
else:
# Not a rate limit error or we've exhausted retries
print(f"Error getting embedding: {e}")
return [0] * 1536 # Return zero vector on error
# If we've exhausted all retries
print(f"Failed to get embedding after {max_retries} retries")
return [0] * 1536
The text was updated successfully, but these errors were encountered:
Describe the feature you'd like and why
Handle API rate limits from openAI
User Impact
Implementation Details (optional)
The text was updated successfully, but these errors were encountered: