BrainUs LogoBrainUs AI
Error Reference

Rate Limit Errors

Handling rate limit exceeded and quota errors

Complete guide to handling rate limit and quota errors.

Error: Rate Limit Exceeded

HTTP Status: 429 Too Many Requests

{
  "error": {
    "code": "rate_limit_exceeded",
    "message": "Rate limit exceeded",
    "retry_after": 30,
    "limit": {
      "type": "per_minute",
      "limit": 60,
      "remaining": 0,
      "reset": 1640995260
    }
  }
}

Understanding the Response

FieldDescription
retry_afterSeconds to wait before retrying
typeWhich limit was hit (per_minute, etc.)
limitMaximum requests allowed
remainingRequests remaining (0 when exceeded)
resetUnix timestamp when limit resets

Handling Rate Limits

Basic Retry

from brainus_ai import BrainusAI, RateLimitError
import asyncio
import os

# client = BrainusAI(api_key=os.getenv("BRAINUS_API_KEY"))

async def query_with_retry(query: str):
    # async with BrainusAI(api_key=os.getenv("BRAINUS_API_KEY")) as client:
    try:
        return await client.query(query=query, store_id="default")
    except RateLimitError as e:
        print(f"Rate limited. Waiting {e.retry_after}s...")
        await asyncio.sleep(e.retry_after)
        return await client.query(query=query, store_id="default")

# asyncio.run(query_with_retry("What is photosynthesis?"))
import { BrainusAI } from '@brainus/ai';

// const client = new BrainusAI({ apiKey: process.env.BRAINUS_API_KEY });

async function queryWithRetry(query) {
  try {
    return await client.query({ query, storeId: 'default' });
  } catch (error) {
     // Assuming RateLimitError is checkable or exposed
     if (error.code === 'rate_limit_exceeded') {
      const retryAfter = error.retryAfter || 1;
      console.log(`Rate limited. Waiting ${retryAfter}s...`);
      await new Promise(resolve => setTimeout(resolve, retryAfter * 1000));
      return await client.query({ query, storeId: 'default' });
    }
    throw error;
  }
}

// const result = await queryWithRetry('What is photosynthesis?');

Exponential Backoff

from brainus_ai import BrainusAI, RateLimitError
import asyncio
import os

async def query_with_backoff(query: str, max_retries: int = 5):
    async with BrainusAI(api_key=os.getenv("BRAINUS_API_KEY")) as client:
        for attempt in range(max_retries):
            try:
                return await client.query(query=query, store_id="default")
            except RateLimitError as e:
                if attempt == max_retries - 1:
                    raise

                # Use retry_after if provided, else exponential backoff
                wait_time = e.retry_after or (2 ** attempt)
                print(f"Attempt {attempt + 1}/{max_retries}: Waiting {wait_time}s...")
                await asyncio.sleep(wait_time)

    return None

# result = asyncio.run(query_with_backoff("What is photosynthesis?"))

Checking Rate Limit Status

Before Making Requests

# Check current status
# response = await client.query(query="test", store_id="default")

# Get rate limit info from headers
# Check updated SDK documentation for metadata access
# remaining = response.metadata.rate_limit_remaining
# limit = response.metadata.rate_limit_limit
# reset_timestamp = response.metadata.rate_limit_reset

# Calculate when to make next request
from datetime import datetime
# reset_time = datetime.fromtimestamp(reset_timestamp)
# print(f"Rate limit: {remaining}/{limit}")
# print(f"Resets at: {reset_time}")

# Proactive waiting
# if remaining < 5:
#     print("Low on rate limit, waiting...")
#     import asyncio
#     await asyncio.sleep(60)

Using Usage API

# Get detailed usage stats
# usage = await client.get_usage()

# print(f"Per-minute remaining: {usage.rate_limits.per_minute.remaining}")
# print(f"Daily remaining: {usage.rate_limits.per_day.remaining}")
# print(f"Monthly quota: {usage.quota.percentage_used}%")

Common Rate Limit Scenarios

1. Burst Traffic

Problem: Too many requests at once

# Bad: Sends all at once
# for query in queries:
#     result = await client.query(query=query, store_id="default")

# Good: Rate limiting
# for query in queries:
#     result = await client.query(query=query, store_id="default")
#     await asyncio.sleep(1)  # 1 second between requests

2. Multiple Processes

Problem: Multiple servers/processes share same key

Solution: Use separate API keys

# Process 1
# client1 = BrainusAI(api_key="key_for_process_1")

# Process 2
# client2 = BrainusAI(api_key="key_for_process_2")

# Each gets its own 60/min limit

3. Batch Processing

Problem: Processing large datasets

import asyncio
from brainus_ai import BrainusAI, RateLimitError
import os

# client = BrainusAI(api_key=os.getenv("BRAINUS_API_KEY"))

async def process_batch(client, queries: list, batch_size: int = 50):
    results = []

    for i in range(0, len(queries), batch_size):
        batch = queries[i:i + batch_size]

        for query in batch:
            try:
                result = await client.query(query=query, store_id="default")
                results.append(result)
            except RateLimitError as e:
                print(f"Rate limited in batch {i//batch_size + 1}")
                await asyncio.sleep(e.retry_after)
                # Retry
                result = await client.query(query=query, store_id="default")
                results.append(result)

        # Wait between batches
        if i + batch_size < len(queries):
            await asyncio.sleep(60)  # 1 minute

    return results

Monthly Quota Exceeded

{
  "error": {
    "code": "quota_exceeded",
    "message": "Monthly quota exceeded",
    "quota": {
      "used": 10000,
      "limit": 10000,
      "resets_at": "2024-02-01T00:00:00Z"
    }
  }
}

Solutions

  1. Wait for reset
from datetime import datetime

# if error.code == "quota_exceeded":
#     reset_date = datetime.fromisoformat(error.quota.resets_at)
#     print(f"Quota resets on: {reset_date}")
  1. Upgrade your plan

Visit Pricing to upgrade

  1. Optimize usage

See Rate Limit Optimization

Prevention Strategies

1. Implement Caching

# Use async caching or manual cache for async calls
# See Best Practices in other guides

2. Monitor Usage

async def monitored_query(client, query: str):
    result = await client.query(query=query, store_id="default")

    # Check updated SDK for access patterns
    # remaining = result.metadata.rate_limit_remaining
    # limit = result.metadata.rate_limit_limit

    # usage_percent = ((limit - remaining) / limit) * 100

    # if usage_percent > 80:
    #     print(f"⚠️ {usage_percent:.0f}% of rate limit used!")
    #     # Alert your team

    return result

3. Use Queue System

import asyncio

async def worker(queue, client):
    while True:
        query = await queue.get()
        # process query...
        queue.task_done()

Rate Limits by Plan

PlanPer MinutePer DayPer Month
Free10300300
Starter202,0002,000
Pro6010,00010,000
Enterprise20050,00050,000

Upgrade to Pro for 6x higher rate limits: View Plans

Debug Checklist

When hitting rate limits:

  • Check which limit was exceeded (per-minute, daily, monthly)
  • Verify retry_after value and wait accordingly
  • Review request patterns for bursts
  • Check if multiple processes share same key
  • Implement caching for duplicate queries
  • Consider upgrading plan
  • Add exponential backoff
  • Monitor usage proactively

Next Steps

On this page