API Rate Limiting Implementation

A Simple Analogy

Rate limiting is like a bouncer at a club. It controls how many people (requests) enter per time period to maintain capacity.

Why Rate Limiting?

Abuse prevention: Stop malicious requests
Resource protection: Prevent overload
Fair usage: Ensure access for all users
Cost control: Limit API costs
SLA compliance: Maintain service quality

Token Bucket Algorithm

public class TokenBucket
{
    private readonly int _capacity;
    private readonly double _refillRate;
    private double _tokens;
    private DateTime _lastRefillTime;
    private readonly object _lock = new();
    
    public TokenBucket(int capacity, double refillRate)
    {
        _capacity = capacity;
        _refillRate = refillRate;
        _tokens = capacity;
        _lastRefillTime = DateTime.UtcNow;
    }
    
    public bool TryConsumeToken(int tokensRequired = 1)
    {
        lock (_lock)
        {
            Refill();
            
            if (_tokens >= tokensRequired)
            {
                _tokens -= tokensRequired;
                return true;
            }
            
            return false;
        }
    }
    
    private void Refill()
    {
        var now = DateTime.UtcNow;
        var elapsed = (now - _lastRefillTime).TotalSeconds;
        _tokens = Math.Min(_capacity, _tokens + (elapsed * _refillRate));
        _lastRefillTime = now;
    }
}

Per-User Rate Limiting

public class RateLimitMiddleware
{
    private readonly RequestDelegate _next;
    private readonly ConcurrentDictionary<string, TokenBucket> _buckets;
    
    public RateLimitMiddleware(RequestDelegate next)
    {
        _next = next;
        _buckets = new ConcurrentDictionary<string, TokenBucket>();
    }
    
    public async Task InvokeAsync(HttpContext context)
    {
        var userId = context.User?.FindFirst(ClaimTypes.NameIdentifier)?.Value ?? 
                     context.Connection.RemoteIpAddress?.ToString();
        
        if (userId == null)
        {
            await _next(context);
            return;
        }
        
        var bucket = _buckets.GetOrAdd(userId, _ => new TokenBucket(
            capacity: 100,
            refillRate: 10  // 10 requests per second
        ));
        
        if (!bucket.TryConsumeToken())
        {
            context.Response.StatusCode = StatusCodes.Status429TooManyRequests;
            context.Response.Headers.Add("Retry-After", "60");
            await context.Response.WriteAsync("Rate limit exceeded");
            return;
        }
        
        await _next(context);
    }
}

Header-Based Rate Limits

public class RateLimitResponseMiddleware
{
    private readonly RequestDelegate _next;
    
    public async Task InvokeAsync(HttpContext context, IRateLimitService rateLimitService)
    {
        var limit = await rateLimitService.GetLimitAsync(context);
        var remaining = limit.Remaining;
        var resetTime = limit.ResetTime;
        
        context.Response.Headers.Add("X-RateLimit-Limit", limit.Limit.ToString());
        context.Response.Headers.Add("X-RateLimit-Remaining", remaining.ToString());
        context.Response.Headers.Add("X-RateLimit-Reset", 
            new DateTimeOffset(resetTime).ToUnixTimeSeconds().ToString());
        
        await _next(context);
    }
}

Redis-Based Distributed Rate Limiting

public class RedisRateLimitService
{
    private readonly IConnectionMultiplexer _redis;
    
    public async Task<bool> IsAllowedAsync(string userId, int limit, int windowSeconds)
    {
        var db = _redis.GetDatabase();
        var key = $"rate-limit:{userId}";
        
        var current = await db.StringIncrementAsync(key);
        
        if (current == 1)
        {
            await db.KeyExpireAsync(key, TimeSpan.FromSeconds(windowSeconds));
        }
        
        return current <= limit;
    }
}

// Usage
var isAllowed = await _rateLimitService.IsAllowedAsync(
    userId: "user-123",
    limit: 100,
    windowSeconds: 60
);

if (!isAllowed)
{
    return StatusCode(429);
}

Endpoint-Specific Limits

[ApiController]
[Route("api/[controller]")]
public class OrdersController : ControllerBase
{
    [HttpGet]
    [RateLimit(requests: 100, windowSeconds: 60)]
    public async Task<IActionResult> GetOrders()
    {
        return Ok(await _service.GetOrdersAsync());
    }
    
    [HttpPost]
    [RateLimit(requests: 10, windowSeconds: 60)]  // Stricter limit for writes
    public async Task<IActionResult> CreateOrder(CreateOrderRequest request)
    {
        var order = await _service.CreateAsync(request);
        return CreatedAtAction(nameof(GetOrders), new { id = order.Id }, order);
    }
}

Best Practices

User-based limits: Limit by user ID
IP-based limits: Fallback for anonymous users
Tiered limits: Different limits for different plans
Clear headers: Inform clients of limits
Graceful degradation: Queue requests instead of rejecting

Related Concepts

Throttling strategies
Quota management
Load balancing
API gateway patterns

Summary

Rate limiting protects APIs from abuse and ensures fair resource allocation. Use token bucket algorithm, per-user tracking, and clear response headers.