Isaac.

Rate Limiting and Throttling

Protect APIs with rate limiting and throttling strategies.

By EMEPublished: February 20, 2025
rate limitingthrottlingapi protectionsecurity

A Simple Analogy

Rate limiting is like a doorman controlling capacity. Too many people trying to enter? Doorman makes them wait in line, preventing overcrowding.


Why Rate Limiting?

  • Protection: Prevent abuse and DDoS attacks
  • Fairness: Ensure equal access for all users
  • Cost control: Limit expensive operations
  • Quality: Maintain performance under load
  • SLA compliance: Honor service levels

Sliding Window (Token Bucket)

public class RateLimiter
{
    private readonly double _tokensPerSecond;
    private readonly double _maxTokens;
    private double _tokens;
    private DateTime _lastRefill;
    
    public RateLimiter(double tokensPerSecond)
    {
        _tokensPerSecond = tokensPerSecond;
        _maxTokens = tokensPerSecond;
        _tokens = _maxTokens;
        _lastRefill = DateTime.UtcNow;
    }
    
    public bool IsAllowed(int tokens = 1)
    {
        Refill();
        
        if (_tokens >= tokens)
        {
            _tokens -= tokens;
            return true;
        }
        
        return false;
    }
    
    private void Refill()
    {
        var now = DateTime.UtcNow;
        var timePassed = (now - _lastRefill).TotalSeconds;
        var tokensToAdd = timePassed * _tokensPerSecond;
        
        _tokens = Math.Min(_maxTokens, _tokens + tokensToAdd);
        _lastRefill = now;
    }
}

// Usage
var limiter = new RateLimiter(10);  // 10 requests/second

if (limiter.IsAllowed())
{
    // Process request
}
else
{
    // Return 429 Too Many Requests
}

ASP.NET Core Middleware

builder.Services.AddStackExchangeRedisCache(options =>
{
    options.Configuration = "localhost:6379";
});

var app = builder.Build();

// Rate limiting middleware
app.Use(async (context, next) =>
{
    var cache = context.RequestServices.GetRequiredService<IDistributedCache>();
    var clientId = context.Connection.RemoteIpAddress?.ToString() ?? "unknown";
    var key = $"rate-limit:{clientId}";
    
    var count = await cache.GetStringAsync(key);
    var currentCount = string.IsNullOrEmpty(count) ? 0 : int.Parse(count);
    
    if (currentCount >= 100)  // 100 requests per minute
    {
        context.Response.StatusCode = StatusCodes.Status429TooManyRequests;
        context.Response.Headers.Add("Retry-After", "60");
        await context.Response.WriteAsync("Rate limit exceeded");
        return;
    }
    
    await cache.SetStringAsync(
        key,
        (currentCount + 1).ToString(),
        new DistributedCacheEntryOptions
        {
            AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(1)
        });
    
    await next();
});

User-Based Rate Limiting

app.Use(async (context, next) =>
{
    var user = context.User?.FindFirst(ClaimTypes.NameIdentifier)?.Value;
    if (string.IsNullOrEmpty(user))
    {
        // Anonymous: 10 req/min
        var limit = 10;
        var windowKey = $"rate-limit:anon:{DateTime.UtcNow:yyyyMMddHHmm}";
        await CheckLimit(context, windowKey, limit);
    }
    else
    {
        // Authenticated: 1000 req/min
        var limit = 1000;
        var windowKey = $"rate-limit:user:{user}:{DateTime.UtcNow:yyyyMMddHHmm}";
        await CheckLimit(context, windowKey, limit);
    }
    
    await next();
});

Quota-Based Limiting

public class QuotaService
{
    private readonly IDistributedCache _cache;
    
    public async Task<bool> HasQuotaAsync(string userId, string quotaName)
    {
        var key = $"quota:{userId}:{quotaName}";
        var used = await _cache.GetStringAsync(key);
        var usedCount = string.IsNullOrEmpty(used) ? 0 : int.Parse(used);
        
        var quota = GetQuotaLimit(quotaName);  // e.g., 100 email sends/day
        
        return usedCount < quota;
    }
    
    public async Task ConsumeQuotaAsync(string userId, string quotaName)
    {
        var key = $"quota:{userId}:{quotaName}";
        var current = await _cache.GetStringAsync(key) ?? "0";
        var newCount = int.Parse(current) + 1;
        
        await _cache.SetStringAsync(
            key,
            newCount.ToString(),
            new DistributedCacheEntryOptions
            {
                AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1)
            });
    }
}

// Usage
public class EmailController : ControllerBase
{
    [HttpPost("send")]
    public async Task<IActionResult> SendEmail(SendEmailRequest request)
    {
        var userId = User.FindFirst(ClaimTypes.NameIdentifier)?.Value;
        
        if (!await _quotaService.HasQuotaAsync(userId, "email-sends"))
        {
            return StatusCode(429, "Email quota exceeded. Try again tomorrow.");
        }
        
        await _emailService.SendAsync(request.To, request.Subject, request.Body);
        await _quotaService.ConsumeQuotaAsync(userId, "email-sends");
        
        return Ok();
    }
}

Best Practices

  1. Return 429 status: Standard for rate limiting
  2. Include Retry-After header: Tell clients when to retry
  3. Distribute limiting: Use Redis for multiple servers
  4. Different limits by tier: Premium vs free users
  5. Monitor violations: Alert on abuse patterns

Related Concepts

  • API gateway throttling
  • Leaky bucket algorithm
  • Sliding window logs
  • Circuit breakers

Summary

Rate limiting protects APIs from abuse and overload. Implement token bucket algorithms with Redis for distributed rate limiting across multiple servers.