Rate Limiting and Throttling

A Simple Analogy

Rate limiting is like a doorman controlling capacity. Too many people trying to enter? Doorman makes them wait in line, preventing overcrowding.

Why Rate Limiting?

Protection: Prevent abuse and DDoS attacks
Fairness: Ensure equal access for all users
Cost control: Limit expensive operations
Quality: Maintain performance under load
SLA compliance: Honor service levels

Sliding Window (Token Bucket)

public class RateLimiter
{
    private readonly double _tokensPerSecond;
    private readonly double _maxTokens;
    private double _tokens;
    private DateTime _lastRefill;
    
    public RateLimiter(double tokensPerSecond)
    {
        _tokensPerSecond = tokensPerSecond;
        _maxTokens = tokensPerSecond;
        _tokens = _maxTokens;
        _lastRefill = DateTime.UtcNow;
    }
    
    public bool IsAllowed(int tokens = 1)
    {
        Refill();
        
        if (_tokens >= tokens)
        {
            _tokens -= tokens;
            return true;
        }
        
        return false;
    }
    
    private void Refill()
    {
        var now = DateTime.UtcNow;
        var timePassed = (now - _lastRefill).TotalSeconds;
        var tokensToAdd = timePassed * _tokensPerSecond;
        
        _tokens = Math.Min(_maxTokens, _tokens + tokensToAdd);
        _lastRefill = now;
    }
}

// Usage
var limiter = new RateLimiter(10);  // 10 requests/second

if (limiter.IsAllowed())
{
    // Process request
}
else
{
    // Return 429 Too Many Requests
}

ASP.NET Core Middleware

builder.Services.AddStackExchangeRedisCache(options =>
{
    options.Configuration = "localhost:6379";
});

var app = builder.Build();

// Rate limiting middleware
app.Use(async (context, next) =>
{
    var cache = context.RequestServices.GetRequiredService<IDistributedCache>();
    var clientId = context.Connection.RemoteIpAddress?.ToString() ?? "unknown";
    var key = $"rate-limit:{clientId}";
    
    var count = await cache.GetStringAsync(key);
    var currentCount = string.IsNullOrEmpty(count) ? 0 : int.Parse(count);
    
    if (currentCount >= 100)  // 100 requests per minute
    {
        context.Response.StatusCode = StatusCodes.Status429TooManyRequests;
        context.Response.Headers.Add("Retry-After", "60");
        await context.Response.WriteAsync("Rate limit exceeded");
        return;
    }
    
    await cache.SetStringAsync(
        key,
        (currentCount + 1).ToString(),
        new DistributedCacheEntryOptions
        {
            AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(1)
        });
    
    await next();
});

User-Based Rate Limiting

app.Use(async (context, next) =>
{
    var user = context.User?.FindFirst(ClaimTypes.NameIdentifier)?.Value;
    if (string.IsNullOrEmpty(user))
    {
        // Anonymous: 10 req/min
        var limit = 10;
        var windowKey = $"rate-limit:anon:{DateTime.UtcNow:yyyyMMddHHmm}";
        await CheckLimit(context, windowKey, limit);
    }
    else
    {
        // Authenticated: 1000 req/min
        var limit = 1000;
        var windowKey = $"rate-limit:user:{user}:{DateTime.UtcNow:yyyyMMddHHmm}";
        await CheckLimit(context, windowKey, limit);
    }
    
    await next();
});

Quota-Based Limiting

public class QuotaService
{
    private readonly IDistributedCache _cache;
    
    public async Task<bool> HasQuotaAsync(string userId, string quotaName)
    {
        var key = $"quota:{userId}:{quotaName}";
        var used = await _cache.GetStringAsync(key);
        var usedCount = string.IsNullOrEmpty(used) ? 0 : int.Parse(used);
        
        var quota = GetQuotaLimit(quotaName);  // e.g., 100 email sends/day
        
        return usedCount < quota;
    }
    
    public async Task ConsumeQuotaAsync(string userId, string quotaName)
    {
        var key = $"quota:{userId}:{quotaName}";
        var current = await _cache.GetStringAsync(key) ?? "0";
        var newCount = int.Parse(current) + 1;
        
        await _cache.SetStringAsync(
            key,
            newCount.ToString(),
            new DistributedCacheEntryOptions
            {
                AbsoluteExpirationRelativeToNow = TimeSpan.FromDays(1)
            });
    }
}

// Usage
public class EmailController : ControllerBase
{
    [HttpPost("send")]
    public async Task<IActionResult> SendEmail(SendEmailRequest request)
    {
        var userId = User.FindFirst(ClaimTypes.NameIdentifier)?.Value;
        
        if (!await _quotaService.HasQuotaAsync(userId, "email-sends"))
        {
            return StatusCode(429, "Email quota exceeded. Try again tomorrow.");
        }
        
        await _emailService.SendAsync(request.To, request.Subject, request.Body);
        await _quotaService.ConsumeQuotaAsync(userId, "email-sends");
        
        return Ok();
    }
}

Best Practices

Return 429 status: Standard for rate limiting
Include Retry-After header: Tell clients when to retry
Distribute limiting: Use Redis for multiple servers
Different limits by tier: Premium vs free users
Monitor violations: Alert on abuse patterns

Related Concepts

API gateway throttling
Leaky bucket algorithm
Sliding window logs
Circuit breakers

Summary

Rate limiting protects APIs from abuse and overload. Implement token bucket algorithms with Redis for distributed rate limiting across multiple servers.