Time Series Forecasting with SARIMAX: Predicting Climate Trends

Published on December 20, 2025

Climate data exhibits complex patterns: long-term trends, seasonal cycles, and irregular fluctuations. SARIMAX (Seasonal AutoRegressive Integrated Moving Average with eXogenous factors) is a powerful framework for modeling such data. In this post, I'll walk through forecasting Delhi's temperature trends using 20+ years of historical data.

Understanding SARIMAX Components

SARIMAX extends ARIMA with seasonal terms and external variables:

Data Preparation

import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt

# Load Berkeley Earth climate data
df = pd.read_csv('delhi_temperature.csv', parse_dates=['date'])
df.set_index('date', inplace=True)

# Resample to monthly averages
monthly = df['temperature'].resample('M').mean()

# Handle missing values
monthly = monthly.interpolate(method='time')

print(f"Data range: {monthly.index[0]} to {monthly.index[-1]}")
print(f"Total observations: {len(monthly)}")

Seasonal Decomposition

# Decompose into trend, seasonal, and residual
decomposition = seasonal_decompose(monthly, model='additive', period=12)

fig, axes = plt.subplots(4, 1, figsize=(12, 10))
decomposition.observed.plot(ax=axes[0], title='Observed')
decomposition.trend.plot(ax=axes[1], title='Trend')
decomposition.seasonal.plot(ax=axes[2], title='Seasonal')
decomposition.resid.plot(ax=axes[3], title='Residual')
plt.tight_layout()

Stationarity Testing

from statsmodels.tsa.stattools import adfuller, kpss

def test_stationarity(series):
    """Perform ADF and KPSS tests for stationarity."""
    # ADF Test
    adf_result = adfuller(series.dropna())
    print(f"ADF Statistic: {adf_result[0]:.4f}")
    print(f"p-value: {adf_result[1]:.4f}")
    
    # KPSS Test
    kpss_result = kpss(series.dropna(), regression='c')
    print(f"KPSS Statistic: {kpss_result[0]:.4f}")
    print(f"p-value: {kpss_result[1]:.4f}")
    
    return adf_result[1] < 0.05  # Returns True if stationary

# Test original series
print("Original Series:")
is_stationary = test_stationarity(monthly)

# Test differenced series if needed
if not is_stationary:
    print("\nFirst Difference:")
    test_stationarity(monthly.diff().dropna())

Model Selection with Auto-ARIMA

from pmdarima import auto_arima

# Automatic parameter selection
auto_model = auto_arima(
    monthly,
    seasonal=True,
    m=12,  # Monthly seasonality
    d=None,  # Auto-detect differencing
    D=None,  # Auto-detect seasonal differencing
    max_p=3, max_q=3,
    max_P=2, max_Q=2,
    information_criterion='aic',
    trace=True,
    error_action='ignore',
    suppress_warnings=True
)

print(f"\nBest model: SARIMAX{auto_model.order}x{auto_model.seasonal_order}")
print(f"AIC: {auto_model.aic():.2f}")

Training the SARIMAX Model

# Split data: 80% train, 20% test
train_size = int(len(monthly) * 0.8)
train, test = monthly[:train_size], monthly[train_size:]

# Fit SARIMAX model
model = SARIMAX(
    train,
    order=(1, 1, 1),
    seasonal_order=(1, 1, 1, 12),
    enforce_stationarity=False,
    enforce_invertibility=False
)

results = model.fit(disp=False)
print(results.summary())

Model Diagnostics

# Diagnostic plots
fig = results.plot_diagnostics(figsize=(12, 8))
plt.tight_layout()

# Check residual autocorrelation
from statsmodels.stats.diagnostic import acorr_ljungbox
lb_test = acorr_ljungbox(results.resid, lags=[12, 24, 36])
print("Ljung-Box Test:")
print(lb_test)

Forecasting and Evaluation

# Generate forecasts
forecast = results.get_forecast(steps=len(test))
forecast_mean = forecast.predicted_mean
forecast_ci = forecast.conf_int()

# Calculate metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(test, forecast_mean)
rmse = np.sqrt(mean_squared_error(test, forecast_mean))
mape = np.mean(np.abs((test - forecast_mean) / test)) * 100

print(f"MAE: {mae:.3f}°C")
print(f"RMSE: {rmse:.3f}°C")
print(f"MAPE: {mape:.2f}%")

Visualization

plt.figure(figsize=(14, 6))
plt.plot(train.index, train, label='Training Data', color='blue')
plt.plot(test.index, test, label='Actual', color='green')
plt.plot(test.index, forecast_mean, label='Forecast', color='red')
plt.fill_between(
    test.index,
    forecast_ci.iloc[:, 0],
    forecast_ci.iloc[:, 1],
    color='red', alpha=0.2, label='95% CI'
)
plt.title('Delhi Temperature Forecast')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.grid(True, alpha=0.3)

Long-Term Climate Trend Analysis

Our analysis revealed a warming trend of approximately 0.02°C per year over the past two decades—consistent with global climate change observations.

Conclusion

SARIMAX provides a robust framework for climate forecasting, capturing both seasonal patterns and long-term trends. While deep learning approaches like LSTMs offer alternatives, SARIMAX remains interpretable and effective for many time series applications.