Econometrics Julia

by hchulkim

data

Julia-based econometric and structural estimation for computationally intensive tasks. Use for structural models, maximum likelihood, GMM, numerical optimization, simulations, and high-performance computing. Covers DataFrames.jl, FixedEffectModels.jl, Optim.jl, and performance optimization.

Skill Details

Repository Files

1 file in this skill directory


name: econometrics-julia description: Julia-based econometric and structural estimation for computationally intensive tasks. Use for structural models, maximum likelihood, GMM, numerical optimization, simulations, and high-performance computing. Covers DataFrames.jl, FixedEffectModels.jl, Optim.jl, and performance optimization.

Julia Econometrics & Structural Estimation

Core Packages

using DataFrames, CSV       # Data handling
using FixedEffectModels     # Panel regressions
using Optim                 # Optimization
using Distributions         # Probability distributions
using LinearAlgebra         # Matrix operations
using ForwardDiff           # Automatic differentiation
using Statistics, StatsBase

Data Handling (DataFrames.jl)

# Read data
df = CSV.read("data/raw/file.csv", DataFrame)

# Basic operations
transform!(df, :x => (x -> x .* 100) => :x_scaled)
subset!(df, :year => y -> y .>= 2000)

# Groupby operations
gdf = groupby(df, :id)
combine(gdf, :y => mean => :mean_y, nrow => :n)

# Efficient joins
leftjoin!(df, other_df, on = [:id, :year])

Panel Regressions (FixedEffectModels.jl)

# Two-way fixed effects
est = reg(df, @formula(y ~ treatment + fe(id) + fe(year)), 
          Vcov.cluster(:state))

# IV estimation
est_iv = reg(df, @formula(y ~ (endog ~ instrument) + fe(id) + fe(year)))

# Extract results
coef(est)        # Coefficients
vcov(est)        # Variance-covariance matrix
nobs(est)        # Observations
r2(est)          # R-squared

Performance Optimization

Memory Efficiency

# Use views instead of copies
@views y_subset = df.y[1:100]

# Preallocate arrays
result = zeros(n, m)
@inbounds for i in 1:n
    result[i, :] .= compute_row(i)
end

# Use StaticArrays for small fixed-size arrays
using StaticArrays
params = SVector{3, Float64}(1.0, 2.0, 3.0)

Broadcasting and Vectorization

# Good: fused broadcasting
@. result = x^2 + y^2 + z  # Single pass

# Avoid: creates intermediates
result = x.^2 .+ y.^2 .+ z  # Multiple allocations

Type Stability

# Always annotate function return types for critical code
function likelihood(θ::Vector{Float64}, data::Matrix{Float64})::Float64
    # Implementation
end

# Use concrete types in structs
struct ModelParams{T<:Real}
    α::T
    β::T
    σ::T
end

Structural Estimation

Maximum Likelihood

function log_likelihood(θ, data)
    α, β, σ = θ
    y, X = data[:, 1], data[:, 2:end]
    
    resid = y .- X * [α, β]
    ll = -0.5 * length(y) * log(2π * σ^2) - 
         sum(resid.^2) / (2σ^2)
    return -ll  # Minimize negative log-likelihood
end

# Optimize
θ0 = [0.0, 0.0, 1.0]
result = optimize(θ -> log_likelihood(θ, data), θ0, LBFGS(),
                  autodiff = :forward)

θ_hat = Optim.minimizer(result)

Generalized Method of Moments (GMM)

function gmm_moments(θ, data, Z)
    α, β = θ
    y, X = data[:, 1], data[:, 2:end]
    resid = y .- X * [α, β]
    
    # Moment conditions: E[Z'ε] = 0
    moments = Z' * resid / size(Z, 1)
    return moments
end

function gmm_objective(θ, data, Z, W)
    g = gmm_moments(θ, data, Z)
    return g' * W * g
end

# Two-step GMM
W1 = I(size(Z, 2))  # First step: identity
θ1 = optimize(θ -> gmm_objective(θ, data, Z, W1), θ0).minimizer

# Optimal weighting matrix
Ω = cov(Z .* (data[:, 1] .- data[:, 2:end] * θ1))
W2 = inv(Ω)
θ2 = optimize(θ -> gmm_objective(θ, data, Z, W2), θ1).minimizer

Simulation-Based Estimation (MSM/Indirect Inference)

function simulate_model(θ, N, S; seed=12345)
    Random.seed!(seed)
    α, β, σ = θ
    
    # Simulate S datasets of size N
    sims = Matrix{Float64}(undef, N, S)
    for s in 1:S
        X = randn(N)
        ε = σ * randn(N)
        sims[:, s] = α .+ β .* X .+ ε
    end
    return sims
end

function msm_objective(θ, data_moments, N, S)
    sims = simulate_model(θ, N, S)
    sim_moments = mean([compute_moments(sims[:, s]) for s in 1:S])
    diff = data_moments - sim_moments
    return diff' * diff
end

Numerical Methods

Root Finding (for equilibrium models)

using NLsolve

function excess_demand(p, params)
    # Market clearing conditions
    return demand(p, params) - supply(p, params)
end

result = nlsolve(p -> excess_demand(p, params), p0)
p_eq = result.zero

Integration (for expected values)

using QuadGK

# One-dimensional
expected_value, err = quadgk(x -> x * pdf(Normal(μ, σ), x), -Inf, Inf)

# Multi-dimensional (Monte Carlo or sparse grids)
using HCubature
integral, err = hcubature(f, lower_bounds, upper_bounds)

Parallel Computing

using Distributed
addprocs(4)

@everywhere using SharedArrays

# Parallel bootstrap
@distributed for b in 1:B
    bootstrap_sample = sample(1:N, N, replace=true)
    θ_boot[b] = estimate(data[bootstrap_sample, :])
end

# Or use Threads for shared memory
Threads.@threads for i in 1:N
    result[i] = compute(i)
end

Standard Errors

Bootstrap

function bootstrap_se(estimate_fn, data, B=1000)
    N = size(data, 1)
    θ_boot = [estimate_fn(data[sample(1:N, N, replace=true), :]) 
              for _ in 1:B]
    return std(θ_boot)
end

Delta Method

using ForwardDiff

function delta_method_se(g, θ, Σ)
    # g: transformation function
    # θ: parameter estimates
    # Σ: variance-covariance of θ
    ∇g = ForwardDiff.gradient(g, θ)
    return sqrt(∇g' * Σ * ∇g)
end

Best Practices

  • Use @time and @btime (from BenchmarkTools) to profile
  • Check type stability with @code_warntype
  • Use const for global constants
  • Prefer column-major iteration (Julia is column-major)

Related Skills

Xlsx

Comprehensive spreadsheet creation, editing, and analysis with support for formulas, formatting, data analysis, and visualization. When Claude needs to work with spreadsheets (.xlsx, .xlsm, .csv, .tsv, etc) for: (1) Creating new spreadsheets with formulas and formatting, (2) Reading or analyzing data, (3) Modify existing spreadsheets while preserving formulas, (4) Data analysis and visualization in spreadsheets, or (5) Recalculating formulas

data

Clickhouse Io

ClickHouse database patterns, query optimization, analytics, and data engineering best practices for high-performance analytical workloads.

datacli

Clickhouse Io

ClickHouse database patterns, query optimization, analytics, and data engineering best practices for high-performance analytical workloads.

datacli

Analyzing Financial Statements

This skill calculates key financial ratios and metrics from financial statement data for investment analysis

data

Data Storytelling

Transform data into compelling narratives using visualization, context, and persuasive structure. Use when presenting analytics to stakeholders, creating data reports, or building executive presentations.

data

Kpi Dashboard Design

Design effective KPI dashboards with metrics selection, visualization best practices, and real-time monitoring patterns. Use when building business dashboards, selecting metrics, or designing data visualization layouts.

designdata

Dbt Transformation Patterns

Master dbt (data build tool) for analytics engineering with model organization, testing, documentation, and incremental strategies. Use when building data transformations, creating data models, or implementing analytics engineering best practices.

testingdocumenttool

Sql Optimization Patterns

Master SQL query optimization, indexing strategies, and EXPLAIN analysis to dramatically improve database performance and eliminate slow queries. Use when debugging slow queries, designing database schemas, or optimizing application performance.

designdata

Anndata

This skill should be used when working with annotated data matrices in Python, particularly for single-cell genomics analysis, managing experimental measurements with metadata, or handling large-scale biological datasets. Use when tasks involve AnnData objects, h5ad files, single-cell RNA-seq data, or integration with scanpy/scverse tools.

arttooldata

Xlsx

Spreadsheet toolkit (.xlsx/.csv). Create/edit with formulas/formatting, analyze data, visualization, recalculate formulas, for spreadsheet processing and analysis.

tooldata

Skill Information

Category:Data
Last Updated:1/29/2026