Bio Hi C Analysis Hic Visualization

by GPTomics

tool

Visualize Hi-C contact matrices, TADs, loops, and genomic features using matplotlib, cooltools, and HiCExplorer. Create triangle plots, virtual 4C, and multi-track figures. Use when visualizing contact matrices or genomic features.

Skill Details

Repository Files

3 files in this skill directory


name: bio-hi-c-analysis-hic-visualization description: Visualize Hi-C contact matrices, TADs, loops, and genomic features using matplotlib, cooltools, and HiCExplorer. Create triangle plots, virtual 4C, and multi-track figures. Use when visualizing contact matrices or genomic features. tool_type: python primary_tool: cooltools

Hi-C Visualization

Visualize Hi-C contact matrices and genomic features.

Required Imports

import cooler
import cooltools
import cooltools.lib.plotting
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import bioframe

Basic Contact Matrix Plot

clr = cooler.Cooler('matrix.mcool::resolutions/10000')

# Get matrix for a region
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')

fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(matrix, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
plt.colorbar(im, ax=ax, label='Balanced contacts')
ax.set_title('chr1:50-60Mb')
plt.savefig('contact_matrix.png', dpi=150)

Triangle (Upper Triangle) Plot

def plot_triangle(matrix, ax, cmap='Reds', vmin=None, vmax=None):
    '''Plot Hi-C matrix as triangle (rotated 45 degrees)'''
    n = matrix.shape[0]

    # Create rotated matrix
    rotated = np.zeros((n, 2*n))
    for i in range(n):
        for j in range(i, n):
            y = j - i
            x = i + j
            rotated[y, x] = matrix[i, j]

    # Plot
    im = ax.imshow(rotated[:n//2, :], cmap=cmap, aspect='auto',
                   norm=LogNorm(vmin=vmin, vmax=vmax) if vmin else None)
    ax.set_ylim(n//2, 0)
    return im

matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
fig, ax = plt.subplots(figsize=(12, 4))
im = plot_triangle(matrix, ax, vmin=0.001, vmax=0.1)
plt.colorbar(im, ax=ax)
plt.savefig('triangle_plot.png', dpi=150)

Plot with TADs

import pandas as pd

matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
tads = pd.read_csv('tads.bed', sep='\t', names=['chrom', 'start', 'end'])

fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(matrix, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))

# Overlay TAD boundaries
region_start = 50000000
bin_size = clr.binsize
for _, tad in tads[tads['chrom'] == 'chr1'].iterrows():
    if region_start <= tad['start'] < 60000000:
        pos = (tad['start'] - region_start) / bin_size
        ax.axhline(pos, color='blue', linewidth=0.5, alpha=0.5)
        ax.axvline(pos, color='blue', linewidth=0.5, alpha=0.5)

plt.colorbar(im, ax=ax)
plt.savefig('matrix_with_tads.png', dpi=150)

Plot with Loops

matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
loops = pd.read_csv('loops.bedpe', sep='\t')

fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(matrix, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))

# Mark loops
region_start = 50000000
bin_size = clr.binsize
for _, loop in loops[loops['chrom1'] == 'chr1'].iterrows():
    if (region_start <= loop['start1'] < 60000000 and
        region_start <= loop['start2'] < 60000000):
        x = (loop['start1'] - region_start) / bin_size
        y = (loop['start2'] - region_start) / bin_size
        circle = plt.Circle((y, x), 3, fill=False, color='blue', linewidth=1)
        ax.add_patch(circle)

plt.colorbar(im, ax=ax)
plt.savefig('matrix_with_loops.png', dpi=150)

Compare Two Matrices

clr1 = cooler.Cooler('sample1.mcool::resolutions/10000')
clr2 = cooler.Cooler('sample2.mcool::resolutions/10000')

region = 'chr1:50000000-60000000'
mat1 = clr1.matrix(balance=True).fetch(region)
mat2 = clr2.matrix(balance=True).fetch(region)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Sample 1
im1 = axes[0].imshow(mat1, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
axes[0].set_title('Sample 1')
plt.colorbar(im1, ax=axes[0])

# Sample 2
im2 = axes[1].imshow(mat2, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
axes[1].set_title('Sample 2')
plt.colorbar(im2, ax=axes[1])

# Log2 fold change
log2fc = np.log2(mat2 / mat1)
log2fc[np.isinf(log2fc)] = np.nan
im3 = axes[2].imshow(log2fc, cmap='coolwarm', vmin=-2, vmax=2)
axes[2].set_title('Log2(Sample2/Sample1)')
plt.colorbar(im3, ax=axes[2])

plt.tight_layout()
plt.savefig('comparison.png', dpi=150)

Split View (Upper/Lower Triangle)

mat1 = clr1.matrix(balance=True).fetch(region)
mat2 = clr2.matrix(balance=True).fetch(region)

# Combine: upper triangle from mat1, lower from mat2
combined = np.triu(mat1) + np.tril(mat2, k=-1)

fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(combined, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
ax.axline((0, 0), slope=1, color='black', linewidth=0.5)
ax.set_title('Sample1 (upper) vs Sample2 (lower)')
plt.colorbar(im, ax=ax)
plt.savefig('split_view.png', dpi=150)

Virtual 4C

def virtual_4c(clr, viewpoint_chrom, viewpoint_pos, resolution=10000):
    '''Extract virtual 4C from Hi-C'''
    # Get row of matrix at viewpoint
    viewpoint_bin = viewpoint_pos // resolution

    # Get contacts from this bin to all others on same chromosome
    matrix = clr.matrix(balance=True).fetch(viewpoint_chrom)
    v4c = matrix[viewpoint_bin, :]

    # Create coordinates
    bins = clr.bins().fetch(viewpoint_chrom)
    coords = bins['start'].values

    return coords, v4c

coords, v4c = virtual_4c(clr, 'chr1', 55000000)

fig, ax = plt.subplots(figsize=(12, 3))
ax.fill_between(coords / 1e6, 0, v4c, alpha=0.5)
ax.axvline(55, color='red', linestyle='--', label='Viewpoint')
ax.set_xlabel('Position (Mb)')
ax.set_ylabel('Contact frequency')
ax.set_title('Virtual 4C from chr1:55Mb')
ax.legend()
plt.savefig('virtual_4c.png', dpi=150)

Multi-Track Figure

fig = plt.figure(figsize=(12, 10))

# Hi-C matrix (triangle)
ax1 = fig.add_axes([0.1, 0.5, 0.8, 0.4])
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
plot_triangle(matrix, ax1, vmin=0.001, vmax=0.1)
ax1.set_ylabel('Hi-C')

# Insulation score
ax2 = fig.add_axes([0.1, 0.35, 0.8, 0.1])
insulation = pd.read_csv('insulation.bedgraph', sep='\t',
                         names=['chrom', 'start', 'end', 'score'])
ins_region = insulation[(insulation['chrom'] == 'chr1') &
                        (insulation['start'] >= 50000000) &
                        (insulation['end'] <= 60000000)]
ax2.plot(ins_region['start'] / 1e6, ins_region['score'])
ax2.set_ylabel('Insulation')
ax2.set_xlim(50, 60)

# Gene track (placeholder)
ax3 = fig.add_axes([0.1, 0.2, 0.8, 0.1])
ax3.set_ylabel('Genes')
ax3.set_xlim(50, 60)

# CTCF ChIP-seq (placeholder)
ax4 = fig.add_axes([0.1, 0.05, 0.8, 0.1])
ax4.set_xlabel('Position (Mb)')
ax4.set_ylabel('CTCF')
ax4.set_xlim(50, 60)

plt.savefig('multi_track.png', dpi=150)

Using HiCExplorer Visualization

# Plot matrix with HiCExplorer
hicPlotMatrix \
    -m matrix.cool \
    --region chr1:50000000-60000000 \
    --log1p \
    --colorMap Reds \
    -o hic_plot.png

# Plot with TADs
hicPlotTADs \
    --tracks tracks.ini \
    --region chr1:50000000-60000000 \
    -o tad_plot.png

Cooltools Pileup Plot

import cooltools

# Pileup at features (e.g., loop anchors)
pileup = cooltools.pileup(
    clr,
    features=loops[['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']],
    view_df=view_df,
    expected=expected,
    flank=100000,
)

# Average pileup
avg_pileup = np.nanmean(pileup, axis=2)

fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(avg_pileup, cmap='Reds')
ax.set_title('Average pileup at loops')
plt.colorbar(im, ax=ax)
plt.savefig('pileup.png', dpi=150)

Related Skills

  • hic-data-io - Load contact matrices
  • tad-detection - Generate TADs to visualize
  • loop-calling - Generate loops to visualize
  • compartment-analysis - Visualize compartments

Related Skills

Dbt Transformation Patterns

Master dbt (data build tool) for analytics engineering with model organization, testing, documentation, and incremental strategies. Use when building data transformations, creating data models, or implementing analytics engineering best practices.

testingdocumenttool

Anndata

This skill should be used when working with annotated data matrices in Python, particularly for single-cell genomics analysis, managing experimental measurements with metadata, or handling large-scale biological datasets. Use when tasks involve AnnData objects, h5ad files, single-cell RNA-seq data, or integration with scanpy/scverse tools.

arttooldata

Xlsx

Spreadsheet toolkit (.xlsx/.csv). Create/edit with formulas/formatting, analyze data, visualization, recalculate formulas, for spreadsheet processing and analysis.

tooldata

Tensorboard

Visualize training metrics, debug models with histograms, compare experiments, visualize model graphs, and profile performance with TensorBoard - Google's ML visualization toolkit

tool

Deeptools

NGS analysis toolkit. BAM to bigWig conversion, QC (correlation, PCA, fingerprints), heatmaps/profiles (TSS, peaks), for ChIP-seq, RNA-seq, ATAC-seq visualization.

tool

Scvi Tools

This skill should be used when working with single-cell omics data analysis using scvi-tools, including scRNA-seq, scATAC-seq, CITE-seq, spatial transcriptomics, and other single-cell modalities. Use this skill for probabilistic modeling, batch correction, dimensionality reduction, differential expression, cell type annotation, multimodal integration, and spatial analysis tasks.

tooldata

Statsmodels

Statistical modeling toolkit. OLS, GLM, logistic, ARIMA, time series, hypothesis tests, diagnostics, AIC/BIC, for rigorous statistical inference and econometric analysis.

tool

Scikit Survival

Comprehensive toolkit for survival analysis and time-to-event modeling in Python using scikit-survival. Use this skill when working with censored survival data, performing time-to-event analysis, fitting Cox models, Random Survival Forests, Gradient Boosting models, or Survival SVMs, evaluating survival predictions with concordance index or Brier score, handling competing risks, or implementing any survival analysis workflow with the scikit-survival library.

workflowtooldata

Neurokit2

Comprehensive biosignal processing toolkit for analyzing physiological data including ECG, EEG, EDA, RSP, PPG, EMG, and EOG signals. Use this skill when processing cardiovascular signals, brain activity, electrodermal responses, respiratory patterns, muscle activity, or eye movements. Applicable for heart rate variability analysis, event-related potentials, complexity measures, autonomic nervous system assessment, psychophysiology research, and multi-modal physiological signal integration.

arttooldata

Statistical Analysis

Statistical analysis toolkit. Hypothesis tests (t-test, ANOVA, chi-square), regression, correlation, Bayesian stats, power analysis, assumption checks, APA reporting, for academic research.

tool

Skill Information

Category:Technical
Last Updated:1/24/2026