Bio Hi C Analysis Hic Visualization
by GPTomics
Visualize Hi-C contact matrices, TADs, loops, and genomic features using matplotlib, cooltools, and HiCExplorer. Create triangle plots, virtual 4C, and multi-track figures. Use when visualizing contact matrices or genomic features.
Skill Details
Repository Files
3 files in this skill directory
name: bio-hi-c-analysis-hic-visualization description: Visualize Hi-C contact matrices, TADs, loops, and genomic features using matplotlib, cooltools, and HiCExplorer. Create triangle plots, virtual 4C, and multi-track figures. Use when visualizing contact matrices or genomic features. tool_type: python primary_tool: cooltools
Hi-C Visualization
Visualize Hi-C contact matrices and genomic features.
Required Imports
import cooler
import cooltools
import cooltools.lib.plotting
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import bioframe
Basic Contact Matrix Plot
clr = cooler.Cooler('matrix.mcool::resolutions/10000')
# Get matrix for a region
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(matrix, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
plt.colorbar(im, ax=ax, label='Balanced contacts')
ax.set_title('chr1:50-60Mb')
plt.savefig('contact_matrix.png', dpi=150)
Triangle (Upper Triangle) Plot
def plot_triangle(matrix, ax, cmap='Reds', vmin=None, vmax=None):
'''Plot Hi-C matrix as triangle (rotated 45 degrees)'''
n = matrix.shape[0]
# Create rotated matrix
rotated = np.zeros((n, 2*n))
for i in range(n):
for j in range(i, n):
y = j - i
x = i + j
rotated[y, x] = matrix[i, j]
# Plot
im = ax.imshow(rotated[:n//2, :], cmap=cmap, aspect='auto',
norm=LogNorm(vmin=vmin, vmax=vmax) if vmin else None)
ax.set_ylim(n//2, 0)
return im
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
fig, ax = plt.subplots(figsize=(12, 4))
im = plot_triangle(matrix, ax, vmin=0.001, vmax=0.1)
plt.colorbar(im, ax=ax)
plt.savefig('triangle_plot.png', dpi=150)
Plot with TADs
import pandas as pd
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
tads = pd.read_csv('tads.bed', sep='\t', names=['chrom', 'start', 'end'])
fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(matrix, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
# Overlay TAD boundaries
region_start = 50000000
bin_size = clr.binsize
for _, tad in tads[tads['chrom'] == 'chr1'].iterrows():
if region_start <= tad['start'] < 60000000:
pos = (tad['start'] - region_start) / bin_size
ax.axhline(pos, color='blue', linewidth=0.5, alpha=0.5)
ax.axvline(pos, color='blue', linewidth=0.5, alpha=0.5)
plt.colorbar(im, ax=ax)
plt.savefig('matrix_with_tads.png', dpi=150)
Plot with Loops
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
loops = pd.read_csv('loops.bedpe', sep='\t')
fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(matrix, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
# Mark loops
region_start = 50000000
bin_size = clr.binsize
for _, loop in loops[loops['chrom1'] == 'chr1'].iterrows():
if (region_start <= loop['start1'] < 60000000 and
region_start <= loop['start2'] < 60000000):
x = (loop['start1'] - region_start) / bin_size
y = (loop['start2'] - region_start) / bin_size
circle = plt.Circle((y, x), 3, fill=False, color='blue', linewidth=1)
ax.add_patch(circle)
plt.colorbar(im, ax=ax)
plt.savefig('matrix_with_loops.png', dpi=150)
Compare Two Matrices
clr1 = cooler.Cooler('sample1.mcool::resolutions/10000')
clr2 = cooler.Cooler('sample2.mcool::resolutions/10000')
region = 'chr1:50000000-60000000'
mat1 = clr1.matrix(balance=True).fetch(region)
mat2 = clr2.matrix(balance=True).fetch(region)
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# Sample 1
im1 = axes[0].imshow(mat1, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
axes[0].set_title('Sample 1')
plt.colorbar(im1, ax=axes[0])
# Sample 2
im2 = axes[1].imshow(mat2, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
axes[1].set_title('Sample 2')
plt.colorbar(im2, ax=axes[1])
# Log2 fold change
log2fc = np.log2(mat2 / mat1)
log2fc[np.isinf(log2fc)] = np.nan
im3 = axes[2].imshow(log2fc, cmap='coolwarm', vmin=-2, vmax=2)
axes[2].set_title('Log2(Sample2/Sample1)')
plt.colorbar(im3, ax=axes[2])
plt.tight_layout()
plt.savefig('comparison.png', dpi=150)
Split View (Upper/Lower Triangle)
mat1 = clr1.matrix(balance=True).fetch(region)
mat2 = clr2.matrix(balance=True).fetch(region)
# Combine: upper triangle from mat1, lower from mat2
combined = np.triu(mat1) + np.tril(mat2, k=-1)
fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(combined, cmap='Reds', norm=LogNorm(vmin=0.001, vmax=0.1))
ax.axline((0, 0), slope=1, color='black', linewidth=0.5)
ax.set_title('Sample1 (upper) vs Sample2 (lower)')
plt.colorbar(im, ax=ax)
plt.savefig('split_view.png', dpi=150)
Virtual 4C
def virtual_4c(clr, viewpoint_chrom, viewpoint_pos, resolution=10000):
'''Extract virtual 4C from Hi-C'''
# Get row of matrix at viewpoint
viewpoint_bin = viewpoint_pos // resolution
# Get contacts from this bin to all others on same chromosome
matrix = clr.matrix(balance=True).fetch(viewpoint_chrom)
v4c = matrix[viewpoint_bin, :]
# Create coordinates
bins = clr.bins().fetch(viewpoint_chrom)
coords = bins['start'].values
return coords, v4c
coords, v4c = virtual_4c(clr, 'chr1', 55000000)
fig, ax = plt.subplots(figsize=(12, 3))
ax.fill_between(coords / 1e6, 0, v4c, alpha=0.5)
ax.axvline(55, color='red', linestyle='--', label='Viewpoint')
ax.set_xlabel('Position (Mb)')
ax.set_ylabel('Contact frequency')
ax.set_title('Virtual 4C from chr1:55Mb')
ax.legend()
plt.savefig('virtual_4c.png', dpi=150)
Multi-Track Figure
fig = plt.figure(figsize=(12, 10))
# Hi-C matrix (triangle)
ax1 = fig.add_axes([0.1, 0.5, 0.8, 0.4])
matrix = clr.matrix(balance=True).fetch('chr1:50000000-60000000')
plot_triangle(matrix, ax1, vmin=0.001, vmax=0.1)
ax1.set_ylabel('Hi-C')
# Insulation score
ax2 = fig.add_axes([0.1, 0.35, 0.8, 0.1])
insulation = pd.read_csv('insulation.bedgraph', sep='\t',
names=['chrom', 'start', 'end', 'score'])
ins_region = insulation[(insulation['chrom'] == 'chr1') &
(insulation['start'] >= 50000000) &
(insulation['end'] <= 60000000)]
ax2.plot(ins_region['start'] / 1e6, ins_region['score'])
ax2.set_ylabel('Insulation')
ax2.set_xlim(50, 60)
# Gene track (placeholder)
ax3 = fig.add_axes([0.1, 0.2, 0.8, 0.1])
ax3.set_ylabel('Genes')
ax3.set_xlim(50, 60)
# CTCF ChIP-seq (placeholder)
ax4 = fig.add_axes([0.1, 0.05, 0.8, 0.1])
ax4.set_xlabel('Position (Mb)')
ax4.set_ylabel('CTCF')
ax4.set_xlim(50, 60)
plt.savefig('multi_track.png', dpi=150)
Using HiCExplorer Visualization
# Plot matrix with HiCExplorer
hicPlotMatrix \
-m matrix.cool \
--region chr1:50000000-60000000 \
--log1p \
--colorMap Reds \
-o hic_plot.png
# Plot with TADs
hicPlotTADs \
--tracks tracks.ini \
--region chr1:50000000-60000000 \
-o tad_plot.png
Cooltools Pileup Plot
import cooltools
# Pileup at features (e.g., loop anchors)
pileup = cooltools.pileup(
clr,
features=loops[['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']],
view_df=view_df,
expected=expected,
flank=100000,
)
# Average pileup
avg_pileup = np.nanmean(pileup, axis=2)
fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(avg_pileup, cmap='Reds')
ax.set_title('Average pileup at loops')
plt.colorbar(im, ax=ax)
plt.savefig('pileup.png', dpi=150)
Related Skills
- hic-data-io - Load contact matrices
- tad-detection - Generate TADs to visualize
- loop-calling - Generate loops to visualize
- compartment-analysis - Visualize compartments
Related Skills
Dbt Transformation Patterns
Master dbt (data build tool) for analytics engineering with model organization, testing, documentation, and incremental strategies. Use when building data transformations, creating data models, or implementing analytics engineering best practices.
Anndata
This skill should be used when working with annotated data matrices in Python, particularly for single-cell genomics analysis, managing experimental measurements with metadata, or handling large-scale biological datasets. Use when tasks involve AnnData objects, h5ad files, single-cell RNA-seq data, or integration with scanpy/scverse tools.
Xlsx
Spreadsheet toolkit (.xlsx/.csv). Create/edit with formulas/formatting, analyze data, visualization, recalculate formulas, for spreadsheet processing and analysis.
Tensorboard
Visualize training metrics, debug models with histograms, compare experiments, visualize model graphs, and profile performance with TensorBoard - Google's ML visualization toolkit
Deeptools
NGS analysis toolkit. BAM to bigWig conversion, QC (correlation, PCA, fingerprints), heatmaps/profiles (TSS, peaks), for ChIP-seq, RNA-seq, ATAC-seq visualization.
Scvi Tools
This skill should be used when working with single-cell omics data analysis using scvi-tools, including scRNA-seq, scATAC-seq, CITE-seq, spatial transcriptomics, and other single-cell modalities. Use this skill for probabilistic modeling, batch correction, dimensionality reduction, differential expression, cell type annotation, multimodal integration, and spatial analysis tasks.
Statsmodels
Statistical modeling toolkit. OLS, GLM, logistic, ARIMA, time series, hypothesis tests, diagnostics, AIC/BIC, for rigorous statistical inference and econometric analysis.
Scikit Survival
Comprehensive toolkit for survival analysis and time-to-event modeling in Python using scikit-survival. Use this skill when working with censored survival data, performing time-to-event analysis, fitting Cox models, Random Survival Forests, Gradient Boosting models, or Survival SVMs, evaluating survival predictions with concordance index or Brier score, handling competing risks, or implementing any survival analysis workflow with the scikit-survival library.
Neurokit2
Comprehensive biosignal processing toolkit for analyzing physiological data including ECG, EEG, EDA, RSP, PPG, EMG, and EOG signals. Use this skill when processing cardiovascular signals, brain activity, electrodermal responses, respiratory patterns, muscle activity, or eye movements. Applicable for heart rate variability analysis, event-related potentials, complexity measures, autonomic nervous system assessment, psychophysiology research, and multi-modal physiological signal integration.
Statistical Analysis
Statistical analysis toolkit. Hypothesis tests (t-test, ANOVA, chi-square), regression, correlation, Bayesian stats, power analysis, assumption checks, APA reporting, for academic research.
