Dhis2 Validation
by BLSQ
Run data quality checks and validation rules in DHIS2. Use for validation rules, violations, or data quality analysis. Routed via dhis2 skill for general DHIS2 requests.
Skill Details
Repository Files
1 file in this skill directory
name: dhis2-validation description: Run data quality checks and validation rules in DHIS2. Use for validation rules, violations, or data quality analysis. Routed via dhis2 skill for general DHIS2 requests.
DHIS2 Validation
Run data quality checks and validation rules in DHIS2.
Prerequisites: Client setup from dhis2 skill (assumes dhis is initialized)
Overview
The validation endpoints provide:
- Validation Rules: Define data quality checks (e.g., "A < B", "Total = Sum of parts")
- Validation Analysis: Run rules against data
- Validation Results: Historical violations stored in the system
- Data Quality Reports: Aggregate validation status
Get Validation Rules
def get_validation_rules(dhis, fields: str = None) -> list:
"""Get all validation rules."""
default_fields = "id,name,description,importance,operator,leftSide[expression,description],rightSide[expression,description]"
response = dhis.api.get(
"validationRules",
params={
"fields": fields or default_fields,
"paging": "false"
}
)
return response.get("validationRules", [])
# Usage
rules = get_validation_rules(dhis)
print(f"Total validation rules: {len(rules)}")
for rule in rules[:5]:
print(f"- {rule['name']} ({rule.get('importance', 'MEDIUM')})")
Validation Rule Structure
| Field | Description |
|---|---|
name |
Rule display name |
description |
What the rule checks |
importance |
HIGH, MEDIUM, LOW |
operator |
equal_to, not_equal_to, greater_than, less_than, etc. |
leftSide |
Left expression (data elements/indicators) |
rightSide |
Right expression (value/calculation) |
periodType |
Applicable period type |
Get Validation Rule Groups
def get_validation_rule_groups(dhis) -> list:
"""Get validation rule groups."""
response = dhis.api.get(
"validationRuleGroups",
params={
"fields": "id,name,description,validationRules[id,name]",
"paging": "false"
}
)
return response.get("validationRuleGroups", [])
# Usage
groups = get_validation_rule_groups(dhis)
for group in groups:
print(f"{group['name']}: {len(group.get('validationRules', []))} rules")
Run Validation Analysis
def run_validation(
dhis,
start_date: str,
end_date: str,
org_unit: str,
validation_rule_group: str = None,
send_notifications: bool = False
) -> dict:
"""Run validation analysis and get violations."""
params = {
"startDate": start_date,
"endDate": end_date,
"ou": org_unit,
"notification": str(send_notifications).lower()
}
if validation_rule_group:
params["vrg"] = validation_rule_group
response = dhis.api.get("validation/dataAnalysis", params=params)
return response
# Run validation for a period
violations = run_validation(
dhis,
start_date="2024-01-01",
end_date="2024-03-31",
org_unit="ImspTQPwCqd"
)
print(f"Found {len(violations)} validation violations")
Validate a Dataset
def validate_dataset(
dhis,
dataset_id: str,
period: str,
org_unit: str
) -> dict:
"""Validate data for a specific dataset, period, and org unit."""
response = dhis.api.get(
f"validation/dataSet/{dataset_id}",
params={
"pe": period,
"ou": org_unit
}
)
return response
# Usage
result = validate_dataset(
dhis,
dataset_id="BfMAe6Itzgt",
period="202401",
org_unit="ImspTQPwCqd"
)
# Check violations
if result.get("validationRuleViolations"):
for v in result["validationRuleViolations"]:
print(f"Rule: {v.get('validationRule', {}).get('name')}")
print(f" Left: {v.get('leftsideValue')} {v.get('operator')} Right: {v.get('rightsideValue')}")
Get Historical Validation Results
def get_validation_results(
dhis,
org_unit: str = None,
start_date: str = None,
end_date: str = None,
validation_rule: str = None
) -> list:
"""Get stored validation results (violations)."""
params = {
"fields": "validationRule[id,name,importance],organisationUnit[id,name],period[id,name],leftsideValue,rightsideValue,created",
"paging": "false"
}
if org_unit:
params["ou"] = org_unit
if start_date:
params["startDate"] = start_date
if end_date:
params["endDate"] = end_date
if validation_rule:
params["vr"] = validation_rule
response = dhis.api.get("validationResults", params=params)
return response.get("validationResults", [])
# Get recent violations
results = get_validation_results(
dhis,
org_unit="ImspTQPwCqd",
start_date="2024-01-01",
end_date="2024-06-30"
)
Convert Violations to DataFrame
import pandas as pd
def violations_to_df(violations: list) -> pd.DataFrame:
"""Convert validation violations to DataFrame."""
rows = []
for v in violations:
rows.append({
"rule_id": v.get("validationRule", {}).get("id"),
"rule_name": v.get("validationRule", {}).get("name"),
"importance": v.get("validationRule", {}).get("importance"),
"org_unit_id": v.get("organisationUnit", {}).get("id"),
"org_unit_name": v.get("organisationUnit", {}).get("name"),
"period": v.get("period", {}).get("id"),
"left_value": v.get("leftsideValue"),
"right_value": v.get("rightsideValue"),
"created": v.get("created")
})
return pd.DataFrame(rows)
# Usage
df = violations_to_df(results)
print(f"Total violations: {len(df)}")
print(f"\nViolations by importance:")
print(df.groupby("importance").size())
print(f"\nViolations by rule:")
print(df.groupby("rule_name").size().sort_values(ascending=False).head(10))
Validation Summary Report
def generate_validation_summary(dhis, org_unit: str, periods: list) -> dict:
"""Generate a validation summary for multiple periods."""
all_violations = []
for period in periods:
try:
# Try to get results for this period
violations = run_validation(
dhis,
start_date=period,
end_date=period,
org_unit=org_unit
)
for v in violations:
v["period"] = period
all_violations.append(v)
except Exception as e:
print(f"Error for period {period}: {e}")
df = violations_to_df(all_violations) if all_violations else pd.DataFrame()
return {
"total_violations": len(df),
"by_importance": df.groupby("importance").size().to_dict() if len(df) > 0 else {},
"by_period": df.groupby("period").size().to_dict() if len(df) > 0 else {},
"by_rule": df.groupby("rule_name").size().to_dict() if len(df) > 0 else {},
"details": df
}
Check Specific Validation Rule
def get_rule_violations(dhis, rule_id: str, org_unit: str, periods: list) -> pd.DataFrame:
"""Get violations for a specific validation rule."""
all_violations = []
for period in periods:
results = get_validation_results(
dhis,
org_unit=org_unit,
validation_rule=rule_id,
start_date=period,
end_date=period
)
all_violations.extend(results)
return violations_to_df(all_violations)
# Usage
rule_violations = get_rule_violations(
dhis,
rule_id="kKXOVY4qPyK",
org_unit="ImspTQPwCqd",
periods=["202401", "202402", "202403"]
)
Validation Operators
| Operator | Description | Example |
|---|---|---|
equal_to |
Left = Right | Total = Sum of parts |
not_equal_to |
Left ≠ Right | A ≠ B |
greater_than |
Left > Right | Stock > 0 |
greater_than_or_equal_to |
Left ≥ Right | Births ≥ Live births |
less_than |
Left < Right | Deaths < Population |
less_than_or_equal_to |
Left ≤ Right | Attended ≤ Registered |
compulsory_pair |
If one exists, both must | ANC1 requires ANC visit date |
exclusive_pair |
Only one can exist | Male XOR Female |
Data Quality Dashboard Data
def get_data_quality_metrics(dhis, org_unit: str, period: str) -> dict:
"""Get data quality metrics for dashboard display."""
# Get all validation rules
rules = get_validation_rules(dhis)
# Run validation
violations = run_validation(
dhis,
start_date=period,
end_date=period,
org_unit=org_unit
)
df = violations_to_df(violations) if violations else pd.DataFrame()
return {
"total_rules": len(rules),
"rules_violated": df["rule_id"].nunique() if len(df) > 0 else 0,
"total_violations": len(df),
"high_importance": len(df[df["importance"] == "HIGH"]) if len(df) > 0 else 0,
"medium_importance": len(df[df["importance"] == "MEDIUM"]) if len(df) > 0 else 0,
"low_importance": len(df[df["importance"] == "LOW"]) if len(df) > 0 else 0,
"compliance_rate": (len(rules) - df["rule_id"].nunique()) / len(rules) * 100 if rules else 100
}
Use Cases
| Scenario | Function |
|---|---|
| List all rules | get_validation_rules() |
| Run validation check | run_validation() |
| Validate specific dataset | validate_dataset() |
| Get historical violations | get_validation_results() |
| Track specific rule | get_rule_violations() |
| Data quality dashboard | get_data_quality_metrics() |
Related Skills
Xlsx
Comprehensive spreadsheet creation, editing, and analysis with support for formulas, formatting, data analysis, and visualization. When Claude needs to work with spreadsheets (.xlsx, .xlsm, .csv, .tsv, etc) for: (1) Creating new spreadsheets with formulas and formatting, (2) Reading or analyzing data, (3) Modify existing spreadsheets while preserving formulas, (4) Data analysis and visualization in spreadsheets, or (5) Recalculating formulas
Clickhouse Io
ClickHouse database patterns, query optimization, analytics, and data engineering best practices for high-performance analytical workloads.
Clickhouse Io
ClickHouse database patterns, query optimization, analytics, and data engineering best practices for high-performance analytical workloads.
Analyzing Financial Statements
This skill calculates key financial ratios and metrics from financial statement data for investment analysis
Data Storytelling
Transform data into compelling narratives using visualization, context, and persuasive structure. Use when presenting analytics to stakeholders, creating data reports, or building executive presentations.
Kpi Dashboard Design
Design effective KPI dashboards with metrics selection, visualization best practices, and real-time monitoring patterns. Use when building business dashboards, selecting metrics, or designing data visualization layouts.
Dbt Transformation Patterns
Master dbt (data build tool) for analytics engineering with model organization, testing, documentation, and incremental strategies. Use when building data transformations, creating data models, or implementing analytics engineering best practices.
Sql Optimization Patterns
Master SQL query optimization, indexing strategies, and EXPLAIN analysis to dramatically improve database performance and eliminate slow queries. Use when debugging slow queries, designing database schemas, or optimizing application performance.
Anndata
This skill should be used when working with annotated data matrices in Python, particularly for single-cell genomics analysis, managing experimental measurements with metadata, or handling large-scale biological datasets. Use when tasks involve AnnData objects, h5ad files, single-cell RNA-seq data, or integration with scanpy/scverse tools.
Xlsx
Spreadsheet toolkit (.xlsx/.csv). Create/edit with formulas/formatting, analyze data, visualization, recalculate formulas, for spreadsheet processing and analysis.
