Skip to main content

CMS 3M Methodology

The output table shows how CMS 3M reassigns severity levels of SDX codes. The rest of the page describes the data pipeline used to generate this table.

Output

Table: tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_summary

As an example, $69235.4 is the median charge across encounters where A000 was the only CC or MCC sdx. A000 is currently a CC but would be reassigned to MCC. For all 3 categories (no CC, CC, MCC), the expected median charge is lower than the actual median charge.

dx_codecurrent_severitynew_severityp50_no_ccp50_ccp50_mccexpected_p50_no_ccexpected_p50_ccexpected_p50_mcc
A000CCMCC69235.47763481619.121248.735188.356256.3
A0100CCCC36456.737214.942849.521248.735188.356256.3
A020CCCC39493.141498.650046.921248.735188.356256.3
A021MCCMCC66352.269475.59036721248.735188.356256.3
A0229CCMCC79437.210045916409121248.735188.356256.3

Pipeline

  1. Scrape and load CC and MCC codes from CMS website into cms_sdx_reassign_icd10_cc_codes and cms_sdx_reassign_icd10_mcc_codes tables.

  2. Create kh_inpatient_sdx, kh_inpatient_sdx_drg, and kh_inpatient_sdx_drg_dxposition tables with the following logic:

    • For each inpatient encounter in Komodo with Medicare or Commercial payer channel:
      • Get the assigned DRG from the IPPS Grouper DRG table.
      • Get the list of diagnosis codes (dx2 to dx26).
      • For each diagnosis code, check if it is in the CC or MCC reassignment lists.
      • Assign severity as 'MCC', 'CC', or 'No CC/MCC' based on the reassignment lists.
      • Aggregate:
        • for kh_inpatient_sdx: aggregate the data by payer channel, diagnosis code, and severity
        • for kh_inpatient_sdx_drg: aggregate the data by payer channel, DRG, diagnosis code, and severity
        • for kh_inpatient_sdx_drg_dxposition: aggregate the data by payer channel, DRG, diagnosis code, severity, and diagnosis position
      • Calculate encounter counts and claim charge amount percentiles.
SQL

# Define Jinja template for the SQL query
sql_template = Template("""
CREATE OR REPLACE TABLE {{ table_name }}
WITH (
PARTITIONING = ARRAY['{{ partition_column }}']
)
AS
WITH
base_data AS (
SELECT
mh.encounter_key,
CASE
WHEN mh.kh_plan_id IN (
SELECT kh_plan_id
FROM tq_dev.claims_benchmarks.payer_map
WHERE payer_channel = 'Medicare'
)
THEN 'Medicare'
WHEN mh.kh_plan_id IN (
SELECT kh_plan_id
FROM tq_dev.claims_benchmarks.payer_map
WHERE payer_channel = 'Commercial'
)
THEN 'Commercial'
ELSE NULL
END AS payer_channel,
ig.new_drg AS drg,
mh.total_claim_charge_amount,
im.weights,
ARRAY[
mh.d2, mh.d3, mh.d4, mh.d5, mh.d6, mh.d7, mh.d8, mh.d9, mh.d10,
mh.d11, mh.d12, mh.d13, mh.d14, mh.d15, mh.d16, mh.d17, mh.d18,
mh.d19, mh.d20, mh.d21, mh.d22, mh.d23, mh.d24, mh.d25, mh.d26
] AS dx_codes,
SEQUENCE(2, 26) AS dx_pos
FROM tq_intermediate.external_komodo.medical_headers mh
JOIN tq_intermediate.external_komodo.ipps_grouper_drgs ig
ON mh.encounter_key = ig.encounter_key
JOIN tq_production.reference_internal.ipps_msdrg im
ON ig.new_drg = im.msdrg
WHERE mh.claim_type_code = 'I'
AND mh.kh_plan_id IN (
SELECT kh_plan_id
FROM tq_dev.claims_benchmarks.payer_map
WHERE payer_channel IN ('Medicare', 'Commercial')
)
AND year(claim_date) >= 2024
AND bill_type_code LIKE '11%'
),
encounter_max_dx_code AS (
SELECT
encounter_key,
CARDINALITY(FILTER(dx_codes, x -> x IS NOT NULL)) + 1 AS max_dx_position
FROM base_data
)
SELECT
payer_channel,
{% for col in select_columns %}{{ col }},
{% endfor %}dx_code,
CASE
WHEN dx_code IN (
SELECT code
FROM tq_dev_hive.internal_dev_csong_imports.cms_sdx_reassign_icd10_mcc_codes
) THEN 'MCC'
WHEN dx_code IN (
SELECT code
FROM tq_dev_hive.internal_dev_csong_imports.cms_sdx_reassign_icd10_cc_codes
) THEN 'CC'
ELSE 'No CC/MCC'
END AS severity,
{% for col in extra_columns %}{{ col }},
{% endfor %}COUNT(DISTINCT encounter_key) AS n_encounters,
MIN(total_claim_charge_amount) AS min_claim_charge_amount,
APPROX_PERCENTILE(total_claim_charge_amount, 0.05) AS p5_claim_charge_amount,
APPROX_PERCENTILE(total_claim_charge_amount, 0.25) AS p25_claim_charge_amount,
APPROX_PERCENTILE(total_claim_charge_amount, 0.50) AS p50_claim_charge_amount,
APPROX_PERCENTILE(total_claim_charge_amount, 0.75) AS p75_claim_charge_amount,
APPROX_PERCENTILE(total_claim_charge_amount, 0.95) AS p95_claim_charge_amount,
MAX(total_claim_charge_amount) AS max_claim_charge_amount
FROM base_data
LEFT JOIN encounter_max_dx_code USING (encounter_key)
CROSS JOIN UNNEST(dx_codes, dx_pos) AS t(dx_code, dx_position)
WHERE dx_code IS NOT NULL
GROUP BY {{ group_by_clause }}
""")

# Define configurations for each table
table_configs = [
{
'table_name': 'tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx',
'partition_column': 'payer_channel',
'select_columns': [],
'extra_columns': [],
'group_by_clause': '1, 2, 3'
},
{
'table_name': 'tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_drg',
'partition_column': 'drg',
'select_columns': ['drg', 'weights'],
'extra_columns': [],
'group_by_clause': '1, 2, 3, 4, 5'
},
{
'table_name': 'tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_drg_dxposition',
'partition_column': 'drg',
'select_columns': ['drg', 'weights'],
'extra_columns': ['dx_position', 'max_dx_position'],
'group_by_clause': '1, 2, 3, 4, 5, 6, 7'
}
]

# Execute queries for each table configuration
for config in table_configs:
sql_query = sql_template.render(**config)
query.execute_query(sql_query, con=trino_conn)

  1. Create tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_encounters
    • For each inpatient encounter in Komodo with Medicare or Commercial payer channel:
      • Get the assigned DRG from the IPPS Grouper DRG table.
      • Get the list of diagnosis codes (dx2 to dx26).
      • For each diagnosis code, check if it is in the CC or MCC reassignment lists.
      • Assign severity as 'MCC', 'CC', or 'No CC/MCC' based on the reassignment lists.
      • Aggregate the data by encounter_key and severity, collecting all relevant diagnosis codes and DRGs.
    • Create arrays:
      • all_dx_codes: all diagnosis codes for the encounter that are in the SDX list
      • all_drgs: all DRGs for the encounter that are in the SDX list
      • dx_codes: diagnosis codes for the encounter that correspond to the assigned severity
      • drgs: DRGs for the encounter that correspond to the assigned severity
Code
CREATE OR REPLACE TABLE tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_encounters
AS
WITH
base_data AS (
SELECT
mh.encounter_key,
CASE
WHEN mh.kh_plan_id IN (
SELECT kh_plan_id
FROM tq_dev.claims_benchmarks.payer_map
WHERE payer_channel = 'Medicare'
)
THEN 'Medicare'
WHEN mh.kh_plan_id IN (
SELECT kh_plan_id
FROM tq_dev.claims_benchmarks.payer_map
WHERE payer_channel = 'Commercial'
)
THEN 'Commercial'
ELSE NULL
END AS payer_channel,
ig.new_drg AS drg,
mh.total_claim_charge_amount,
im.weights,
ARRAY[
mh.d2, mh.d3, mh.d4, mh.d5, mh.d6, mh.d7, mh.d8, mh.d9, mh.d10,
mh.d11, mh.d12, mh.d13, mh.d14, mh.d15, mh.d16, mh.d17, mh.d18,
mh.d19, mh.d20, mh.d21, mh.d22, mh.d23, mh.d24, mh.d25, mh.d26
] AS dx_codes,
SEQUENCE(2, 26) AS dx_pos
FROM tq_intermediate.external_komodo.medical_headers mh
JOIN tq_intermediate.external_komodo.ipps_grouper_drgs ig
ON mh.encounter_key = ig.encounter_key
JOIN tq_production.reference_internal.ipps_msdrg im
ON ig.new_drg = im.msdrg
WHERE mh.claim_type_code = 'I'
AND mh.kh_plan_id IN (
SELECT kh_plan_id
FROM tq_dev.claims_benchmarks.payer_map
WHERE payer_channel IN ('Medicare', 'Commercial')
)
AND year(claim_date) >= 2024
AND bill_type_code LIKE '11%'
),
marginal AS (
SELECT
encounter_key,
total_claim_charge_amount,
MAX(
CASE
WHEN dx_code IN (
SELECT code
FROM tq_dev_hive.internal_dev_csong_imports.cms_sdx_reassign_icd10_mcc_codes
) THEN 2
WHEN dx_code IN (
SELECT code
FROM tq_dev_hive.internal_dev_csong_imports.cms_sdx_reassign_icd10_cc_codes
) THEN 1
ELSE 0
END
) AS claim_severity,
ARRAY_AGG(DISTINCT dx_code) AS all_dx_codes,
ARRAY_AGG(DISTINCT drg) AS all_drgs
FROM base_data
CROSS JOIN UNNEST(dx_codes, dx_pos) AS t(dx_code, dx_position)
WHERE dx_code IN (
SELECT dx_code
FROM tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx
WHERE n_encounters > 25
)
GROUP BY 1, 2
),
by_severity AS (
SELECT
encounter_key,
CASE
WHEN dx_code IN (
SELECT code
FROM tq_dev_hive.internal_dev_csong_imports.cms_sdx_reassign_icd10_mcc_codes
) THEN 'MCC'
WHEN dx_code IN (
SELECT code
FROM tq_dev_hive.internal_dev_csong_imports.cms_sdx_reassign_icd10_cc_codes
) THEN 'CC'
ELSE 'No CC/MCC'
END AS severity,
ARRAY_AGG(DISTINCT dx_code) AS dx_codes,
ARRAY_AGG(DISTINCT drg) AS drgs
FROM base_data
CROSS JOIN UNNEST(dx_codes, dx_pos) AS t(dx_code, dx_position)
WHERE dx_code IN (
SELECT dx_code
FROM tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx
WHERE n_encounters > 25
)
GROUP BY 1, 2
)
SELECT
m.encounter_key,
m.total_claim_charge_amount,
CASE
WHEN m.claim_severity = 2 THEN 'MCC'
WHEN m.claim_severity = 1 THEN 'CC'
ELSE 'No CC/MCC'
END AS claim_severity,
s.severity,
s.dx_codes,
s.drgs,
m.all_dx_codes,
m.all_drgs
FROM marginal m
JOIN by_severity s
ON m.encounter_key = s.encounter_key
  1. Create tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_summary
  • For each SDX code with more than 25 encounters in kh_inpatient_sdx:
    • Join with kh_inpatient_sdx_actual_cost_encounters on diagnosis codes
    • Exclude encounters where the SDX code is the only diagnosis code or is present in the diagnosis codes for that encounter
    • Calculate the median (p50) and average claim charge amounts for the remaining encounters, grouped by SDX code and severity
Code
CREATE OR REPLACE TABLE tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_marginal_costs AS
WITH
exploded_encounters AS (
SELECT
DISTINCT
encounter_key,
claim_severity,
total_claim_charge_amount
FROM tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_encounters
)
SELECT
claim_severity as severity,
APPROX_PERCENTILE(total_claim_charge_amount, 0.50) AS p50_claim_charge_amount,
AVG(total_claim_charge_amount) AS avg_claim_charge_amount,
COUNT(DISTINCT encounter_key) AS n_encounters
FROM exploded_encounters
GROUP BY claim_severity;


CREATE OR REPLACE TABLE tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_summary AS
WITH
all_sdx AS (
SELECT DISTINCT dx_code
FROM tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx
WHERE n_encounters > 25
),
exploded_encounters AS (
SELECT
b.encounter_key,
b.severity,
b.total_claim_charge_amount,
b.dx_codes,
dx_code_exploded
FROM tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_actual_cost_encounters b
CROSS JOIN UNNEST(b.all_dx_codes) AS t(dx_code_exploded)
)
SELECT
a.dx_code,
b.severity AS current_severity,

-- p50 by severity
APPROX_PERCENTILE(e.total_claim_charge_amount, 0.50)
FILTER (WHERE e.severity = 'No CC/MCC') AS p50_no_cc,
APPROX_PERCENTILE(e.total_claim_charge_amount, 0.50)
FILTER (WHERE e.severity = 'CC') AS p50_cc,
APPROX_PERCENTILE(e.total_claim_charge_amount, 0.50)
FILTER (WHERE e.severity = 'MCC') AS p50_mcc,

-- marginal avg by severity
APPROX_PERCENTILE(m.p50_claim_charge_amount, 0.50)
FILTER (WHERE e.severity = 'No CC/MCC') AS expected_p50_no_cc,
APPROX_PERCENTILE(m.p50_claim_charge_amount, 0.50)
FILTER (WHERE e.severity = 'CC') AS expected_p50_cc,
APPROX_PERCENTILE(m.p50_claim_charge_amount, 0.50)
FILTER (WHERE e.severity = 'MCC') AS expected_p50_mcc,

-- encounter counts by severity
COUNT(DISTINCT e.encounter_key)
FILTER (WHERE e.severity = 'No CC/MCC') AS n_no_cc_mcc,
COUNT(DISTINCT e.encounter_key)
FILTER (WHERE e.severity = 'CC') AS n_cc,
COUNT(DISTINCT e.encounter_key)
FILTER (WHERE e.severity = 'MCC') AS n_mcc

FROM all_sdx a
LEFT JOIN tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx b
ON a.dx_code = b.dx_code
JOIN exploded_encounters e
ON e.dx_code_exploded = a.dx_code
AND (
CARDINALITY(e.dx_codes) > 1
OR NOT CONTAINS(e.dx_codes, a.dx_code)
)
LEFT JOIN tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx_marginal_costs m
ON e.severity = m.severity
GROUP BY
a.dx_code,
b.severity

Exploratory Analysis

  1. Distribution of Number of Encounters by SDX code
n_encounters
p6028
p7065
p75118
p90864
p953385
Code
df = pd.read_sql(f"""
SELECT
APPROX_PERCENTILE(n_encounters, 0.60) AS p60,
APPROX_PERCENTILE(n_encounters, 0.70) AS p70,
APPROX_PERCENTILE(n_encounters, 0.75) AS p75,
APPROX_PERCENTILE(n_encounters, 0.90) AS p90,
APPROX_PERCENTILE(n_encounters, 0.95) AS p95
FROM tq_dev.internal_dev_csong_sandbox.kh_inpatient_sdx
""", con=trino_conn)
print(df.T.rename(columns={0: "n_encounters"}).to_markdown())