version 0
Due: 01/13/2026
Final Table: tq_dev.internal_dev_csong_sandbox.implant_rates_v0_2025_10
Goal:
Goal for v0 is to create a table with implant carveout percentages using payer MRF data.
Scope:
- Data Source:
- payer MRF (core rates)
- Providers:
- All Hospitals in Clear Rates
- Payers / Networks:
- All payers and networks in Clear Rates
- But if network for a payer isn’t available in Clear Rates, we should still include the non-Clear Rates network
- Codes:
- Revenue codes:
- 0275, 0276, 0277, 0278
- Revenue codes:
Methodology:
- Filter Core Rates:
- we can use either:
- core_rates: tq_production.public_YYYY_MM.core_rates
a temp table in Clear Rates pipeline that is a preprocessed version of core_rates: tq_dev.internal_dev_csong_cld_v2_3_1.tmp_raw_mrf_payer_rates_2025_10
- payers, networks, and providers that are in scope.
- Filter payers using payer_id
- Filter providers using provider_id
- Filter networks using product_network_label
- billing_class
- institutional
note that some payers report institutional ASC rates with billing_class = ‘professional’ (see logic here)only including Hospitals
- codes
- Filter to codes listed above in the “scope section” using biling_code
- billing_code = ‘RC’ for revenue codes, ‘HCPCS’ for others
- negotiated_type
- percentage
- negotiated_rate
- exclude where negotiated_rate > 100
- we can use either:
- Pre-Processing
- negotiated_rate
- if negotiated_rate < 1 then negotiated_rate * 100
- round to the nearest 100th place
- negotiated_rate
- Analysis
- For each payer, network, provider, compute the following summary statistics using the “percentage”:
- mode
- median
- min
- max
- average
- count(distinct percentages)
- Determine thresholds for handling multiple different rates:
- if source of multiple rates is within contract-revenue_code, select the max()
- if source of multiple rates is between revenue_codes within a contract, hierarchy to prefer
0278
- For each payer, network, provider, compute the following summary statistics using the “percentage”:
SQL:
Create a table from Core Rates where each row represents a unique payer, network, provider, and revenue_code. These are candidate implant carveout rates.
tq_dev.internal_dev_csong_sandbox.implant_rates_cr_v0_{cld_subversion}
Core Rates Table Code
cld_version = "v2_3_1"
cld_subversion = "2025_10"
core_rate_month = "2025_10"
query.execute_query(f"""
CREATE OR REPLACE TABLE tq_dev.internal_dev_csong_sandbox.implant_rates_cr_v0_{cld_subversion} AS
WITH
network_spine AS (
SELECT
payer_id,
network_id,
network_name,
t.payer_data_network
FROM
tq_dev.internal_dev_csong_cld_{cld_version}.tmp_ref_network_spine_{cld_subversion},
UNNEST(payer_data_network) AS t(payer_data_network)
),
payer_spine AS (
SELECT DISTINCT
payer_id,
payer_name
FROM tq_dev.internal_dev_csong_cld_{cld_version}.tmp_ref_payer_spine_{cld_subversion}
),
provider_spine AS (
SELECT
provider_id
FROM
tq_dev.internal_dev_csong_cld_{cld_version}.tmp_ref_provider_spine_{cld_subversion}
WHERE
provider_id IS NOT NULL
AND provider_type = 'Hospital'
),
core_rates AS (
SELECT
cr.payer_id,
cr.payer_name,
CASE
WHEN ns.network_id IS NOT NULL
THEN ns.network_id
ELSE from_big_endian_64(xxhash64(CAST(
cr.payer_id || cr.product_network_label AS varbinary
)))
END AS network_id,
CASE
WHEN ns.network_name IS NOT NULL
THEN ns.network_name
ELSE cr.product_network_label
END AS network_name,
CASE
WHEN ns.network_name IS NOT NULL
THEN True
ELSE False
END AS in_clear_rates,
cr.provider_id,
cr.billing_code,
cr.service_code,
cr.id,
CASE
WHEN negotiated_rate < 1 THEN negotiated_rate * 100
WHEN negotiated_rate > 100 THEN NULL
ELSE negotiated_rate
END AS percentage
FROM tq_production.public_{core_rate_month}.core_rates cr
LEFT JOIN network_spine ns
ON ns.payer_id = cr.payer_id
AND ns.payer_data_network = cr.product_network_label
LEFT JOIN payer_spine ps
ON ps.payer_id = cr.payer_id
JOIN provider_spine ps
ON ps.provider_id = cr.provider_id
WHERE
cr.billing_code_type = 'RC'
AND cr.billing_code IN (
'0275',
'0276',
'0277',
'0278'
)
AND cr.billing_class = 'institutional'
AND cr.negotiated_type = 'percentage'
),
mode_rates AS (
SELECT
payer_id,
network_id,
provider_id,
percentage,
ROW_NUMBER() OVER (
PARTITION BY payer_id, network_id, provider_id
ORDER BY COUNT DESC
) AS rn
FROM (
SELECT
payer_id,
network_id,
provider_id,
percentage,
COUNT(*) as count
FROM core_rates
GROUP BY 1, 2, 3, 4
)
)
SELECT
cr.payer_id || CAST(cr.network_id AS VARCHAR) || cr.provider_id || cr.billing_code AS unique_id,
cr.payer_id || CAST(cr.network_id AS VARCHAR) || cr.provider_id AS contract_id,
cr.payer_id,
cr.payer_name,
cr.network_id,
cr.network_name,
cr.in_clear_rates,
cr.provider_id,
cr.billing_code,
ARRAY_SORT(ARRAY_AGG(DISTINCT cr.service_code)) AS service_codes,
ARRAY_AGG(DISTINCT cr.id) AS core_rate_ids,
min(cr.percentage) as min_rate,
max(cr.percentage) as max_rate,
avg(cr.percentage) as avg_rate,
approx_percentile(cr.percentage, 0.5) as median_rate,
ANY_VALUE(mr.percentage) as mode_rate,
count(DISTINCT cr.percentage) as num_rates
FROM core_rates cr
LEFT JOIN (SELECT * FROM mode_rates WHERE rn = 1) mr
ON mr.payer_id = cr.payer_id
AND mr.network_id = cr.network_id
AND mr.provider_id = cr.provider_id
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
HAVING count(DISTINCT cr.percentage) >= 1
""", con=trino_conn)
Then, after identifying thresholds, apply the thresholds to generate the final implant rates table, where each row is a payer, network, and provider.
tq_dev.internal_dev_csong_sandbox.implant_rates_v0_{cld_subversion}
Implant Rates Table Code
query.execute_query(f"""
CREATE OR REPLACE TABLE tq_dev.internal_dev_csong_sandbox.implant_rates_v0_{cld_subversion} AS
WITH
df AS (
SELECT
unique_id,
contract_id,
payer_id,
payer_name,
network_id,
network_name,
in_clear_rates,
provider_id,
billing_code,
CAST(NULL AS VARCHAR) as setting,
max_rate as implant_percentage,
ROW_NUMBER() OVER (
PARTITION BY contract_id
ORDER BY
-- Prefer 0278 over others
CASE
WHEN billing_code = '0278' THEN 1
ELSE 2
END
ASC
) AS rn
FROM tq_dev.internal_dev_csong_sandbox.implant_rates_cr_v0_{cld_subversion}
)
SELECT
unique_id,
contract_id,
payer_id,
payer_name,
network_id,
network_name,
in_clear_rates,
provider_id,
billing_code,
setting,
implant_percentage
FROM df
WHERE rn = 1
""", con=trino_conn)
Analysis
- A contract-revenue_code pair has multiple different rates less than 5% of the time. We'll prefer the max rate.
- A contract has multiple implant rev codes with different rates about 5% of the time. We'll prefer the 0278 rate.
Q1: How often does a contract-revenue_code pair have multiple different rates?
There are 191324 payer, network, provider, revenue code (274-278) combinations. 95% of the time, this contract-revenue_code pair has 2 distinct percentages.
| num_rates | |
|---|---|
| count | 191324 |
| mean | 1.1824 |
| std | 0.64793 |
| min | 0 |
| 25% | 1 |
| 50% | 1 |
| 75% | 1 |
| 90% | 2 |
| 95% | 2 |
| 99% | 4 |
| max | 16 |
Code
# distribution
df = pd.read_sql(f"""
SELECT
COUNT(*) AS num_objects,
COUNT(DISTINCT contract_id) AS num_contracts,
COUNT(DISTINCT billing_code) AS num_rates,
APPROX_PERCENTILE(num_rates, 0.25) AS p25,
APPROX_PERCENTILE(num_rates, 0.5) AS p50,
APPROX_PERCENTILE(num_rates, 0.75) AS p75,
APPROX_PERCENTILE(num_rates, 0.9) AS p90,
APPROX_PERCENTILE(num_rates, 0.95) AS p95,
APPROX_PERCENTILE(num_rates, 0.99) AS p99,
MAX(num_rates) AS max_rates
FROM tq_dev.internal_dev_csong_sandbox.implant_rates_cr_v0_{cld_subversion}
""", con=trino_conn)
df.T
Evaluate Samples with Multiple Rates:
Code
df = pd.read_sql(f"""
WITH
samples AS (
SELECT
payer_id,
network_name,
provider_id,
billing_code,
core_rate_ids
FROM
tq_dev.internal_dev_csong_sandbox.implant_rates_cr_v0_{cld_subversion}
WHERE num_rates > 1
ORDER BY RANDOM()
LIMIT 10
),
expanded AS (
SELECT
payer_id,
network_name,
provider_id,
billing_code,
core_rate_id
FROM samples,
UNNEST(core_rate_ids) AS t(core_rate_id)
)
SELECT
cr.*
FROM tq_production.public_2025_10.core_rates cr
JOIN expanded e
ON cr.payer_id = e.payer_id
AND cr.id = e.core_rate_id
""", con=trino_conn)
df
# %%
df_sample = df[[
'payer_id',
'product_network_label',
'provider_id',
'provider_npi',
'provider_group_id',
'ein_name',
'billing_code',
'billing_class',
'service_code',
'negotiated_rate'
]].sort_values([
'payer_id',
'product_network_label',
'provider_id',
'billing_code',
'negotiated_rate'
])
print(df_sample.to_markdown(index=False))
One explanation for the within-contract-revenue_code percentage variability in these examples is that EINs with different rates are getting mapped to the same provider_id. A known "issue" with provider ID mapping.
Another appears to be payer simply reporting multiple rates.
Data Examples
| payer_id | product_network_label | provider_id | provider_npi | provider_group_id | ein_name | billing_code | billing_class | negotiated_rate |
|---|---|---|---|---|---|---|---|---|
| 522 | KFHP NORTHERN CALIFORNIA | 6557 | 1639143381 | 941397461 | Sierra View Medical Center | 0275 | institutional | 70 |
| 522 | KFHP NORTHERN CALIFORNIA | 6557 | 1982827358 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KFHP NORTHERN CALIFORNIA | 6557 | 1073736443 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KFHP SOUTHERN CALIFORNIA | 6557 | 1639143381 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KFHP SOUTHERN CALIFORNIA | 6557 | 1639143381 | 941397461 | Sierra View Medical Center | 0275 | institutional | 100 |
| 522 | KFHP SOUTHERN CALIFORNIA | 6557 | 1639143381 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KPIC NORTHERN CALIFORNIA | 6557 | 1639143381 | 941397461 | Sierra View Medical Center | 0275 | institutional | 70 |
| 522 | KPIC NORTHERN CALIFORNIA | 6557 | 1073736443 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KPIC NORTHERN CALIFORNIA | 6557 | 1982827358 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KPIC SOUTHERN CALIFORNIA | 6557 | 1639143381 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 522 | KPIC SOUTHERN CALIFORNIA | 6557 | 1639143381 | 941397461 | Sierra View Medical Center | 0275 | institutional | 100 |
| 522 | KPIC SOUTHERN CALIFORNIA | 6557 | 1639143381 | 680317191 | County Of Sacramento | 0275 | institutional | 100 |
| 624 | MT NAVIGATOR HSA | 5559 | 1003939190 | 475437700 | Billings Clinic Health System | 0278 | institutional | 55 |
| 624 | MT NAVIGATOR HSA | 5559 | 1003939190 | 475437700 | Billings Clinic Health System | 0278 | institutional | 60 |
| 624 | MT NAVIGATOR HSA | 5559 | 1003939190 | 475437700 | Billings Clinic Health System | 0278 | institutional | 80 |
| 7 | MD GROUP EPO | 1412 | 1992701825 | 520445374 | Civista Medical Center | 0278 | institutional | 97.75 |
| 7 | MD GROUP EPO | 1412 | 1992701825 | 520445374 | Civista Medical Center | 0278 | institutional | 98 |
| 7 | NATIONAL PPO | 1590 | 1144228305 | 550357045 | WVU Medicine | 0278 | institutional | 71 |
| 7 | NATIONAL PPO | 1590 | 1144228305 | 550357045 | WVU Medicine | 0278 | institutional | 94 |
| 7 | NATIONAL PPO | 1704 | 1275681264 | 560529945 | Atrium Health | 0278 | institutional | 59.1 |
| 7 | NATIONAL PPO | 1704 | 1437554037 | 560529945 | Atrium Health | 0278 | institutional | 59.1 |
| 7 | NATIONAL PPO | 1704 | 1619372216 | 560529945 | Atrium Health | 0278 | institutional | 59.1 |
| 7 | NATIONAL PPO | 1704 | 1558765529 | 560529945 | Atrium Health | 0278 | institutional | 59.1 |
| 7 | NATIONAL PPO | 1704 | 1790875953 | 560529945 | Atrium Health | 0278 | institutional | 59.1 |
| 7 | NATIONAL PPO | 1704 | 1093708711 | 560529945 | Atrium Health | 0278 | institutional | 59.1 |
| 7 | NATIONAL PPO | 1704 | 1558765529 | 560529945 | Atrium Health | 0278 | institutional | 75 |
| 7 | NATIONAL PPO | 1704 | 1437554037 | 560529945 | Atrium Health | 0278 | institutional | 75 |
| 7 | NATIONAL PPO | 1704 | 1275681264 | 560529945 | Atrium Health | 0278 | institutional | 75 |
| 7 | NATIONAL PPO | 1704 | 1093708711 | 560529945 | Atrium Health | 0278 | institutional | 75 |
| 7 | NATIONAL PPO | 1704 | 1619372216 | 560529945 | Atrium Health | 0278 | institutional | 75 |
| 7 | NATIONAL PPO | 1704 | 1790875953 | 560529945 | Atrium Health | 0278 | institutional | 75 |
| 7 | NC GROUP POS | 1666 | 1497708838 | 560554230 | Novant Health | 0278 | institutional | 49.5 |
| 7 | NC GROUP POS | 1666 | 1417360736 | 560554230 | Novant Health | 0278 | institutional | 49.5 |
| 7 | NC GROUP POS | 1666 | 1881647204 | 560554230 | Novant Health | 0278 | institutional | 49.5 |
| 7 | NC GROUP POS | 1666 | 1881647204 | 560554230 | Novant Health | 0278 | institutional | 79 |
| 7 | NC GROUP POS | 1666 | 1417360736 | 560554230 | Novant Health | 0278 | institutional | 79 |
| 7 | NC GROUP POS | 1666 | 1497708838 | 560554230 | Novant Health | 0278 | institutional | 79 |
| 76 | CARELINK NATIONAL OAP | 5368 | 1952723967 | 464389870 | Houston Methodist | 0278 | institutional | 50 |
| 76 | CARELINK NATIONAL OAP | 5368 | 1952723967 | 464389870 | Houston Methodist | 0278 | institutional | 53 |
| 76 | LOCALPLUS SAR | 7158 | 1104982917 | 132997301 | Mount Sinai Health System | 0276 | institutional | 44.13 |
| 76 | LOCALPLUS SAR | 7158 | 1104982917 | 132997301 | Mount Sinai Health System | 0276 | institutional | 53.16 |
| 76 | PRIORITY HEALTH ALLIANCE | 5348 | 1700801909 | 954537720 | The Hospitals of Providence | 0275 | institutional | 21.9 |
| 76 | PRIORITY HEALTH ALLIANCE | 5348 | 1700801909 | 954537720 | The Hospitals of Providence | 0275 | institutional | 22.8 |
| 861 | CHOICE POS II | 1287 | 1568526135 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 38 |
| 861 | CHOICE POS II | 1287 | 1639143084 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 38 |
| 861 | CHOICE POS II | 1287 | 1225004047 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 38 |
| 861 | CHOICE POS II | 1287 | 1881695146 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 38 |
| 861 | CHOICE POS II | 1287 | 1225004047 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 40 |
| 861 | CHOICE POS II | 1287 | 1568526135 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 40 |
| 861 | CHOICE POS II | 1287 | 1881695146 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 40 |
| 861 | CHOICE POS II | 1287 | 1639143084 | 250965429 | University of Pittsburgh Medical Center | 0278 | institutional | 40 |
Q2: How often do contracts contain multiple implant rev codes with different rates?
There is a difference 5% of the time. We'll prefer the 0278 rate.
Code
df = pd.read_sql(f"""
WITH
df AS (
SELECT
contract_id,
min(avg_rate) AS min_rate,
max(avg_rate) AS max_rate,
max(avg_rate) - min(avg_rate) AS rate_diff
FROM tq_dev.internal_dev_csong_sandbox.implant_rates_cr_v0_{cld_subversion}
GROUP BY 1
)
SELECT
CASE
WHEN rate_diff = 0 THEN 'no_diff'
WHEN rate_diff > 0 AND rate_diff <= 10 THEN '0-10'
WHEN rate_diff > 10 AND rate_diff <= 25 THEN '10-25'
WHEN rate_diff > 25 AND rate_diff <= 50 THEN '25-50'
WHEN rate_diff > 50 THEN '50+'
ELSE 'unknown'
END AS diff_bucket,
COUNT(*) AS num_contracts
FROM df
GROUP BY 1
ORDER BY
CASE
WHEN diff_bucket = 'no_diff' THEN 1
WHEN diff_bucket = '0-10' THEN 2
WHEN diff_bucket = '10-25' THEN 3
WHEN diff_bucket = '25-50' THEN 4
WHEN diff_bucket = '50+' THEN 5
ELSE 6
END
""", con=trino_conn)
df['pct_contracts'] = 100 * df['num_contracts'] / df['num_contracts'].sum()
print(df.to_markdown(index=False))
| diff_bucket | num_contracts | pct_contracts |
|---|---|---|
| no_diff | 64189 | 94.1353 |
| 0-10 | 2433 | 3.56808 |
| 10-25 | 1147 | 1.68211 |
| 25-50 | 172 | 0.252244 |
| 50+ | 11 | 0.0161319 |
| unknown | 236 | 0.346102 |
Q3: What is the distribution of percentages?
using tq_dev.internal_dev_csong_sandbox.implant_rates_v0_{cld_subversion} now
df = pd.read_sql(f"""
SELECT contract_id, avg(implant_percentage) as implant_percentage
FROM tq_dev.internal_dev_csong_sandbox.implant_rates_v0_{cld_subversion}
GROUP BY 1
""", con=trino_conn)
sns.histplot(data=df, x="implant_percentage", bins=50, kde=True)

# summary table with buckets
df = pd.read_sql(f"""
WITH buckets AS (
SELECT
CASE
WHEN implant_percentage < 25 THEN '<25%'
WHEN implant_percentage >= 25 AND implant_percentage < 50 THEN '25-50%'
WHEN implant_percentage >= 50 AND implant_percentage < 75 THEN '50-75%'
WHEN implant_percentage >=75 AND implant_percentage < 100 THEN '75-100%'
WHEN implant_percentage >= 100 THEN '100%'
ELSE NULL
END AS rate_bucket,
COUNT(DISTINCT contract_id) AS num_contracts
FROM tq_dev.internal_dev_csong_sandbox.implant_rates_v0_{cld_subversion}
GROUP BY 1
)
SELECT
rate_bucket,
num_contracts,
ROUND(100.0 * num_contracts / SUM(num_contracts) OVER (), 2) AS pct_contracts
FROM buckets
ORDER BY
CASE
WHEN rate_bucket = '<25%' THEN 1
WHEN rate_bucket = '25-50%' THEN 2
WHEN rate_bucket = '50-75%' THEN 3
WHEN rate_bucket = '75-100%' THEN 4
WHEN rate_bucket = '100%' THEN 5
ELSE 6
END
""", con=trino_conn)
print(df.to_markdown(index=False))
| rate_bucket | num_contracts | pct_contracts |
|---|---|---|
| <25% | 5783 | 8.48 |
| 25-50% | 20026 | 29.37 |
| 50-75% | 21042 | 30.86 |
| 75-100% | 14288 | 20.95 |
| 100% | 6813 | 9.99 |
| 236 | 0.35 |