PG Accuracy Scores
We would like to create procedure group (PG)-specific accuracy scoring methodology.
Similar to labs, we define a "consistent percentage of medicare rate" as having at least 30 rates for a given PG/network combination AND having the difference between % of Medicare rates' 5th and 95th percentiles be less than 0.25.
------------------------------------
-- PG HIERARCHY:
-- SCORE = 5: has_consistent_pct_of_medicare_rate AND percent between 0.9 and 3.5 (10th and 90th percentiles of consistent rates)
-- SCORE = 4: has_consistent_pct_of_medicare_rate AND percent between 0.5 and 5.5 (1st and 99th percentiles of consistent rates)
-- SCORE = 3: percent between 0.9 and 3.5
-- SCORE = 2: not an outlier 0.5 and 5.5 (captures ~98% of posted PG rates)
------------------------------------
Distributions
Validated Rates
| canonical_rate_percent_of_medicare | |
|---|---|
| count | 49996 |
| mean | 2.26526 |
| std | 1.78897 |
| min | 0.12 |
| 1% | 0.71 |
| 5% | 1 |
| 10% | 1.11 |
| 25% | 1.34 |
| 50% | 1.78 |
| 75% | 2.6 |
| 90% | 3.82 |
| 95% | 4.7 |
| 99% | 8.44 |
| max | 29.96 |
df = pd.read_sql(f"""
SELECT
canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE bill_type = 'Professional'
AND canonical_rate_score = 5
AND provider_type = 'Physician Group'
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)
print(df['canonical_rate_percent_of_medicare'].describe())
All Non-Outlier Rates
| canonical_rate_percent_of_medicare | |
|---|---|
| count | 49998 |
| mean | 1.86677 |
| std | 1.93384 |
| min | 0.1 |
| 1% | 0.35 |
| 5% | 0.42 |
| 10% | 0.6 |
| 25% | 0.92 |
| 50% | 1.37 |
| 75% | 2.22 |
| 90% | 3.42 |
| 95% | 4.5315 |
| 99% | 8.87 |
| max | 29.71 |
df = pd.read_sql(f"""
SELECT
canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE bill_type = 'Professional'
AND canonical_rate_score > 1
AND provider_type = 'Physician Group'
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)
print(
df['canonical_rate_percent_of_medicare']
.describe(percentiles=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99])
.to_markdown()
)
Consistent Rates
| canonical_rate_percent_of_medicare | |
|---|---|
| count | 6394 |
| mean | 1.67522 |
| std | 1.44894 |
| min | 0 |
| 1% | 0.6793 |
| 5% | 0.8 |
| 10% | 0.93 |
| 25% | 1.04 |
| 50% | 1.23 |
| 75% | 1.65 |
| 90% | 3.48 |
| 95% | 4.2435 |
| 99% | 5.5 |
| max | 49.75 |
df = pd.read_sql(f"""
WITH
df AS (
SELECT
provider_id,
network_id,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.05) as p05,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.25) as p25,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.5) as p50,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.75) as p75,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.95) as p95,
COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE bill_type = 'Professional'
AND canonical_rate_score > 1
AND provider_type = 'Physician Group'
GROUP BY 1,2
),
consistent AS (
SELECT
*
FROM df
WHERE
p95 - p05 < 0.25
AND n_rates >= 30
)
SELECT canonical_rate_percent_of_medicare
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
JOIN consistent USING (provider_id, network_id)
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)
print(
df['canonical_rate_percent_of_medicare']
.describe(percentiles=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99])
.to_markdown()
)