Skip to main content

example


# %% [markdown]
"""
IQR Window Tuning
"""


# %%
df = pd.read_sql(f"""
SELECT billing_code, q1, q3, iqr, medicare_rate, lower_bound, upper_bound
FROM hive.cld_utils.outlier_bounds_negotiated_rate_2025_04_18
WHERE upper_bound_type = 'log_iqr'
ORDER BY RANDOM()
LIMIT 1000
""", con=trino_conn)

sns.set_theme(rc={"figure.figsize": (14, 12)})
ax = sns.scatterplot(
x=df['medicare_rate'],
y=np.exp(df['q3'] + df['iqr']*1.5),
label='1.5 IQR'
)
ax = sns.scatterplot(
x=df['medicare_rate'],
y=np.exp(df['q3'] + df['iqr']*2),
label='2 IQR'
)

sns.lineplot(
x=df['medicare_rate'],
y=df['medicare_rate']*5,
color='red',
label='5x Medicare Rate'
)
sns.lineplot(
x=df['medicare_rate'],
y=df['medicare_rate']*10,
color='red',
label='10x Medicare Rate'
)
ax.set_ylim(500, 80000)
ax.set_xlim(500, 10000)
# %% [markdown]
"""
Why not just use Medicare?
"""

# %%
samples = pd.read_sql(f"""
WITH
opps AS (
SELECT
distinct
apc,
cpt
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10 opps
JOIN tq_dev.internal_dev_csong_cld_v1_0.tmp_ref_provider_spine_2025_02 ps
ON CAST(opps.provider_id AS VARCHAR) = ps.provider_id
WHERE effective_date = (
SELECT max(effective_date)
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10
)
)
SELECT apc, count(distinct billing_code) as n
FROM tq_dev.internal_dev_csong_cld_v1_0.prod_combined_all a
JOIN opps
ON a.billing_code = opps.cpt
WHERE provider_type LIKE '%Hospital%'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1000
""", con=trino_conn)

# %%
samples.head(30)

# %%
apc = '5113'

# %%
df = pd.read_sql(f"""
WITH
opps AS (
SELECT
distinct
apc,
cpt
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10 opps
JOIN tq_dev.internal_dev_csong_cld_v1_0.tmp_ref_provider_spine_2025_02 ps
ON CAST(opps.provider_id AS VARCHAR) = ps.provider_id
WHERE effective_date = (
SELECT max(effective_date)
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10
)
)
SELECT
provider_id,
billing_code,
apc,
medicare_rate,
canonical_rate
FROM tq_dev.internal_dev_csong_cld_v1_0.prod_combined_all
JOIN opps
ON billing_code = cpt
WHERE provider_type LIKE '%Hospital%'
AND canonical_rate IS NOT NULL
AND canonical_rate_score = 5
AND canonical_rate between lower_bound AND upper_bound
AND apc = '{apc}'
ORDER BY RANDOM()
LIMIT 20000
""", con=trino_conn)

df

# %%
# pick 30 random codes
codes = df['billing_code'].sample(40, random_state=1)
df_plot = df[df['billing_code'].isin(codes)].copy()

# melt to long form
df_long = df_plot.melt(
id_vars='billing_code',
value_vars=['canonical_rate', 'medicare_rate'],
var_name='rate_type',
value_name='rate'
)

order = (
df_plot
.groupby('billing_code')['canonical_rate']
.median()
.sort_values()
.index
)

sns.set_theme(rc={"figure.figsize": (14, 12)})
ax = sns.boxplot(
x='billing_code',
y='rate',
hue='rate_type',
data=df_long.sort_values('rate'),
palette=['skyblue','red'],
order=order,
showfliers=False
)

plt.title(f'Rates by Billing Code (APC {apc})')
plt.xlabel('Billing Code')
plt.ylabel('Rate')
plt.xticks(rotation=90)
# ax.set_ylim(0, df['canonical_rate'].quantile(0.95))

plt.legend(title='Rate Type', loc='upper right')
plt.tight_layout()
plt.show()