{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "using CSV, DataFrames, DataFrames, Statistics, LinearAlgebra, Random" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_anycat = CSV.read(\"crlm_data_cohort_after_matching.csv\", DataFrame)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_train = df_anycat[:, [\"age\", \"gender\", \"T\", \"N\", \"rightleft\", \"neochemo\",\n", " \"cea\", \"DFI\", \"size\", \n", " \"bilobar\", \"number\"]];\n", "\n", "train_treatments = df_anycat[:, :adjuvantchemo]\n", "\n", "train_recurred = convert(Vector{Bool}, df_anycat[:, :RCT_RFS_event])\n", "\n", "train_times = df_anycat[:, :RCT_TIME];" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "reward_lnr = IAI.CategoricalSurvivalRewardEstimator(\n", " propensity_estimator=IAI.RandomForestClassifier(show_progress=false, missingdatamode=\"separate_class\"),\n", " outcome_estimator=IAI.RandomForestSurvivalLearner(show_progress=false, missingdatamode=\"separate_class\", random_seed=123),\n", " reward_estimator=:direct_method,\n", " propensity_min_value=0.2,\n", " random_seed=123,\n", " evaluation_time= 60 #5 years\n", ");" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Assign weights to the group of interest to tweak the mean rewards\n", "w = ones(size(X_train, 1));\n", "# w = w + 0.5 * ((df_anycat.RCT_RFS_event .== 0) .&& (df_anycat.adjuvantchemo .== true))\n", "# w = w + 0.3 * ((df_anycat.RCT_RFS_event .== 0) .&& (df_anycat.adjuvantchemo .== false))\n", "sum(w)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_predictions, train_reward_score = IAI.fit_predict!(reward_lnr,\n", " X_train, train_treatments, train_recurred, train_times,\n", " propensity_score_criterion = \"misclassification\", #'auc', 'misclassification'\n", " outcome_score_criterion = \"harrell_c_statistic\",\n", " sample_weight = w); #'r_squared', 'harrell_c_statistic'\n", "train_rewards = train_predictions[:reward];" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "describe(train_rewards)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# # Constrained reward estimation\n", "# rows_to_adjust = findall(x -> (train_rewards[x, 1] > train_rewards[x, 2]) && (train_rewards[x, 2] >= 0.8 * train_rewards[x, 1]), 1:size(train_rewards, 1))\n", "\n", "# # Iterate over rows to update values\n", "# for row in rows_to_adjust\n", "# train_rewards[row, 1] = 0.8 * train_rewards[row, 2]\n", "# end" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "describe(train_rewards)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_train = df_anycat[:, [\"T\", \"N\", \"rightleft\", \"neochemo\", \"DFI\", \"size\", \n", " \"bilobar\", \"number\"]];" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid_rw = IAI.GridSearch(\n", " IAI.OptimalTreePolicyMaximizer(\n", " random_seed=123,\n", " max_categoric_levels_before_warning=20,\n", " missingdatamode=\"separate_class\",\n", " ),\n", " max_depth=[4, 5, 6],\n", " minbucket=[15],\n", "# cp = [0.01, 0.05]\n", " cp = [0.001, 0.00001]\n", ")\n", "IAI.fit!(grid_rw, X_train, train_rewards)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia (IAI) 1.8.0", "language": "julia", "name": "julia-_iai_-1.8" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.8.0" } }, "nbformat": 4, "nbformat_minor": 4 }