array(40) {
["project_status"]=>
string(7) "ongoing"
["project_assoc_trials"]=>
array(1) {
[0]=>
object(WP_Post)#3903 (24) {
["ID"]=>
int(1727)
["post_author"]=>
string(4) "1363"
["post_date"]=>
string(19) "2018-10-23 14:23:00"
["post_date_gmt"]=>
string(19) "2018-10-23 14:23:00"
["post_content"]=>
string(0) ""
["post_title"]=>
string(198) "NCT01106014 - A Multicenter, Double-blind, Placebo-controlled Phase 3 Study Assessing the Safety and Efficacy of Selexipag on Morbidity and Mortality in Patients With Pulmonary Arterial Hypertension"
["post_excerpt"]=>
string(0) ""
["post_status"]=>
string(7) "publish"
["comment_status"]=>
string(6) "closed"
["ping_status"]=>
string(6) "closed"
["post_password"]=>
string(0) ""
["post_name"]=>
string(194) "nct01106014-a-multicenter-double-blind-placebo-controlled-phase-3-study-assessing-the-safety-and-efficacy-of-selexipag-on-morbidity-and-mortality-in-patients-with-pulmonary-arterial-hypertension"
["to_ping"]=>
string(0) ""
["pinged"]=>
string(0) ""
["post_modified"]=>
string(19) "2025-04-18 12:58:50"
["post_modified_gmt"]=>
string(19) "2025-04-18 16:58:50"
["post_content_filtered"]=>
string(0) ""
["post_parent"]=>
int(0)
["guid"]=>
string(243) "https://dev-yoda.pantheonsite.io/clinical-trial/nct01106014-a-multicenter-double-blind-placebo-controlled-phase-3-study-assessing-the-safety-and-efficacy-of-selexipag-on-morbidity-and-mortality-in-patients-with-pulmonary-arterial-hypertension/"
["menu_order"]=>
int(0)
["post_type"]=>
string(14) "clinical_trial"
["post_mime_type"]=>
string(0) ""
["comment_count"]=>
string(1) "0"
["filter"]=>
string(3) "raw"
}
}
["project_title"]=>
string(47) "Defining a selexipag responder phenotype in PAH"
["project_narrative_summary"]=>
string(697) "Patients with PAH are initially prescribed two drugs as first-line therapy to treat their disease. Many patients will require a third drug as second-line therapy. However, there is no clear evidence-based framework to assist doctors in determining what the best third drug should be. There are three main options for a doctor to choose, including the drug selexipag. In this project, we will analyze data from patients enrolled in a clinical trial of selexipag. We will designate "responders" to selexipag as those who improved when given the drug. Then, we will examine the clinical features of those responders to determine what types of patients should receive selexipag as the best third drug."
["project_learn_source"]=>
string(9) "colleague"
["principal_investigator"]=>
array(7) {
["first_name"]=>
string(9) "Catherine"
["last_name"]=>
string(7) "Simpson"
["degree"]=>
string(7) "MD, MHS"
["primary_affiliation"]=>
string(24) "Johns Hopkins University"
["email"]=>
string(26) "catherine.simpson@jhmi.edu"
["state_or_province"]=>
string(2) "MD"
["country"]=>
string(13) "United States"
}
["project_key_personnel"]=>
array(1) {
[0]=>
array(6) {
["p_pers_f_name"]=>
string(4) "Paul"
["p_pers_l_name"]=>
string(7) "Hassoun"
["p_pers_degree"]=>
string(2) "MD"
["p_pers_pr_affil"]=>
string(24) "Johns Hopkins University"
["p_pers_scop_id"]=>
string(0) ""
["requires_data_access"]=>
string(2) "no"
}
}
["project_ext_grants"]=>
array(2) {
["value"]=>
string(2) "no"
["label"]=>
string(68) "No external grants or funds are being used to support this research."
}
["project_date_type"]=>
string(18) "full_crs_supp_docs"
["property_scientific_abstract"]=>
string(1249) "Background: Patients with PAH are prescribed a combination of two drugs at the time of diagnosis as first-line therapy. Many patients will require escalation to second-line therapy, however there is no evidence basis for selecting a particular second-line therapy in PAH from among the available options. Selexipag is one of the three main choices for second-line therapy available to PAH prescribers.
Objective: To define a responder phenotype for second-line PAH therapies in order to enable precision-guided treatment escalation. We hypothesize that specific clinical and phenotypic features will predict response to selexpag.
Study Design: Individual participant level trial analysis using machine learning to determine the predictors of multicomponent improvement with selexipag.
Participants: All GRIPHON subjects randomized to receive selexipag.
Primary and Secondary Outcome Measures: patients will be designated as selexipag responders if they demonstrate multicomponent improvement (MCI), an expert consensus-defined clinical endpoint used in more recent PAH trials, on selexipag.
Statistical Analysis: Predictors of MCI will be derived using ensemble machine learning via the SuperLearner package in R."
["project_brief_bg"]=>
string(2097) "While there is broad consensus and strong evidence that first-line therapy for most patients with PAH should consist of a phosphodiesterase-5 inhibitor (PDE5i) given in combination with an endothelin receptor antagonist (ERA), the approach to escalation of therapy for patients who fail to achieve low-risk status with initial therapy lacks a clear framework. Compounding this challenge, the available second-line options have expanded dramatically over the last decade, and now include the novel activin inhibitor sotatercept in addition to selexipag and oral, inhaled, or parenteral prostacyclins. The modern PAH treatment algorithm does not provide for an evidence-based approach for choosing among these potential second-line therapies for patients who require escalation of therapy. Defining "responder" phenotypes for the available second-line drugs in order to identify patients who stand to benefit the most could help to improve patient care.
For all PAH drugs, including selexipag, clinical trial data have demonstrated heterogeneity in treatment responses, with some patients responding very well, and many patients experiencing minimal to no response to therapy. The physiologic and phenotypic factors that determine robust vs. poor response are undefined. The ability to predict which patients will experience improvement with selexipag 1) would help to precisely integrate selexipag use into modern treatment algorithms in a more rational manner, and 2) could help to facilitate a more personalized approach to treatment selection for PAH patients in need of escalated therapy.
Hemnes et al. have identified a molecular signature of response to calcium channel blockers (CCBs), and Benza et al. have identified variants in endothelin pathway genes that predict response to ERAs. Though overall a negative study, Zamanian et al. also defined a molecular signature of response to rituximab given investigationally for scleroderma-associated PAH. These examples underscore the feasibility of the “responder” approach we propose for studying selexipag."
["project_specific_aims"]=>
string(1309) "The overall goal of the project is to define a responder phenotype for second-line PH therapies in order to enable precision-guided treatment escalation. We hypothesize that specific clinical and phenotypic features will predict response to selexpag.
Aim 1. Identify baseline predictors of response to selexipag to define a multi-domain responder phenotype.
We will use an expert-defined consensus definition of multi component improvement (MCI) to designate whether or not trial participants randomized to selexipag experienced MCI. We will train predictive models for MCI via ensemble modeling using the SuperLearner package in R. We will evaluate trained model performance using cross-validated AUC and accuracy. We will define and plot feature importance using SHAP values to fully delineate the “responder” phenotype. The responder phenotype can then be externally validated using real-world registry data.
Aim 2. Construct a data-driven response classifier for selexipag using unsupervised learning.
We will perform latent class analysis on quantitative trial data, which includes granular hemodynamic data and clinical events, to identify any natural response subgroups that may emerge from the data patterns, beyond expert-defined response criteria. "
["project_study_design"]=>
array(2) {
["value"]=>
string(14) "indiv_trial_an"
["label"]=>
string(25) "Individual trial analysis"
}
["project_purposes"]=>
array(1) {
[0]=>
array(2) {
["value"]=>
string(56) "new_research_question_to_examine_treatment_effectiveness"
["label"]=>
string(114) "New research question to examine treatment effectiveness on secondary endpoints and/or within subgroup populations"
}
}
["project_research_methods"]=>
string(155) "Inclusion criteria: we will consider all subjects randomized to selexipag in the GRIPHON trial.
Exclusion criteria: subjects randomized to placebo."
["project_main_outcome_measure"]=>
string(812) "The primary outcome measure is the 24-week multi-component clinical improvement (MCI) definition that was recently used as a secondary endpoint in the STELLAR trial. This is a composite endpoint measured as change from baseline and defined by: 1) improvement in 6-minute walk distance (6MWD) by ≥30 m, 2) reduction in N-terminal pro-B-type natriuretic peptide (NT-proBNP) levels by ≥30% or maintenance of NT-proBNP less than 300 pg/mL, and 3) improvement in or maintenance of World Health Organization functional class (WHO-FC) I or II. All three elements must be met for MCI to be present.
The secondary outcome measure will be an exploratory responder definition that we will derive via machine learning, as described in the analytic plan below.
There are no other secondary outcome measures."
["project_main_predictor_indep"]=>
string(147) "This is an unsupervised machine learning approach, and as such, the main predictor variables for MCI will be learned, rather than defined a priori."
["project_other_variables_interest"]=>
string(323) "Variables that will be made available for selection by the SuperLearner algorithm will include demographics, PH diagnosis class, comorbidities, NYHA/WHO functional classification, lab results including NTproBNP, right heart catheterization data (RAP, mPAP, PVR, CO, PCWP), 6MWD, lung function, PH drugs, and outcome events."
["project_stat_analysis_plan"]=>
string(2129) "Aim 1: We will extract patient-level baseline features (demographics, disease subtype, laboratory and PFT values, hemodynamics, walk distance, functional classification). We will perform pre-processing steps (standardize continuous variables, filter variables with greater than 20% missingness, impute remaining missing data using MICE) as appropriate. We will train predictive models using the SuperLearner package in R. SuperLearner is a meta-learning algorithm that constructs an optimal weighted combination of multiple base models (or "learners") to minimize prediction error. The SuperLearner framework allows the user to specify which modeling approaches will be incorporated (e.g., to define the list of learners). For this analysis, we will specify traditional logistic regression (using glm), random forest (using ranger, to account for potential non-linear interactions), gradient boosting (using xgboost, to account for complex decision boundaries).SuperLearner combines the strengths of these models, optimally weights each model’s predictions, then optimally blends models. We will use cross-validation to determine the best ensemble model.
Aim 2: We will perform latent class analysis (LCA) using the poLCA package in R to define discrete responder phenotypes based on multivariate response patterns in the pre-processed data. LCA is ideal for this analysis given this method handles mixed data types (continuous and categorical data) and provides interpretable, probabilistic class assignments. We still start by fitting 2-class, 3-class, 4-class, and 5-class models, using maximum likelihood estimation (MLE) to estimate class membership probabilities. We will use Bayesian Information Criteria (BIC), entropy scores, and likelihood ratio tests to determine the best-fitting model. We will exclude models with high class overlap (entropy less than 0.7). Once the classes are established, we will validate class stability using bootstrap resampling (minimum of 1,000 resamples to ensure stable classification). We’ll re-run LCA excluding one variable at a time to assess the sensitivity of the model. "
["project_software_used"]=>
array(2) {
["value"]=>
string(7) "rstudio"
["label"]=>
string(7) "RStudio"
}
["project_timeline"]=>
string(264) "Once the data is received, we anticipate data analysis to begin immediately. Analysis completion will take approximately 3-4 months, manuscript completion will take 2-3 months, and we expect submission of a publication and data report back to YODA within one year."
["project_dissemination_plan"]=>
string(89) "We would plan to submit the results to the European Respiratory Journal or CHEST journal."
["project_bibliography"]=>
string(952) "
- Chin, Kelly M., et al. “Treatment algorithm for pulmonary arterial hypertension.” European Respiratory Journal 64.4 (2024).
- Hemnes AR, Trammell AW, Archer SL, et al. Peripheral blood signature of vasodilator-responsive pulmonary arterial hypertension. Circulation 2015; 131(4): 401–409; discussion 409
- Benza RL, Gomberg-Maitland M, Demarco T, et al. Endothelin-1 pathway polymorphisms and outcomes in pulmonary arterial hypertension. Am J Respir Crit Care Med 2015; 192(11): 1345–1354.
- Humbert M, McLaughlin V, Gibbs JSR, Gomberg-Maitland M, Hoeper MM, Preston IR, et al. Sotatercept for the Treatment of Pulmonary Arterial Hypertension. N Engl J Med. 2021;384(13):1204-15.
- Hoeper MM, Badesch DB, Ghofrani HA, Gibbs JSR, Gomberg-Maitland M, McLaughlin VV, et al. Phase 3 Trial of Sotatercept for Treatment of Pulmonary Arterial Hypertension. N Engl J Med. 2023;388(16):1478-90.
"
["project_suppl_material"]=>
bool(false)
["project_coi"]=>
array(2) {
[0]=>
array(1) {
["file_coi"]=>
array(21) {
["ID"]=>
int(16876)
["id"]=>
int(16876)
["title"]=>
string(11) "COI FORM CS"
["filename"]=>
string(15) "COI-FORM-CS.pdf"
["filesize"]=>
int(20422)
["url"]=>
string(64) "https://yoda.yale.edu/wp-content/uploads/2025/03/COI-FORM-CS.pdf"
["link"]=>
string(57) "https://yoda.yale.edu/data-request/2025-0184/coi-form-cs/"
["alt"]=>
string(0) ""
["author"]=>
string(4) "1885"
["description"]=>
string(0) ""
["caption"]=>
string(0) ""
["name"]=>
string(11) "coi-form-cs"
["status"]=>
string(7) "inherit"
["uploaded_to"]=>
int(16867)
["date"]=>
string(19) "2025-03-17 17:02:13"
["modified"]=>
string(19) "2025-03-17 17:02:13"
["menu_order"]=>
int(0)
["mime_type"]=>
string(15) "application/pdf"
["type"]=>
string(11) "application"
["subtype"]=>
string(3) "pdf"
["icon"]=>
string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
}
}
[1]=>
array(1) {
["file_coi"]=>
array(21) {
["ID"]=>
int(16877)
["id"]=>
int(16877)
["title"]=>
string(11) "COI FORM PH"
["filename"]=>
string(15) "COI-FORM-PH.pdf"
["filesize"]=>
int(29008)
["url"]=>
string(64) "https://yoda.yale.edu/wp-content/uploads/2025/03/COI-FORM-PH.pdf"
["link"]=>
string(57) "https://yoda.yale.edu/data-request/2025-0184/coi-form-ph/"
["alt"]=>
string(0) ""
["author"]=>
string(4) "1885"
["description"]=>
string(0) ""
["caption"]=>
string(0) ""
["name"]=>
string(11) "coi-form-ph"
["status"]=>
string(7) "inherit"
["uploaded_to"]=>
int(16867)
["date"]=>
string(19) "2025-03-17 17:02:16"
["modified"]=>
string(19) "2025-03-17 17:02:16"
["menu_order"]=>
int(0)
["mime_type"]=>
string(15) "application/pdf"
["type"]=>
string(11) "application"
["subtype"]=>
string(3) "pdf"
["icon"]=>
string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
}
}
}
["data_use_agreement_training"]=>
bool(true)
["human_research_protection_training"]=>
bool(true)
["certification"]=>
bool(true)
["search_order"]=>
string(1) "0"
["project_send_email_updates"]=>
bool(false)
["project_publ_available"]=>
bool(true)
["project_year_access"]=>
string(4) "2025"
["project_rep_publ"]=>
bool(false)
["project_assoc_data"]=>
array(0) {
}
["project_due_dil_assessment"]=>
array(21) {
["ID"]=>
int(17156)
["id"]=>
int(17156)
["title"]=>
string(47) "YODA Project Due Diligence Assessment 2025-0184"
["filename"]=>
string(51) "YODA-Project-Due-Diligence-Assessment-2025-0184.pdf"
["filesize"]=>
int(135207)
["url"]=>
string(100) "https://yoda.yale.edu/wp-content/uploads/2025/03/YODA-Project-Due-Diligence-Assessment-2025-0184.pdf"
["link"]=>
string(93) "https://yoda.yale.edu/data-request/2025-0184/yoda-project-due-diligence-assessment-2025-0184/"
["alt"]=>
string(0) ""
["author"]=>
string(4) "1885"
["description"]=>
string(0) ""
["caption"]=>
string(0) ""
["name"]=>
string(47) "yoda-project-due-diligence-assessment-2025-0184"
["status"]=>
string(7) "inherit"
["uploaded_to"]=>
int(16867)
["date"]=>
string(19) "2025-04-29 18:35:39"
["modified"]=>
string(19) "2025-04-29 18:35:39"
["menu_order"]=>
int(0)
["mime_type"]=>
string(15) "application/pdf"
["type"]=>
string(11) "application"
["subtype"]=>
string(3) "pdf"
["icon"]=>
string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
}
["project_title_link"]=>
array(21) {
["ID"]=>
int(17155)
["id"]=>
int(17155)
["title"]=>
string(44) "YODA Project Protocol 2025-0184 - 2025-03-17"
["filename"]=>
string(46) "YODA-Project-Protocol-2025-0184-2025-03-17.pdf"
["filesize"]=>
int(123204)
["url"]=>
string(95) "https://yoda.yale.edu/wp-content/uploads/2025/03/YODA-Project-Protocol-2025-0184-2025-03-17.pdf"
["link"]=>
string(88) "https://yoda.yale.edu/data-request/2025-0184/yoda-project-protocol-2025-0184-2025-03-17/"
["alt"]=>
string(0) ""
["author"]=>
string(4) "1885"
["description"]=>
string(0) ""
["caption"]=>
string(0) ""
["name"]=>
string(42) "yoda-project-protocol-2025-0184-2025-03-17"
["status"]=>
string(7) "inherit"
["uploaded_to"]=>
int(16867)
["date"]=>
string(19) "2025-04-29 18:35:08"
["modified"]=>
string(19) "2025-04-29 18:35:08"
["menu_order"]=>
int(0)
["mime_type"]=>
string(15) "application/pdf"
["type"]=>
string(11) "application"
["subtype"]=>
string(3) "pdf"
["icon"]=>
string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
}
["project_review_link"]=>
array(21) {
["ID"]=>
int(17158)
["id"]=>
int(17158)
["title"]=>
string(36) "YODA Project Review - 2025-0184_site"
["filename"]=>
string(38) "YODA-Project-Review-2025-0184_site.pdf"
["filesize"]=>
int(1315627)
["url"]=>
string(87) "https://yoda.yale.edu/wp-content/uploads/2025/03/YODA-Project-Review-2025-0184_site.pdf"
["link"]=>
string(80) "https://yoda.yale.edu/data-request/2025-0184/yoda-project-review-2025-0184_site/"
["alt"]=>
string(0) ""
["author"]=>
string(4) "1885"
["description"]=>
string(0) ""
["caption"]=>
string(0) ""
["name"]=>
string(34) "yoda-project-review-2025-0184_site"
["status"]=>
string(7) "inherit"
["uploaded_to"]=>
int(16867)
["date"]=>
string(19) "2025-04-29 18:36:29"
["modified"]=>
string(19) "2025-04-29 18:36:29"
["menu_order"]=>
int(0)
["mime_type"]=>
string(15) "application/pdf"
["type"]=>
string(11) "application"
["subtype"]=>
string(3) "pdf"
["icon"]=>
string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
}
["project_highlight_button"]=>
string(0) ""
["request_overridden_res"]=>
string(1) "3"
["request_data_partner"]=>
string(15) "johnson-johnson"
}
data partner
array(1) {
[0]=>
string(15) "johnson-johnson"
}
pi country
array(0) {
}
pi affil
array(0) {
}
products
array(1) {
[0]=>
string(7) "uptravi"
}
num of trials
array(1) {
[0]=>
string(1) "1"
}
res
array(1) {
[0]=>
string(1) "3"
}
Research Proposal
Project Title:
Defining a selexipag responder phenotype in PAH
Scientific Abstract:
Background: Patients with PAH are prescribed a combination of two drugs at the time of diagnosis as first-line therapy. Many patients will require escalation to second-line therapy, however there is no evidence basis for selecting a particular second-line therapy in PAH from among the available options. Selexipag is one of the three main choices for second-line therapy available to PAH prescribers.
Objective: To define a responder phenotype for second-line PAH therapies in order to enable precision-guided treatment escalation. We hypothesize that specific clinical and phenotypic features will predict response to selexpag.
Study Design: Individual participant level trial analysis using machine learning to determine the predictors of multicomponent improvement with selexipag.
Participants: All GRIPHON subjects randomized to receive selexipag.
Primary and Secondary Outcome Measures: patients will be designated as selexipag responders if they demonstrate multicomponent improvement (MCI), an expert consensus-defined clinical endpoint used in more recent PAH trials, on selexipag.
Statistical Analysis: Predictors of MCI will be derived using ensemble machine learning via the SuperLearner package in R.
Brief Project Background and Statement of Project Significance:
While there is broad consensus and strong evidence that first-line therapy for most patients with PAH should consist of a phosphodiesterase-5 inhibitor (PDE5i) given in combination with an endothelin receptor antagonist (ERA), the approach to escalation of therapy for patients who fail to achieve low-risk status with initial therapy lacks a clear framework. Compounding this challenge, the available second-line options have expanded dramatically over the last decade, and now include the novel activin inhibitor sotatercept in addition to selexipag and oral, inhaled, or parenteral prostacyclins. The modern PAH treatment algorithm does not provide for an evidence-based approach for choosing among these potential second-line therapies for patients who require escalation of therapy. Defining "responder" phenotypes for the available second-line drugs in order to identify patients who stand to benefit the most could help to improve patient care.
For all PAH drugs, including selexipag, clinical trial data have demonstrated heterogeneity in treatment responses, with some patients responding very well, and many patients experiencing minimal to no response to therapy. The physiologic and phenotypic factors that determine robust vs. poor response are undefined. The ability to predict which patients will experience improvement with selexipag 1) would help to precisely integrate selexipag use into modern treatment algorithms in a more rational manner, and 2) could help to facilitate a more personalized approach to treatment selection for PAH patients in need of escalated therapy.
Hemnes et al. have identified a molecular signature of response to calcium channel blockers (CCBs), and Benza et al. have identified variants in endothelin pathway genes that predict response to ERAs. Though overall a negative study, Zamanian et al. also defined a molecular signature of response to rituximab given investigationally for scleroderma-associated PAH. These examples underscore the feasibility of the "responder" approach we propose for studying selexipag.
Specific Aims of the Project:
The overall goal of the project is to define a responder phenotype for second-line PH therapies in order to enable precision-guided treatment escalation. We hypothesize that specific clinical and phenotypic features will predict response to selexpag.
Aim 1. Identify baseline predictors of response to selexipag to define a multi-domain responder phenotype.
We will use an expert-defined consensus definition of multi component improvement (MCI) to designate whether or not trial participants randomized to selexipag experienced MCI. We will train predictive models for MCI via ensemble modeling using the SuperLearner package in R. We will evaluate trained model performance using cross-validated AUC and accuracy. We will define and plot feature importance using SHAP values to fully delineate the "responder" phenotype. The responder phenotype can then be externally validated using real-world registry data.
Aim 2. Construct a data-driven response classifier for selexipag using unsupervised learning.
We will perform latent class analysis on quantitative trial data, which includes granular hemodynamic data and clinical events, to identify any natural response subgroups that may emerge from the data patterns, beyond expert-defined response criteria.
Study Design:
Individual trial analysis
What is the purpose of the analysis being proposed? Please select all that apply.:
New research question to examine treatment effectiveness on secondary endpoints and/or within subgroup populations
Software Used:
RStudio
Data Source and Inclusion/Exclusion Criteria to be used to define the patient sample for your study:
Inclusion criteria: we will consider all subjects randomized to selexipag in the GRIPHON trial.
Exclusion criteria: subjects randomized to placebo.
Primary and Secondary Outcome Measure(s) and how they will be categorized/defined for your study:
The primary outcome measure is the 24-week multi-component clinical improvement (MCI) definition that was recently used as a secondary endpoint in the STELLAR trial. This is a composite endpoint measured as change from baseline and defined by: 1) improvement in 6-minute walk distance (6MWD) by >=30 m, 2) reduction in N-terminal pro-B-type natriuretic peptide (NT-proBNP) levels by >=30% or maintenance of NT-proBNP less than 300 pg/mL, and 3) improvement in or maintenance of World Health Organization functional class (WHO-FC) I or II. All three elements must be met for MCI to be present.
The secondary outcome measure will be an exploratory responder definition that we will derive via machine learning, as described in the analytic plan below.
There are no other secondary outcome measures.
Main Predictor/Independent Variable and how it will be categorized/defined for your study:
This is an unsupervised machine learning approach, and as such, the main predictor variables for MCI will be learned, rather than defined a priori.
Other Variables of Interest that will be used in your analysis and how they will be categorized/defined for your study:
Variables that will be made available for selection by the SuperLearner algorithm will include demographics, PH diagnosis class, comorbidities, NYHA/WHO functional classification, lab results including NTproBNP, right heart catheterization data (RAP, mPAP, PVR, CO, PCWP), 6MWD, lung function, PH drugs, and outcome events.
Statistical Analysis Plan:
Aim 1: We will extract patient-level baseline features (demographics, disease subtype, laboratory and PFT values, hemodynamics, walk distance, functional classification). We will perform pre-processing steps (standardize continuous variables, filter variables with greater than 20% missingness, impute remaining missing data using MICE) as appropriate. We will train predictive models using the SuperLearner package in R. SuperLearner is a meta-learning algorithm that constructs an optimal weighted combination of multiple base models (or "learners") to minimize prediction error. The SuperLearner framework allows the user to specify which modeling approaches will be incorporated (e.g., to define the list of learners). For this analysis, we will specify traditional logistic regression (using glm), random forest (using ranger, to account for potential non-linear interactions), gradient boosting (using xgboost, to account for complex decision boundaries).SuperLearner combines the strengths of these models, optimally weights each model's predictions, then optimally blends models. We will use cross-validation to determine the best ensemble model.
Aim 2: We will perform latent class analysis (LCA) using the poLCA package in R to define discrete responder phenotypes based on multivariate response patterns in the pre-processed data. LCA is ideal for this analysis given this method handles mixed data types (continuous and categorical data) and provides interpretable, probabilistic class assignments. We still start by fitting 2-class, 3-class, 4-class, and 5-class models, using maximum likelihood estimation (MLE) to estimate class membership probabilities. We will use Bayesian Information Criteria (BIC), entropy scores, and likelihood ratio tests to determine the best-fitting model. We will exclude models with high class overlap (entropy less than 0.7). Once the classes are established, we will validate class stability using bootstrap resampling (minimum of 1,000 resamples to ensure stable classification). We'll re-run LCA excluding one variable at a time to assess the sensitivity of the model.
Narrative Summary:
Patients with PAH are initially prescribed two drugs as first-line therapy to treat their disease. Many patients will require a third drug as second-line therapy. However, there is no clear evidence-based framework to assist doctors in determining what the best third drug should be. There are three main options for a doctor to choose, including the drug selexipag. In this project, we will analyze data from patients enrolled in a clinical trial of selexipag. We will designate "responders" to selexipag as those who improved when given the drug. Then, we will examine the clinical features of those responders to determine what types of patients should receive selexipag as the best third drug.
Project Timeline:
Once the data is received, we anticipate data analysis to begin immediately. Analysis completion will take approximately 3-4 months, manuscript completion will take 2-3 months, and we expect submission of a publication and data report back to YODA within one year.
Dissemination Plan:
We would plan to submit the results to the European Respiratory Journal or CHEST journal.
Bibliography:
- Chin, Kelly M., et al. “Treatment algorithm for pulmonary arterial hypertension.” European Respiratory Journal 64.4 (2024).
- Hemnes AR, Trammell AW, Archer SL, et al. Peripheral blood signature of vasodilator-responsive pulmonary arterial hypertension. Circulation 2015; 131(4): 401--409; discussion 409
- Benza RL, Gomberg-Maitland M, Demarco T, et al. Endothelin-1 pathway polymorphisms and outcomes in pulmonary arterial hypertension. Am J Respir Crit Care Med 2015; 192(11): 1345--1354.
- Humbert M, McLaughlin V, Gibbs JSR, Gomberg-Maitland M, Hoeper MM, Preston IR, et al. Sotatercept for the Treatment of Pulmonary Arterial Hypertension. N Engl J Med. 2021;384(13):1204-15.
- Hoeper MM, Badesch DB, Ghofrani HA, Gibbs JSR, Gomberg-Maitland M, McLaughlin VV, et al. Phase 3 Trial of Sotatercept for Treatment of Pulmonary Arterial Hypertension. N Engl J Med. 2023;388(16):1478-90.