array(39) {
  ["project_status"]=>
  string(30) "approved_pending_dua_signature"
  ["project_assoc_trials"]=>
  array(3) {
    [0]=>
    object(WP_Post)#4683 (24) {
      ["ID"]=>
      int(1249)
      ["post_author"]=>
      string(4) "1363"
      ["post_date"]=>
      string(19) "2014-10-20 14:57:00"
      ["post_date_gmt"]=>
      string(19) "2014-10-20 14:57:00"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(233) "NCT00638690 - A Phase 3, Randomized, Double-Blind, Placebo-Controlled Study of Abiraterone Acetate (CB7630) Plus Prednisone in Patients With Metastatic Castration-Resistant Prostate Cancer Who Have Failed Docetaxel-Based Chemotherapy"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(4) "open"
      ["ping_status"]=>
      string(4) "open"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(193) "nct00638690-a-phase-3-randomized-double-blind-placebo-controlled-study-of-abiraterone-acetate-cb7630-plus-prednisone-in-patients-with-metastatic-castration-resistant-prostate-cancer-who-have-fa"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2024-05-13 13:12:31"
      ["post_modified_gmt"]=>
      string(19) "2024-05-13 17:12:31"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(242) "https://dev-yoda.pantheonsite.io/clinical-trial/nct00638690-a-phase-3-randomized-double-blind-placebo-controlled-study-of-abiraterone-acetate-cb7630-plus-prednisone-in-patients-with-metastatic-castration-resistant-prostate-cancer-who-have-fa/"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
    [1]=>
    object(WP_Post)#4681 (24) {
      ["ID"]=>
      int(8008)
      ["post_author"]=>
      string(4) "1363"
      ["post_date"]=>
      string(19) "2023-08-05 04:44:39"
      ["post_date_gmt"]=>
      string(19) "2023-08-05 04:44:39"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(210) "NCT02489318 - A Phase 3 Randomized, Placebo-controlled, Double-blind Study of Apalutamide Plus Androgen Deprivation Therapy (ADT) Versus ADT in Subjects With Metastatic Hormone-sensitive Prostate Cancer (mHSPC)"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(4) "open"
      ["ping_status"]=>
      string(4) "open"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(194) "nct02489318-a-phase-3-randomized-placebo-controlled-double-blind-study-of-apalutamide-plus-androgen-deprivation-therapy-adt-versus-adt-in-subjects-with-metastatic-hormone-sensitive-prostate-canc"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2023-11-30 11:40:10"
      ["post_modified_gmt"]=>
      string(19) "2023-11-30 16:40:10"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(243) "https://dev-yoda.pantheonsite.io/clinical-trial/nct02489318-a-phase-3-randomized-placebo-controlled-double-blind-study-of-apalutamide-plus-androgen-deprivation-therapy-adt-versus-adt-in-subjects-with-metastatic-hormone-sensitive-prostate-canc/"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
    [2]=>
    object(WP_Post)#4682 (24) {
      ["ID"]=>
      int(1568)
      ["post_author"]=>
      string(4) "1363"
      ["post_date"]=>
      string(19) "2016-10-31 14:30:00"
      ["post_date_gmt"]=>
      string(19) "2016-10-31 14:30:00"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(223) "NCT00887198 - A Phase 3, Randomized, Double-blind, Placebo-Controlled Study of Abiraterone Acetate (CB7630) Plus Prednisone in Asymptomatic or Mildly Symptomatic Patients With Metastatic Castration-Resistant Prostate Cancer"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(4) "open"
      ["ping_status"]=>
      string(4) "open"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(193) "nct00887198-a-phase-3-randomized-double-blind-placebo-controlled-study-of-abiraterone-acetate-cb7630-plus-prednisone-in-asymptomatic-or-mildly-symptomatic-patients-with-metastatic-castration-re"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2024-03-26 10:20:09"
      ["post_modified_gmt"]=>
      string(19) "2024-03-26 14:20:09"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(242) "https://dev-yoda.pantheonsite.io/clinical-trial/nct00887198-a-phase-3-randomized-double-blind-placebo-controlled-study-of-abiraterone-acetate-cb7630-plus-prednisone-in-asymptomatic-or-mildly-symptomatic-patients-with-metastatic-castration-re/"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
  }
  ["project_title"]=>
  string(97) "Missing data considerations for patient reported outcome measures in randomized controlled trials"
  ["project_narrative_summary"]=>
  string(3477) "Patient-reported outcome (PRO) measures are increasingly included in clinical trials to support primary, secondary, or exploratory endpoints, providing the patient's perspective of treatment. PRO measures are questionnaires that usually consist of multiple items grouped into different domains, measuring patients' symptoms and quality of life. Generally, a total score or domain score is calculated and used as the endpoint in clinical trials according to the scoring rule developed by the authors of the questionnaire. However, due to the burden of completing the questionnaire, especially when the trial collects PRO data over repeated visits, missingness is very common in PRO data. There are two types of missing data in PRO: missing item and missing visit. Specifically, patients may fail to answer some of the questions in the domain of interest, known as 'missing item', or they leave the entire questionnaire unanswered due to drop out, known as 'missing visit'. 

Missing data can affect the calculation of the domain score and/or total score. Some scoring manuals allow for certain amounts of missing items. A common rule is to calculate the domain score by averaging over all available item scores if more than 50% of items are completed, also known as the 'half rule'. This method can also be referred to as 'item mean imputation'. In the meantime, many scoring manuals do not facilitate the calculation of domain/total scores in the presence of any missing items. Missing data can affect the calculation of the domain score and/or total score. Some scoring manuals allow for certain amounts of missing items. A common rule is to calculate the domain score by averaging over all available item scores if more than 50% of items are completed, also known as the 'half rule'. This method can also be referred to as 'item mean imputation'. In the meantime, many scoring manuals do not facilitate the calculation of domain/total scores in the presence of any missing items.

There are two approaches that deal with the missing PRO data. One approach is to directly impute the missing data on item level. Observed items in the same domain and other variables such as disease severity are usually related to the missing item and thus could be used to predict the missing item. The other approach is to impute the domain or total score without using the observed items in the domain. The imputation is repeated multiple times to reduce the uncertainty of a single imputation, known as multiple imputation(MI).

Missing data plays an important role in the analysis and interpretation of clinical trial results. The guidance by the FDA for missing data in PRO recommended specification of imputation methods in the analysis plan, but did not provide recommendations for specific imputation approaches. Rombach et al. investigated the performance of MI on item level versus domain score level for missing PRO data in a single visit[3]. To our knowledge, little progress has been made on the interpretation of PRO endpoint within a longitudinal framework in the presence of missing item and missing visit. And there is no consensus on how to select the appropriate statistical methodologies in these circumstances. The investigation of statistical methods can help further clarify the patterns of missing data accurately and therefore select the appropriate missing data handling strategies for interpreting PRO endpoints.
" ["project_learn_source"]=> string(9) "colleague" ["principal_investigator"]=> array(7) { ["first_name"]=> string(4) "Yang" ["last_name"]=> string(2) "Li" ["degree"]=> string(29) "Doctor's degree in statistics" ["primary_affiliation"]=> string(26) "Renmin University of China" ["email"]=> string(18) "yang.li@ruc.edu.cn" ["state_or_province"]=> string(7) "Beijing" ["country"]=> string(5) "China" } ["project_key_personnel"]=> array(2) { [0]=> array(6) { ["p_pers_f_name"]=> string(4) "Jing" ["p_pers_l_name"]=> string(3) "Sun" ["p_pers_degree"]=> string(39) "Bachelor of medicine, master of science" ["p_pers_pr_affil"]=> string(26) "Renmin University of China" ["p_pers_scop_id"]=> string(0) "" ["requires_data_access"]=> string(3) "yes" } [1]=> array(6) { ["p_pers_f_name"]=> string(7) "Shenran" ["p_pers_l_name"]=> string(4) "Deng" ["p_pers_degree"]=> string(20) "Bachelor of medicine" ["p_pers_pr_affil"]=> string(26) "Renmin University of China" ["p_pers_scop_id"]=> string(0) "" ["requires_data_access"]=> string(3) "yes" } } ["project_ext_grants"]=> array(2) { ["value"]=> string(2) "no" ["label"]=> string(68) "No external grants or funds are being used to support this research." } ["project_date_type"]=> string(18) "full_crs_supp_docs" ["property_scientific_abstract"]=> string(4084) "Background: Patient-reported outcome (PRO) measures are increasingly included in clinical trials to support primary, secondary, or exploratory endpoints, providing the patient's perspective of treatment and the impact of medical conditions. Little progress has been made on the interpretation of PRO endpoint data within a longitudinal framework in the presence of missing data and how to select the appropriate statistical methodologies in these circumstances. The investigation of statistical methods can help further clarify the missing patterns accurately and therefore select the appropriate missing data handling strategies for interpreting PRO endpoints.

Objective: We conducted a simulation study to evaluate the performance of different statistical methods handling missing PRO data to provide scientific suggestions on the selection of appropriate statistical methods under various scenarios.

Study Design: Based on a thorough review of the questionnaire structures and scoring methods for over 100 PROs, hypothetical questionnaires in a hypothetical randomized trial were set up. A comprehensive simulation was conducted to evaluate the performance of statistical methods handling missingness in PRO data with various scenarios considering the following aspects: (1) sample size; (2) missing rate and proportion of missing visit; (3) missing mechanism and (4) number of missing items within the PRO measure. Under these scenarios, six statistical methods were evaluated, i.e., multiple imputation (MI) on item score, MI on domain score, MI on total score, item mean imputation, mixed model for repeated measurements (MMRM), and complete cases analysis. Bias, standard error (SE), root mean squared error (RMSE), power, and type 1 error were adopted to evaluate method performance.

Participants: In the numerical simulation, we use simulated data based on the PRO questionnaire review. In real data analysis, we will evaluate the methods' performance on the PRO endpoints collected in real-world clinical trials. Participants include the study population in each trial.

Primary and Secondary Outcome Measure(s): Outcome measures are PRO endpoints used in the trials, including Brief Fatigue Inventory (BFI) total score, Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, Prostate Cancer Subscale (PCS) score, FACT-P total score, and the EuroQoL five dimensions, five-levels questionnaire (EQ-5D-5L) score.

Statistical Analysis: Step 1: First, we will do a descriptive analysis of the trial data, giving a general picture of the missing patterns in the longitudinal observations. Specifically, we will summarize the dropout rate in each group by visits, and the percentage of subjects with different numbers of missing items at each visit. We will also examine missing data in baseline covariates, since missing covariates is not the focus of this study, we will only include those covariates with very few missing data for later analysis. Step 2: Conduct an exploratory regression analysis with baseline covariates as predictors, each PRO endpoint, and missingness as dependent variables respectively. Select the variables with significant correlations with the outcome or missingness for MI model construction. Step 3: Obtain the subset of patients with complete longitudinal PRO data. Save the treatment effect under this complete dataset as the 'true' value. Step 4: Impose missingness in the full dataset following the missing pattern in Step 1. Step 5: Apply MI on item score, MI on domain/total score, and item mean imputation respectively. Conduct MMRM analysis and complete case analysis(CCA). Obtain the treatment effect estimate under each method. Step 6: Repeat Steps 4 and 5 two hundred times. Step 7: Calculate the performance measures for each method, including standard error (SE), root mean squared error (RMSE), and power/type one error.

" ["project_brief_bg"]=> string(3198) "Background
Patient-reported outcome (PRO) measures are increasingly included in clinical trials to support primary, secondary, or exploratory endpoints, providing the patient's perspective of treatment[1]. PRO measures are questionnaires that usually consist of multiple items grouped into different domains, measuring patients' symptoms and quality of life. Generally, a total score or domain score is calculated and used as the endpoint in clinical trials according to the scoring rule developed by the authors of the questionnaire. However, due to the burden of completing the questionnaire, especially when the trial collects PRO data over repeated visits, missingness is very common in PRO data. There are two types of missing data in PRO: missing item and missing visit. Specifically, patients may fail to answer some of the questions in the domain of interest, known as 'missing item', or they leave the entire questionnaire unanswered due to drop out, known as 'missing visit'.

Missing data can affect the calculation of the domain score and/or total score. Some scoring manuals allow for certain amounts of missing items. A common rule is to calculate the domain score by averaging over all available item scores if more than 50% of items are completed, also known as the 'half rule'. This method can also be referred to as 'item mean imputation'. In the meantime, many scoring manuals do not facilitate the calculation of domain/total scores in the presence of any missing items.

There are two approaches that deal with the missing PRO data. One approach is to directly impute the missing data on item level. Observed items in the same domain and other variables such as disease severity are usually related to the missing item and thus could be used to predict the missing item. The other approach is to impute the domain or total score without using the observed items in the domain. The imputation is repeated multiple times to reduce the uncertainty of a single imputation, known as multiple imputation(MI).

Necessity of research
Missing data plays an important role in the analysis and interpretation of clinical trial results. The guidance by the FDA for missing data in PRO recommended specification of imputation methods in the analysis plan, but did not provide recommendation for specific imputation approaches[2]. Rombach et al. investigated the performance of MI on item level versus domain score level for missing PRO data in a single visit[3]. To our knowledge, little progress has been made on the interpretation of PRO endpoint within a longitudinal framework in the presence of missing item and missing visit. And there is no consensus on how to select the appropriate statistical methodologies in these circumstances.

We conducted a simulation research to evaluate different imputation methods dealing with missing PRO data in a longitudinal framework, including MI on item score, MI on domain/total score, and item mean imputation. This request is made to test the performance of the above methods for missing PRO data on a real dataset of longitudinal PRO in a clinical trial.
" ["project_specific_aims"]=> string(981) "Specific hypotheses to be evaluated include:
1. MI on item score outperforms MI on domain/total score in terms of the precision for treatment effect estimation and the statistical power for hypothesis test.
2. The performance of item mean imputation depends on the percentage of missing item. Specifically, when more than 50% of items are missing for a subject, it is not accurate to impute missing items with the mean of available items.

In addition to the two hypotheses, we also aim to quantify the difference of MI methods against mixed model for repeated measurement(MMRM) and complete case analysis(CCA) under different sample sizes and missing rates to give a holistic picture of missing data handling strategies for PRO data.

We would like to emphasize that the aim of this study is not to examine the treatment effect, rather, we focus on the performance of different statistical methods dealing with missing PRO data.
" ["project_study_design"]=> array(2) { ["value"]=> string(8) "meth_res" ["label"]=> string(23) "Methodological research" } ["project_purposes"]=> array(2) { [0]=> array(2) { ["value"]=> string(37) "develop_or_refine_statistical_methods" ["label"]=> string(37) "Develop or refine statistical methods" } [1]=> array(2) { ["value"]=> string(34) "research_on_clinical_trial_methods" ["label"]=> string(34) "Research on clinical trial methods" } } ["project_software_used"]=> array(2) { ["value"]=> string(1) "r" ["label"]=> string(1) "R" } ["project_research_methods"]=> string(1020) "The study sample will remain the same as the clinical trial participants. There will be no further inclusion/exclusion criteria besides the original criteria that define the study population in each trial.

Trial COU-AA-301(NCT00638690): patients with metastatic castration-resistant prostate cancer who had progressed after docetaxel-based chemotherapy and had an Eastern Cooperative Oncology Group performance status (ECOG PS) less than 2;
Trial COU-AA-302(NCT00887198): patients with metastatic castration-resistant prostate cancer who had no prior chemotherapy, were asymptomatic [score, 0 or 1 on Brief Pain Inventory-Short Form (BPI-SF) item 3] or mildly symptomatic (score, 2 or 3 on BPI-SF item 3), had ECOG PS 0 or 1 and had no prior visceral metastases;
TITAN trial (NCT02489318): patients with metastatic castration-sensitive prostate cancer (defined as not receiving Androgen deprivation therapy(ADT) at the time of metastatic disease progression) who were receiving ADT.
" ["project_main_outcome_measure"]=> string(1263) "The proposed research does not examine the treatment effect in a specific population, rather, we focus on the performance of statistical methods dealing with missing PRO data. Therefore, there are no secondary outcome measures. All outcome measures are the PRO endpoints used in the clinical trial.

Trial COU-AA-301(NCT00638690): Brief Fatigue Inventory (BFI) total score, Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, and Prostate Cancer Subscale (PCS) score.
Trial COU-AA-302(NCT00887198): Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, and Prostate Cancer Subscale (PCS) score.
TITAN trial (NCT02489318): Brief Fatigue Inventory (BFI) total score, Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, Prostate Cancer Subscale (PCS) score, FACT-P total score, and the EuroQoL five dimensions, five-levels questionnaire (EQ-5D-5L) score.
" ["project_main_predictor_indep"]=> string(2458) "The methods under investigation could be divided into two types: (1) imputation on item score level(MI or item mean imputation); (2) imputation(MI) or direct analysis(MMRM) on domain/total score level.

For MMRM, the main predictors include treatment, time, treatment-by-time interaction, and baseline score of the PRO.

Since this is within a longitudinal framework, MI is conducted sequentially, i.e., the outcome variable at visit 1 is modeled using the information from the baseline, and the outcome variable at visit 2 is modeled using the information at visit 1. The imputation continues until the last visit is imputed. MI on item score and MI on domain/total score level will use different predictors to impute missing data.

For MI on item score, the main predictors include the treatment group and all items of the questionnaire at each visit.

Take CFB in EQ-5D as an example to illustrate the sequential imputation process. MI on item score will begin with imputing item 1 at visit 1, using the treatment group, 5 items measured at baseline, and other items(items 2-5) at visit 1 as predictors. Then for item 2 at visit 1, predictors are the treatment group, 5 items measured at baseline, and other items(items 1, 3, 4, 5) at visit 1. After all 5 items at visit 1 are imputed, we move on to visit 2. For item 1 at visit 2, predictors are the treatment group, 5 items measured at visit 1, and other items(items 2-5) at visit 2. The process ends when all 5 items are imputed at the last visit.

Here we want to emphasize that the MI on item score method will require information on each item score for each subject at each visit.
Specifically,
for EQ-5D: 5 items including mobility, self-care, usual activities, pain or discomfort, and anxiety or depression for each patient at each visit;
for FACT-P: 27 items in FACT-G and 12 items in the prostate-specific module for each patient at each visit;
for BFI: items 1 (present fatigue), 2(usual fatigue level in last 24 h), and 3 (worst fatigue in last 24 h) for each patient at each visit;
for BPI-SF: items 3 (worst pain in last 24 h), 4 (least pain in last 24 h), 5 (average pain), and 6 (present pain) for each patient at each visit.

For MI on domain/total score level, the main predictors include the treatment group and domain/total score at the previous visit.
" ["project_other_variables_interest"]=> string(317) "Baseline covariates collected in the trial, such as age, gender, ethnicity, disease severity, and others. The relationship of baseline covariates with the PRO endpoints and missingness will be explored and prognostic factors for the outcome or the missingness will be included in the regression model for MI.
" ["project_stat_analysis_plan"]=> string(2205) "Step 1: First, we will do a descriptive analysis of the trial data, giving a general picture of the missing patterns in the longitudinal observations. Specifically, we will summarize the dropout rate in each group by visits, and the percentage of subjects who have different numbers of missing items at each visit. We will also examine missing data in baseline covariates, since missing covariates is not the focus of this study, we will only include those covariates with very few missing data for later analysis.
Step 2: Conduct an exploratory regression analysis with baseline covariates as predictors, PRO endpoints, and missingness as dependent variables respectively. Select the variables that have significant correlations with the outcome or missingness for MI model construction.
Step 3: Obtain the subset of patients with complete longitudinal PRO data. Save the treatment effect under this complete dataset as the 'true' value.
Step 4: Impose missing data in the complete dataset following the missing pattern in Step 1.
Step 5: Apply MI on item score, MI on total score, and item mean imputation respectively. Conduct MMRM analysis and complete case analysis(CCA). Obtain the treatment effect estimate under each method.
Step 6: Repeat Steps 4 and 5 two hundred times.
Step 7: Calculate the performance measures for each method, including standard error (SE), root mean squared error (RMSE), and power.

In step 5, the details of the analysis methods are as follows.
For MMRM, the main predictors are treatment, time, treatment-by-time, and baseline score of the PRO.
For CCA, the analysis model is ANCOVA(analysis of covariance) which will use the baseline score of the PRO as the covariate.
For MI on item score, the main predictors include the treatment group and all items of the questionnaire at the previous visit and other items at the current visit.
For MI on domain/total score level, main predictors include the treatment group and domain/total score at the previous visit.
Ancillary variables identified in Step 2 will be added to MI models upon the team's discussion.

" ["project_timeline"]=> string(187) "Anticipated project start date: 2024-05-30
Analysis completion date: 2024-07-20
Publication submission date: 2024-09-20
Results reported back date: 2024-11-30
" ["project_dissemination_plan"]=> string(171) "The manuscript will be submitted to journals in the field of patient reported outcome research, such as Value in Health, and Journal of Patient-Reported Outcomes.
" ["project_bibliography"]=> string(819) "

1. Mercieca-Bebber, R., King, M. T., Calvert, M. J., Stockler, M. R., & Friedlander, M. (2018). The importance of patient-reported outcomes in clinical trials and strategies for future optimization. Patient related outcome measures, 353-367.
2. US Food and Drug Administration. (2009). Patient-reported outcome measures: use in medical product development to support labeling claims. Guidance for Industry.
3. Rombach, I., Gray, A. M., Jenkinson, C., Murray, D. W., & Rivero-Arias, O. (2018). Multiple imputation for patient reported outcome measures in randomised controlled trials: advantages and disadvantages of imputing at the item, subscale or composite score level. BMC medical research methodology, 18, 1-16.

" ["project_suppl_material"]=> bool(false) ["project_coi"]=> array(3) { [0]=> array(1) { ["file_coi"]=> array(21) { ["ID"]=> int(14679) ["id"]=> int(14679) ["title"]=> string(17) "COI_Yang-Li-1.pdf" ["filename"]=> string(17) "COI_Yang-Li-1.pdf" ["filesize"]=> int(20304) ["url"]=> string(66) "https://yoda.yale.edu/wp-content/uploads/2024/04/COI_Yang-Li-1.pdf" ["link"]=> string(63) "https://yoda.yale.edu/data-request/2024-0392/coi_yang-li-1-pdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1742" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(17) "coi_yang-li-1-pdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(14447) ["date"]=> string(19) "2024-04-20 08:47:14" ["modified"]=> string(19) "2024-04-20 08:47:18" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } [1]=> array(1) { ["file_coi"]=> array(21) { ["ID"]=> int(14681) ["id"]=> int(14681) ["title"]=> string(22) "COI_Shenran-Deng-1.pdf" ["filename"]=> string(22) "COI_Shenran-Deng-1.pdf" ["filesize"]=> int(20179) ["url"]=> string(71) "https://yoda.yale.edu/wp-content/uploads/2024/04/COI_Shenran-Deng-1.pdf" ["link"]=> string(68) "https://yoda.yale.edu/data-request/2024-0392/coi_shenran-deng-1-pdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1742" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(22) "coi_shenran-deng-1-pdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(14447) ["date"]=> string(19) "2024-04-20 08:47:16" ["modified"]=> string(19) "2024-04-20 08:47:19" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } [2]=> array(1) { ["file_coi"]=> array(21) { ["ID"]=> int(14682) ["id"]=> int(14682) ["title"]=> string(18) "COI_Jing-Sun-1.pdf" ["filename"]=> string(18) "COI_Jing-Sun-1.pdf" ["filesize"]=> int(20831) ["url"]=> string(67) "https://yoda.yale.edu/wp-content/uploads/2024/04/COI_Jing-Sun-1.pdf" ["link"]=> string(64) "https://yoda.yale.edu/data-request/2024-0392/coi_jing-sun-1-pdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1742" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(18) "coi_jing-sun-1-pdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(14447) ["date"]=> string(19) "2024-04-20 08:47:17" ["modified"]=> string(19) "2024-04-20 08:47:19" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } } ["data_use_agreement_training"]=> bool(true) ["certification"]=> bool(true) ["search_order"]=> string(1) "0" ["project_send_email_updates"]=> bool(false) ["project_publ_available"]=> bool(true) ["project_year_access"]=> string(0) "" ["project_rep_publ"]=> bool(false) ["project_assoc_data"]=> array(0) { } ["project_due_dil_assessment"]=> bool(false) ["project_title_link"]=> array(21) { ["ID"]=> int(14716) ["id"]=> int(14716) ["title"]=> string(28) "Data Request Approved Notice" ["filename"]=> string(32) "Data-Request-Approved-Notice.pdf" ["filesize"]=> int(195663) ["url"]=> string(81) "https://yoda.yale.edu/wp-content/uploads/2024/04/Data-Request-Approved-Notice.pdf" ["link"]=> string(77) "https://yoda.yale.edu/data-request/2024-0400/data-request-approved-notice-10/" ["alt"]=> string(0) "" ["author"]=> string(3) "190" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(31) "data-request-approved-notice-10" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(14453) ["date"]=> string(19) "2024-04-25 15:38:15" ["modified"]=> string(19) "2024-04-25 15:38:15" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } ["project_review_link"]=> bool(false) ["project_highlight_button"]=> string(0) "" ["request_overridden_res"]=> string(1) "3" ["request_data_partner"]=> string(15) "johnson-johnson" } data partner
array(1) { [0]=> string(15) "johnson-johnson" }

pi country
array(0) { }

pi affil
array(0) { }

products
array(2) { [0]=> string(6) "zytiga" [1]=> string(7) "erleada" }

num of trials
array(1) { [0]=> string(1) "3" }

res
array(1) { [0]=> string(1) "3" }

2024-0392

Research Proposal

Project Title: Missing data considerations for patient reported outcome measures in randomized controlled trials

Scientific Abstract: Background: Patient-reported outcome (PRO) measures are increasingly included in clinical trials to support primary, secondary, or exploratory endpoints, providing the patient's perspective of treatment and the impact of medical conditions. Little progress has been made on the interpretation of PRO endpoint data within a longitudinal framework in the presence of missing data and how to select the appropriate statistical methodologies in these circumstances. The investigation of statistical methods can help further clarify the missing patterns accurately and therefore select the appropriate missing data handling strategies for interpreting PRO endpoints.

Objective: We conducted a simulation study to evaluate the performance of different statistical methods handling missing PRO data to provide scientific suggestions on the selection of appropriate statistical methods under various scenarios.

Study Design: Based on a thorough review of the questionnaire structures and scoring methods for over 100 PROs, hypothetical questionnaires in a hypothetical randomized trial were set up. A comprehensive simulation was conducted to evaluate the performance of statistical methods handling missingness in PRO data with various scenarios considering the following aspects: (1) sample size; (2) missing rate and proportion of missing visit; (3) missing mechanism and (4) number of missing items within the PRO measure. Under these scenarios, six statistical methods were evaluated, i.e., multiple imputation (MI) on item score, MI on domain score, MI on total score, item mean imputation, mixed model for repeated measurements (MMRM), and complete cases analysis. Bias, standard error (SE), root mean squared error (RMSE), power, and type 1 error were adopted to evaluate method performance.

Participants: In the numerical simulation, we use simulated data based on the PRO questionnaire review. In real data analysis, we will evaluate the methods' performance on the PRO endpoints collected in real-world clinical trials. Participants include the study population in each trial.

Primary and Secondary Outcome Measure(s): Outcome measures are PRO endpoints used in the trials, including Brief Fatigue Inventory (BFI) total score, Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, Prostate Cancer Subscale (PCS) score, FACT-P total score, and the EuroQoL five dimensions, five-levels questionnaire (EQ-5D-5L) score.

Statistical Analysis: Step 1: First, we will do a descriptive analysis of the trial data, giving a general picture of the missing patterns in the longitudinal observations. Specifically, we will summarize the dropout rate in each group by visits, and the percentage of subjects with different numbers of missing items at each visit. We will also examine missing data in baseline covariates, since missing covariates is not the focus of this study, we will only include those covariates with very few missing data for later analysis. Step 2: Conduct an exploratory regression analysis with baseline covariates as predictors, each PRO endpoint, and missingness as dependent variables respectively. Select the variables with significant correlations with the outcome or missingness for MI model construction. Step 3: Obtain the subset of patients with complete longitudinal PRO data. Save the treatment effect under this complete dataset as the 'true' value. Step 4: Impose missingness in the full dataset following the missing pattern in Step 1. Step 5: Apply MI on item score, MI on domain/total score, and item mean imputation respectively. Conduct MMRM analysis and complete case analysis(CCA). Obtain the treatment effect estimate under each method. Step 6: Repeat Steps 4 and 5 two hundred times. Step 7: Calculate the performance measures for each method, including standard error (SE), root mean squared error (RMSE), and power/type one error.

Brief Project Background and Statement of Project Significance: Background
Patient-reported outcome (PRO) measures are increasingly included in clinical trials to support primary, secondary, or exploratory endpoints, providing the patient's perspective of treatment[1]. PRO measures are questionnaires that usually consist of multiple items grouped into different domains, measuring patients' symptoms and quality of life. Generally, a total score or domain score is calculated and used as the endpoint in clinical trials according to the scoring rule developed by the authors of the questionnaire. However, due to the burden of completing the questionnaire, especially when the trial collects PRO data over repeated visits, missingness is very common in PRO data. There are two types of missing data in PRO: missing item and missing visit. Specifically, patients may fail to answer some of the questions in the domain of interest, known as 'missing item', or they leave the entire questionnaire unanswered due to drop out, known as 'missing visit'.

Missing data can affect the calculation of the domain score and/or total score. Some scoring manuals allow for certain amounts of missing items. A common rule is to calculate the domain score by averaging over all available item scores if more than 50% of items are completed, also known as the 'half rule'. This method can also be referred to as 'item mean imputation'. In the meantime, many scoring manuals do not facilitate the calculation of domain/total scores in the presence of any missing items.

There are two approaches that deal with the missing PRO data. One approach is to directly impute the missing data on item level. Observed items in the same domain and other variables such as disease severity are usually related to the missing item and thus could be used to predict the missing item. The other approach is to impute the domain or total score without using the observed items in the domain. The imputation is repeated multiple times to reduce the uncertainty of a single imputation, known as multiple imputation(MI).

Necessity of research
Missing data plays an important role in the analysis and interpretation of clinical trial results. The guidance by the FDA for missing data in PRO recommended specification of imputation methods in the analysis plan, but did not provide recommendation for specific imputation approaches[2]. Rombach et al. investigated the performance of MI on item level versus domain score level for missing PRO data in a single visit[3]. To our knowledge, little progress has been made on the interpretation of PRO endpoint within a longitudinal framework in the presence of missing item and missing visit. And there is no consensus on how to select the appropriate statistical methodologies in these circumstances.

We conducted a simulation research to evaluate different imputation methods dealing with missing PRO data in a longitudinal framework, including MI on item score, MI on domain/total score, and item mean imputation. This request is made to test the performance of the above methods for missing PRO data on a real dataset of longitudinal PRO in a clinical trial.

Specific Aims of the Project: Specific hypotheses to be evaluated include:
1. MI on item score outperforms MI on domain/total score in terms of the precision for treatment effect estimation and the statistical power for hypothesis test.
2. The performance of item mean imputation depends on the percentage of missing item. Specifically, when more than 50% of items are missing for a subject, it is not accurate to impute missing items with the mean of available items.

In addition to the two hypotheses, we also aim to quantify the difference of MI methods against mixed model for repeated measurement(MMRM) and complete case analysis(CCA) under different sample sizes and missing rates to give a holistic picture of missing data handling strategies for PRO data.

We would like to emphasize that the aim of this study is not to examine the treatment effect, rather, we focus on the performance of different statistical methods dealing with missing PRO data.

Study Design: Methodological research

What is the purpose of the analysis being proposed? Please select all that apply.: Develop or refine statistical methods Research on clinical trial methods

Software Used: R

Data Source and Inclusion/Exclusion Criteria to be used to define the patient sample for your study: The study sample will remain the same as the clinical trial participants. There will be no further inclusion/exclusion criteria besides the original criteria that define the study population in each trial.

Trial COU-AA-301(NCT00638690): patients with metastatic castration-resistant prostate cancer who had progressed after docetaxel-based chemotherapy and had an Eastern Cooperative Oncology Group performance status (ECOG PS) less than 2;
Trial COU-AA-302(NCT00887198): patients with metastatic castration-resistant prostate cancer who had no prior chemotherapy, were asymptomatic [score, 0 or 1 on Brief Pain Inventory-Short Form (BPI-SF) item 3] or mildly symptomatic (score, 2 or 3 on BPI-SF item 3), had ECOG PS 0 or 1 and had no prior visceral metastases;
TITAN trial (NCT02489318): patients with metastatic castration-sensitive prostate cancer (defined as not receiving Androgen deprivation therapy(ADT) at the time of metastatic disease progression) who were receiving ADT.

Primary and Secondary Outcome Measure(s) and how they will be categorized/defined for your study: The proposed research does not examine the treatment effect in a specific population, rather, we focus on the performance of statistical methods dealing with missing PRO data. Therefore, there are no secondary outcome measures. All outcome measures are the PRO endpoints used in the clinical trial.

Trial COU-AA-301(NCT00638690): Brief Fatigue Inventory (BFI) total score, Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, and Prostate Cancer Subscale (PCS) score.
Trial COU-AA-302(NCT00887198): Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, and Prostate Cancer Subscale (PCS) score.
TITAN trial (NCT02489318): Brief Fatigue Inventory (BFI) total score, Brief Pain Inventory-Short Form (BPI-SF) total score, functional well-being (FWB, subscale of FACT-G) domain score, physical well-being (PWB, subscale of FACT-G) domain score, Prostate Cancer Subscale (PCS) score, FACT-P total score, and the EuroQoL five dimensions, five-levels questionnaire (EQ-5D-5L) score.

Main Predictor/Independent Variable and how it will be categorized/defined for your study: The methods under investigation could be divided into two types: (1) imputation on item score level(MI or item mean imputation); (2) imputation(MI) or direct analysis(MMRM) on domain/total score level.

For MMRM, the main predictors include treatment, time, treatment-by-time interaction, and baseline score of the PRO.

Since this is within a longitudinal framework, MI is conducted sequentially, i.e., the outcome variable at visit 1 is modeled using the information from the baseline, and the outcome variable at visit 2 is modeled using the information at visit 1. The imputation continues until the last visit is imputed. MI on item score and MI on domain/total score level will use different predictors to impute missing data.

For MI on item score, the main predictors include the treatment group and all items of the questionnaire at each visit.

Take CFB in EQ-5D as an example to illustrate the sequential imputation process. MI on item score will begin with imputing item 1 at visit 1, using the treatment group, 5 items measured at baseline, and other items(items 2-5) at visit 1 as predictors. Then for item 2 at visit 1, predictors are the treatment group, 5 items measured at baseline, and other items(items 1, 3, 4, 5) at visit 1. After all 5 items at visit 1 are imputed, we move on to visit 2. For item 1 at visit 2, predictors are the treatment group, 5 items measured at visit 1, and other items(items 2-5) at visit 2. The process ends when all 5 items are imputed at the last visit.

Here we want to emphasize that the MI on item score method will require information on each item score for each subject at each visit.
Specifically,
for EQ-5D: 5 items including mobility, self-care, usual activities, pain or discomfort, and anxiety or depression for each patient at each visit;
for FACT-P: 27 items in FACT-G and 12 items in the prostate-specific module for each patient at each visit;
for BFI: items 1 (present fatigue), 2(usual fatigue level in last 24 h), and 3 (worst fatigue in last 24 h) for each patient at each visit;
for BPI-SF: items 3 (worst pain in last 24 h), 4 (least pain in last 24 h), 5 (average pain), and 6 (present pain) for each patient at each visit.

For MI on domain/total score level, the main predictors include the treatment group and domain/total score at the previous visit.

Other Variables of Interest that will be used in your analysis and how they will be categorized/defined for your study: Baseline covariates collected in the trial, such as age, gender, ethnicity, disease severity, and others. The relationship of baseline covariates with the PRO endpoints and missingness will be explored and prognostic factors for the outcome or the missingness will be included in the regression model for MI.

Statistical Analysis Plan: Step 1: First, we will do a descriptive analysis of the trial data, giving a general picture of the missing patterns in the longitudinal observations. Specifically, we will summarize the dropout rate in each group by visits, and the percentage of subjects who have different numbers of missing items at each visit. We will also examine missing data in baseline covariates, since missing covariates is not the focus of this study, we will only include those covariates with very few missing data for later analysis.
Step 2: Conduct an exploratory regression analysis with baseline covariates as predictors, PRO endpoints, and missingness as dependent variables respectively. Select the variables that have significant correlations with the outcome or missingness for MI model construction.
Step 3: Obtain the subset of patients with complete longitudinal PRO data. Save the treatment effect under this complete dataset as the 'true' value.
Step 4: Impose missing data in the complete dataset following the missing pattern in Step 1.
Step 5: Apply MI on item score, MI on total score, and item mean imputation respectively. Conduct MMRM analysis and complete case analysis(CCA). Obtain the treatment effect estimate under each method.
Step 6: Repeat Steps 4 and 5 two hundred times.
Step 7: Calculate the performance measures for each method, including standard error (SE), root mean squared error (RMSE), and power.

In step 5, the details of the analysis methods are as follows.
For MMRM, the main predictors are treatment, time, treatment-by-time, and baseline score of the PRO.
For CCA, the analysis model is ANCOVA(analysis of covariance) which will use the baseline score of the PRO as the covariate.
For MI on item score, the main predictors include the treatment group and all items of the questionnaire at the previous visit and other items at the current visit.
For MI on domain/total score level, main predictors include the treatment group and domain/total score at the previous visit.
Ancillary variables identified in Step 2 will be added to MI models upon the team's discussion.

Narrative Summary: Patient-reported outcome (PRO) measures are increasingly included in clinical trials to support primary, secondary, or exploratory endpoints, providing the patient's perspective of treatment. PRO measures are questionnaires that usually consist of multiple items grouped into different domains, measuring patients' symptoms and quality of life. Generally, a total score or domain score is calculated and used as the endpoint in clinical trials according to the scoring rule developed by the authors of the questionnaire. However, due to the burden of completing the questionnaire, especially when the trial collects PRO data over repeated visits, missingness is very common in PRO data. There are two types of missing data in PRO: missing item and missing visit. Specifically, patients may fail to answer some of the questions in the domain of interest, known as 'missing item', or they leave the entire questionnaire unanswered due to drop out, known as 'missing visit'.

Missing data can affect the calculation of the domain score and/or total score. Some scoring manuals allow for certain amounts of missing items. A common rule is to calculate the domain score by averaging over all available item scores if more than 50% of items are completed, also known as the 'half rule'. This method can also be referred to as 'item mean imputation'. In the meantime, many scoring manuals do not facilitate the calculation of domain/total scores in the presence of any missing items. Missing data can affect the calculation of the domain score and/or total score. Some scoring manuals allow for certain amounts of missing items. A common rule is to calculate the domain score by averaging over all available item scores if more than 50% of items are completed, also known as the 'half rule'. This method can also be referred to as 'item mean imputation'. In the meantime, many scoring manuals do not facilitate the calculation of domain/total scores in the presence of any missing items.

There are two approaches that deal with the missing PRO data. One approach is to directly impute the missing data on item level. Observed items in the same domain and other variables such as disease severity are usually related to the missing item and thus could be used to predict the missing item. The other approach is to impute the domain or total score without using the observed items in the domain. The imputation is repeated multiple times to reduce the uncertainty of a single imputation, known as multiple imputation(MI).

Missing data plays an important role in the analysis and interpretation of clinical trial results. The guidance by the FDA for missing data in PRO recommended specification of imputation methods in the analysis plan, but did not provide recommendations for specific imputation approaches. Rombach et al. investigated the performance of MI on item level versus domain score level for missing PRO data in a single visit[3]. To our knowledge, little progress has been made on the interpretation of PRO endpoint within a longitudinal framework in the presence of missing item and missing visit. And there is no consensus on how to select the appropriate statistical methodologies in these circumstances. The investigation of statistical methods can help further clarify the patterns of missing data accurately and therefore select the appropriate missing data handling strategies for interpreting PRO endpoints.

Project Timeline: Anticipated project start date: 2024-05-30
Analysis completion date: 2024-07-20
Publication submission date: 2024-09-20
Results reported back date: 2024-11-30

Dissemination Plan: The manuscript will be submitted to journals in the field of patient reported outcome research, such as Value in Health, and Journal of Patient-Reported Outcomes.

Bibliography:

1. Mercieca-Bebber, R., King, M. T., Calvert, M. J., Stockler, M. R., & Friedlander, M. (2018). The importance of patient-reported outcomes in clinical trials and strategies for future optimization. Patient related outcome measures, 353-367.
2. US Food and Drug Administration. (2009). Patient-reported outcome measures: use in medical product development to support labeling claims. Guidance for Industry.
3. Rombach, I., Gray, A. M., Jenkinson, C., Murray, D. W., & Rivero-Arias, O. (2018). Multiple imputation for patient reported outcome measures in randomised controlled trials: advantages and disadvantages of imputing at the item, subscale or composite score level. BMC medical research methodology, 18, 1-16.