array(41) {
  ["project_status"]=>
  string(7) "ongoing"
  ["project_assoc_trials"]=>
  array(3) {
    [0]=>
    object(WP_Post)#4874 (24) {
      ["ID"]=>
      int(1902)
      ["post_author"]=>
      string(4) "1363"
      ["post_date"]=>
      string(19) "2023-08-05 04:45:19"
      ["post_date_gmt"]=>
      string(19) "2023-08-05 04:45:19"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(229) "NCT02065791 - A Randomized, Double-blind, Event-driven, Placebo-controlled, Multicenter Study of the Effects of Canagliflozin on Renal and Cardiovascular Outcomes in Subjects With Type 2 Diabetes Mellitus and Diabetic Nephropathy"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(4) "open"
      ["ping_status"]=>
      string(4) "open"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(194) "nct02065791-a-randomized-double-blind-event-driven-placebo-controlled-multicenter-study-of-the-effects-of-canagliflozin-on-renal-and-cardiovascular-outcomes-in-subjects-with-type-2-diabetes-mell"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2024-09-27 17:15:43"
      ["post_modified_gmt"]=>
      string(19) "2024-09-27 21:15:43"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(243) "https://dev-yoda.pantheonsite.io/clinical-trial/nct02065791-a-randomized-double-blind-event-driven-placebo-controlled-multicenter-study-of-the-effects-of-canagliflozin-on-renal-and-cardiovascular-outcomes-in-subjects-with-type-2-diabetes-mell/"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
    [1]=>
    object(WP_Post)#4873 (24) {
      ["ID"]=>
      int(1806)
      ["post_author"]=>
      string(4) "1363"
      ["post_date"]=>
      string(19) "2023-08-05 04:45:19"
      ["post_date_gmt"]=>
      string(19) "2023-08-05 04:45:19"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(195) "NCT01032629 - A Randomized, Multicenter, Double-Blind, Parallel, Placebo-Controlled Study of the Effects of JNJ-28431754 on Cardiovascular Outcomes in Adult Subjects With Type 2 Diabetes Mellitus"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(4) "open"
      ["ping_status"]=>
      string(4) "open"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(189) "nct01032629-a-randomized-multicenter-double-blind-parallel-placebo-controlled-study-of-the-effects-of-jnj-28431754-on-cardiovascular-outcomes-in-adult-subjects-with-type-2-diabetes-mellitus"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2024-09-27 17:13:51"
      ["post_modified_gmt"]=>
      string(19) "2024-09-27 21:13:51"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(238) "https://dev-yoda.pantheonsite.io/clinical-trial/nct01032629-a-randomized-multicenter-double-blind-parallel-placebo-controlled-study-of-the-effects-of-jnj-28431754-on-cardiovascular-outcomes-in-adult-subjects-with-type-2-diabetes-mellitus/"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
    [2]=>
    object(WP_Post)#4875 (24) {
      ["ID"]=>
      int(1808)
      ["post_author"]=>
      string(4) "1363"
      ["post_date"]=>
      string(19) "2019-08-12 15:10:00"
      ["post_date_gmt"]=>
      string(19) "2019-08-12 15:10:00"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(188) "NCT01989754 - A Randomized, Multicenter, Double-Blind, Parallel, Placebo-Controlled Study of the Effects of Canagliflozin on Renal Endpoints in Adult Subjects With Type 2 Diabetes Mellitus"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(4) "open"
      ["ping_status"]=>
      string(4) "open"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(182) "nct01989754-a-randomized-multicenter-double-blind-parallel-placebo-controlled-study-of-the-effects-of-canagliflozin-on-renal-endpoints-in-adult-subjects-with-type-2-diabetes-mellitus"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2023-09-11 15:45:40"
      ["post_modified_gmt"]=>
      string(19) "2023-09-11 19:45:40"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(231) "https://dev-yoda.pantheonsite.io/clinical-trial/nct01989754-a-randomized-multicenter-double-blind-parallel-placebo-controlled-study-of-the-effects-of-canagliflozin-on-renal-endpoints-in-adult-subjects-with-type-2-diabetes-mellitus/"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
  }
  ["request_data_partner"]=>
  string(15) "johnson-johnson"
  ["project_title"]=>
  string(147) "Development of two predictive tools to estimate cardiovascular risk, renal risk and all-cause mortality risk in CKD patients using machine learning"
  ["project_narrative_summary"]=>
  string(643) "Chronic kidney disease (CKD) is the 10th leading cause of death worldwide. Among deaths due to CKD, cardiovascular (CV) disease is the leading cause. While some tools are available to predict CV risk in the general population, they are not adapted to CKD patients. The most widely known tool for predicting CKD progression "KFRE" needs to be improved, as it does not take into account CV and metabolic factors. The aim of this project is to develop specific tools for individually predicting CV and renal risk in patients with CKD. Knowledge of individual risk should enable the nephrologist to propose the most appropriate therapeutic option."
  ["project_learn_source"]=>
  string(11) "data_holder"
  ["principal_investigator"]=>
  array(7) {
    ["first_name"]=>
    string(11) "Jean-Pierre"
    ["last_name"]=>
    string(6) "Fauvel"
    ["degree"]=>
    string(22) "M.D., Ph.D., Professor"
    ["primary_affiliation"]=>
    string(23) "Hospices Civils de Lyon"
    ["email"]=>
    string(29) "nu-thuy-dung.tran@chu-lyon.fr"
    ["state_or_province"]=>
    string(7) "Rhône "
    ["country"]=>
    string(6) "France"
  }
  ["project_key_personnel"]=>
  array(2) {
    [0]=>
    array(6) {
      ["p_pers_f_name"]=>
      string(4) "Dung"
      ["p_pers_l_name"]=>
      string(4) "Tran"
      ["p_pers_degree"]=>
      string(23) "Master of Public Health"
      ["p_pers_pr_affil"]=>
      string(23) "Hospices Civils de Lyon"
      ["p_pers_scop_id"]=>
      string(0) ""
      ["requires_data_access"]=>
      string(3) "yes"
    }
    [1]=>
    array(6) {
      ["p_pers_f_name"]=>
      string(6) "Michel"
      ["p_pers_l_name"]=>
      string(6) "Ducher"
      ["p_pers_degree"]=>
      string(18) "Doctor of Pharmacy"
      ["p_pers_pr_affil"]=>
      string(23) "Hospices Civils de Lyon"
      ["p_pers_scop_id"]=>
      string(0) ""
      ["requires_data_access"]=>
      string(3) "yes"
    }
  }
  ["project_ext_grants"]=>
  array(2) {
    ["value"]=>
    string(3) "yes"
    ["label"]=>
    string(65) "External grants or funds are being used to support this research."
  }
  ["project_funding_source"]=>
  string(180) "French National Institute of Health and Medical Research (INSERM), Société Francophone de Néphrologie Dialyse et Transplantation, GlaxoSmithKline, AstraZeneca, Astellas Pharma. "
  ["project_date_type"]=>
  string(18) "full_crs_supp_docs"
  ["property_scientific_abstract"]=>
  string(1705) "Background:
Available tools for predicting CV risk, such as Framingham, Q-risk, PROCAM or SCORE, are not suitable for patients with CKD, as they present a higher risk [1]. The most widely used tool for predicting the progression of chronic kidney disease is the “KFRE”, but this risk calculator does not take into account important factors such as hypertension and diabetes. It therefore needs to be improved [2].
Objective:
This project aims to create and externally validate 3 prediction tools:
• ESRD progression
• CV event
• All-cause mortality
Study design:
The PhotoGraph 3 cohort, the ALICE-PROTECT cohort and data extracted from CKD patients consulted at the AURAL-Alsace nephrology center and the Hospices Civils de Lyon, France will be used to build the learning dataset. The prediction tools will be created using learning dataset and different machine learning models. The model which provides the best performance will be chosen to optimize the clinical prediction tool. The optimized prediction tools will be externally validated using a national database (CKD-REIN cohort), and international databases (CREDENCE, CANVAS, CANVAS-R studies and Singapore Renal Registry).
Participants:
Patients with stage 3-4 CKD (i) not on dialysis, (ii) not transplanted, (iii) aged ≥ 18 years, (iv) with results recorded at 2 and/or 5 years will be included.
Primary outcome measure and statistical analysis:
The predictive performance of the two prediction tools will be evaluated by :
• Discriminatory capacity: AUC-ROC, accuracy, sensitivity, specificity
• Calibration: calibration plot " ["project_brief_bg"]=> string(3090) "Although the amount of research into the development of prognostic tools in nephrology is increasing, a systematic review by Collins et al (2012) reported that "predictive models for CKD have often been developed using inappropriate methods and have generally been poorly reported" [3]. In a more recent systematic review, Ramspek et al (2020) stated that "future research efforts should focus on external validation and impact assessment on clinically relevant patient populations" [4]. In the context of improving the quality and quantity of medical data, machine learning can help improve predictive performance. Therefore, our research group aims to develop predictive tools for kidney disease patients using appropriate methods derived from machine learning.

The main objective of CKD management is to slow disease progression and preserve renal function for as long as possible [5]. Nevertheless, the management of CKD is challenging due to the complexity and heterogeneity of disease progression, particularly in patients with multiple comorbidities. In addition, the decision whether or not to proceed with renal replacement therapy (dialysis, hemodialysis or renal transplantation) is generally made on the basis of the experience and professional opinion of the nephrologist. Correct risk stratification can improve clinical outcomes through an appropriate prevention strategy. The use of individual patient factors to calculate CV and renal risk in CKD patients using predictive tools, combined with patient life expectancy and demographic and socioeconomic information, enables the nephrologist to provide more appropriate care decisions. An individualized and accurate treatment plan can help avoid unnecessary healthcare costs and the burden of disease on patients and healthcare systems. In the future, randomized controlled trials should be set up to study the impact of predictive tools in the management of patients with CKD.

Our research uses an innovative approach to develop predictive tools, with synthetic data and machine learning. In medical research, the inability to recruit a sufficient number of subjects into a trial is a long-standing problem. One study showed that only 31% of trials achieved their recruitment targets. Furthermore, 1/3 of trials had to be extended because they failed to meet initial recruitment targets [6]. This negatively affects the investigators' workload and financial aspect, and above all, it leads to possible erroneous results. A synthetic data can help to simplify the problem of patient recruitment in medical studies, without having to worry about the necessary number of patients included, or even about ethical issues. A synthetic data is an artificial dataset generated from an original dataset and a model trained to reproduce the characteristics and structure of the original dataset [7]. Our study will test the effect of synthetic data on the development process of predictive tools. Our study could be a pioneer in the application of synthetic data to the development of predictive tools." ["project_specific_aims"]=> string(1012) "Main objectives:
Create and externally validate clinical prediction tools to predict (i) progression to ESRD (ii) CV events and (iii) all-cause mortality at 2 and 5 years in patients with CKD.

Secondary objectives:
• To evaluate the effects of synthetic data on the predictive performance of the prediction tool.
• To compare the predictive performance of several machine learning models: (i) Bayesian network, (ii) deep learning, (iii) logistic regression, (iv) random forest and (v) XG-Boost.
Hypothesis:
• Machine learning can help create satisfactory internal and external validity of tools for predicting progression to ESRD, cardiovascular events and all-cause mortality at 2 and/or 5 years.
• Using synthetic data to augment the data and improve the event rate in the training dataset can help improve the predictive performance of tools for predicting progression to ESRD, cardiovascular events and all-cause mortality at 2 and/or 5 years." ["project_study_design"]=> array(2) { ["value"]=> string(8) "meth_res" ["label"]=> string(23) "Methodological research" } ["project_purposes"]=> array(1) { [0]=> array(2) { ["value"]=> string(50) "research_on_clinical_prediction_or_risk_prediction" ["label"]=> string(50) "Research on clinical prediction or risk prediction" } } ["project_software_used"]=> array(2) { ["value"]=> string(1) "r" ["label"]=> string(1) "R" } ["project_research_methods"]=> string(922) "Data sources:
The project will use data requested from YODA, data from the PhotoGraph 3 cohort, data from the ALICE-PROTECT cohort, the CKD-REIN cohort, data collected from the AURAL-Alsace nephrology center, data collected from Hospices Civils de Lyon, data from the Singapore Renal Association.

Inclusion criteria:
(i) Male or female
(ii) Age ≥ 18 years
(iii) CKD (for more than 3 months)
(iv) Have an eGFR <60 and ≥15 ml/min/1.73m² (CKD-EPI formula).

Exclusion criteria:
(i) Patient having started dialysis
(ii) Renal transplant patients
(iii) Patient protected by the French law mentioned in articles L.1121-5 to L.1121-8.
(iv) Absence of vital status, eGFR and recorded CV event (CV death and/or non-fatal myocardial infarction and/or non-fatal stroke and/or hospitalization for heart failure) at 2 and/or 5 years. " ["project_main_outcome_measure"]=> string(262) "The predictive performance of 2 tools to predict progression to ESRD and CV events at 2 and/or 5 years will be evaluated using:
• C-statistic (i.e. AUC-ROC)
• Accuracy
• Sensitivity,
• Specificity
• Calibration curve." ["project_main_predictor_indep"]=> string(1219) "The chi2 statistic is a nonparametric statistical technique used to determine if a distribution of observed frequencies differs from the theoretical expected frequencies. Chi2 statistics use nominal data, thus instead of using means and variances, this test uses frequencies. The value of chi2 for each variable will be calculated according to formula (1).

X2 = ∑〖(O -E)〗^2/E (1)

Where X2 is the chi2 statistic, O the observed frequency and E the expected frequency. In general, the chi2 statistic summarizes the deviations between the expected number of times each outcome occurs (assuming the model is true) and the observed number of times each outcome occurs, by summing the squares of the deviations, normalized by the expected numbers, over all categories.

The higher the value of X2, the more the variable is associated with the outcome. The variables most associated with the outcome that enable the model to achieve a satisfactory AUC-ROC will be included in the prediction tools.

The independent variables included in each of the project's prediction tools are those that are mentioned mandatory in the table 1 (supplementary material)." ["project_other_variables_interest"]=> string(330) "The other anticipated variables of interest are those with the lowest X2 value. The other variables of interest are those that are mentioned optional in the table 1 above. These variables can be included in the prediction tool, serve as complementary variables to obtain more precise practical results, or describe the population." ["project_stat_analysis_plan"]=> string(2808) "Step 1: Construction of a learning dataset using data from eligible patients in the PhotoGraph 3 study (N ≈ 530), the ALICE-PROTECT study (N ≈ 560), CKD patients followed in the AURAL Alsace Strasbourg nephrology center (N ≈ 200), and CKD patients followed in the HCL nephrology department (N ≈ 1500). The final learning dataset will include demographic, clinical, biological and therapeutic variables.

Step 2: Use of several machine learning models (Bayesian network, deep learning, logistic regression, random forest, XG boost) to create different prediction tools whose predictive performance will be evaluated by 10-fold cross-validations. The predictive performance of these tools will be compared in terms of AUC-ROC, accuracy, sensitivity, specificity and calibration. The model offering the best performance will be selected for optimization and finalization.

Step 3: Optimize the performance of the chosen tool by creating a synthetic dataset and reducing the number of predictors included. The synthetic learning dataset will be generated from the original learning dataset, in order to increase the number of patients in the learning dataset and balance the ratio patient with event/patient without event in the learning dataset using Synthetic Minority Over-sampling Technique as proposed by Chawla et al [8].

Step 4: The national CKD-REIN cohort (N ≈ 3000), data requested from YODA and the Singapore renal registry database will be used to externally validate prediction tools at different levels (national and international). Missing data values will be estimated by applying a learned random forest model for missing values. The performance of external validation of prediction tools will be evaluated in terms of AUC-ROC, accuracy, sensitivity, specificity and calibration curve. The entire clinical prediction tool development process will follow the recommendations of the "TRIPOD" guideline (Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis Initiative).

The minimum sample size for the validation data set was calculated using the method proposed by Riley and colleagues [9].

n= (1-Φ)/(Φ(SE(ln⁡(O/E) ))^( 2 ) ) (1)

Where SE is the standard error of the estimate of interest, O/E is the observed/expected ratio in the validation dataset, Φ is the proportion of observed events in the validation dataset and n is the population size.

Our prediction tool was developed using RapidMiner software, which we hope can be integrated into the YODA Project secure platform. Otherwise, we will reproduce our prediction tool in R or Rstudio and externally validate it separately with participant-level data from the YODA Project. " ["project_timeline"]=> string(110) "The anticipated timeline with key milestone dates for the study is shown in Figure 1 (supplementary material)." ["project_dissemination_plan"]=> string(609) "The research project will be disseminated to nephrologists and researchers through the publications of a scientific articles in a specialized nephrology journal and the submission of an abstract to a nephrology congress.

Anticipated target journal: Kidney International or Journal of America Society of Nephrology or Nephrology Dialysis and Transplantation or Journal of Nephrology.

Anticipated target congress: 2025 European Renal Association annual congress or 2025 Annual congress of Société Francophone de Néphrologie or 2025 America Society of Nephrology annual congress." ["project_bibliography"]=> string(1920) "
  1. Mahmood, S.S., et al., The Framingham Heart Study and the epidemiology of cardiovascular disease: a historical perspective. The Lancet, 2014. 383(9921): p. 999- 1008.
  2. Tangri N, Stevens LA, Griffith J, Tighiouart H, Djurdjev O, Naimark D, Levin A, Levey AS. A predictive model for progression of chronic kidney disease to kidney failure. JAMA. 2011 Apr 20;305(15):1553-9. doi: 10.1001/jama.2011.451.
  3. Collins, G.S., et al., A systematic review finds prediction models for chronic kidney disease were poorly reported and often developed using inappropriate methods. J Clin Epidemiol, 2013. 66(3): p. 268-77.
  4. Ramspek, C.L., et al., Towards the best kidney failure prediction tool: a systematic review and selection aid. Nephrol Dial Transplant, 2020. 35(9): p. 1527-1538.
  5. Elendu C, Elendu RC, Enyong JM,et al. Comprehensive review of current management guidelines of chronic kidney disease. Medicine (Baltimore). 2023 Jun 9;102(23):e33984. doi: 10.1097/MD.0000000000033984.
  6. Fogel DB. Factors associated with clinical trials that fail and opportunities for improving the likelihood of success: A review. Contemp Clin Trials Commun. 2018 Aug 7;11:156-164. doi: 10.1016/j.conctc.2018.08.001.
  7. Hradec, M. Craglia, M. Di Leo, S. De Nigris, N. Ostlaender, N. Nicholson, Multipurpose synthetic population for policy applications, EUR 31116 EN, Publications Office of the European Union, Luxembourg, ISBN 978-92-76-53478-5 (online), doi:10.2760/50072 (online), JRC128595, 2022.
  8. Chawla, Nitesh, Bowyer, et al. SMOTE: Synthetic Minority Over-sampling Technique. J Artif Intell Res. 2002;16: 321-367. DOI: https://doi.org/10.1613/jair.953.
  9. Riley, R.D., et al., Minimum sample size for external validation of a clinical prediction model with a binary outcome. Stat Med, 2021. 40(19): p. 4230-4251.
" ["project_suppl_material"]=> array(1) { [0]=> array(1) { ["suppl_file"]=> array(21) { ["ID"]=> int(16450) ["id"]=> int(16450) ["title"]=> string(45) "2025_01_YODA research proposal modified clean" ["filename"]=> string(50) "2025_01_YODA-research-proposal-modified-clean.docx" ["filesize"]=> int(71584) ["url"]=> string(99) "https://yoda.yale.edu/wp-content/uploads/2023/12/2025_01_YODA-research-proposal-modified-clean.docx" ["link"]=> string(91) "https://yoda.yale.edu/data-request/2023-5514/2025_01_yoda-research-proposal-modified-clean/" ["alt"]=> string(0) "" ["author"]=> string(2) "20" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(45) "2025_01_yoda-research-proposal-modified-clean" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2025-01-23 19:29:54" ["modified"]=> string(19) "2025-01-23 19:29:54" ["menu_order"]=> int(0) ["mime_type"]=> string(71) "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ["type"]=> string(11) "application" ["subtype"]=> string(59) "vnd.openxmlformats-officedocument.wordprocessingml.document" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } } ["project_coi"]=> array(3) { [0]=> array(1) { ["file_coi"]=> array(21) { ["ID"]=> int(14747) ["id"]=> int(14747) ["title"]=> string(21) "YODA_COI_M.Fauvel.pdf" ["filename"]=> string(21) "YODA_COI_M.Fauvel.pdf" ["filesize"]=> int(20281) ["url"]=> string(70) "https://yoda.yale.edu/wp-content/uploads/2024/04/YODA_COI_M.Fauvel.pdf" ["link"]=> string(67) "https://yoda.yale.edu/data-request/2023-5514/yoda_coi_m-fauvel-pdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1640" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(21) "yoda_coi_m-fauvel-pdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2024-04-26 10:09:56" ["modified"]=> string(19) "2024-04-26 10:10:00" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } [1]=> array(1) { ["file_coi"]=> array(21) { ["ID"]=> int(15288) ["id"]=> int(15288) ["title"]=> string(14) "COI_Ducher.pdf" ["filename"]=> string(14) "COI_Ducher.pdf" ["filesize"]=> int(19725) ["url"]=> string(63) "https://yoda.yale.edu/wp-content/uploads/2024/07/COI_Ducher.pdf" ["link"]=> string(60) "https://yoda.yale.edu/data-request/2023-5514/coi_ducher-pdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1640" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(14) "coi_ducher-pdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2024-07-02 12:22:08" ["modified"]=> string(19) "2024-07-02 12:22:10" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } [2]=> array(1) { ["file_coi"]=> array(21) { ["ID"]=> int(15289) ["id"]=> int(15289) ["title"]=> string(12) "COI_Tran.pdf" ["filename"]=> string(12) "COI_Tran.pdf" ["filesize"]=> int(20508) ["url"]=> string(61) "https://yoda.yale.edu/wp-content/uploads/2024/07/COI_Tran.pdf" ["link"]=> string(58) "https://yoda.yale.edu/data-request/2023-5514/coi_tran-pdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1640" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(12) "coi_tran-pdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2024-07-02 12:22:09" ["modified"]=> string(19) "2024-07-02 12:22:11" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } } } ["data_use_agreement_training"]=> bool(true) ["certification"]=> bool(true) ["search_order"]=> string(1) "0" ["project_send_email_updates"]=> bool(false) ["project_publ_available"]=> bool(true) ["project_year_access"]=> string(4) "2025" ["project_rep_publ"]=> bool(false) ["project_assoc_data"]=> array(0) { } ["project_due_dil_assessment"]=> array(21) { ["ID"]=> int(16598) ["id"]=> int(16598) ["title"]=> string(47) "YODA Project Due Diligence Assessment 2023-5514" ["filename"]=> string(51) "YODA-Project-Due-Diligence-Assessment-2023-5514.pdf" ["filesize"]=> int(111732) ["url"]=> string(100) "https://yoda.yale.edu/wp-content/uploads/2023/12/YODA-Project-Due-Diligence-Assessment-2023-5514.pdf" ["link"]=> string(93) "https://yoda.yale.edu/data-request/2023-5514/yoda-project-due-diligence-assessment-2023-5514/" ["alt"]=> string(0) "" ["author"]=> string(4) "1885" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(47) "yoda-project-due-diligence-assessment-2023-5514" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2025-02-07 22:32:29" ["modified"]=> string(19) "2025-02-07 22:32:29" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } ["project_title_link"]=> array(21) { ["ID"]=> int(16597) ["id"]=> int(16597) ["title"]=> string(42) "YODA Project Protocol 2023-5514 - 25-01-20" ["filename"]=> string(44) "YODA-Project-Protocol-2023-5514-25-01-20.pdf" ["filesize"]=> int(132459) ["url"]=> string(93) "https://yoda.yale.edu/wp-content/uploads/2023/12/YODA-Project-Protocol-2023-5514-25-01-20.pdf" ["link"]=> string(86) "https://yoda.yale.edu/data-request/2023-5514/yoda-project-protocol-2023-5514-25-01-20/" ["alt"]=> string(0) "" ["author"]=> string(4) "1885" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(40) "yoda-project-protocol-2023-5514-25-01-20" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2025-02-07 22:32:17" ["modified"]=> string(19) "2025-02-07 22:32:17" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } ["project_review_link"]=> array(21) { ["ID"]=> int(16599) ["id"]=> int(16599) ["title"]=> string(39) "YODA Project Review - 2023-5514_sitepdf" ["filename"]=> string(41) "YODA-Project-Review-2023-5514_sitepdf.pdf" ["filesize"]=> int(1315637) ["url"]=> string(90) "https://yoda.yale.edu/wp-content/uploads/2023/12/YODA-Project-Review-2023-5514_sitepdf.pdf" ["link"]=> string(83) "https://yoda.yale.edu/data-request/2023-5514/yoda-project-review-2023-5514_sitepdf/" ["alt"]=> string(0) "" ["author"]=> string(4) "1885" ["description"]=> string(0) "" ["caption"]=> string(0) "" ["name"]=> string(37) "yoda-project-review-2023-5514_sitepdf" ["status"]=> string(7) "inherit" ["uploaded_to"]=> int(13863) ["date"]=> string(19) "2025-02-07 22:36:39" ["modified"]=> string(19) "2025-02-07 22:36:39" ["menu_order"]=> int(0) ["mime_type"]=> string(15) "application/pdf" ["type"]=> string(11) "application" ["subtype"]=> string(3) "pdf" ["icon"]=> string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png" } ["project_highlight_button"]=> string(0) "" ["request_overridden_res"]=> string(1) "3" ["human_research_protection_training"]=> bool(false) } data partner
array(1) { [0]=> string(15) "johnson-johnson" }

pi country
array(0) { }

pi affil
array(0) { }

products
array(1) { [0]=> string(8) "invokana" }

num of trials
array(1) { [0]=> string(1) "3" }

res
array(1) { [0]=> string(1) "3" }

2023-5514

Research Proposal

Project Title: Development of two predictive tools to estimate cardiovascular risk, renal risk and all-cause mortality risk in CKD patients using machine learning

Scientific Abstract: Background:
Available tools for predicting CV risk, such as Framingham, Q-risk, PROCAM or SCORE, are not suitable for patients with CKD, as they present a higher risk [1]. The most widely used tool for predicting the progression of chronic kidney disease is the "KFRE", but this risk calculator does not take into account important factors such as hypertension and diabetes. It therefore needs to be improved [2].
Objective:
This project aims to create and externally validate 3 prediction tools:
- ESRD progression
- CV event
- All-cause mortality
Study design:
The PhotoGraph 3 cohort, the ALICE-PROTECT cohort and data extracted from CKD patients consulted at the AURAL-Alsace nephrology center and the Hospices Civils de Lyon, France will be used to build the learning dataset. The prediction tools will be created using learning dataset and different machine learning models. The model which provides the best performance will be chosen to optimize the clinical prediction tool. The optimized prediction tools will be externally validated using a national database (CKD-REIN cohort), and international databases (CREDENCE, CANVAS, CANVAS-R studies and Singapore Renal Registry).
Participants:
Patients with stage 3-4 CKD (i) not on dialysis, (ii) not transplanted, (iii) aged >= 18 years, (iv) with results recorded at 2 and/or 5 years will be included.
Primary outcome measure and statistical analysis:
The predictive performance of the two prediction tools will be evaluated by :
- Discriminatory capacity: AUC-ROC, accuracy, sensitivity, specificity
- Calibration: calibration plot

Brief Project Background and Statement of Project Significance: Although the amount of research into the development of prognostic tools in nephrology is increasing, a systematic review by Collins et al (2012) reported that "predictive models for CKD have often been developed using inappropriate methods and have generally been poorly reported" [3]. In a more recent systematic review, Ramspek et al (2020) stated that "future research efforts should focus on external validation and impact assessment on clinically relevant patient populations" [4]. In the context of improving the quality and quantity of medical data, machine learning can help improve predictive performance. Therefore, our research group aims to develop predictive tools for kidney disease patients using appropriate methods derived from machine learning.

The main objective of CKD management is to slow disease progression and preserve renal function for as long as possible [5]. Nevertheless, the management of CKD is challenging due to the complexity and heterogeneity of disease progression, particularly in patients with multiple comorbidities. In addition, the decision whether or not to proceed with renal replacement therapy (dialysis, hemodialysis or renal transplantation) is generally made on the basis of the experience and professional opinion of the nephrologist. Correct risk stratification can improve clinical outcomes through an appropriate prevention strategy. The use of individual patient factors to calculate CV and renal risk in CKD patients using predictive tools, combined with patient life expectancy and demographic and socioeconomic information, enables the nephrologist to provide more appropriate care decisions. An individualized and accurate treatment plan can help avoid unnecessary healthcare costs and the burden of disease on patients and healthcare systems. In the future, randomized controlled trials should be set up to study the impact of predictive tools in the management of patients with CKD.

Our research uses an innovative approach to develop predictive tools, with synthetic data and machine learning. In medical research, the inability to recruit a sufficient number of subjects into a trial is a long-standing problem. One study showed that only 31% of trials achieved their recruitment targets. Furthermore, 1/3 of trials had to be extended because they failed to meet initial recruitment targets [6]. This negatively affects the investigators' workload and financial aspect, and above all, it leads to possible erroneous results. A synthetic data can help to simplify the problem of patient recruitment in medical studies, without having to worry about the necessary number of patients included, or even about ethical issues. A synthetic data is an artificial dataset generated from an original dataset and a model trained to reproduce the characteristics and structure of the original dataset [7]. Our study will test the effect of synthetic data on the development process of predictive tools. Our study could be a pioneer in the application of synthetic data to the development of predictive tools.

Specific Aims of the Project: Main objectives:
Create and externally validate clinical prediction tools to predict (i) progression to ESRD (ii) CV events and (iii) all-cause mortality at 2 and 5 years in patients with CKD.

Secondary objectives:
- To evaluate the effects of synthetic data on the predictive performance of the prediction tool.
- To compare the predictive performance of several machine learning models: (i) Bayesian network, (ii) deep learning, (iii) logistic regression, (iv) random forest and (v) XG-Boost.
Hypothesis:
- Machine learning can help create satisfactory internal and external validity of tools for predicting progression to ESRD, cardiovascular events and all-cause mortality at 2 and/or 5 years.
- Using synthetic data to augment the data and improve the event rate in the training dataset can help improve the predictive performance of tools for predicting progression to ESRD, cardiovascular events and all-cause mortality at 2 and/or 5 years.

Study Design: Methodological research

What is the purpose of the analysis being proposed? Please select all that apply.: Research on clinical prediction or risk prediction

Software Used: R

Data Source and Inclusion/Exclusion Criteria to be used to define the patient sample for your study: Data sources:
The project will use data requested from YODA, data from the PhotoGraph 3 cohort, data from the ALICE-PROTECT cohort, the CKD-REIN cohort, data collected from the AURAL-Alsace nephrology center, data collected from Hospices Civils de Lyon, data from the Singapore Renal Association.

Inclusion criteria:
(i) Male or female
(ii) Age >= 18 years
(iii) CKD (for more than 3 months)
(iv) Have an eGFR <60 and >=15 ml/min/1.73m^2 (CKD-EPI formula).

Exclusion criteria:
(i) Patient having started dialysis
(ii) Renal transplant patients
(iii) Patient protected by the French law mentioned in articles L.1121-5 to L.1121-8.
(iv) Absence of vital status, eGFR and recorded CV event (CV death and/or non-fatal myocardial infarction and/or non-fatal stroke and/or hospitalization for heart failure) at 2 and/or 5 years.

Primary and Secondary Outcome Measure(s) and how they will be categorized/defined for your study: The predictive performance of 2 tools to predict progression to ESRD and CV events at 2 and/or 5 years will be evaluated using:
- C-statistic (i.e. AUC-ROC)
- Accuracy
- Sensitivity,
- Specificity
- Calibration curve.

Main Predictor/Independent Variable and how it will be categorized/defined for your study: The chi2 statistic is a nonparametric statistical technique used to determine if a distribution of observed frequencies differs from the theoretical expected frequencies. Chi2 statistics use nominal data, thus instead of using means and variances, this test uses frequencies. The value of chi2 for each variable will be calculated according to formula (1).

X2 = ∑〖(O -E)〗^2/E (1)

Where X2 is the chi2 statistic, O the observed frequency and E the expected frequency. In general, the chi2 statistic summarizes the deviations between the expected number of times each outcome occurs (assuming the model is true) and the observed number of times each outcome occurs, by summing the squares of the deviations, normalized by the expected numbers, over all categories.

The higher the value of X2, the more the variable is associated with the outcome. The variables most associated with the outcome that enable the model to achieve a satisfactory AUC-ROC will be included in the prediction tools.

The independent variables included in each of the project's prediction tools are those that are mentioned mandatory in the table 1 (supplementary material).

Other Variables of Interest that will be used in your analysis and how they will be categorized/defined for your study: The other anticipated variables of interest are those with the lowest X2 value. The other variables of interest are those that are mentioned optional in the table 1 above. These variables can be included in the prediction tool, serve as complementary variables to obtain more precise practical results, or describe the population.

Statistical Analysis Plan: Step 1: Construction of a learning dataset using data from eligible patients in the PhotoGraph 3 study (N ≈ 530), the ALICE-PROTECT study (N ≈ 560), CKD patients followed in the AURAL Alsace Strasbourg nephrology center (N ≈ 200), and CKD patients followed in the HCL nephrology department (N ≈ 1500). The final learning dataset will include demographic, clinical, biological and therapeutic variables.

Step 2: Use of several machine learning models (Bayesian network, deep learning, logistic regression, random forest, XG boost) to create different prediction tools whose predictive performance will be evaluated by 10-fold cross-validations. The predictive performance of these tools will be compared in terms of AUC-ROC, accuracy, sensitivity, specificity and calibration. The model offering the best performance will be selected for optimization and finalization.

Step 3: Optimize the performance of the chosen tool by creating a synthetic dataset and reducing the number of predictors included. The synthetic learning dataset will be generated from the original learning dataset, in order to increase the number of patients in the learning dataset and balance the ratio patient with event/patient without event in the learning dataset using Synthetic Minority Over-sampling Technique as proposed by Chawla et al [8].

Step 4: The national CKD-REIN cohort (N ≈ 3000), data requested from YODA and the Singapore renal registry database will be used to externally validate prediction tools at different levels (national and international). Missing data values will be estimated by applying a learned random forest model for missing values. The performance of external validation of prediction tools will be evaluated in terms of AUC-ROC, accuracy, sensitivity, specificity and calibration curve. The entire clinical prediction tool development process will follow the recommendations of the "TRIPOD" guideline (Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis Initiative).

The minimum sample size for the validation data set was calculated using the method proposed by Riley and colleagues [9].

n= (1-Φ)/(Φ(SE(ln⁡(O/E) ))^( 2 ) ) (1)

Where SE is the standard error of the estimate of interest, O/E is the observed/expected ratio in the validation dataset, Φ is the proportion of observed events in the validation dataset and n is the population size.

Our prediction tool was developed using RapidMiner software, which we hope can be integrated into the YODA Project secure platform. Otherwise, we will reproduce our prediction tool in R or Rstudio and externally validate it separately with participant-level data from the YODA Project.

Narrative Summary: Chronic kidney disease (CKD) is the 10th leading cause of death worldwide. Among deaths due to CKD, cardiovascular (CV) disease is the leading cause. While some tools are available to predict CV risk in the general population, they are not adapted to CKD patients. The most widely known tool for predicting CKD progression "KFRE" needs to be improved, as it does not take into account CV and metabolic factors. The aim of this project is to develop specific tools for individually predicting CV and renal risk in patients with CKD. Knowledge of individual risk should enable the nephrologist to propose the most appropriate therapeutic option.

Project Timeline: The anticipated timeline with key milestone dates for the study is shown in Figure 1 (supplementary material).

Dissemination Plan: The research project will be disseminated to nephrologists and researchers through the publications of a scientific articles in a specialized nephrology journal and the submission of an abstract to a nephrology congress.

Anticipated target journal: Kidney International or Journal of America Society of Nephrology or Nephrology Dialysis and Transplantation or Journal of Nephrology.

Anticipated target congress: 2025 European Renal Association annual congress or 2025 Annual congress of Société Francophone de Néphrologie or 2025 America Society of Nephrology annual congress.

Bibliography:

  1. Mahmood, S.S., et al., The Framingham Heart Study and the epidemiology of cardiovascular disease: a historical perspective. The Lancet, 2014. 383(9921): p. 999- 1008.
  2. Tangri N, Stevens LA, Griffith J, Tighiouart H, Djurdjev O, Naimark D, Levin A, Levey AS. A predictive model for progression of chronic kidney disease to kidney failure. JAMA. 2011 Apr 20;305(15):1553-9. doi: 10.1001/jama.2011.451.
  3. Collins, G.S., et al., A systematic review finds prediction models for chronic kidney disease were poorly reported and often developed using inappropriate methods. J Clin Epidemiol, 2013. 66(3): p. 268-77.
  4. Ramspek, C.L., et al., Towards the best kidney failure prediction tool: a systematic review and selection aid. Nephrol Dial Transplant, 2020. 35(9): p. 1527-1538.
  5. Elendu C, Elendu RC, Enyong JM,et al. Comprehensive review of current management guidelines of chronic kidney disease. Medicine (Baltimore). 2023 Jun 9;102(23):e33984. doi: 10.1097/MD.0000000000033984.
  6. Fogel DB. Factors associated with clinical trials that fail and opportunities for improving the likelihood of success: A review. Contemp Clin Trials Commun. 2018 Aug 7;11:156-164. doi: 10.1016/j.conctc.2018.08.001.
  7. Hradec, M. Craglia, M. Di Leo, S. De Nigris, N. Ostlaender, N. Nicholson, Multipurpose synthetic population for policy applications, EUR 31116 EN, Publications Office of the European Union, Luxembourg, ISBN 978-92-76-53478-5 (online), doi:10.2760/50072 (online), JRC128595, 2022.
  8. Chawla, Nitesh, Bowyer, et al. SMOTE: Synthetic Minority Over-sampling Technique. J Artif Intell Res. 2002;16: 321-367. DOI: https://doi.org/10.1613/jair.953.
  9. Riley, R.D., et al., Minimum sample size for external validation of a clinical prediction model with a binary outcome. Stat Med, 2021. 40(19): p. 4230-4251.

Supplementary Material: 2025_01_YODA research proposal modified clean (docx)