2024-0248 - The YODA Project

                    array(39) {
  ["project_status"]=>
  string(7) "ongoing"
  ["project_assoc_trials"]=>
  array(2) {
    [0]=>
    object(WP_Post)#4776 (24) {
      ["ID"]=>
      int(14190)
      ["post_author"]=>
      string(4) "1638"
      ["post_date"]=>
      string(19) "2024-02-15 12:26:00"
      ["post_date_gmt"]=>
      string(19) "2024-02-15 17:26:00"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(338) "NCT03267576 - Canagliflozin Continuous Glucose Monitoring (CANA CGM) Trial: A Pilot Randomized, Double-Blind, Controlled, Crossover Study on the Effects of the SGLT-2 Inhibitor Canagliflozin (vs. the DPP-4 Inhibitor Sitagliptin) on Glucose Variability in Mexican Patients With Type 2 Diabetes Mellitus Inadequately Controlled on Metformin"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(6) "closed"
      ["ping_status"]=>
      string(6) "closed"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(189) "nct03267576-canagliflozin-continuous-glucose-monitoring-cana-cgm-trial-a-pilot-randomized-double-blind-controlled-crossover-study-on-the-effects-of-the-sglt-2-inhibitor-canagliflozin-vs-the"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2024-05-21 11:55:59"
      ["post_modified_gmt"]=>
      string(19) "2024-05-21 15:55:59"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(60) "https://yoda.yale.edu/?post_type=clinical_trial&p=14190"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
    [1]=>
    object(WP_Post)#4775 (24) {
      ["ID"]=>
      int(14841)
      ["post_author"]=>
      string(3) "190"
      ["post_date"]=>
      string(19) "2024-05-09 10:17:33"
      ["post_date_gmt"]=>
      string(19) "2024-05-09 14:17:33"
      ["post_content"]=>
      string(0) ""
      ["post_title"]=>
      string(267) "NCT02139943  - A Randomized Phase 2, Double-blind, Placebo-controlled, Treat-to-Target, Parallel-group, 3-arm, Multicenter Study to Assess the Efficacy and Safety of Canagliflozin as Add-on Therapy to Insulin in the Treatment of Subjects With Type 1 Diabetes Mellitus"
      ["post_excerpt"]=>
      string(0) ""
      ["post_status"]=>
      string(7) "publish"
      ["comment_status"]=>
      string(6) "closed"
      ["ping_status"]=>
      string(6) "closed"
      ["post_password"]=>
      string(0) ""
      ["post_name"]=>
      string(191) "nct02139943-a-randomized-phase-2-double-blind-placebo-controlled-treat-to-target-parallel-group-3-arm-multicenter-study-to-assess-the-efficacy-and-safety-of-canagliflozin-as-add-on-therapy-to"
      ["to_ping"]=>
      string(0) ""
      ["pinged"]=>
      string(0) ""
      ["post_modified"]=>
      string(19) "2024-06-13 12:30:31"
      ["post_modified_gmt"]=>
      string(19) "2024-06-13 16:30:31"
      ["post_content_filtered"]=>
      string(0) ""
      ["post_parent"]=>
      int(0)
      ["guid"]=>
      string(60) "https://yoda.yale.edu/?post_type=clinical_trial&p=14841"
      ["menu_order"]=>
      int(0)
      ["post_type"]=>
      string(14) "clinical_trial"
      ["post_mime_type"]=>
      string(0) ""
      ["comment_count"]=>
      string(1) "0"
      ["filter"]=>
      string(3) "raw"
    }
  }
  ["project_title"]=>
  string(116) "Time-series insights into diabetes treatment - using a fine-tuned CGM foundation model to improve treatment outcomes"
  ["project_narrative_summary"]=>
  string(683) "Diabetes is a pressing health issue globally. We aim to investigate the potential of advanced learning approaches to improve the results of clinical trials by predicting treatment responses for individual patients. We will utilize proprietary data to enhance time-series based learning techniques in order to identify nuanced patterns in CGM data that correlate with treatment efficacy. Our research will focus on developing a methodological framework, paving the way for a deeper understanding of diabetes management. The anticipated outcome of our study is to establish a robust model capable of guiding clinical decisions, ultimately improving patient-specific diabetes treatment."
  ["project_learn_source"]=>
  string(10) "web_search"
  ["principal_investigator"]=>
  array(7) {
    ["first_name"]=>
    string(4) "Eran"
    ["last_name"]=>
    string(5) "Segal"
    ["degree"]=>
    string(11) "Prof. (PhD)"
    ["primary_affiliation"]=>
    string(29) "Weizmann Institute of Science"
    ["email"]=>
    string(25) "Eran.Segal@weizmann.ac.il"
    ["state_or_province"]=>
    string(6) "Israel"
    ["country"]=>
    string(6) "Israel"
  }
  ["project_key_personnel"]=>
  array(3) {
    [0]=>
    array(6) {
      ["p_pers_f_name"]=>
      string(5) "Hagai"
      ["p_pers_l_name"]=>
      string(7) "Rossman"
      ["p_pers_degree"]=>
      string(5) "Ph.D."
      ["p_pers_pr_affil"]=>
      string(29) "Weizmann Institute of Science"
      ["p_pers_scop_id"]=>
      string(0) ""
      ["requires_data_access"]=>
      string(2) "no"
    }
    [1]=>
    array(6) {
      ["p_pers_f_name"]=>
      string(3) "Gal"
      ["p_pers_l_name"]=>
      string(5) "Sapir"
      ["p_pers_degree"]=>
      string(10) "M.D, Ph.D."
      ["p_pers_pr_affil"]=>
      string(29) "Weizmann Institute of Science"
      ["p_pers_scop_id"]=>
      string(0) ""
      ["requires_data_access"]=>
      string(3) "yes"
    }
    [2]=>
    array(6) {
      ["p_pers_f_name"]=>
      string(3) "Guy"
      ["p_pers_l_name"]=>
      string(7) "Lutsker"
      ["p_pers_degree"]=>
      string(3) "MSc"
      ["p_pers_pr_affil"]=>
      string(29) "Weizmann Institute of Science"
      ["p_pers_scop_id"]=>
      string(0) ""
      ["requires_data_access"]=>
      string(3) "yes"
    }
  }
  ["project_ext_grants"]=>
  array(2) {
    ["value"]=>
    string(2) "no"
    ["label"]=>
    string(68) "No external grants or funds are being used to support this research."
  }
  ["project_date_type"]=>
  string(18) "full_crs_supp_docs"
  ["property_scientific_abstract"]=>
  string(1468) "Background:

Diabetes, a rising health concern world-wide, demands improved treatment strategies. Advancements in Continuous Glucose Monitoring (CGM) and machine learning offer new avenues for personalized treatment approaches.

Objective:

To leverage advanced learning techniques, particularly time-series analysis, to predict individual treatment responses from CGM data in order to enhance diabetes treatment personalization and efficacy.

Study Design:

This research will employ proprietary CGM data from the Human Phenotype Project (HPP, previously called 10 K project (1). By integrating exploratory analysis tools like CGMap and IGLU and time-series based learning, we aim to refine predictive models for diabetes care.

Participants:

All participants who have more than 24 hours of CGM data available.

Primary and Secondary outcome measures:

Primary outcomes include the accuracy of treatment response predictions. Secondary outcomes are improved patient segmentation based on treatment efficacy and the identification of predictive markers for drug responsiveness.

Statistical analysis:

Our analysis will span descriptive, bivariate, and multivariable techniques, including machine learning models fine-tuned on the cohort data. We will evaluate model performance through cross-validation and other relevant metrics, aiming to establish a robust framework for clinical decision support."
  ["project_brief_bg"]=>
  string(1892) "Recent estimates suggest that around 6% of the world's population suffers from diabetes (~529 million people). This corresponds to approximately 38 million years of life lost due to roughly 1.7 million disease-related deaths (2). Additionally, it was recently estimated that ~9% of adults have impaired glucose tolerance, a number that is projected to increase significantly over the next two decades (3). Diabetes and related conditions are therefore major concerns for public health, worldwide.



Continuous glucose monitoring (CGM) devices measure the level of glucose in the interstitial fluid, which correlates well with blood glucose levels. The advantages of CGM usage in patients with diabetes was previously recognized (4), and recently stated to be an important component of future clinical trials concerning diabetic patients (5). 



The 10 K cohort is a large-scale, prospective, longitudinal cohort of 40-70 years old Israeli individuals (1), containing, among other things, a 2-week CGM for each participant.



It was demonstrated that advanced statistical methods can improve our ability to describe diabetic patients (e.g. glucodensities (6)), and there are several available tools for the effective analysis of this data (e.g. (7),(8)). Additionally, it was shown that treating CGM data as time-series data can assist with treatment response prediction (9). Foundational models are able to generalize insights on new datasets that were not presented during training. Recent advancements have enabled this type of model for time-series data, and it is possible that such models could be used to enhance our understanding of diabetes through CGM data (10). We propose to apply these kinds of models on CGM data from clinical trials after it has been fine-tuned on sufficient data, in order to predict response to anti-diabetic treatments."
  ["project_specific_aims"]=>
  string(1174) "Predict Individual Treatment Responses: Utilize time-series analysis to predict how individuals with diabetes respond to specific treatments (DDP4 inhibitor, SGLT2 inhibitor).

Methodological Advancement: Develop and refine a methodological framework by integrating time-series based learning techniques with proprietary data from the 10 k cohort. 

Patient Segmentation and Treatment Optimization: Identify patterns within CGM data that correlate with positive treatment outcomes, facilitating a nuanced segmentation of the patient population. This aims to optimize treatment strategies by aligning them more closely with individual patient profiles and needs.

Enhance Scientific and Medical Knowledge: By analyzing CGM data through the lens of advanced learning approaches, this project seeks to create new scientific knowledge that can be directly applied to enhance medical care for individuals with diabetes.



By focusing on individualized treatment response predictions and leveraging a rich dataset, we aim to advance the understanding and management of diabetes, with the potential to improve outcomes for millions worldwide.

"
  ["project_study_design"]=>
  array(2) {
    ["value"]=>
    string(8) "meth_res"
    ["label"]=>
    string(23) "Methodological research"
  }
  ["project_purposes"]=>
  array(2) {
    [0]=>
    array(2) {
      ["value"]=>
      string(37) "develop_or_refine_statistical_methods"
      ["label"]=>
      string(37) "Develop or refine statistical methods"
    }
    [1]=>
    array(2) {
      ["value"]=>
      string(50) "research_on_clinical_prediction_or_risk_prediction"
      ["label"]=>
      string(50) "Research on clinical prediction or risk prediction"
    }
  }
  ["project_software_used"]=>
  array(2) {
    ["value"]=>
    string(6) "python"
    ["label"]=>
    string(6) "Python"
  }
  ["project_research_methods"]=>
  string(232) "Proprietary dataset from the 10k cohort study, including CGM data and comprehensive demographic and clinical information on participants.

Inclusion Criteria: All participants who have more than 24 hours of CGM data available."
  ["project_main_outcome_measure"]=>
  string(429) "Primary Outcome: 

Patient Segmentation Efficiency: Evaluation of the model's ability to accurately segment patients based on their predicted response to treatment, assessed through clustering quality metrics.



Secondary Outcomes:



Predictive Accuracy: The precision and recall of treatment response predictions, determined through cross-validation and comparison with actual treatment outcomes."
  ["project_main_predictor_indep"]=>
  string(447) "The primary predictor in our study will be the individual patient's CGM data and other demographic and clinical information. These variables will undergo quantitative analysis and categorization based on established clinical thresholds for glycemic control (e.g., HbA1c levels). They will form the dataset input for the model, influencing its output and serving as crucial indicators for predicting response to various diabetes treatments.

"
  ["project_other_variables_interest"]=>
  string(402) "Other variables of interest include other demographic and clinical information. These variables will undergo quantitative analysis and categorization based on established clinical thresholds for glycemic control (e.g., HbA1c levels). They will form the dataset input for the model, influencing its output and serving as crucial indicators for predicting response to various diabetes treatments.

"
  ["project_stat_analysis_plan"]=>
  string(2753) "Data preparation and Exploratory Analysis: 

Data cleaning: standardize and apply transformation to skewed variables, address missing data. 

Exploratory data analysis (EDA): conduct initial analysis to understand distributions, patterns and outliers. Visualize CGM time-series data, identify preliminary patterns of response to treatment. Additionally, perform covariate analysis to assess the influence of other demographic and clinical variables on treatment response.

Basic Analysis using CGMap and IGLU: 

Utilize CGMap to calculate CGM metrics (mean glucose, glycemic variability etc). 

Utilize IGLU for metrics including glucose management indicator (GMI) and other metrics. 

Clustering for patient segmentation: Apply unsupervised learning techniques such as PCA, UMAP and HDBSCAN to segment patients into clusters based on similarities in CGM data patterns.

Utilize statistical tests to study inter-cluster differences in treatment response.

Treatment response prediction and Identification of Responders and Non-responders

Training a model for treatment response prediction by incorporating features extracted from CGMap and IGLU and a training set selected from the clinical trial data.

Using baseline measurements, predict the response to DDP4 inhibitor and SGLT2 inhibitor.

Using baseline measurements and one of the treatments, predict the response to the other treatment.

Using baseline measurements and the first X hours of a treatment, predict the final response to the treatment.

Advanced Time Series Characterization based on the TimeGPT foundation model: 

Fine-tune the foundation model on a subset of the preprocessed CGM data from the 10 K cohort in order to build a CGM time-series model. 

Feature extraction and time series analysis - extract relevant features such as time-series embeddings from CGM time-series data (possibly: complex temporal patterns, trends), then perform analysis on extracted features. For example: change in features due to treatment.

Train a model for treatment response prediction by incorporating the fine-tuned CGM time-series model and a training set selected from the clinical trial data, as described in (4a).

Model Evaluation and Validation

Compare the relation between metrics in feature space (3a, 5b) and treatment response. For example, distances between samples within a group who responded well to the treatment compared to the distance between groups of samples (responders and non-responders).

Utilize N-fold cross-validation to train a treatment response predictor (4a, 5c) on the clinical trial data. Evaluate based on ROCAUC, precision and recall.

"
  ["project_timeline"]=>
  string(867) "Months 1-2: Define goals and preprocess CGM data, perform EDA on trial data (basic CGM analysis - CGMap, IGLU).

Months 3-6: Fine-tune TimeGPT with CGM data.

Months 7-8: Extract features and enhance predictive models using TimeGPT.

Months 9-10: Perform advanced analysis and refine models.

Months 11-12: Finalize report and prepare manuscript for publication.



Key Milestones:



End of Month 2: Preprocessed dataset ready.

End of Month 6: TimeGPT fine-tuned.

End of Month 8: Predictive models enhanced.

End of Month 10: Model refinement and advanced insights complete.

Analysis Completion Date: End of Month 10.

Manuscript Drafted: Month 11.



First Submission for Publication: By Month 12.



Results Reported Back to YODA Project: By Month 12.

"
  ["project_dissemination_plan"]=>
  string(390) "We aim to publish our findings in top-tier journals such as Nature Medicine or NEJM, targeting a broad audience across diabetes care and medical research. Plans include presenting at international conferences like ADA and EASD. By choosing open-access options, we ensure wide accessibility, benefiting healthcare professionals and fostering advancements in personalized diabetes management."
  ["project_bibliography"]=>
  string(4047) "
Shilo S, Bar N, Keshet A, Talmor-Barkan Y, Rossman H, Godneva A, Aviv Y, Edlitz Y, Reicher L, Kolobkov D, Wolf BC, Lotan-Pompan M, Levi K, Cohen O, Saranga H, Weinberger A, Segal E. 10 K: a large-scale prospective longitudinal study in Israel. Eur J Epidemiol. 2021 Nov;36(11):1187-1194. doi: 10.1007/s10654-021-00753-5. Epub 2021 May 15. PMID: 33993378.
Watkins DA, Ali MK. Measuring the global burden of diabetes: implications for health policy, practice, and research. Lancet. 2023 Jul 15;402(10397):163-165. doi: 10.1016/S0140-6736(23)01287-4. Epub 2023 Jun 22. PMID: 37356449.
Mary R. Rooney, Michael Fang, Katherine Ogurtsova, Bige Ozkan, Justin B. Echouffo-Tcheugui, Edward J. Boyko, Dianna J. Magliano, Elizabeth Selvin; Global Prevalence of Prediabetes. Diabetes Care 1 July 2023; 46 (7): 1388–1394. https://doi.org/10.2337/dc22-2376
Danne T, Nimri R, Battelino T, Bergenstal RM, Close KL, DeVries JH, Garg S, Heinemann L, Hirsch I, Amiel SA, Beck R, Bosi E, Buckingham B, Cobelli C, Dassau E, Doyle FJ 3rd, Heller S, Hovorka R, Jia W, Jones T, Kordonouri O, Kovatchev B, Kowalski A, Laffel L, Maahs D, Murphy HR, Nørgaard K, Parkin CG, Renard E, Saboo B, Scharf M, Tamborlane WV, Weinzimer SA, Phillip M. International Consensus on Use of Continuous Glucose Monitoring. Diabetes Care. 2017 Dec;40(12):1631-1640. doi: 10.2337/dc17-1600. PMID: 29162583; PMCID: PMC6467165.
Battelino T, Alexander CM, Amiel SA, Arreaza-Rubin G, Beck RW, Bergenstal RM, Buckingham BA, Carroll J, Ceriello A, Chow E, Choudhary P, Close K, Danne T, Dutta S, Gabbay R, Garg S, Heverly J, Hirsch IB, Kader T, Kenney J, Kovatchev B, Laffel L, Maahs D, Mathieu C, Mauricio D, Nimri R, Nishimura R, Scharf M, Del Prato S, Renard E, Rosenstock J, Saboo B, Ueki K, Umpierrez GE, Weinzimer SA, Phillip M. Continuous glucose monitoring and metrics for clinical trials: an international consensus statement. Lancet Diabetes Endocrinol. 2023 Jan;11(1):42-57. doi: 10.1016/S2213-8587(22)00319-9. Epub 2022 Dec 6. Erratum in: Lancet Diabetes Endocrinol. 2024 Feb;12(2):e12. PMID: 36493795.
Matabuena M, Petersen A, Vidal JC, Gude F. Glucodensities: A new representation of glucose profiles using distributional data analysis. Statistical Methods in Medical Research. 2021;30(6):1445-1464. doi:10.1177/0962280221998064
Shao J, Liu Z, Li S, Wu B, Nie Z, Li Y, Zhou K. Continuous Glucose Monitoring Time Series Data Analysis: A Time Series Analysis Package for Continuous Glucose Monitoring Data. J Comput Biol. 2023 Jan;30(1):112-116. doi: 10.1089/cmb.2022.0100. Epub 2022 Aug 8. PMID: 35939283.
Broll S, Urbanek J, Buchanan D, Chun E, Muschelli J, et al. (2021) Interpreting blood GLUcose data with R package iglu. PLOS ONE 16(4): e0248560. https://doi.org/10.1371/journal.pone.0248560
Li L, Sun J, Ruan L, Song Q. Time-Series Analysis of Continuous Glucose Monitoring Data to Predict Treatment Efficacy in Patients with T2DM. J Clin Endocrinol Metab. 2021 Jul 13;106(8):2187-2197. doi: 10.1210/clinem/dgab356. PMID: 34010405.
Garza, Azul, and Max Mergenthaler-Canseco. “TimeGPT-1.” arXiv preprint arXiv:2310.03589 (2023).

"
  ["project_suppl_material"]=>
  bool(false)
  ["project_coi"]=>
  array(4) {
    [0]=>
    array(1) {
      ["file_coi"]=>
      array(21) {
        ["ID"]=>
        int(14197)
        ["id"]=>
        int(14197)
        ["title"]=>
        string(19) "Survey-Response.pdf"
        ["filename"]=>
        string(19) "Survey-Response.pdf"
        ["filesize"]=>
        int(20266)
        ["url"]=>
        string(68) "https://yoda.yale.edu/wp-content/uploads/2024/02/Survey-Response.pdf"
        ["link"]=>
        string(65) "https://yoda.yale.edu/data-request/2024-0248/survey-response-pdf/"
        ["alt"]=>
        string(0) ""
        ["author"]=>
        string(4) "1696"
        ["description"]=>
        string(0) ""
        ["caption"]=>
        string(0) ""
        ["name"]=>
        string(19) "survey-response-pdf"
        ["status"]=>
        string(7) "inherit"
        ["uploaded_to"]=>
        int(14196)
        ["date"]=>
        string(19) "2024-02-18 14:37:30"
        ["modified"]=>
        string(19) "2024-02-18 14:37:32"
        ["menu_order"]=>
        int(0)
        ["mime_type"]=>
        string(15) "application/pdf"
        ["type"]=>
        string(11) "application"
        ["subtype"]=>
        string(3) "pdf"
        ["icon"]=>
        string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
      }
    }
    [1]=>
    array(1) {
      ["file_coi"]=>
      array(21) {
        ["ID"]=>
        int(14318)
        ["id"]=>
        int(14318)
        ["title"]=>
        string(12) "eran-coi.pdf"
        ["filename"]=>
        string(12) "eran-coi.pdf"
        ["filesize"]=>
        int(20401)
        ["url"]=>
        string(61) "https://yoda.yale.edu/wp-content/uploads/2024/03/eran-coi.pdf"
        ["link"]=>
        string(58) "https://yoda.yale.edu/data-request/2024-0248/eran-coi-pdf/"
        ["alt"]=>
        string(0) ""
        ["author"]=>
        string(4) "1696"
        ["description"]=>
        string(0) ""
        ["caption"]=>
        string(0) ""
        ["name"]=>
        string(12) "eran-coi-pdf"
        ["status"]=>
        string(7) "inherit"
        ["uploaded_to"]=>
        int(14196)
        ["date"]=>
        string(19) "2024-03-10 09:14:50"
        ["modified"]=>
        string(19) "2024-03-10 09:14:53"
        ["menu_order"]=>
        int(0)
        ["mime_type"]=>
        string(15) "application/pdf"
        ["type"]=>
        string(11) "application"
        ["subtype"]=>
        string(3) "pdf"
        ["icon"]=>
        string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
      }
    }
    [2]=>
    array(1) {
      ["file_coi"]=>
      array(21) {
        ["ID"]=>
        int(14319)
        ["id"]=>
        int(14319)
        ["title"]=>
        string(13) "hagai-coi.pdf"
        ["filename"]=>
        string(13) "hagai-coi.pdf"
        ["filesize"]=>
        int(19836)
        ["url"]=>
        string(62) "https://yoda.yale.edu/wp-content/uploads/2024/03/hagai-coi.pdf"
        ["link"]=>
        string(59) "https://yoda.yale.edu/data-request/2024-0248/hagai-coi-pdf/"
        ["alt"]=>
        string(0) ""
        ["author"]=>
        string(4) "1696"
        ["description"]=>
        string(0) ""
        ["caption"]=>
        string(0) ""
        ["name"]=>
        string(13) "hagai-coi-pdf"
        ["status"]=>
        string(7) "inherit"
        ["uploaded_to"]=>
        int(14196)
        ["date"]=>
        string(19) "2024-03-10 09:14:52"
        ["modified"]=>
        string(19) "2024-03-10 09:14:53"
        ["menu_order"]=>
        int(0)
        ["mime_type"]=>
        string(15) "application/pdf"
        ["type"]=>
        string(11) "application"
        ["subtype"]=>
        string(3) "pdf"
        ["icon"]=>
        string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
      }
    }
    [3]=>
    array(1) {
      ["file_coi"]=>
      array(21) {
        ["ID"]=>
        int(14824)
        ["id"]=>
        int(14824)
        ["title"]=>
        string(11) "COI FORM GL"
        ["filename"]=>
        string(15) "COI-FORM-GL.pdf"
        ["filesize"]=>
        int(20308)
        ["url"]=>
        string(64) "https://yoda.yale.edu/wp-content/uploads/2024/02/COI-FORM-GL.pdf"
        ["link"]=>
        string(57) "https://yoda.yale.edu/data-request/2024-0248/coi-form-gl/"
        ["alt"]=>
        string(0) ""
        ["author"]=>
        string(3) "190"
        ["description"]=>
        string(0) ""
        ["caption"]=>
        string(0) ""
        ["name"]=>
        string(11) "coi-form-gl"
        ["status"]=>
        string(7) "inherit"
        ["uploaded_to"]=>
        int(14196)
        ["date"]=>
        string(19) "2024-05-06 17:10:17"
        ["modified"]=>
        string(19) "2024-05-06 17:10:17"
        ["menu_order"]=>
        int(0)
        ["mime_type"]=>
        string(15) "application/pdf"
        ["type"]=>
        string(11) "application"
        ["subtype"]=>
        string(3) "pdf"
        ["icon"]=>
        string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
      }
    }
  }
  ["data_use_agreement_training"]=>
  bool(true)
  ["certification"]=>
  bool(true)
  ["request_data_partner"]=>
  string(15) "johnson-johnson"
  ["search_order"]=>
  string(1) "0"
  ["project_send_email_updates"]=>
  bool(false)
  ["project_publ_available"]=>
  bool(true)
  ["project_year_access"]=>
  string(4) "2024"
  ["project_rep_publ"]=>
  bool(false)
  ["project_assoc_data"]=>
  array(0) {
  }
  ["project_due_dil_assessment"]=>
  array(21) {
    ["ID"]=>
    int(15252)
    ["id"]=>
    int(15252)
    ["title"]=>
    string(51) "YODA Project Due Diligence Assessment 2024-0248 (2)"
    ["filename"]=>
    string(53) "YODA-Project-Due-Diligence-Assessment-2024-0248-2.pdf"
    ["filesize"]=>
    int(150910)
    ["url"]=>
    string(102) "https://yoda.yale.edu/wp-content/uploads/2024/02/YODA-Project-Due-Diligence-Assessment-2024-0248-2.pdf"
    ["link"]=>
    string(95) "https://yoda.yale.edu/data-request/2024-0248/yoda-project-due-diligence-assessment-2024-0248-2/"
    ["alt"]=>
    string(0) ""
    ["author"]=>
    string(3) "190"
    ["description"]=>
    string(0) ""
    ["caption"]=>
    string(0) ""
    ["name"]=>
    string(49) "yoda-project-due-diligence-assessment-2024-0248-2"
    ["status"]=>
    string(7) "inherit"
    ["uploaded_to"]=>
    int(14196)
    ["date"]=>
    string(19) "2024-06-25 15:23:50"
    ["modified"]=>
    string(19) "2024-06-25 15:23:50"
    ["menu_order"]=>
    int(0)
    ["mime_type"]=>
    string(15) "application/pdf"
    ["type"]=>
    string(11) "application"
    ["subtype"]=>
    string(3) "pdf"
    ["icon"]=>
    string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
  }
  ["project_title_link"]=>
  array(21) {
    ["ID"]=>
    int(15253)
    ["id"]=>
    int(15253)
    ["title"]=>
    string(42) "YODA Project Protocol 2024-0248 - 24-05-24"
    ["filename"]=>
    string(44) "YODA-Project-Protocol-2024-0248-24-05-24.pdf"
    ["filesize"]=>
    int(131079)
    ["url"]=>
    string(93) "https://yoda.yale.edu/wp-content/uploads/2024/02/YODA-Project-Protocol-2024-0248-24-05-24.pdf"
    ["link"]=>
    string(86) "https://yoda.yale.edu/data-request/2024-0248/yoda-project-protocol-2024-0248-24-05-24/"
    ["alt"]=>
    string(0) ""
    ["author"]=>
    string(3) "190"
    ["description"]=>
    string(0) ""
    ["caption"]=>
    string(0) ""
    ["name"]=>
    string(40) "yoda-project-protocol-2024-0248-24-05-24"
    ["status"]=>
    string(7) "inherit"
    ["uploaded_to"]=>
    int(14196)
    ["date"]=>
    string(19) "2024-06-25 15:25:18"
    ["modified"]=>
    string(19) "2024-06-25 15:25:18"
    ["menu_order"]=>
    int(0)
    ["mime_type"]=>
    string(15) "application/pdf"
    ["type"]=>
    string(11) "application"
    ["subtype"]=>
    string(3) "pdf"
    ["icon"]=>
    string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
  }
  ["project_review_link"]=>
  array(21) {
    ["ID"]=>
    int(15254)
    ["id"]=>
    int(15254)
    ["title"]=>
    string(36) "YODA Project Review - 2024-0248_site"
    ["filename"]=>
    string(38) "YODA-Project-Review-2024-0248_site.pdf"
    ["filesize"]=>
    int(504047)
    ["url"]=>
    string(87) "https://yoda.yale.edu/wp-content/uploads/2024/02/YODA-Project-Review-2024-0248_site.pdf"
    ["link"]=>
    string(80) "https://yoda.yale.edu/data-request/2024-0248/yoda-project-review-2024-0248_site/"
    ["alt"]=>
    string(0) ""
    ["author"]=>
    string(3) "190"
    ["description"]=>
    string(0) ""
    ["caption"]=>
    string(0) ""
    ["name"]=>
    string(34) "yoda-project-review-2024-0248_site"
    ["status"]=>
    string(7) "inherit"
    ["uploaded_to"]=>
    int(14196)
    ["date"]=>
    string(19) "2024-06-25 15:30:10"
    ["modified"]=>
    string(19) "2024-06-25 15:30:10"
    ["menu_order"]=>
    int(0)
    ["mime_type"]=>
    string(15) "application/pdf"
    ["type"]=>
    string(11) "application"
    ["subtype"]=>
    string(3) "pdf"
    ["icon"]=>
    string(62) "https://yoda.yale.edu/wp/wp-includes/images/media/document.png"
  }
  ["project_highlight_button"]=>
  string(0) ""
  ["request_overridden_res"]=>
  string(1) "3"
}
data partner
array(1) {
  [0]=>
  string(15) "johnson-johnson"
}


pi country
array(0) {
}


pi affil
array(0) {
}


products
array(1) {
  [0]=>
  string(8) "invokana"
}


num of trials
array(1) {
  [0]=>
  string(1) "2"
}


res
array(1) {
  [0]=>
  string(1) "3"
}

General Information

How did you learn about the YODA Project?: Internet Search

Conflict of Interest

Request Clinical Trials

Associated Trial(s):

What type of data are you looking for?: Individual Participant-Level Data, which includes Full CSR and all supporting documentation

Request Clinical Trials

Data Request Status

Status: Ongoing

Research Proposal

Project Title: Time-series insights into diabetes treatment - using a fine-tuned CGM foundation model to improve treatment outcomes

Scientific Abstract: Background:
Diabetes, a rising health concern world-wide, demands improved treatment strategies. Advancements in Continuous Glucose Monitoring (CGM) and machine learning offer new avenues for personalized treatment approaches.
Objective:
To leverage advanced learning techniques, particularly time-series analysis, to predict individual treatment responses from CGM data in order to enhance diabetes treatment personalization and efficacy.
Study Design:
This research will employ proprietary CGM data from the Human Phenotype Project (HPP, previously called 10 K project (1). By integrating exploratory analysis tools like CGMap and IGLU and time-series based learning, we aim to refine predictive models for diabetes care.
Participants:
All participants who have more than 24 hours of CGM data available.
Primary and Secondary outcome measures:
Primary outcomes include the accuracy of treatment response predictions. Secondary outcomes are improved patient segmentation based on treatment efficacy and the identification of predictive markers for drug responsiveness.
Statistical analysis:
Our analysis will span descriptive, bivariate, and multivariable techniques, including machine learning models fine-tuned on the cohort data. We will evaluate model performance through cross-validation and other relevant metrics, aiming to establish a robust framework for clinical decision support.

Brief Project Background and Statement of Project Significance: Recent estimates suggest that around 6% of the world's population suffers from diabetes (~529 million people). This corresponds to approximately 38 million years of life lost due to roughly 1.7 million disease-related deaths (2). Additionally, it was recently estimated that ~9% of adults have impaired glucose tolerance, a number that is projected to increase significantly over the next two decades (3). Diabetes and related conditions are therefore major concerns for public health, worldwide.

Continuous glucose monitoring (CGM) devices measure the level of glucose in the interstitial fluid, which correlates well with blood glucose levels. The advantages of CGM usage in patients with diabetes was previously recognized (4), and recently stated to be an important component of future clinical trials concerning diabetic patients (5).

The 10 K cohort is a large-scale, prospective, longitudinal cohort of 40-70 years old Israeli individuals (1), containing, among other things, a 2-week CGM for each participant.

It was demonstrated that advanced statistical methods can improve our ability to describe diabetic patients (e.g. glucodensities (6)), and there are several available tools for the effective analysis of this data (e.g. (7),(8)). Additionally, it was shown that treating CGM data as time-series data can assist with treatment response prediction (9). Foundational models are able to generalize insights on new datasets that were not presented during training. Recent advancements have enabled this type of model for time-series data, and it is possible that such models could be used to enhance our understanding of diabetes through CGM data (10). We propose to apply these kinds of models on CGM data from clinical trials after it has been fine-tuned on sufficient data, in order to predict response to anti-diabetic treatments.

Specific Aims of the Project: Predict Individual Treatment Responses: Utilize time-series analysis to predict how individuals with diabetes respond to specific treatments (DDP4 inhibitor, SGLT2 inhibitor).
Methodological Advancement: Develop and refine a methodological framework by integrating time-series based learning techniques with proprietary data from the 10 k cohort.
Patient Segmentation and Treatment Optimization: Identify patterns within CGM data that correlate with positive treatment outcomes, facilitating a nuanced segmentation of the patient population. This aims to optimize treatment strategies by aligning them more closely with individual patient profiles and needs.
Enhance Scientific and Medical Knowledge: By analyzing CGM data through the lens of advanced learning approaches, this project seeks to create new scientific knowledge that can be directly applied to enhance medical care for individuals with diabetes.

By focusing on individualized treatment response predictions and leveraging a rich dataset, we aim to advance the understanding and management of diabetes, with the potential to improve outcomes for millions worldwide.

Study Design: Methodological research

What is the purpose of the analysis being proposed? Please select all that apply.: Develop or refine statistical methods Research on clinical prediction or risk prediction

Software Used: Python

Data Source and Inclusion/Exclusion Criteria to be used to define the patient sample for your study: Proprietary dataset from the 10k cohort study, including CGM data and comprehensive demographic and clinical information on participants.
Inclusion Criteria: All participants who have more than 24 hours of CGM data available.

Primary and Secondary Outcome Measure(s) and how they will be categorized/defined for your study: Primary Outcome:
Patient Segmentation Efficiency: Evaluation of the model's ability to accurately segment patients based on their predicted response to treatment, assessed through clustering quality metrics.

Secondary Outcomes:

Predictive Accuracy: The precision and recall of treatment response predictions, determined through cross-validation and comparison with actual treatment outcomes.

Main Predictor/Independent Variable and how it will be categorized/defined for your study: The primary predictor in our study will be the individual patient's CGM data and other demographic and clinical information. These variables will undergo quantitative analysis and categorization based on established clinical thresholds for glycemic control (e.g., HbA1c levels). They will form the dataset input for the model, influencing its output and serving as crucial indicators for predicting response to various diabetes treatments.

Other Variables of Interest that will be used in your analysis and how they will be categorized/defined for your study: Other variables of interest include other demographic and clinical information. These variables will undergo quantitative analysis and categorization based on established clinical thresholds for glycemic control (e.g., HbA1c levels). They will form the dataset input for the model, influencing its output and serving as crucial indicators for predicting response to various diabetes treatments.

Statistical Analysis Plan: Data preparation and Exploratory Analysis:
Data cleaning: standardize and apply transformation to skewed variables, address missing data.
Exploratory data analysis (EDA): conduct initial analysis to understand distributions, patterns and outliers. Visualize CGM time-series data, identify preliminary patterns of response to treatment. Additionally, perform covariate analysis to assess the influence of other demographic and clinical variables on treatment response.
Basic Analysis using CGMap and IGLU:
Utilize CGMap to calculate CGM metrics (mean glucose, glycemic variability etc).
Utilize IGLU for metrics including glucose management indicator (GMI) and other metrics.
Clustering for patient segmentation: Apply unsupervised learning techniques such as PCA, UMAP and HDBSCAN to segment patients into clusters based on similarities in CGM data patterns.
Utilize statistical tests to study inter-cluster differences in treatment response.
Treatment response prediction and Identification of Responders and Non-responders
Training a model for treatment response prediction by incorporating features extracted from CGMap and IGLU and a training set selected from the clinical trial data.
Using baseline measurements, predict the response to DDP4 inhibitor and SGLT2 inhibitor.
Using baseline measurements and one of the treatments, predict the response to the other treatment.
Using baseline measurements and the first X hours of a treatment, predict the final response to the treatment.
Advanced Time Series Characterization based on the TimeGPT foundation model:
Fine-tune the foundation model on a subset of the preprocessed CGM data from the 10 K cohort in order to build a CGM time-series model.
Feature extraction and time series analysis - extract relevant features such as time-series embeddings from CGM time-series data (possibly: complex temporal patterns, trends), then perform analysis on extracted features. For example: change in features due to treatment.
Train a model for treatment response prediction by incorporating the fine-tuned CGM time-series model and a training set selected from the clinical trial data, as described in (4a).
Model Evaluation and Validation
Compare the relation between metrics in feature space (3a, 5b) and treatment response. For example, distances between samples within a group who responded well to the treatment compared to the distance between groups of samples (responders and non-responders).
Utilize N-fold cross-validation to train a treatment response predictor (4a, 5c) on the clinical trial data. Evaluate based on ROCAUC, precision and recall.

Narrative Summary: Diabetes is a pressing health issue globally. We aim to investigate the potential of advanced learning approaches to improve the results of clinical trials by predicting treatment responses for individual patients. We will utilize proprietary data to enhance time-series based learning techniques in order to identify nuanced patterns in CGM data that correlate with treatment efficacy. Our research will focus on developing a methodological framework, paving the way for a deeper understanding of diabetes management. The anticipated outcome of our study is to establish a robust model capable of guiding clinical decisions, ultimately improving patient-specific diabetes treatment.

Project Timeline: Months 1-2: Define goals and preprocess CGM data, perform EDA on trial data (basic CGM analysis - CGMap, IGLU).
Months 3-6: Fine-tune TimeGPT with CGM data.
Months 7-8: Extract features and enhance predictive models using TimeGPT.
Months 9-10: Perform advanced analysis and refine models.
Months 11-12: Finalize report and prepare manuscript for publication.

Key Milestones:

End of Month 2: Preprocessed dataset ready.
End of Month 6: TimeGPT fine-tuned.
End of Month 8: Predictive models enhanced.
End of Month 10: Model refinement and advanced insights complete.
Analysis Completion Date: End of Month 10.
Manuscript Drafted: Month 11.

First Submission for Publication: By Month 12.

Results Reported Back to YODA Project: By Month 12.

Dissemination Plan: We aim to publish our findings in top-tier journals such as Nature Medicine or NEJM, targeting a broad audience across diabetes care and medical research. Plans include presenting at international conferences like ADA and EASD. By choosing open-access options, we ensure wide accessibility, benefiting healthcare professionals and fostering advancements in personalized diabetes management.

Bibliography:

Shilo S, Bar N, Keshet A, Talmor-Barkan Y, Rossman H, Godneva A, Aviv Y, Edlitz Y, Reicher L, Kolobkov D, Wolf BC, Lotan-Pompan M, Levi K, Cohen O, Saranga H, Weinberger A, Segal E. 10 K: a large-scale prospective longitudinal study in Israel. Eur J Epidemiol. 2021 Nov;36(11):1187-1194. doi: 10.1007/s10654-021-00753-5. Epub 2021 May 15. PMID: 33993378.
Watkins DA, Ali MK. Measuring the global burden of diabetes: implications for health policy, practice, and research. Lancet. 2023 Jul 15;402(10397):163-165. doi: 10.1016/S0140-6736(23)01287-4. Epub 2023 Jun 22. PMID: 37356449.
Mary R. Rooney, Michael Fang, Katherine Ogurtsova, Bige Ozkan, Justin B. Echouffo-Tcheugui, Edward J. Boyko, Dianna J. Magliano, Elizabeth Selvin; Global Prevalence of Prediabetes. Diabetes Care 1 July 2023; 46 (7): 1388–1394. https://doi.org/10.2337/dc22-2376
Danne T, Nimri R, Battelino T, Bergenstal RM, Close KL, DeVries JH, Garg S, Heinemann L, Hirsch I, Amiel SA, Beck R, Bosi E, Buckingham B, Cobelli C, Dassau E, Doyle FJ 3rd, Heller S, Hovorka R, Jia W, Jones T, Kordonouri O, Kovatchev B, Kowalski A, Laffel L, Maahs D, Murphy HR, Nørgaard K, Parkin CG, Renard E, Saboo B, Scharf M, Tamborlane WV, Weinzimer SA, Phillip M. International Consensus on Use of Continuous Glucose Monitoring. Diabetes Care. 2017 Dec;40(12):1631-1640. doi: 10.2337/dc17-1600. PMID: 29162583; PMCID: PMC6467165.
Battelino T, Alexander CM, Amiel SA, Arreaza-Rubin G, Beck RW, Bergenstal RM, Buckingham BA, Carroll J, Ceriello A, Chow E, Choudhary P, Close K, Danne T, Dutta S, Gabbay R, Garg S, Heverly J, Hirsch IB, Kader T, Kenney J, Kovatchev B, Laffel L, Maahs D, Mathieu C, Mauricio D, Nimri R, Nishimura R, Scharf M, Del Prato S, Renard E, Rosenstock J, Saboo B, Ueki K, Umpierrez GE, Weinzimer SA, Phillip M. Continuous glucose monitoring and metrics for clinical trials: an international consensus statement. Lancet Diabetes Endocrinol. 2023 Jan;11(1):42-57. doi: 10.1016/S2213-8587(22)00319-9. Epub 2022 Dec 6. Erratum in: Lancet Diabetes Endocrinol. 2024 Feb;12(2):e12. PMID: 36493795.
Matabuena M, Petersen A, Vidal JC, Gude F. Glucodensities: A new representation of glucose profiles using distributional data analysis. Statistical Methods in Medical Research. 2021;30(6):1445-1464. doi:10.1177/0962280221998064
Shao J, Liu Z, Li S, Wu B, Nie Z, Li Y, Zhou K. Continuous Glucose Monitoring Time Series Data Analysis: A Time Series Analysis Package for Continuous Glucose Monitoring Data. J Comput Biol. 2023 Jan;30(1):112-116. doi: 10.1089/cmb.2022.0100. Epub 2022 Aug 8. PMID: 35939283.
Broll S, Urbanek J, Buchanan D, Chun E, Muschelli J, et al. (2021) Interpreting blood GLUcose data with R package iglu. PLOS ONE 16(4): e0248560. https://doi.org/10.1371/journal.pone.0248560
Li L, Sun J, Ruan L, Song Q. Time-Series Analysis of Continuous Glucose Monitoring Data to Predict Treatment Efficacy in Patients with T2DM. J Clin Endocrinol Metab. 2021 Jul 13;106(8):2187-2197. doi: 10.1210/clinem/dgab356. PMID: 34010405.
Garza, Azul, and Max Mergenthaler-Canseco. “TimeGPT-1.” arXiv preprint arXiv:2310.03589 (2023).