<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">Online J Public Health Inform</journal-id><journal-id journal-id-type="publisher-id">ojphi</journal-id><journal-id journal-id-type="index">45</journal-id><journal-title>Online Journal of Public Health Informatics</journal-title><abbrev-journal-title>Online J Public Health Inform</abbrev-journal-title><issn pub-type="epub">1947-2579</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v18i1e81119</article-id><article-id pub-id-type="doi">10.2196/81119</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Workflow&#x2011;Based Information Management Framework for Multicenter Research Studies: Design and Development</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Sulaeman</surname><given-names>Hasan</given-names></name><degrees>MSci</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Stone</surname><given-names>Mars</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bruhn</surname><given-names>Roberta</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zurita</surname><given-names>Karla</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nguyen</surname><given-names>Anh</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chiang</surname><given-names>Vincent</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jones</surname><given-names>Jefferson Michael</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Deng</surname><given-names>Xutao</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Custer</surname><given-names>Brian</given-names></name><degrees>MPH, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Busch</surname><given-names>Michael</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Grebe</surname><given-names>Eduard</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Vitalant Research Institute</institution><addr-line>360 Spear Street</addr-line><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Centers for Disease Control and Prevention, National Center for Immunization and Respiratory Diseases</institution><addr-line>Atlanta</addr-line><addr-line>GA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mensah</surname><given-names>Edward</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Zuidhof</surname><given-names>Niek</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Punzalan</surname><given-names>Jaime Kristoffer</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mekbib</surname><given-names>Michael Sileshi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Hasan Sulaeman, MSci, Vitalant Research Institute, 360 Spear Street, San Francisco, CA, 94115, United States, 1 (415) 923-5771; <email>hsulaeman@vitalant.org</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>20</day><month>4</month><year>2026</year></pub-date><volume>18</volume><elocation-id>e81119</elocation-id><history><date date-type="received"><day>22</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>23</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>18</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Hasan Sulaeman, Mars Stone, Roberta Bruhn, Karla Zurita, Anh Nguyen, Vincent Chiang, Jefferson Michael Jones, Xutao Deng, Brian Custer, Michael Busch, Eduard Grebe. Originally published in the Online Journal of Public Health Informatics (<ext-link ext-link-type="uri" xlink:href="https://ojphi.jmir.org/">https://ojphi.jmir.org/</ext-link>), 20.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Online Journal of Public Health Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://ojphi.jmir.org/">https://ojphi.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ojphi.jmir.org/2026/1/e81119"/><abstract><sec><title>Background</title><p>Biological and health research is increasingly data-driven, with commercial and academic institutions generating data at unprecedented rates. The rapid pace of data generation, together with lessons learned during the COVID-19 pandemic, underscores the need for nimble, transparent, and dependable data infrastructures that enable rapid study execution and timely insights to inform public health policy and practice.</p></sec><sec><title>Objective</title><p>This paper describes the workflow-based information management (WIM) framework, a flexible research information management system designed to support diverse epidemiologic workflows and data-intensive research projects.</p></sec><sec sec-type="methods"><title>Methods</title><p>WIM was developed as a modular, workflow-oriented framework built on the open-source R (R Foundation) programming language and its extensive ecosystem of community-developed packages. The framework emphasizes reproducibility, adaptability, and transparency, enabling users to design and manage research workflows tailored to specific study requirements. We describe the architecture and core components of WIM and illustrate its application through representative epidemiologic research scenarios.</p></sec><sec sec-type="results"><title>Results</title><p>The framework supported high-volume, multiorganizational research; managing &#x003E;3.7 million donation and testing records from 17 blood collection organizations across the United States. The WIM framework was readily adapted to a wide range of epidemiologic studies and research projects, demonstrating flexibility across varying data types, analytical needs, and operational contexts. By leveraging established R-based tools and workflows, WIM supported efficient data ingestion, processing, analysis, and reporting while promoting reproducible and collaborative research practices. The framework facilitated rapid iteration and reuse of workflows, addressing common challenges in managing complex and evolving research studies.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>WIM provides a flexible, open-source, and extensible approach to research information management for modern biological and health research. By integrating workflow-based design principles with the R ecosystem, the framework supports reproducible analysis, scalable research operations, and rapid study execution. WIM offers a practical solution for institutions seeking adaptable data infrastructure to support epidemiologic research and inform public health decision-making.</p></sec></abstract><kwd-group><kwd>multicenter studies</kwd><kwd>large studies</kwd><kwd>open source</kwd><kwd>data system</kwd><kwd>data management</kwd><kwd>data infrastructure</kwd><kwd>ETL</kwd><kwd>extract transform load</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>With technological advances in recent decades, bioinformatics and data management are becoming increasingly important to life sciences research [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Contemporary biological research is often dependent on the management, sharing, and analysis of large-scale, aggregated data, in particular for studies designed to tackle large scientific and societal issues such as the COVID-19 pandemic [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>During the COVID-19 pandemic, scientific interest in specific public health questions shifted as the pandemic progressed [<xref ref-type="bibr" rid="ref7">7</xref>]. For example, early seroprevalence studies were performed to determine the proportion of the population that had been infected with SARS-CoV-2. After the introduction of vaccines, these had to be modified to determine the proportion of the population that had been vaccinated, infected, or both [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. In May 2020, the US Centers for Disease Control and Prevention (CDC), in partnership with Vitalant Research Institute (VRI), established the Nationwide Blood Donor Seroprevalence study (NBDS). In July 2020, NBDS launched the first phase of the study in collaboration with 17 blood collection organizations across the United States and Puerto Rico. Multiple testing laboratories, including VRI and Creative Testing Solutions, captured, tested, and analyzed approximately 150,000 blood donation specimens monthly in a serial cross-sectional seroprevalence survey. As the pandemic evolved, a large proportion of the population had infection-induced antibodies or had been vaccinated. To determine if infections occurred before or after vaccination or to detect reinfection, longitudinal data is required. The need to detect reinfections and infections in vaccinated individuals led to the launch at the start of 2022 of the second phase of the program: the Nationwide Blood Donor Cohort study (NBDC). The NBDC program switched from a cross-sectional to a longitudinal study format to follow a cohort of blood donors from BCOs to address questions such as the incidence of infection (in vaccinated and unvaccinated individuals) and multiple sequential infections (reinfections) with SARS-CoV-2, waning antibody titers following vaccination or infections, and correlates of protection against SARS-CoV-2 infection [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Given the fluid nature of these large studies, a secure and nimble data infrastructure was needed to meet the studies&#x2019; needs, including management of large quantities of data on donors and donations from BCO records, serologic testing data, and responses to electronic donor surveys.</p><p>BCOs are primed for nationwide studies such as the NBDS and NBDC, with a physical infrastructure already in place to collect and test biospecimens from blood donations, including capturing residual specimens after routine blood screening for additional research testing and executing electronic surveys of participating donors. However, the data management framework and infrastructure to facilitate research programs built on the blood collection system would benefit from further development, a challenge many other organizations face and are attempting to tackle [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Due to the volume and complexity of the NBDS and NBDC data, traditional methods of data manipulation and management using software such as spreadsheet-based software were not an option. Though there are numerous commercially available data and project management software systems, their use would require a costly upfront license fee and/or a monthly cost incurred from services rendered for a software-as-a-service platform. Furthermore, off-the-shelf solutions typically require extensive customization to meet the needs of large and complex research programs.</p><p>Fortunato and Galassi (2021) [<xref ref-type="bibr" rid="ref15">15</xref>] defined open-source software as &#x201C;any computer program released under a license that grants users rights to run the program for any purpose, to study it, to modify it, and to redistribute it in original or modified form.&#x201D; R and Python (Python Software Foundation) are 2 popular data manipulation and analysis programming languages that fall into the category of open-source software. We chose to build our data management infrastructure in R due to its popularity among life science professionals, versatility, low barrier to entry, and a strong community of developers that has built an ecosystem of data-related packages [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Using the same principles as the framework we detail here, an analogous data system could easily be built in Python or any other open-source general programming language.</p><p>The design of our workflow-based information management (hereafter WIM) framework follows a few distinct design principles to achieve the goal of a nimble and reusable informatics management system that could reliably handle large amounts of data using open-source programming languages. First, our framework follows the FAIR (Findable, Accessible, Interoperable, Reusable) principles detailed in Wilkinson et al (2016) [<xref ref-type="bibr" rid="ref18">18</xref>] for data reusability, accessibility, and system interoperability. Second, our framework follows the design principle of loose coupling, wherein components are only weakly associated with each other or the system, and so changes in 1 component least affect the performance of other components [<xref ref-type="bibr" rid="ref19">19</xref>]. This principle allows the framework to be agnostic regarding operating systems and underlying platforms such as the relational database management system. Lastly, due to the fluidity of research studies, to prioritize reusability and adaptability of the framework, we opted not to have a formal component model [<xref ref-type="bibr" rid="ref20">20</xref>].</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Concept</title><p>The framework is built on modules, some automated, used in the same way functions from packages are used in R, and is publicly accessible on GitHub (GitHub, Inc) [<xref ref-type="bibr" rid="ref21">21</xref>]. The modules are called and managed using the <italic>box</italic> package, which negates the need to import each function or to publish the suite of modules as an R package [<xref ref-type="bibr" rid="ref22">22</xref>]. Additionally, all settings used by the modules are set using a single project-wide configuration file in the yet another markup language (YAML) format, which includes information on quality control (QC) processes, data dictionaries used by the study, credentials where required to run certain processes, database connection strings, and any other pertinent information used in data ingestion and reporting [<xref ref-type="bibr" rid="ref23">23</xref>]. The use of a configuration file streamlines the implementation of changes in data flow, formatting, and reporting and lessens the time required to apply any changes to the data system, while also allowing multiple data managers to execute the same functions and scripts with any individual configurations managed in a separate configuration file.</p><p>The modules are platform-agnostic and can be run on either a cloud platform or locally with little to no input, depending on how much of the process requires manual review. They could also be run on a set schedule as a <italic>cron job</italic>, a system for scheduling specific tasks on Unix-based operating systems (eg, Linux [DragonByte Technologies] and macOS [Apple Inc]), or automatically when required using a listener to respond to an external action. In our case, for pragmatic reasons, the modules are run locally on desktop machines by one or more administrators. Separately, though modules run processes through R, an external connection is required to run certain processes, such as performing data retrieval from an external source, monitoring a secure file transfer protocol (sFTP) server for new data deliveries, writing processed and quality-controlled data to a central database, and transferring data to other organizations. These connections are made in R using certain packages, including <italic>curl</italic>, <italic>RCurl</italic>, and <italic>sFTP</italic> [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>Lastly, a central goal in the implementation is for all relevant stakeholders to have access to both the managed central database and the version-controlled code repository, managed using Git. To lower the technical skill floor required for accessing this study&#x2019;s database, a custom front-end was built using Python and Flask (Pallets), which caters specifically to the organization and stakeholders&#x2019; data needs and is abstracted. The Git repository for the modules includes all the information and scripts required to install and run the modules, with users having only to install the required dependencies using a script included in the Git repository.</p></sec><sec id="s2-2"><title>Data Sources and Types</title><p>Data sources for the NBDS and NBDC included donor, donation, and blood specimen testing data (<xref ref-type="fig" rid="figure1">Figure 1</xref>). When the NBDC study launched, donor electronic survey data were added as a data source [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Donor and donation data for the program are extracted from BCO operational data systems. Donor data includes time-invariant donor characteristics such as birth date and blood group, while donation data includes information collected at each donation event, for example, the blood collection procedure, responses to routine questionnaires administered at the time of donation, as well as other data points that might differ for a given donor over time. Data points such as ZIP code of residence, sex, race, and ethnicity were reported by the donor at the time of donation. It is worth noting that reference tables, also known as lookup tables, were generated and used for tracking changes to donor identifiers and certain donation-level data where the original data point from the source should be kept, along with any study-specific interpretation we applied. For example, the way organizations record and group donor race and ethnicity data might differ from how a study groups these data. By implementing a lookup table that links the original data to the study-specific data grouping, stakeholders can revise groupings, if necessary. Testing data includes all valid serological test results from assays in the program, whether for routine study-directed testing or for assay validation substudies [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Specifications for which variables are captured, stored, and reported differ between assays and are dependent on the assay manufacturer&#x2019;s instructions for use and determinations of the most relevant measurements.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Simplified ERD for the repeat donor cohorts program. Levels are used to imply dependencies between the different tables. Purple, yellow, and white are used to denote survey data, donation data, and testing data, respectively. Standard cardinality notation is used. ERD: entity relationship diagram.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v18i1e81119_fig01.png"/></fig><p>Lastly, donor survey data for the NBDC can be split into 2 other data types: survey question information and survey response information&#x2014;both of which are equally important to properly manage. Survey questions for the program often change between different survey rounds to accommodate the changing needs of this study, so there is a need to manage question information while tracking question equivalency across survey rounds. Due to survey questions sometimes changing, survey responses, defined as a completed survey form, must be managed separately from the discrete answers to a question. This is so that answers to equivalent questions across survey rounds can be parsed and analyzed without requiring excessive processing time or manual recoding during analysis.</p></sec><sec id="s2-3"><title>Data Dictionary</title><p>The NBDS program accepted frequent data submissions from 17 BCOs and multiple testing laboratories across the United States and Puerto Rico. The NBDC program, on the other hand, while only accepting submissions from 2 organizations and 2 testing laboratories, expanded the scope and quantity of data collected, stored, and managed for its cohorts. Both studies required comprehensive data dictionaries, regularly updated to reflect changes made to study procedures and methods, and detailing how to format data for each type of data transfer between organizations, including what values or valid ranges are acceptable for each field. Data dictionaries for both the NBDS and NBDC programs are included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>.</p></sec><sec id="s2-4"><title>Data Flow</title><p>Though the data flow for the NBDS and NBDC programs differed slightly due to different data sources, types, and testing algorithms applied to biospecimens, the general flow of data remained the same (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Data from all sources first flowed to VRI, where it underwent QC and, if the submission passed quality control procedures, was imported to this study&#x2019;s database. The data submission was then reported to the data coordinating center (DCC), the contract research organization Westat, where it underwent a second QC step, where primary analysis took place, and was finally transferred to the sponsor (CDC).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Workflows defined by the framework. Workflows include an extract, transform, load process from each data source, a process to manage data while at rest, and a process for reporting to the data center and later the US Centers for Disease Control and Prevention. CDC: US Centers for Disease Control and Prevention; VRI: Vitalant Research Institute.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v18i1e81119_fig02.png"/></fig><p>For the NBDS, to accommodate varying testing and sample flows, participating BCOs were binned into groups, each of which had a distinct data transmission format and schedule [<xref ref-type="bibr" rid="ref12">12</xref>]. Groups evolved with the program and adapted to changes in the testing algorithm and the capacity of testing laboratories. For the NBDC, data flow differed by data source and organization. Donor and donation data for Vitalant flowed to VRI, while for ARC, the submission went directly to the data broker. Testing results are always routed through VRI before going to the DCC, while survey results were reported directly to the DCC for both Vitalant and ARC (<xref ref-type="fig" rid="figure2">Figure 2</xref>)</p></sec><sec id="s2-5"><title>Study-Specific Relational Database</title><p>The use of a study-specific database was critical in the day-to-day operations of both the framework and this study itself (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Having certain restrictions inherent in the structure of the relational database, which included length of the field, primary and foreign keys, data types, allowable potentially identifying information, and allowable codes for categorical fields (enumerated types), establishes this study&#x2019;s database both as the canonical source of study data and as a redundant QC process by passively ensuring data ingested is in accordance with this study&#x2019;s data dictionary as well as all relevant regulations surrounding human-participants research and privacy protections. In our case, with the hierarchical structure of donor, visit, testing, and survey data, a study database was instrumental in keeping data integrity across tables. Lastly, it is important to note that for organizations working with potentially identifying information, such as hospital records, granular data governance is crucial to augment privacy protections by allowing only relevant stakeholders to view certain parts of the database.</p></sec><sec id="s2-6"><title>Automated Secure Data Retrieval</title><p>This study relied on 2 data transfer methods that are encrypted both in transit and at rest: Microsoft OneDrive (Microsoft Corp) and an sFTP server. The module responsible for automated secure data retrieval performs several processes and uses Microsoft Workflows (previously Microsoft Power Automate; Microsoft Corp) as a method of notifying submitters and stakeholders of any new submissions. The workflow or module of the framework is also responsible for notifying stakeholders whether the submission passed QC and was accepted, or did not pass the QC checks and was rejected. Each participating organization is given a separate directory (also known as a bucket) on the sFTP server, along with credentials for their account. Within each bucket, 3 directories are made: upload, download, and archive. Users are only allowed to add or remove files in the upload and download directories, where they can upload data submissions or download data validation reports and rejected data submissions, respectively. The archive folder is where accepted submissions are moved and stored as a backup and for data audit purposes.</p><p>Automated listeners monitor each directory, with a process that is triggered when a file is altered or added in the directory. A change in the upload directory triggers a new submission email to all relevant stakeholders, while a change in the download and archive directories triggers rejection and acceptance emails, respectively. When a new submission is added to the upload folder, an R module downloads the file and removes it from the upload directory. The module then performs a QC process specified in the configuration file and, depending on the outcome of the quality control check, performs 1 of 2 actions: if the submission failed QC, the module uploads an itemized list of all quality control issues encountered to the download folder, which prompts a submission rejection email to all stakeholders with the itemized list of quality control issues encountered attached. Data type, length, date, and enumerator checks are examples of what the quality control process entails. If the submission passes quality control, the accepted submission is uploaded to the archive folder, the data is imported into this study&#x2019;s database, and a submission acceptance email is sent out to all stakeholders.</p></sec><sec id="s2-7"><title>Data Intake Workflows</title><sec id="s2-7-1"><title>Donor and Donation Data</title><p>The extract, transform, load (ETL) process for donor and donation data differed between the NBDS and NBDC programs, as well as by organization for both studies. For the NBDS, donation data was transferred from all participating organizations through the sFTP server, while for the NBDC, the data for Vitalant was transferred through Samba servers to VRI. Donor and donation data for ARC were sent directly to the DCC. For the NBDS, all submissions were required to adhere to the agreed-upon data dictionary and were submitted in accordance with a data submission guideline document shared with all participating organizations (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). For the NBDC, the only processes required were QC and data transformation for Vitalant donor and donation data. Once Vitalant&#x2019;s data submission passed quality control and was transformed per this study&#x2019;s data dictionary, the data were imported to the study database (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p></sec><sec id="s2-7-2"><title>Laboratory Testing Data</title><p>Testing data for both the NBDS and NBDC programs were generated by testing and immunology laboratories, with each testing laboratory having a separate workflow and R module. Transmission of test results from the test laboratory was sent via a cloud service, and a QC check was performed to ensure adherence to this study&#x2019;s data dictionary. For testing data from the immunology laboratory, output from testing instruments was retrieved, reformatted to fit this study&#x2019;s data dictionary specifications, and imported into this study&#x2019;s database automatically by an R module developed for this purpose (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p></sec><sec id="s2-7-3"><title>Donor Survey Data</title><p>Donor survey data consisted of both survey questions and responses over time. As survey questions change to accommodate changes to this study over time, the data management approach must track how these questions shift and change over time&#x2014;maintaining equivalency of questions across survey rounds. The data management approach also must be able to record answers to each question without having to change the database table structure with every change in survey question (<xref ref-type="fig" rid="figure3">Figure 3</xref>). It was also important to capture analysis-ready data that integrated data captured across survey rounds in a consistent format.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Management of survey data in this study&#x2019;s database. Levels are used to imply the linkage dependency of tables. Level 1 tables do not need primary and foreign keys to link to another table, while levels 2, 3, and 4 rely on linkage with subsequent levels. Standard cardinality notation is used.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v18i1e81119_fig03.png"/></fig><p>The database structure for the donor survey data was generally categorized as either a raw data table or an analytic table. The raw data tables consist of survey questions, survey responses, and survey answers, while analytic tables include any tables using the survey data to derive information that might be beneficial for analysts without having to parse the raw answers. The survey questions table records every question ever given to donors across all survey rounds and assigns to each question a question ID. Equivalent questions across rounds share the same question ID for ease of analysis. The survey responses table records whether a respondent has a completed response for each survey round and whether the respondent consented for their answers to be used in research (ie, 1 row per respondent per survey round). For donors who did not consent for their answers to be used in research, none of their survey answers are recorded in the database, though they are still recorded as having responded to the survey and flagged as nonconsenting. While respondents generally did not answer any further questions if they failed to provide consent at the outset, consent could be withdrawn later in the process, and these technical safeguards ensured that no responses were recorded without consent to use the responses in research. The survey answers table consists of several fields, including a unique response ID that links to the responses table, a question ID that links to the survey questions table, and the answer given to the specific question by the respondent on the survey.</p><p>Two modules are responsible for managing the survey data, both of which rely on raw output from the survey platform. The first module is responsible for assigning equivalency to each question based on the question ID and for updating the survey questions table, while the other module parses survey responses and updates both the survey responses and survey answers table.</p></sec></sec><sec id="s2-8"><title>Reporting Workflows</title><p>Reporting is done through a module for each type of report defined by the program. For example, reporting for the NBDS program only required testing reports. In contrast, the NBDC program required donor, donation, survey, and testing reports. While each reporting module pulls information from the configuration file on which fields to extract from which tables in the database, and how to transform the data to fit the data dictionary&#x2019;s reporting specifications, extra code is generally needed for specific requirements that require more specific QC steps than the standard checks for data format and length. This necessitates more modular code to be included. For example, changes in how a laboratory test is configured might require changes in the quality control process, for example, to check for values in other fields, or to transform values based on one or more entries in other fields.</p></sec><sec id="s2-9"><title>Ethical Considerations</title><p>All blood donors consented to the use of deidentified, residual specimens for further research purposes. Consistent with the policies and guidance of the University of California, San Francisco Institutional Review Board, VRI self-certified the use of deidentified donations in this study as not meeting the criteria for human participant research. CDC investigators reviewed and relied on this determination as consistent with applicable federal law and CDC policy (45 C.F.R. part 46, 21 C.F.R. part 56; 42 U.S.C. &#x00A7; 241[d]; 5 U.S.C. &#x00A7; 552a; 44 U.S.C. &#x00A7; 3501). The study number is Pro00056783. The donor surveys conducted by Vitalant were conducted under a protocol supervised and approved by the Advarra Institutional Review Board (Pro00056783) and linked to biospecimens in deidentified form.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Customizability</title><p>The framework we developed resulted in a data system that is robust, reusable, and adaptable. Several factors contribute to the adaptability and reusability of the system. First, by separating generic code from the study-specific information, the system can adapt to changes in the study or be reused for another study with a greatly reduced investment of time to set up a study-specific data system (<xref ref-type="table" rid="table1">Table 1</xref>). The use of a configuration file that instructs the base code on how to access certain study-specific resources; what data types, variable lengths, and data types are allowed for specific entries; and how to generate multiple types of reporting required by the study or project allows for the study&#x2019;s data system to be quickly adapted to changes in the study or implementation of new substudies, without lengthy development time and making extensive changes to the code. An example would be how changes to a study&#x2019;s codebook are implemented. For systems where certain fields or information are hard-coded, implementing a change means changing the base code. In the case of our system, editing the configuration file is sufficient to implement the changes.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Overview of the key design principles that guided the development of the WIM<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> data management framework.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Principle</td><td align="left" valign="bottom">What the principle seeks</td><td align="left" valign="bottom">WIM design features</td><td align="left" valign="bottom">Examples from NBDS<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup>/NBDC<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Findable</td><td align="left" valign="top">Data and metadata can be easily found via clear documentation and identifiers</td><td align="left" valign="top">Comprehensive data dictionaries and persistent identifiers are enforced across different studies</td><td align="left" valign="top">Shared IDs kept consistent across NBDS and NBDC to enable cross-study linkage and downstream analyses</td></tr><tr><td align="left" valign="top">Accessible</td><td align="left" valign="top">Users can retrieve data with appropriate authorization and governance</td><td align="left" valign="top">Abstracted database web frontend with authentication and granular access control, lowering the skill floor while supporting governance</td><td align="left" valign="top">Nontechnical collaborators access curated tables and reports through the web frontend, with role-based permissions</td></tr><tr><td align="left" valign="top">Interoperable</td><td align="left" valign="top">Systems, tools, and data can work across platforms and contexts</td><td align="left" valign="top">Platform-agnostic modules can connect to multiple software platforms; deployable on-prem or cloud</td><td align="left" valign="top">ETL<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup>/QC<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup> modules run on Linux or Windows (Microsoft Corp) and interface with different database engines and file exchange endpoints,</td></tr><tr><td align="left" valign="top">Reusable</td><td align="left" valign="top">Data and tools are packaged to maximize reuse across studies and time</td><td align="left" valign="top">Single project-wide YAML<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup> configuration externalizes study-specific logic (ie,
 dictionaries and QC rules) from core code. Omission of a formal component model to preserve flexibility</td><td align="left" valign="top">Reused ingestion, QC, and reporting modules between NBDS and NBDC with primarily configuration changes, minimizing code edits and reducing setup time</td></tr><tr><td align="left" valign="top">Loose coupling</td><td align="left" valign="top">Components change with minimal ripple effects on others</td><td align="left" valign="top">Strict modularization (ingestion, QC, ETL, and reporting separated). Configuration- driven behavior</td><td align="left" valign="top">Updating a QC rule or adding a new data source did not require edits to ingestion listeners or reporting modules</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>WIM: workflow-based information management.</p></fn><fn id="table1fn2"><p><sup>b</sup>NBDS: Nationwide Blood Donor Seroprevalence.</p></fn><fn id="table1fn3"><p><sup>c</sup>NBDC: Nationwide Blood Donor Cohort.</p></fn><fn id="table1fn4"><p><sup>d</sup>ETL: extract, transform, load. </p></fn><fn id="table1fn5"><p><sup>e</sup>QC: quality control. </p></fn><fn id="table1fn6"><p><sup>f</sup>YAML: yet another markup language.</p></fn></table-wrap-foot></table-wrap><p>Second, for data with significant structural variability over time, such as the survey data in our case, native functionality of database engines should be leveraged (<xref ref-type="fig" rid="figure3">Figure 3</xref>). For example, survey data for our study changed regularly between survey rounds, and keeping the data in wide format would mean having to manage a very wide table. As a wide table would mean having to change table configurations and column names as changes are implemented to the surveys, we elected to store survey data in a long format&#x2014;with discrete tables for survey questions, responses, and answers. The use of long format for both survey answers and questions negated the need for tables with a column for each question ever asked on the survey. Additionally, having both the survey questions and answers in a long-format table means that the general data structure can be preserved, even if changes are made to the survey questions, and that the survey questions table can be used as a reference table to check for equivalency of specific questions between different rounds of the survey.</p><p>The last factor to contribute to the framework&#x2019;s reusability and adaptability is being flexible with respect to connections with other systems, along with loose coupling. Interoperability, as defined by Wilkinson et al (2016) [<xref ref-type="bibr" rid="ref18">18</xref>], is the ability of data or tools from noncooperating resources to integrate or work together with minimal effort. By using and interoperating with other preexisting data systems and platforms, including proprietary software systems, the framework can fill in gaps in functionality and be implemented in parallel with both legacy and new systems.</p></sec><sec id="s3-2"><title>Evaluation of Use Case</title><p>We used deployment efficiency, adaptability, and update latency, data throughput, and reusability of core modules as key operational indicators used to evaluate the framework. These metrics reflect real-world performance under high-volume, multiorganizational research conditions. Taken together, these metrics highlight WIM&#x2019;s ability to support large, complex, and rapidly evolving research programs by providing a scalable, adaptable, and reusable data-management infrastructure.</p></sec><sec id="s3-3"><title>Data Throughput and QC Performance</title><p>For the NBDS study, our study data infrastructure handled 2,670,225 donation and testing records from all 50 states in the United States over the lifetime of this study (<xref ref-type="fig" rid="figure4">Figure 4</xref>). These records were generated by 17 BCOs and were submitted to VRI monthly. For the NBDC study, our study data infrastructure handled a total of 1,064,381 donation records from 65,524 donors who were longitudinally followed over the course of 2 or more years (<xref ref-type="fig" rid="figure4">Figure 4</xref>). Donation and donor data were generated and updated on a quarterly basis, while testing data were updated on a weekly basis.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Number of records collected and managed by the NBDS and NBDC programs. (A) Number of records collected by state, with 270,319 records excluded due to missing geographic information. (B) Number of records accumulated over the two programs&#x2019; lifetime. Colors yellow, blue, and red denote whether the record was collected for the repeat donor cohorts 2023, Vitalant repeat donor cohorts 2022, or the National Blood Donor Serosurveillance study, respectively. NBDC: Nationwide Blood Donor Cohort; NBDS: Nationwide Blood Donor Seroprevalence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v18i1e81119_fig04.png"/></fig></sec><sec id="s3-4"><title>Deployment Efficiency</title><p>For deployment efficiency, initial system deployment required approximately 6 months for the NBDS program, whereas deployment for the more complex NBDC program required just over 2 months. This reduction in setup time reflects the reusability of core modules and the ability to externalize all study-specific logic in the YAML configuration file, allowing new implementations to leverage an existing codebase with minimal modification. For other studies with different data needs, only study-specific logic and QC checks would have to be developed or adapted before implementation.</p></sec><sec id="s3-5"><title>Adaptability and Update Latency</title><p>Operational and methodological changes to this study, such as updates to data dictionaries, QC criteria, reporting formats, or survey structures, can be implemented typically within 1 day for minor revisions and within a few days for more complex changes involving changes to multiple workflows or a drastic overhaul of study design. As WIM isolates study-specific rules from core modules and uses an external configuration file, updates could be integrated rapidly into ETL, QC, or reporting pipelines with little to no change to the underlying software.</p><p>The framework successfully supported very high data volumes, which include 2.67 million donation and testing records in NBDS and over 1 million longitudinal donation records in NBDC. Data ingestion and reporting vary in size and complexity, but the framework handled data ingestion and reporting tasks of up to 500,000 rows seamlessly. Automated QC workflows processed each data submission immediately upon arrival, applying rule-based checks for field formats, enumerated values, date validity, and cross-table integrity. The use of automated listeners and rule-based validation eliminated the need for manual prescreening and ensured rapid feedback to submitting organizations.</p></sec><sec id="s3-6"><title>Reusability of Core Modules</title><p>For the NBDS and NBDC studies highlighted here, processes such as ETL routines, QC, secure data retrieval, and reporting were reused with no modification. Study-specific customization was largely restricted to configuration files, data dictionaries, and survey-specific logic. This high degree of functional reuse demonstrates the portability of the framework and supports efficient deployment in new research settings.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Overview</title><p>Modern data generation techniques in life science research increasingly result in large quantities of data. With the increasing rate of data generation comes an increasing need for reliable, stable, and sophisticated information management systems to manage, store, and share data [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. When large datasets from multiple, often disjointed, data sources can be accommodated, our ability to efficiently study large societal and scientific challenges is greatly increased (eg, studying population-wide effects of the COVID-19 pandemic) [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. In this paper, we describe how our approach of developing a modular, flexible, and interoperable data infrastructure using open-source tools allowed us to nimbly manage data in rapidly evolving nationwide COVID-19 studies during the pandemic, and how we can scale, reuse, and adapt the same framework to manage other large multicenter studies with minimal changes to the core software.</p><p>Despite the availability of both commercial and open-source data-management platforms, the WIM framework provides several contributions that differentiate it from existing systems and were essential to supporting the rapidly evolving NBDS and NBDC research programs. First, it uses a single project-wide YAML configuration file to control all modules responsible for data ingestion, transformation, QC, reporting, and database interactions. This design places study-specific logic entirely outside the core codebase, allowing substantial changes to data dictionaries, quality control rules, or reporting specifications to be implemented without modifying the underlying software. This approach reduces development time, lowers operational complexity, and enables modules to be reused with minimal reconfiguration across multiple studies. Second, WIM achieves a high level of modularization and loose coupling, in which each workflow module operates independently. This stands in contrast to many proprietary systems that require vendor-managed customization or open-source pipelines that integrate ingestion, processing, and reporting in a rigid or monolithic structure. By separating generic functions from study-specific rules, WIM supports rapid iteration as research needs evolve and minimizes unintended downstream effects when updating a single module.</p><p>WIM&#x2019;s architecture is designed for portability and reusability beyond the described COVID-19 serosurveillance programs described. Core modules for secure data retrieval, ETL processes, QC, configuration management, and reporting can be repurposed for new studies with only changes to configuration files and study-specific dictionaries. This level of reuse allowed implementation time for NBDC to be reduced from approximately 6 months for NBDS to just over 2 months, even as the complexity and data sources increased. The combination of configuration-driven adaptability, rigorous modularization, automated QC workflows, and open-source extensibility makes the framework a flexible and scalable solution for large, complex, and multiorganizational research environments.</p><p>In building the framework and the data system implemented using the framework, several design principles were followed with the goal of a nimble and reusable system that could be sustainably used in a myriad of different studies and implementation environments with minimal modification. The FAIR principles are detailed in Wilkinson et al (2016) [<xref ref-type="bibr" rid="ref18">18</xref>], the principle of loose coupling, and the omission of a formal component model are the principles that allowed us to develop our framework with sufficient flexibility [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. These design principles allow for a sustainable implementation, as the only changes and maintenance required on the system are dictated by updates to the open-source components used by the system.</p><p>The FAIR principles for data management suggest that contemporary data resources, tools, vocabularies, and infrastructures should exhibit the qualities required to encourage discovery and reuse [<xref ref-type="bibr" rid="ref18">18</xref>]. Our framework follows these design principles in multiple ways. First, for the data to be findable, we keep documentation in the form of data dictionaries and enforce a persistent identifier across multiple studies and data systems for similar data types (eg, primary identifier for a blood donation is the same across studies). Second, for the data to be accessible, we built and manage an abstracted database frontend web application as part of the framework to lower the skill floor of stakeholder access to the data. Through user authentication and granular access control, the use of a web-based front-end application also contributes to data governance [<xref ref-type="bibr" rid="ref13">13</xref>]. Third, for the data system to be interoperable, we have designed our data system to be agnostic with respect to operating systems and database engines, and to be able to connect to multiple other software platforms. Lastly, for the data to be reusable, we prioritized bidirectional compatibility and stability, wherein the data collected and managed in any 1 study should be linkable to and available for use in analyses of future data, as appropriate [<xref ref-type="bibr" rid="ref20">20</xref>], while complying with applicable ethical and IRB requirements.</p><p>Two further guiding design principles we followed were those of loose coupling and the omission of a formal component model from the framework [<xref ref-type="bibr" rid="ref19">19</xref>]. The omission of a formal component model was motivated by the fact that research is often not a linear endeavor, with changes to the process happening regularly as priorities shift. This is especially true for our work on the NBDS and NBDC programs. For this reason, modules were separated through workflows: data ingestion, management, and reporting. Each of these workflows can have multiple components, depending on the complexity of the study, but each workflow maintains its function, and modular code can be added or removed depending on the need.</p><p>On loose coupling, we took inspiration from ethology and decided that our framework should be agnostic with respect to operating system, relational database system of choice, and deployment method [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. By choosing to develop our data system in R, and by omitting any functions that are dependent on the operating system, we allowed for the code to be run on any operating system that could run R. This also means that the system could be deployed on on-premises or cloud infrastructure. In our case, on-premises deployment was chosen, but in future studies, we plan to make use of cloud deployments, depending on cost, data security, and cross-organization access requirements.</p><p>While adaptability and data throughput capacity are important in any large-scale research data infrastructure, for human participants&#x2019; research we consider data integrity and the protection of sensitive personal information of participants to be of equal importance. Data integrity for the system is maintained through appropriate QC steps at every stage of data processing, along with the appropriate redundancies. For example, a data submission might be put through 2 QC steps, in parallel or sequentially. Automated or semiautomated handling of data submissions further supports data integrity, since rejected submissions that failed QC checks are sent back to submitters, along with a report providing detailed information on quality problems, for review, correction, and resubmission. For the protection of sensitive personal information, steps are taken to minimize the risk of a data breach by encrypting data in transit and at rest, making use of appropriately managed infrastructure (including vulnerability monitoring and an appropriate patching schedule), while also minimizing the collection and storage of personally identifying information. This ensures that even in the event of a data breach, the risk to donors or participants is minimized. An example of a practice we used to minimize the identifiability of research participants was maintaining dates of birth only at the month level and avoiding storage of multiple indirect identifiers. Further examples may include storing geographic information in formats that encode larger areas, such as 3-digit rather than 5-digit ZIP codes.</p></sec><sec id="s4-2"><title>Conclusions</title><p>With life sciences research becoming increasingly reliant on large, interconnected datasets and databases, the need for adaptable, reusable, and scalable methods to manage and curate data at an organizational or multiorganizational level will also increase [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Our work shows that open-source software, along with community-led software ecosystems, can be used to meet this need and provide superior functionality and flexibility to costly proprietary data management platforms.</p><p>With the rapid advancements in technology, bioinformatics and data management have become crucial to life sciences research. This is especially true for large-scale studies addressing significant scientific and societal issues, such as the COVID-19 pandemic. The described framework was built on the need for a nimble and abstracted data management system for multicenter studies. This was achieved by using modular components, and a single project-wide configuration file in YAML format sets all module settings, including QC processes, data dictionaries, credentials, and database connection strings. These features streamline data quality control, data flow, and data formatting, while lowering the effort required to implement changes and lowering the skill floor required to manage a system that usually requires a skilled data manager.</p></sec></sec></body><back><ack><p>We would like to express our deepest gratitude to our colleagues at Creative Testing Solutions, Westat, the Centers for Disease Control and Prevention (CDC), and Vitalant Research Institute for their encouragement and constructive feedback. We also extend our appreciation to the CDC for their financial support, which made this research possible. Generative artificial intelligence was not used in the preparation of this paper.</p></ack><notes><sec><title>Funding</title><p>This study was funded by the CDC. The funder had no involvement in this study's design, data collection, analysis, interpretation, or writing of this paper.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CDC</term><def><p>Centers for Disease Control and Prevention</p></def></def-item><def-item><term id="abb2">DCC</term><def><p>data coordinating center</p></def></def-item><def-item><term id="abb3">ETL</term><def><p>extract, transform, load</p></def></def-item><def-item><term id="abb4">FAIR</term><def><p>Findable, Accessible, Interoperable, Reusable</p></def></def-item><def-item><term id="abb5">NBDC</term><def><p>Nationwide Blood Donor Cohort</p></def></def-item><def-item><term id="abb6">NBDS</term><def><p>Nationwide Blood Donor Seroprevalence</p></def></def-item><def-item><term id="abb7">QC</term><def><p>quality control</p></def></def-item><def-item><term id="abb8">sFTP</term><def><p>secure file transfer protocol</p></def></def-item><def-item><term id="abb9">VRI</term><def><p>Vitalant Research Institute</p></def></def-item><def-item><term id="abb10">WIM</term><def><p>workflow-based information management</p></def></def-item><def-item><term id="abb11">YAML</term><def><p>yet another markup language</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mondal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Das</surname><given-names>G</given-names> </name><name name-style="western"><surname>Khatua</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ghosh</surname><given-names>Z</given-names> </name></person-group><article-title>Big data in biology: the hope and present-day challenges in it</article-title><source>Gene Rep</source><year>2020</year><month>12</month><volume>21</volume><fpage>100869</fpage><pub-id pub-id-type="doi">10.1016/j.genrep.2020.100869</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Iqbal</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>P</given-names> </name></person-group><article-title>From data science to bioscience: emerging era of bioinformatics applications, tools and challenges</article-title><source>Procedia Comput Sci</source><year>2023</year><volume>218</volume><fpage>1516</fpage><lpage>1528</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2023.01.130</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Han</surname><given-names>L</given-names> </name></person-group><article-title>Bioinformatics: advancing biomedical discovery and innovation in the era of big data and artificial intelligence</article-title><source>TIME</source><year>2023</year><volume>1</volume><issue>1</issue><fpage>100012</fpage><pub-id pub-id-type="doi">10.59717/j.xinn-med.2023.100012</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boles</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bergeron</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kiehl</surname><given-names>TR</given-names> </name></person-group><article-title>Big data access and infrastructure for modern biology: case studies in data repository utility</article-title><source>Ann N Y Acad Sci</source><year>2017</year><month>01</month><volume>1387</volume><issue>1</issue><fpage>112</fpage><lpage>123</lpage><pub-id pub-id-type="doi">10.1111/nyas.13281</pub-id><pub-id pub-id-type="medline">27801987</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Attwood</surname><given-names>TK</given-names> </name><name name-style="western"><surname>Blackford</surname><given-names>S</given-names> </name><name name-style="western"><surname>Brazas</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Davies</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>MV</given-names> </name></person-group><article-title>A global perspective on evolving bioinformatics and data science training needs</article-title><source>Brief Bioinformatics</source><year>2019</year><month>03</month><day>25</day><volume>20</volume><issue>2</issue><fpage>398</fpage><lpage>404</lpage><pub-id pub-id-type="doi">10.1093/bib/bbx100</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dash</surname><given-names>S</given-names> </name><name name-style="western"><surname>Shakyawar</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kaushik</surname><given-names>S</given-names> </name></person-group><article-title>Big data in healthcare: management, analysis and future prospects</article-title><source>J Big Data</source><year>2019</year><month>12</month><volume>6</volume><issue>1</issue><fpage>54</fpage><pub-id pub-id-type="doi">10.1186/s40537-019-0217-0</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stone</surname><given-names>M</given-names> </name><name name-style="western"><surname>Di Germanio</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wright</surname><given-names>DJ</given-names> </name><etal/></person-group><article-title>Use of US blood donors for national serosurveillance of severe acute respiratory syndrome coronavirus 2 antibodies: basis for an expanded national donor serosurveillance program</article-title><source>Clin Infect Dis</source><year>2022</year><month>03</month><day>9</day><volume>74</volume><issue>5</issue><fpage>871</fpage><lpage>881</lpage><pub-id pub-id-type="doi">10.1093/cid/ciab537</pub-id><pub-id pub-id-type="medline">34111244</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adalja</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Toner</surname><given-names>E</given-names> </name><name name-style="western"><surname>Inglesby</surname><given-names>TV</given-names> </name></person-group><article-title>Priorities for the US health community responding to COVID-19</article-title><source>JAMA</source><year>2020</year><month>04</month><day>14</day><volume>323</volume><issue>14</issue><fpage>1343</fpage><lpage>1344</lpage><pub-id pub-id-type="doi">10.1001/jama.2020.3413</pub-id><pub-id pub-id-type="medline">32125355</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rahman</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Miah</surname><given-names>M</given-names> </name><etal/></person-group><article-title>COVID-19 reinfections among naturally infected and vaccinated individuals</article-title><source>Sci Rep</source><year>2022</year><month>01</month><day>26</day><volume>12</volume><issue>1</issue><fpage>1438</fpage><pub-id pub-id-type="doi">10.1038/s41598-022-05325-5</pub-id><pub-id pub-id-type="medline">35082344</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Pulliam</surname><given-names>J</given-names> </name></person-group><article-title>COVID-19 infection, reinfection, and the transition to endemicity</article-title><source>Lancet</source><year>2023</year><month>03</month><volume>401</volume><issue>10379</issue><fpage>798</fpage><lpage>800</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(22)02634-4</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jones</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Manrique</surname><given-names>IM</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>MS</given-names> </name><etal/></person-group><article-title>Estimates of SARS-CoV-2 seroprevalence and incidence of primary SARS-CoV-2 infections among blood donors, by COVID-19 vaccination status - United States, April 2021-September 2022</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2023</year><month>06</month><day>2</day><volume>72</volume><issue>22</issue><fpage>601</fpage><lpage>605</lpage><pub-id pub-id-type="doi">10.15585/mmwr.mm7222a3</pub-id><pub-id pub-id-type="medline">37262007</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fink</surname><given-names>RV</given-names> </name><name name-style="western"><surname>Fisher</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sulaeman</surname><given-names>H</given-names> </name><etal/></person-group><article-title>How do we&#x2026;form and coordinate a national serosurvey of SARS-CoV-2 within the blood collection industry?</article-title><source>Transfusion</source><year>2022</year><month>07</month><volume>62</volume><issue>7</issue><fpage>1321</fpage><lpage>1333</lpage><pub-id pub-id-type="doi">10.1111/trf.16943</pub-id><pub-id pub-id-type="medline">35607854</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leonelli</surname><given-names>S</given-names> </name></person-group><article-title>Global data for local science: assessing the scale of data infrastructures in biological and biomedical research</article-title><source>Biosocieties</source><year>2013</year><month>12</month><volume>8</volume><issue>4</issue><fpage>449</fpage><lpage>465</lpage><pub-id pub-id-type="doi">10.1057/biosoc.2013.23</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harrow</surname><given-names>J</given-names> </name><name name-style="western"><surname>Drysdale</surname><given-names>R</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>A</given-names> </name><name name-style="western"><surname>Repo</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lanfear</surname><given-names>J</given-names> </name><name name-style="western"><surname>Blomberg</surname><given-names>N</given-names> </name></person-group><article-title>ELIXIR: providing a sustainable infrastructure for life science data at European scale</article-title><source>Bioinformatics</source><year>2021</year><month>08</month><day>25</day><volume>37</volume><issue>16</issue><fpage>2506</fpage><lpage>2511</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btab481</pub-id><pub-id pub-id-type="medline">34175941</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fortunato</surname><given-names>L</given-names> </name><name name-style="western"><surname>Galassi</surname><given-names>M</given-names> </name></person-group><article-title>The case for free and open source software in research and scholarship</article-title><source>Phil Trans R Soc A</source><year>2021</year><month>05</month><day>17</day><volume>379</volume><issue>2197</issue><pub-id pub-id-type="doi">10.1098/rsta.2020.0079</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="web"><source>R: a language and environment for statistical computing</source><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.R-project.org">https://www.R-project.org</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><source>Python</source><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://python.org">https://python.org</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilkinson</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Dumontier</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aalbersberg</surname><given-names>IJJ</given-names> </name><etal/></person-group><article-title>The FAIR guiding principles for scientific data management and stewardship</article-title><source>Sci Data</source><year>2016</year><month>03</month><day>15</day><volume>3</volume><issue>1</issue><fpage>160018</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id><pub-id pub-id-type="medline">26978244</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Pressman</surname><given-names>RS</given-names> </name></person-group><source>Software Engineering: A Practitioner&#x2019;s Approach</source><year>2010</year><access-date>2026-03-21</access-date><edition>7</edition><publisher-name>McGraw-Hill/Higher Education</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.mlsu.ac.in/econtents/16_EBOOK-7th_ed_software_engineering_a_practitioners_approach_by_roger_s._pressman_.pdf">https://www.mlsu.ac.in/econtents/16_EBOOK-7th_ed_software_engineering_a_practitioners_approach_by_roger_s._pressman_.pdf</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bauch</surname><given-names>A</given-names> </name><name name-style="western"><surname>Adamczyk</surname><given-names>I</given-names> </name><name name-style="western"><surname>Buczek</surname><given-names>P</given-names> </name><etal/></person-group><article-title>openBIS: a flexible framework for managing and analyzing complex data in biology research</article-title><source>BMC Bioinformatics</source><year>2011</year><month>12</month><day>8</day><volume>12</volume><issue>1</issue><fpage>468</fpage><pub-id pub-id-type="doi">10.1186/1471-2105-12-468</pub-id><pub-id pub-id-type="medline">22151573</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><article-title>hasanregius/workflow-based-information-management</article-title><source>GitHub</source><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/hasanregius/workflow-based-information-management">https://github.com/hasanregius/workflow-based-information-management</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Rudolph</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schubert</surname><given-names>M</given-names> </name></person-group><article-title>Box: write reusable, composable and modular R code</article-title><source>CRAN: Package box</source><year>2025</year><month>11</month><day>28</day><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/box/index.html">https://cran.r-project.org/web/packages/box/index.html</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ben-Kiki</surname><given-names>O</given-names> </name><name name-style="western"><surname>Evans</surname><given-names>C</given-names> </name><name name-style="western"><surname>d&#x00F6;t Net</surname><given-names>I</given-names> </name></person-group><source>YAML Ain&#x2019;t Markup Language (YAMLTM) Version 12</source><year>2009</year><month>10</month><day>1</day><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://yaml.org/spec/1.2.1/">https://yaml.org/spec/1.2.1/</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Curl: a modern and flexible web client for R</article-title><source>CRAN: Package curl</source><year>2025</year><month>08</month><day>19</day><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/curl/index.html">https://cran.r-project.org/web/packages/curl/index.html</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>stenevang/sftp</article-title><source>GitHub</source><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/stenevang/sftp">https://github.com/stenevang/sftp</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>RCurl: general network (HTTP/FTP/...) client interface for R</article-title><source>CRAN: Package RCurl</source><year>2025</year><month>03</month><day>22</day><access-date>2026-03-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/RCurl/index.html">https://cran.r-project.org/web/packages/RCurl/index.html</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Di Germanio</surname><given-names>C</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Balasko</surname><given-names>B</given-names> </name><etal/></person-group><article-title>P&#x2010;CB&#x2010;3 | anti&#x2010;spike and nucleocapsid antibody dynamics following SARS&#x2010;CoV&#x2010;2 infection and vaccination: implications for sourcing COVID&#x2010;19 convalescent plasma</article-title><source>Transfusion</source><year>2023</year><month>10</month><volume>63</volume><issue>S5</issue><fpage>148A</fpage><pub-id pub-id-type="doi">10.1111/trf.186_17554</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grebe</surname><given-names>E</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>M</given-names> </name><name name-style="western"><surname>Spencer</surname><given-names>BR</given-names> </name><etal/></person-group><article-title>Detection of nucleocapsid antibodies associated with primary SARS-CoV-2 infection in unvaccinated and vaccinated blood donors</article-title><source>Emerg Infect Dis</source><year>2024</year><month>08</month><volume>30</volume><issue>8</issue><fpage>1621</fpage><lpage>1630</lpage><pub-id pub-id-type="doi">10.3201/eid3008.240659</pub-id><pub-id pub-id-type="medline">38981189</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stone</surname><given-names>M</given-names> </name><name name-style="western"><surname>Grebe</surname><given-names>E</given-names> </name><name name-style="western"><surname>Sulaeman</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Evaluation of commercially available high-throughput SARS-CoV-2 serologic assays for serosurveillance and related applications</article-title><source>Emerg Infect Dis</source><year>2022</year><month>03</month><volume>28</volume><issue>3</issue><fpage>672</fpage><lpage>683</lpage><pub-id pub-id-type="doi">10.3201/eid2803.211885</pub-id><pub-id pub-id-type="medline">35202525</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sulaeman</surname><given-names>H</given-names> </name><name name-style="western"><surname>Grebe</surname><given-names>E</given-names> </name><name name-style="western"><surname>Dave</surname><given-names>H</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Chao</surname><given-names>DY</given-names> </name></person-group><article-title>Evaluation of Ortho VITROS and Roche Elecsys S and NC immunoassays for SARS-CoV-2 serosurveillance applications</article-title><source>Microbiol Spectr</source><year>2023</year><month>08</month><day>17</day><volume>11</volume><issue>4</issue><fpage>e0323422</fpage><pub-id pub-id-type="doi">10.1128/spectrum.03234-22</pub-id><pub-id pub-id-type="medline">37347180</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Naeem</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jamal</surname><given-names>T</given-names> </name><name name-style="western"><surname>Diaz-Martinez</surname><given-names>J</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Pan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Balas</surname><given-names>VE</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>CM</given-names> </name></person-group><article-title>Trends and future perspective challenges in big data</article-title><source>Advances in Intelligent Data Analysis and Applications</source><year>2022</year><publisher-name>Springer, Singapore</publisher-name><fpage>309</fpage><lpage>325</lpage><pub-id pub-id-type="doi">10.1007/978-981-16-5036-9_30</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>A</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Mandal</surname><given-names>S</given-names> </name></person-group><article-title>Big data in bioinformatics and computational biology: basic insights</article-title><source>Methods Mol Biol</source><year>2024</year><volume>2719</volume><fpage>153</fpage><lpage>166</lpage><pub-id pub-id-type="doi">10.1007/978-1-0716-3461-5_9</pub-id><pub-id pub-id-type="medline">37803117</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xia</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Niu</surname><given-names>S</given-names> </name></person-group><article-title>Research challenges and opportunities for using big data in global change biology</article-title><source>Glob Chang Biol</source><year>2020</year><month>11</month><volume>26</volume><issue>11</issue><fpage>6040</fpage><lpage>6061</lpage><pub-id pub-id-type="doi">10.1111/gcb.15317</pub-id><pub-id pub-id-type="medline">32799353</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Awotunde</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Oluwabukonla</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chakraborty</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bhoi</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Ajamu</surname><given-names>GJ</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Hassan</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Mohamed</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Alnowibet</surname><given-names>KA</given-names> </name></person-group><article-title>Application of artificial intelligence and big data for fighting COVID-19 pandemic</article-title><source>Decision Sciences for COVID-19: Learning Through Case Studies</source><year>2022</year><publisher-name>Springer International Publishing</publisher-name><fpage>3</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-87019-5_1</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nathan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Monk</surname><given-names>CT</given-names> </name><name name-style="western"><surname>Arlinghaus</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Big-data approaches lead to an increased understanding of the ecology of animal movement</article-title><source>Science</source><year>2022</year><month>02</month><day>18</day><volume>375</volume><issue>6582</issue><fpage>eabg1780</fpage><pub-id pub-id-type="doi">10.1126/science.abg1780</pub-id><pub-id pub-id-type="medline">35175823</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Glassman</surname><given-names>RB</given-names> </name></person-group><article-title>Persistence and loose coupling in living systems</article-title><source>Syst Res</source><year>1973</year><month>03</month><volume>18</volume><issue>2</issue><fpage>83</fpage><lpage>98</lpage><pub-id pub-id-type="doi">10.1002/bs.3830180202</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kasprzyk</surname><given-names>A</given-names> </name></person-group><article-title>BioMart: driving a paradigm change in biological data management</article-title><source>Database (Oxford)</source><year>2011</year><volume>2011</volume><fpage>bar049</fpage><pub-id pub-id-type="doi">10.1093/database/bar049</pub-id><pub-id pub-id-type="medline">22083790</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Data dictionary used for the first iteration of this study.</p><media xlink:href="ojphi_v18i1e81119_app1.xlsx" xlink:title="XLSX File, 31 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Data dictionary used for the repeat donor cohort iteration of the serosurveillance study.</p><media xlink:href="ojphi_v18i1e81119_app2.xlsx" xlink:title="XLSX File, 31 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Data submission guidelines used to train collaborators during the data submission process.</p><media xlink:href="ojphi_v18i1e81119_app3.xlsx" xlink:title="XLSX File, 141 KB"/></supplementary-material></app-group></back></article>