<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">Online J Public Health Inform</journal-id><journal-id journal-id-type="publisher-id">ojphi</journal-id><journal-id journal-id-type="index">45</journal-id><journal-title>Online Journal of Public Health Informatics</journal-title><abbrev-journal-title>Online J Public Health Inform</abbrev-journal-title><issn pub-type="epub">1947-2579</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v17i1e68013</article-id><article-id pub-id-type="doi">10.2196/68013</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Identifying Substance Use and High-Risk Sexual Behavior Among Sexual and Gender Minority Youth by Using Mobile Phone Data: Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Beikzadeh</surname><given-names>Mehrab</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Holloway</surname><given-names>Ian W</given-names></name><degrees>MPH, MSW, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>K&#x00E4;rkk&#x00E4;inen</surname><given-names>Kimmo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hong</surname><given-names>Chenglin</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cascalheira</surname><given-names>Cory</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wu</surname><given-names>Elizabeth S C</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Boka</surname><given-names>Callisto</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Avenda&#x00F1;o</surname><given-names>Alexandra C</given-names></name><degrees>MA</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yonko</surname><given-names>Elizabeth A</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sarrafzadeh</surname><given-names>Majid</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Computer Science, UCLA Samueli School Of Engineering, University of California, Los Angeles</institution><addr-line>7400 Boelter Hall</addr-line><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Social Welfare, University of California, Los Angeles</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Optum</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff4"><institution>School of Social Work, University of Connecticut</institution><addr-line>Hartford</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff5"><institution>Addiction Treatment Center, VA Puget Sound Health Care System</institution><addr-line>Seattle</addr-line><addr-line>WA</addr-line><country>United States</country></aff><aff id="aff6"><institution>Department of Epidemiology, UCLA Fielding School of Public Health, University of California, Los Angeles</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mensah</surname><given-names>Edward</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Hsu</surname><given-names>Hsun-Ta</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Cook</surname><given-names>Stephanie</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to  Mehrab Beikzadeh, MS, Department of Computer Science, UCLA Samueli School Of Engineering, University of California, Los Angeles, 7400 Boelter Hall, Los Angeles, CA, 90034, United States, 1 4245664464; <email>mehrabbeikzadeh@cs.ucla.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>12</day><month>8</month><year>2025</year></pub-date><volume>17</volume><elocation-id>e68013</elocation-id><history><date date-type="received"><day>25</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>23</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>20</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Mehrab Beikzadeh, Ian W Holloway, Kimmo K&#x00E4;rkk&#x00E4;inen, Chenglin Hong, Cory Cascalheira, Elizabeth S C Wu, Callisto Boka, Alexandra C Avenda&#x00F1;o, Elizabeth Ann Yonko, Majid Sarrafzadeh. Originally published in the Online Journal of Public Health Informatics (<ext-link ext-link-type="uri" xlink:href="https://ojphi.jmir.org/">https://ojphi.jmir.org/</ext-link>), 12.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Online Journal of Public Health Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://ojphi.jmir.org/">https://ojphi.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ojphi.jmir.org/2025/1/e68013"/><abstract><sec><title>Background</title><p>Sexual and gender minority (SGM) individuals are at heightened risk for substance use and sexually transmitted infections than their non-SGM peers. Collecting mobile phone usage data passively may open new opportunities for personalizing interventions, as behavioral risks could be identified without user input.</p></sec><sec><title>Objective</title><p>This study aimed to determine (1) whether passively sensed mobile phone data can be used to identify substance use and sexual risk behaviors for sexually transmitted infection (STI) and HIV transmission among young SGM who have sex with men, (2) which outcomes can be predicted with a high level of accuracy, and (3) which passive data sources are most predictive of these outcomes.</p></sec><sec sec-type="methods"><title>Methods</title><p>We developed a mobile phone app to collect participants&#x2019; messaging, location, and app use data and trained a machine learning model to predict risk behaviors for STI and HIV transmission. We used Scikit-learn to train logistic regression and gradient boosting classification models with simple linear model specification to predict participants' substance use and sexual behaviors (ie, condomless anal sex, number of sexual partners, and methamphetamine use), which were validated using self-report questionnaires. <italic>F</italic><sub>1</sub>-scores were used to quantify prediction accuracy of the model using different data sources (and combinations of these sources) for prediction. Differences between text, location, app use, and Linguistic Inquiry and Word Count (LIWC) domains by outcome were investigated using independent <italic>t</italic> tests where associations were considered significant at <italic>P</italic>&#x003C;.05.</p></sec><sec sec-type="results"><title>Results</title><p>Among participants (n=82) who identified as SGM, were sexually active, and reported recent substance use, our model was highly predictive of methamphetamine use and having &#x2265;6 sexual partners (<italic>F</italic><sub>1</sub>-scores as high as 0.83 and 0.69, respectively). The model was less predictive of condomless anal sex (highest <italic>F</italic><sub>1</sub>-score 0.38). Overall, text-based features were found to be most predictive, but app use and location data improved predictive accuracy, particularly for detecting &#x2265;6 sexual partners. Methamphetamine use was significantly associated with dating app use (<italic>P</italic>=.01) and use of sex-related words (<italic>P</italic>=.002). Having &#x2265;6 sex partners was associated with dating app use (0.02), use of sex-related words (<italic>P</italic>=.001), and traveling a further distance from home (<italic>P</italic>=.03), on average, compared to participants with fewer sex partners. Methamphetamine users were more likely to use social (<italic>P</italic>=.002) and affect words (<italic>P</italic>=.003) and less likely to use drive-related words (<italic>P</italic>=.02). People having 6 or more partners were more likely to use social, affect words, and cognitive process-related words (<italic>P</italic>=.003 and .004 respectively).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our results show that passively collected mobile phone data may be useful in detecting sexual risk behaviors. Expanding data collection may improve the results further, as certain behaviors, such as injection drug use, were quite rare in the study sample. These models may be used to personalize STI and HIV prevention as well as substance use harm reduction interventions.</p></sec><sec sec-type="registered-report"><title>International Registered Report Identifier (IRRID)</title><p>RR2-10.2196/58448</p></sec></abstract><kwd-group><kwd>substance use</kwd><kwd>HIV risk</kwd><kwd>sexual and gender minoritized</kwd><kwd>mobile app</kwd><kwd>eHealth</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Sexual and gender minoritized (SGM) individuals are at heightened risk for substance use and sexually transmitted infections (STIs) than the general United States population. Among SGM populations, men who have sex with men (MSM), for example, are twice as likely to use illicit drugs [<xref ref-type="bibr" rid="ref1">1</xref>], which may be used to cope with negative life events and thoughts, or to enhance pleasure during sex [<xref ref-type="bibr" rid="ref2">2</xref>]. Over half of new HIV infections occur among SGM, which can be attributed to sexual risk behaviors and intravenous drug use (IDU) [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Between 2018 and 2022, the Centers for Disease Control and Prevention reported HIV diagnoses increased significantly among transgender and gender nonbinary populations, more so than among cisgender men or women [<xref ref-type="bibr" rid="ref5">5</xref>]. Research suggests that these health disparities in substance use and HIV are generated by unjust social conditions [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] and increased exposure to minority stressors [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. SGM are also at higher odds of mental distress and depression [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], which in turn may increase substance use as a coping mechanism [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Systematic reviews of studies in SGM populations, largely thus far tailored for MSM, have shown that interventions can be effective on methamphetamine- and sexual health-related outcomes, such as condomless anal sex or substance use during sex [<xref ref-type="bibr" rid="ref13">13</xref>], and participants find these interventions useful for gaining new knowledge and skills [<xref ref-type="bibr" rid="ref14">14</xref>]. In addition, participants find interventions useful for self-reflection [<xref ref-type="bibr" rid="ref14">14</xref>], which may lead to behavior change. However, results from a global survey among MSM who use substances found that only 11% of respondents had access to substance use treatment programs and only 5% participated in such a program [<xref ref-type="bibr" rid="ref15">15</xref>]. In the United States, only 6.5% of people who needed substance use treatment received it in 2020 [<xref ref-type="bibr" rid="ref16">16</xref>]. The majority of those who want substance use treatment but do not receive it experience significant access barriers such as affordability due to the lack of health care coverage, not finding an appropriate program, fear of others having a negative opinion of them, and the absence of culturally informed treatment tailored to the unique needs of SGM [<xref ref-type="bibr" rid="ref17">17</xref>]. Therefore, efforts to address these disparities should prioritize improving access to health services for SGM, particularly strategies to deliver health services and identify those at the highest risk.</p><p>Mobile- and eHealth-based interventions could improve the accessibility of interventions, as they have potential to overcome many of these treatment barriers (eg, overcoming the stigma of receiving substance use treatment by using the eHealth intervention from the privacy of one&#x2019;s home). Mobile and eHealth interventions may also open new opportunities for personalization through increased availability of data about participants. Prior studies have shown success in providing personalized HIV interventions to MSM and people using substances [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. However, this personalization typically depends on participants reporting behaviors manually, which increases participant burden. For example, one study asked participants to respond to either daily or biweekly surveys, which many participants reported to be too repetitive [<xref ref-type="bibr" rid="ref21">21</xref>]. Although burdensome, the group receiving daily surveys found them to be more useful than the group receiving biweekly surveys. This indicates that behavioral health monitoring should not depend on receiving frequent input from the participant. Therefore, being able to automate some or all the behavior monitoring could reduce participant burden.</p><p>In this study, we investigate how machine learning techniques can help identify sexual risk behaviors among SGM from passively sensed mobile phone data. Prior studies have predicted HIV risk using, for example, Twitter [<xref ref-type="bibr" rid="ref22">22</xref>], electronic health records [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], or smartphone survey data [<xref ref-type="bibr" rid="ref25">25</xref>]. Similarly, substance use risk has been detected using survey data [<xref ref-type="bibr" rid="ref26">26</xref>], cognitive test results [<xref ref-type="bibr" rid="ref27">27</xref>], Instagram (Meta) profile data [<xref ref-type="bibr" rid="ref28">28</xref>], and social media posts [<xref ref-type="bibr" rid="ref29">29</xref>]. To the best of our knowledge, this is the first study to correlate substance use and sexual risk behaviors among SGM using passively collected mobile phone data, which allows for frequent data collection with minimal effort required from the participant.</p><p>We first developed a mobile sensing app that tracks participants&#x2019; daily actions, such as their location, messaging, and app use. We then trained machine learning models to detect substance use and sexual risk behaviors from these data and evaluated their performance in predicting different behaviors. Finally, we analyzed how different risky behaviors manifest in mobile phone data.</p><p>The main contributions of this study are (1) demonstrating how passively collected mobile phone data can be used for behavioral risk prediction and identifying limitations of this approach; (2) evaluating which types of data should be collected to identify substance use and sexual risk behaviors by training machine learning models using different subsets of the data, as well as analyzing differences between participants&#x2019; data; and (3) Determining how accurately different behaviors can be identified from mobile phone data.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Design and Eligibility</title><p>Data for this analysis were derived from a National Institutes of Health&#x2013;funded randomized comparison trial &#x2013; uTECH (ClinicalTrials.gov identifier: NCT04710901). To be eligible for the study, participants had to meet the following criteria: (1) be 18 to 29 years old, (2) be able to speak in English, (3) identify as a sexual or gender minority, (4) have had anal or oral sex with a man in the past 3 months, (5) have used substances (such as alcohol, marijuana, poppers [amyl nitrate], methamphetamines, heroin, cocaine, and ecstasy) in the past 3 months, (6) have had sex while using substances in the past 3 months, (7) being HIV negative or of unknown HIV status, (8) have used a dating app to meet sexual and substance use partners in the past 3 months, (9) own a smartphone, (10) reside in the United States, (11) be willing to participate in a 12-month study, and (12) be able to provide informed consent. Eligibility criteria, recruitment procedures, and overall study design are detailed comprehensively in the protocol paper [<xref ref-type="bibr" rid="ref30">30</xref>].</p></sec><sec id="s2-2"><title>Screening</title><p>All participants completed an initial screener survey that was hosted on Qualtrics, a web-based survey platform [<xref ref-type="bibr" rid="ref31">31</xref>]. The screener provided information about the study and included questions to determine eligibility. If an individual met eligibility criteria, the survey used branching logic to show additional screens to ask for contact information, including the phone number of their smartphone [<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>Research staff took precautions against fraudulent screeners by using survey metadata to identify noncellular phone numbers, virtual private network software, and high-risk IP addresses [<xref ref-type="bibr" rid="ref30">30</xref>]. Screeners that were suspected to be illegitimate were removed before enrollment in the study. SGM who completed the screener, met the eligibility criteria, and passed fraud detection checks were contacted to schedule a consent and onboarding session over the Zoom conferencing platform (Zoom Video Communications) [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec><sec id="s2-3"><title>Ethical Considerations</title><p>All study procedures and protocols were approved by the by the South Campus institutional review board of the University of California, Los Angeles (IRB#22&#x2010;000009). Informed consent was obtained during the onboarding process. During the informed consent process, the interviewer shared the consent document which provided details of what types of data (eg, keylogged data and Global Positioning System [GPS] data) were collected by the data collection app. The consent document also provided details on what kinds of data would not be collected (ie, photos and video). Benefits and risks of participation, monetary incentives, and data privacy protections were likewise detailed in the consent document. Participants were informed all data collected were protected from use as evidence in legal processes by a Certificate of Confidentiality granted by the National Institutes of Health (number: CC-OD-22&#x2010;3555). If the participant consented to participate, their agreement was recorded by the interviewer, and they were emailed a copy of the consent to keep for their records. Participants were compensated up to US $450 for completion of study-related activities (see <xref ref-type="fig" rid="figure1">Figure 1C</xref> for details).</p><p>All research staff were required to complete HIPAA (Health Insurance Portability and Accountability Act) and human subjects research training to gain access to participant materials. Any personally identifiable information or media collected unintentionally through data collection was redacted or removed entirely prior to storage in the study&#x2019;s database.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Screenshots of the eWellness app.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v17i1e68013_fig01.png"/></fig></sec><sec id="s2-4"><title>Data Collection App</title><p>Android [<xref ref-type="bibr" rid="ref33">33</xref>] users who completed the consent and enrollment process would then install the data collection app used in the study, eWellness. The app was based on the Aware Framework [<xref ref-type="bibr" rid="ref34">34</xref>], which has been used in numerous earlier eHealth studies, for example, to predict depression and anxiety [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], progression of Parkinson disease [<xref ref-type="bibr" rid="ref37">37</xref>], or alcohol use events [<xref ref-type="bibr" rid="ref38">38</xref>]. We adapted eWellness for Android phones to collect data on participants&#x2019; mobile phone use activities. We asked participants to give the app all the necessary permissions to passively collect keyboard and location data during participation. In addition to collecting keyboard data when participants typed text, the app collected information on which app they were typing the text in or, if they were using a browser, which website they were on.</p><p>The app also contained a substance use and sexual behavior survey, referred to as the &#x201C;wellness survey,&#x201D; (shown in <xref ref-type="fig" rid="figure1">Figure 1A</xref>) which the participants were asked to complete when they joined the study and once every 3 months after that. The survey was adapted from the US Centers for Disease Control and Prevention&#x2019;s HIV and pre-exposure prophylaxis (PrEP) clinical practice guidelines [<xref ref-type="bibr" rid="ref39">39</xref>]. The wellness survey contained questions on individual participants&#x2019; substance use and sexual risk behaviors, which yielded a total score indicating one&#x2019;s risk for HIV infection and PrEP eligibility. The questions and answer options are shown in Table S1 in the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. To keep participants engaged, the app also provided other useful information, such as a map of nearby resources, such as pharmacies, HIV testing locations, and substance use harm reduction resources (<xref ref-type="fig" rid="figure1">Figure 1B</xref>), as well as the study timeline and incentives (<xref ref-type="fig" rid="figure1">Figure 1C</xref>).</p><p>The app sent the collected data to our secure server every 30 minutes whenever internet connection was available. Highly sensitive information, such as passwords, was filtered out and the research team had no access to them. The server-stored data did not contain other identifying information; only a randomly assigned participant identifier was included in the data. A document linking personal information necessary for participant follow-up was hosted on Federal Information Processing Standards 140&#x2010;2 certified cloud-based file management platform, Box. The Box directory was only accessible via multifactor authentication university-based single sign-on log-in to institutional review board&#x2013;approved researchers with participant follow-up duties who had completed HIPAA and Good Clinical Practices trainings. All files containing participant information were protected with AES 256-bit encryption and data leak prevention and threat detection algorithms integrated into Box.</p></sec><sec id="s2-5"><title>Data Preprocessing</title><sec id="s2-5-1"><title>Keyboard Data</title><p>The eWellness app collected information about the currently active text field&#x2019;s contents on every keystroke. As a result, our database contained multiple rows of data for every full line of text that the participant typed. For example, typing &#x201C;Hello&#x201D; might have been stored in the database as rows containing text values: &#x201C;H,&#x201D; &#x201C;He,&#x201D; &#x201C;Hel,&#x201D; &#x201C;Hell,&#x201D; and &#x201C;Hello.&#x201D; In addition, the participant could have changed earlier parts of the text or used autocorrection, which means that this same text could have appeared as database rows: &#x201C;H,&#x201D; &#x201C;He,&#x201D; &#x201C;Hel,&#x201D; &#x201C;Helo,&#x201D; &#x201C;Hello.&#x201D; As a result, the earlier row was not always a substring of the next one.</p><p>To remove duplicate rows, we repeated the following steps for each individual participant&#x2019;s text data until there were no more rows to remove: (1) compare each row to the next row and if the first row is a substring of the second one, remove the first row, and (2) calculate the Levenshtein similarity between each row and the following row, and if the similarity is larger than 0.6, remove the first row.</p><p>Levenshtein similarity between two strings a and b is defined as:</p><disp-formula id="E1"> <label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">m</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where dist(a,b) is the Levenshtein distance [<xref ref-type="bibr" rid="ref40">40</xref>] which counts the minimum number of single-character modifications (insert, delete, substitute) that are necessary to make the strings identical:</p><disp-formula id="E2"> <label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>s</mml:mi><mml:mi>a</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>a</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mn>0</mml:mn><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>b</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mn>0</mml:mn><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>b</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>a</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>b</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>a[0]</italic> refers to the first character of string <italic>a</italic>, and <italic>tail(a</italic>) refers to a substring of <italic>a</italic> which contains everything except the first character. We calculated the similarity score using the TextDistance Python library [<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec><sec id="s2-5-2"><title>Application Data</title><p>For every row of text data, we had a package name of the app where the text was entered as well as the Uniform Resource Locator (URL) of the website if the participant was using a web browser. In many cases, online services can be accessed both through an app and through a website, so we combined these data sources by extracting the domain name from the URL and mapping the commonly appearing URLs to the corresponding package names using a manually curated list of domain name or app pairs. If a domain name was not in this list, the domain name itself was used as the package name. Apps and websites were treated in the same manner in analysis and model training.</p></sec><sec id="s2-5-3"><title>Location Data</title><p>The eWellness app saved GPS coordinates periodically whenever the phone moved to a different location. The app avoided unnecessary data collection to reduce battery consumption by only collecting location data when the phone was moving. This meant that if the participant remained in the same location, we did not receive location data until the participant started moving again. As we were only interested in locations in which the participant spent time rather than locations that the participant moved by, we removed data points where the participant was moving and only retained the last location once the movement ended.</p></sec></sec><sec id="s2-6"><title>Feature Extraction</title><p>After cleaning the dataset using the previously shown steps, we manually extracted various features (ie, computed independent variables) from each of the data sources to be used with the machine learning algorithms. These feature extraction techniques are described in the following subsections.</p><sec id="s2-6-1"><title>Text Data</title><p>In our dataset, participants were active for different numbers of days, and for individual participants, different days had sometimes vastly different amounts of text data. This made the direct application of traditional text processing techniques challenging. In addition, the words and phrases used by participants sometimes differed from the ones used by the general public, so for optimal results, the techniques had to be tailored for the study population. We only considered text data collected from social media, dating, or messaging apps/websites, as text data from other sources was found to contain more noise than useful information (for example, product names in shopping apps or location names in navigation apps).</p><p>The first set of features extracted from the text data was the frequencies of individual words used. Participants&#x2019; text was first lemmatized, which means that inflected word forms were transformed to their base forms (eg, &#x201C;walking&#x201D; &#x2192; &#x201C;walk,&#x201D; &#x201C;better&#x201D; &#x2192; &#x201C;good&#x201D;) to avoid having the same word appear in multiple forms in our set of features. Lemmatization was performed using WordNet Lemmatizer from the Natural Language Toolkit (NLTK) [<xref ref-type="bibr" rid="ref42">42</xref>]. Then, we removed words defined in NLTK&#x2019;s stop word list, commonly appearing placeholder texts (eg, &#x201C;Enter message,&#x201D; &#x201C;Say something&#x201D;), as well as words used by fewer than five people. For each remaining word, we calculated the frequency of word use:</p><disp-formula id="E3"> <label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>f</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>q</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>w</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">#</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>y</mml:mi><mml:mi>s</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>d</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">#</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>y</mml:mi><mml:mi>s</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>t</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The second set of text features only considered words and phrases associated with drug use or sexual behaviors. We used a phrase list, which had been found effective for identifying HIV risk behavior as well as substance use by an earlier study [<xref ref-type="bibr" rid="ref29">29</xref>], and we used our previously defined frequency formula to determine frequencies for both individual phrases as well as for higher-level phrase categories (ie, different types of substance use or sexual behavior).</p><p>A third set of features was computed by Linguistic Inquiry and Word Count (LIWC) software [<xref ref-type="bibr" rid="ref43">43</xref>], which uses built-in dictionaries to capture social and psychological states. It computes features describing how much an individual talks about a variety of topics, such as money, physical intimacy, or leisure activities, and it also computes higher-level descriptive features to measure factors, such as analytical thinking, authenticity, and emotional tone. It has been used in numerous studies to, for example, analyze fake news [<xref ref-type="bibr" rid="ref44">44</xref>], social media posts [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>], online reviews [<xref ref-type="bibr" rid="ref47">47</xref>], and college admission essays [<xref ref-type="bibr" rid="ref48">48</xref>]. We used it to generate features for each individual day, and we calculated the average across all days for each individual participant.</p><p>Our last set of text features was generated using the Bidirectional Encoder Representations from Transformers (BERT) language model [<xref ref-type="bibr" rid="ref49">49</xref>]. We used a model that was pretrained for sentiment analysis using Twitter data [<xref ref-type="bibr" rid="ref50">50</xref>], as we expected Twitter data to use similar language as other social media and messaging platforms. We removed the last fully connected layer of the model so that it could be used to generate text embeddings, and we applied it to each individual day of data. These text embeddings were then averaged across all days of data for individual participants.</p></sec><sec id="s2-6-2"><title>App Data</title><p>We captured app usage by looking at apps where the user wrote text. We generated one set of features by calculating how frequently each app was used:</p><disp-formula id="E4"><label> (4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>f</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>q</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mi>p</mml:mi><mml:mi>p</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">#</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>y</mml:mi><mml:mi>s</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>a</mml:mi><mml:mi>p</mml:mi><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">#</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>y</mml:mi><mml:mi>s</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>y</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>a</mml:mi><mml:mi>p</mml:mi><mml:mi>p</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>We also considered a subset of these frequency features that only contained social media, dating, and messaging apps, as we expected other types of apps to be less relevant to our prediction task. Other apps (eg, maps, music, or shopping) were expected to be noisy and therefore to have a negative effect on the model&#x2019;s predictive performance.</p></sec><sec id="s2-6-3"><title>Location Data</title><p>Before extracting features from location data, we clustered GPS coordinates for each individual participant by using the Mean-Shift algorithm [<xref ref-type="bibr" rid="ref51">51</xref>]. This algorithm moves all points repeatedly towards the mean value of their neighborhood (determined by window radius r) until all points have converged. Points that converge to the same coordinates are defined to belong in the same cluster, thus allowing the algorithm to find the appropriate number of clusters. As the generated clusters depend on the window size, we determined the appropriate size by visually inspecting the clustering results. We also assumed that the most visited location was the participant&#x2019;s home.</p><p>We then computed the features describing individuals&#x2019; mobility, such as how far from home they traveled, how many locations they visited per day, and how many of these locations were unique. These features were selected such that they were potentially related to participants' behavioral health outcomes either directly or indirectly. For example, several unique locations may be associated with having many partners, while having very few unique locations could be related to methamphetamine use due to the limited number of locations where the participant could safely use the drug. The full list of location-based features is shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec></sec><sec id="s2-7"><title>Model Training</title><p>We used Scikit-learn [<xref ref-type="bibr" rid="ref52">52</xref>] to train logistic regression [<xref ref-type="bibr" rid="ref53">53</xref>] and gradient boosting [<xref ref-type="bibr" rid="ref54">54</xref>] classification models to predict participants&#x2019; answers to each survey question. These models were chosen to represent a simple linear model as well as a more advanced nonlinear model. To determine which types of data could be useful for the prediction task, we trained separate models using individual data categories, such as location data, app use, and risky word use. We then evaluated combinations of these features, focusing on feature combinations that we believed would give a comprehensive view of the participant&#x2019;s activities without including redundant data (eg, not including social media apps and all apps in the same model). Models were evaluated using leave-one-out cross-validation due to the relatively small number of participants.</p><p>To address class imbalance, we calculated <italic>F</italic><sub>1</sub>-scores for the minority class and used gradient boosting which can better handle imbalanced datasets. After initial exploration of model hyperparameters, we selected values that provided stable performance. For logistic regression, we used default hyperparameters with max_iter=1000 to ensure convergence. For gradient boosting, we used a GradientBoostingClassifier with n_estimators=80 and default values for other hyperparameters.</p><p>We chose to focus on <italic>F</italic><sub>1</sub>-scores for model evaluation, as they balance precision and recall considerations. In the context of behavioral risk prediction for potential interventions, both types of misclassification errors have important implications: false positives might lead to unnecessary interventions, while false negatives could miss individuals who might benefit from support. The <italic>F</italic><sub>1</sub>-score helps balance these considerations for our exploratory analysis, though future applications may need to adjust classification thresholds based on specific intervention contexts.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Population</title><p>Sociodemographic, sexual risk, and substance use characteristics reported by participants (n=82) at baseline are summarized in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Self-reported participant characteristics at baseline.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristic</td><td align="left" valign="bottom">Total</td></tr></thead><tbody><tr><td align="left" valign="top">Participants, n (%)</td><td align="left" valign="top">82 (100)</td></tr><tr><td align="left" valign="top">Age, mean (SD)</td><td align="left" valign="top">25.2 (3.9)</td></tr><tr><td align="left" valign="top">Race and ethnicity, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>American Indian or Native Alaskan</td><td align="left" valign="top">2 (2.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Asian</td><td align="left" valign="top">13 (15.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Black or African American</td><td align="left" valign="top">7 (8.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hispanic or Latino</td><td align="left" valign="top">10 (12.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Middle Eastern and North African</td><td align="left" valign="top">1 (1.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Two or more races</td><td align="left" valign="top">11 (13.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Non-Hispanic White</td><td align="left" valign="top">38 (46.3)</td></tr><tr><td align="left" valign="top">Gender identity, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cisgender man</td><td align="left" valign="top">54 (65.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transgender man</td><td align="left" valign="top">14 (17.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nonbinary</td><td align="left" valign="top">11 (13.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transgender woman</td><td align="left" valign="top">3 (3.7)</td></tr><tr><td align="left" valign="top">Sexual orientation, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gay</td><td align="left" valign="top">55 (67.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bisexual or Pansexual</td><td align="left" valign="top">20 (24.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Queer</td><td align="left" valign="top">5 (6.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Straight or heterosexual</td><td align="left" valign="top">1 (1.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Refuse to answer</td><td align="left" valign="top">1 (1.2)</td></tr><tr><td align="left" valign="top">Education, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Less than college degree</td><td align="left" valign="top">33 (40.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>College degree or higher</td><td align="left" valign="top">48 (58.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Refuse to answer</td><td align="left" valign="top">1 (1.2)</td></tr><tr><td align="left" valign="top">US region, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>West</td><td align="left" valign="top">26 (31.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Northeast</td><td align="left" valign="top">25 (30.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>South</td><td align="left" valign="top">17 (20.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Midwest</td><td align="left" valign="top">14 (17.1)</td></tr><tr><td align="left" valign="top">Sexual behavior, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Condomless receptive sex</td><td align="left" valign="top">61 (74.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Condomless insertive sex with HIV+ partner<break/>5+ times</td><td align="left" valign="top">3/74 (4.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>HIV+ partners</td><td align="left" valign="top">8/74 (10.8)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>6+ partners</td><td align="left" valign="top">41 (50)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>11+ partners</td><td align="left" valign="top">26 (32)</td></tr><tr><td align="left" valign="top">Substance use, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Methamphetamine use</td><td align="left" valign="top">15 (18.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Injection drug use</td><td align="left" valign="top">3 (3.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Injects cocaine</td><td align="left" valign="top">1 (1.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Injects in group</td><td align="left" valign="top">2 (2.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Shares injection equipment</td><td align="left" valign="top">1 (1.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Injects methamphetamine</td><td align="left" valign="top">2 (2.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>In substance use treatment program</td><td align="left" valign="top">0 (0)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Data Statistics</title><p>We collected data from participants between November 10, 2021, and April 15, 2024. Dataset statistics are shown in <xref ref-type="table" rid="table2">Table 2</xref>. Among all the apps, we manually identified 68 social media, dating, and messaging apps which were later used to analyze participants&#x2019; messaging data. It should be noted that apps included unique websites as well (grouped by domain name).</p><p>We used data for all participants who had at least 30 days of data available. If a participant&#x2019;s answer to a survey question was &#x201C;Decline to answer&#x201D; or &#x201C;I don&#x2019;t know,&#x201D; this answer was not included in the model training or evaluation. This did not, however, exclude their other survey answers from being used. Statistics for survey responses are shown in <xref ref-type="table" rid="table1">Table 1</xref> (full questions and answer options are shown in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Text, location, and app use summary statistics.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Total</td><td align="left" valign="bottom">Mean</td><td align="left" valign="bottom">Median</td><td align="left" valign="bottom">SD</td><td align="left" valign="bottom">Min</td><td align="left" valign="bottom">Max</td></tr></thead><tbody><tr><td align="left" valign="top">Lines of text</td><td align="left" valign="top">4,848,639</td><td align="left" valign="top">59,129.7</td><td align="left" valign="top">45,758</td><td align="left" valign="top">43,832.2</td><td align="left" valign="top">2529</td><td align="left" valign="top">195,797</td></tr><tr><td align="left" valign="top">Locations</td><td align="left" valign="top">1,607,149</td><td align="left" valign="top">16,917.4</td><td align="left" valign="top">6071</td><td align="left" valign="top">26,793</td><td align="left" valign="top">509</td><td align="left" valign="top">169,334</td></tr><tr><td align="left" valign="top">Unique apps</td><td align="left" valign="top">2248</td><td align="left" valign="top">92.1</td><td align="left" valign="top">89.5</td><td align="left" valign="top">46.4</td><td align="left" valign="top">20</td><td align="left" valign="top">289</td></tr></tbody></table></table-wrap></sec><sec id="s3-3"><title>Model Performance</title><p>We trained classification models to predict answers to each question. As some questions had partially overlapping answer options, we split them into multiple distinct questions. For example, the answer options for substance use included methamphetamine use, injection drug use, and both, so we split it into two questions: methamphetamine use and injection drug use. In addition, some questions had a very low number of positive responses (for example, only 2 participants were in a substance use treatment program). As a result, the focus of our discussion will be on the three questions which we determined to be the most informative: methamphetamine use, having 6 or more male sexual partners, and receptive anal sex without a condom.</p><p>Results for predicting survey responses using both individual feature types as well as combinations of them are shown in <xref ref-type="table" rid="table3">Table 3</xref>. Feature combinations were selected both based on their individual results and based on whether they were presumed to provide nonoverlapping information. For example, we avoided combining highly correlated feature groups, such as social media apps and all apps, in the same model.</p><p>As the results show, methamphetamine use could be predicted well using just the text data. The word frequency feature with the gradient boosting model worked best. Predicting having many partners worked reasonably well when combining all feature types. Predictive models were only moderately successful in determining whether the participant had receptive condomless anal sex.</p><p>Combining multiple feature types rarely improved the performance by a noticeable amount. This could be because in many cases, the feature groups might provide redundant information, so using only one highly informative feature group was enough. In addition, increasing the number of features could lead to overfitting, as the number of features can become much larger than the number of participants.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p><italic>F</italic><sub>1</sub>-scores for predicting answers to survey questions. <italic>F</italic><sub>1</sub>-score was calculated for the less frequent response, which in most cases was the &#x201C;positive&#x201D; answer (answer frequencies are shown in <xref ref-type="table" rid="table1">Table 1</xref>). The first value shows the score using logistic regression and the second value shows the score using a gradient-boosting classifier.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2"/><td align="left" valign="bottom" rowspan="2">Methamphetamine use</td><td align="left" valign="bottom" colspan="2">Sexual behavior</td></tr><tr><td align="left" valign="top">6+ partners</td><td align="left" valign="top">Condomless receptive sex</td></tr></thead><tbody><tr><td align="left" valign="top">Social apps</td><td align="left" valign="top">0.32/0.40</td><td align="left" valign="top">0.49/0.53</td><td align="left" valign="top">0.35/0.16</td></tr><tr><td align="left" valign="top">All apps</td><td align="left" valign="top">0.31/0.08</td><td align="left" valign="top">0.56/0.60</td><td align="left" valign="top">0.29/0.33</td></tr><tr><td align="left" valign="top">Location</td><td align="left" valign="top">0.00/0.25</td><td align="left" valign="top">0.46/0.39</td><td align="left" valign="top">0.00/0.00</td></tr><tr><td align="left" valign="top">Risky words</td><td align="left" valign="top">0.48/0.52</td><td align="left" valign="top">0.62/0.65</td><td align="left" valign="top">0.29/0.12</td></tr><tr><td align="left" valign="top">All words</td><td align="left" valign="top">0.29/0.83<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">0.57/0.23</td><td align="left" valign="top">0.22/0.11</td></tr><tr><td align="left" valign="top">LIWC<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">0.52/0.47</td><td align="left" valign="top">0.61/0.63</td><td align="left" valign="top">0.31/0.26</td></tr><tr><td align="left" valign="top">Bert</td><td align="left" valign="top">0.34/0.34</td><td align="left" valign="top">0.67/0.64</td><td align="left" valign="top">0.30/0.38<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Risky words, LIWC</td><td align="left" valign="top">0.50/0.67</td><td align="left" valign="top">0.62/0.61</td><td align="left" valign="top">0.32/0.25</td></tr><tr><td align="left" valign="top">Social apps, Risky words</td><td align="left" valign="top">0.48/0.38</td><td align="left" valign="top">0.58/0.65</td><td align="left" valign="top">0.28/0.15</td></tr><tr><td align="left" valign="top">Social apps, Risky words, LIWC</td><td align="left" valign="top">0.48/0.67</td><td align="left" valign="top">0.60/0.55</td><td align="left" valign="top">0.33/0.31</td></tr><tr><td align="left" valign="top">Social apps, Risky words, Location</td><td align="left" valign="top">0.52/0.48</td><td align="left" valign="top">0.61/0.69<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">0.24/0.17</td></tr><tr><td align="left" valign="top">Social apps, Risky words, Location, LIWC,</td><td align="left" valign="top">0.52/0.67</td><td align="left" valign="top">0.61/0.64</td><td align="left" valign="top">0.19/0.07</td></tr><tr><td align="left" valign="top">All</td><td align="left" valign="top">0.42/0.58</td><td align="left" valign="top">0.62/0.49</td><td align="left" valign="top">0.10/0.21</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Highest predictive result for the corresponding outcome.</p></fn><fn id="table3fn2"><p><sup>b</sup>LIWC: Linguistic Inquiry and Word Count</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Feature Analysis</title><p>Next, we analyzed how the participant data differed depending on the survey responses. We show the differences based on independent <italic>t</italic> tests for the most predictive tasks (<xref ref-type="fig" rid="figure2">Figures 2</xref><xref ref-type="fig" rid="figure3"/>-<xref ref-type="fig" rid="figure4">4</xref>), which were methamphetamine use and having &#x2265;6 partners.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Differences in app use among different groups. The x-axis represents the percentage of days when the participant communicated using an app from a certain category. The blue bar corresponds to &#x201C;Yes&#x201D; methamphetamine use or &#x201C;Yes&#x201D; had &#x2265;6 partners, respectively.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v17i1e68013_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Differences in risky word use among different groups. The x-axis represents the percentage of days when the participant used words or phrases from a certain category. The blue bar corresponds to &#x201C;Yes&#x201D; methamphetamine use or &#x201C;Yes&#x201D; had &#x2265;6 partners, respectively.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v17i1e68013_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Differences in location data among different groups. Values have been scaled such that the largest individual value for each feature becomes 1 to be able to show all values in the same figure. The blue bar corresponds to &#x201C;Yes&#x201D; methamphetamine use or &#x201C;Yes&#x201D; had &#x2265;6 partners, respectively.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v17i1e68013_fig04.png"/></fig><sec id="s3-4-1"><title>App Use</title><p><xref ref-type="fig" rid="figure2">Figure 2</xref> shows how frequently participants used apps from different categories. We considered any apps that are used for communicating with other people and divided them into 3 categories: messaging apps (eg, Messages, WhatsApp, and Telegram), social media apps (eg, Facebook, Instagram, Twitter [currently known as X]), and dating apps (eg, Grindr, Tinder, and Adam4Adam).</p><p>Methamphetamine users or participants who had &#x2265;6 partners were more likely to use dating apps than nonusers (<italic>P</italic>=.01, and <italic>P</italic>=.02, respectively). However, differences in the use of social media and messaging apps were not statistically significant for these groups.</p></sec><sec id="s3-4-2"><title>Risky Words</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> shows differences in risky word use. We divided the list of risky words into sex-related and drug-related words.</p><p>Methamphetamine users and individuals with 6+ partners were both more likely to use sex-related words, with significance levels of <italic>P</italic>=.002 and <italic>P</italic>=.001, respectively. However, the difference in drug-related word usage was not statistically significant for methamphetamine users and individuals with 6+ partners. (<italic>P</italic>=.12 and .21, respectively)</p></sec><sec id="s3-4-3"><title>Location</title><p><xref ref-type="fig" rid="figure4">Figure 4</xref> shows how location data differed for different groups. Due to the large number of location-based features, we chose a smaller subset of features that contained less overlapping information.</p><p>Methamphetamine users were less likely to spend time over 50 miles from home, although the difference was not statistically significant (<italic>P</italic>=.14). People with 6+ partners were found to travel further than those with 5 or fewer partners (<italic>P</italic>=.03).</p></sec><sec id="s3-4-4"><title>LIWC</title><p>Lastly, we compared LIWC features among different groups (<xref ref-type="fig" rid="figure5">Figure 5</xref>). Again, due to the large number of distinct features, we show results only for some of the super-categories which we expected to show differences.</p><p>Methamphetamine users were more likely to use social (<italic>P</italic>=.002) and affect words (<italic>P</italic>=.003) and less likely to use drive-related words (<italic>P</italic>=.02). People having 6 or more partners were more likely to use social, affect words, and cognitive process-related words (<italic>P</italic>=.003 and .004 respectively).</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Differences in Linguistic Inquiry and Word Count features among different groups. Original values have been scaled to fit in the same figure.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ojphi_v17i1e68013_fig05.png"/></fig></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>In this paper, we have shown that mobile sensing data can be used to identify multiple risk behaviors of SGM in our study sample. More specifically, our participants&#x2019; text and location data were highly informative of methamphetamine use and having over 6 sexual partners in three months.</p><p>In addition to determining which behaviors can be predicted, our second goal was to determine what data is useful for these predictions. We have shown that text-based features like all words, risky words, and BERT were the most informative for most behaviors, which was an expected result because participants might, for example, look for partners on dating apps or discuss substance use in private messages with other people. This is aligned with previous research [<xref ref-type="bibr" rid="ref29">29</xref>] showing that certain types of substance use may be predicted from social media messaging data.</p><p>In addition, we have shown that more recent language modeling techniques, such as BERT, can often provide similar results as the traditional techniques based on predetermined word lists and word frequencies. However, the more abstract nature of these representations may complicate the interpretation of the results, as individual values do not have a human-interpretable meaning. This lack of human interpretability might not be a limitation in digital health applications, where the emphasis is on achieving high precision and recall for effective intervention delivery rather than on understanding the underlying phenomena. On the other hand, BERT representations can also help improve privacy, as they do not reveal which exact words the participants used. Due to the small number of participants, training a language model using our dataset was not feasible, so we relied on a model that had been trained on Twitter (currently known as X) data. While we expect the language use to be mostly similar across datasets, training a language model using data from the target population could improve the results if enough data were available, as people might use different words and phrases on Twitter compared to dating apps or private messages.</p><p>We were also able to detect behavioral differences between groups of people with different survey responses. For example, methamphetamine users were more likely to use sex-related words in their messages. Earlier research has shown that methamphetamine users have more sexual partners [<xref ref-type="bibr" rid="ref55">55</xref>] and may be engaged in more risky sexual behavior, such as condomless anal sex, although it is not clear whether the relationship between methamphetamine use and condomless anal sex is causal [<xref ref-type="bibr" rid="ref56">56</xref>]. Methamphetamine users were also less likely to travel far from home. This may be related to lower household income level [<xref ref-type="bibr" rid="ref57">57</xref>], which could make traveling far unaffordable, or paranoia induced by methamphetamine use. They also used more affective and social words and fewer drive-related words, which may be partly related to the higher prevalence of co-occurring mental health problems [<xref ref-type="bibr" rid="ref57">57</xref>]. These insights could be valuable in personalizing methamphetamine use harm reduction and treatment by crafting prevention messages that frame behavioral modification as an affective or social process, instead of, for example, focusing on motivation.</p><p>We also found that participants with greater than 6 sex partners were more active users of all types of social apps (messaging, social media, and dating). Earlier studies have shown that users of geosocial networking apps, such as Grindr, have more sexual partners in general [<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>], and SGM with more partners have larger social networks [<xref ref-type="bibr" rid="ref60">60</xref>], which may explain the more frequent use of social apps. Being more social may similarly explain more time spent away from home and in many different locations. People with greater than 6 sexual partners also used more sex- and drug-related words. Substance use has previously been found to be associated with a larger number of sexual partners [<xref ref-type="bibr" rid="ref61">61</xref>]. These findings point toward personalized prevention that leverages both social media platforms and geolocation data. Partnerships between public health and dating apps to date have been limited to advertising and the addition of profile features (eg, fields for PrEP use). Researchers and public health practitioners might explore partnerships that allow users to opt into health promotion campaigns that are personalized by app use. Sexual health and substance use harm reduction should be pushed in relation to users&#x2019; geolocation.</p></sec><sec id="s4-2"><title>Comparison With Previous Work</title><p>The closest work to ours identified HIV as well as amphetamine, methamphetamine, and tetrahydrocannabinol use from social media messaging data [<xref ref-type="bibr" rid="ref29">29</xref>]. Our work differs from this by using a wider range of data sources collected through participants&#x2019; mobile devices. For example, we used text typed in any mobile app and website which allowed us to identify sexual risk behaviors in traditional messaging apps and less common dating apps in addition to the most popular social media apps. In addition, we used location data, which allows us to analyze participants&#x2019; daily movement patterns and their relation to risk behaviors. We also attempted to identify a wider range of risk behaviors, especially related to sexual health. The only shared prediction target between these 2 studies was methamphetamine use; the earlier paper was able to predict with an <italic>F</italic><sub>1</sub>-score of 0.85, which was very close to our result (0.83). In summary, our work is aligned and expands previous research on this topic.</p><p>Another similar study predicted alcohol, tobacco, prescription drug, and illegal drug use from Instagram data [<xref ref-type="bibr" rid="ref28">28</xref>]. They were able to detect alcohol use with statistical significance, but they had less success in predicting other types of substance use. Our better prediction results may be attributed to having access to more personal messaging data, as many people may avoid discussing substance use on public platforms. This shows that choosing the appropriate data collection methods is very important for accurate results.</p><p>Other studies have implemented personalized MSM interventions using survey data [<xref ref-type="bibr" rid="ref62">62</xref>], identified the efficacy of MSM-targeted mobile app interventions [<xref ref-type="bibr" rid="ref63">63</xref>], or evaluated the feasibility and acceptability of mobile sensing among MSM [<xref ref-type="bibr" rid="ref64">64</xref>-<xref ref-type="bibr" rid="ref66">66</xref>]. However, these studies have not evaluated whether mobile sensing data can be used to inform and personalize interventions, nor have they incorporated a broader SGM population inclusive of transgender and gender-diverse individuals, which were the goals of our study.</p></sec><sec id="s4-3"><title>Limitations</title><p>A limitation of our study was that we only included participants who had an Android smartphone. We chose to only include Android users because iPhones have more restrictions on what data can be collected, and therefore collecting text data would have been unfeasible based on the budget for this project. The demographic differences between Android and iPhone users have been previously described, with iPhone users more likely to be female, younger, more concerned about their smartphone as a &#x201C;status object,&#x201D; and displaying lower levels of honesty and humility and higher levels of emotionality [<xref ref-type="bibr" rid="ref67">67</xref>]. The restriction of our study population to Android users only may limit the generalizability of results from this formative study. Potential participants had to be excluded from this study either because of the challenges with iOS adaptation (n=324) or due to missing data from Android users (n=11), which may skew the demographics to some extent, again impacting generalizability. In addition, as the data was collected using personal devices, there were some interruptions in data collection. For example, some participants turned off or deleted the app during the study while others upgraded to a new phone without reinstalling the app. Some Android phones were found to have aggressive battery-saving functionality which occasionally turned off the data collection. To avoid data collection issues, we kept track of when each participant&#x2019;s device had last sent us data and contacted participants after three missing days to make sure data collection could be resumed.</p><p>Our machine-learning model was trained on English-language text data only, which limits its ability to accurately interpret text written in other languages or in culturally specific dialects and vernaculars. While the ability to speak and understand English was one of our eligibility criteria, we did not exclude participants who are multilingual (ie, speak one or more languages other than English), and the model may not accurately process multilingual input. This limitation highlights the need for our future work to incorporate multilingual natural language processing approaches to better reflect diversity of language use.</p><p>Another limitation was that the text data only included what the participants typed on their phones. This approach may miss the context of some messages, as the responses are not collected. It could be informative to know what content participants consumed online or what messages they received from others. In addition, participants might have messaged with people using multiple devices (eg, computer or tablet in addition to their phone), so our data collection approach might not have been able to track all social media usage and messaging for some participants.</p><p>Finally, some of the outcomes that we set out to predict were very infrequent, which made the task impossible. For example, only two of our participants were in a substance use treatment program, which was not enough for training and evaluating a machine learning model. Therefore, we had to focus on questions that had a reasonable number of both positive and negative responses.</p></sec><sec id="s4-4"><title>Conclusions</title><p>In this study, we have shown that certain types of substance use and sexual risk behaviors can be determined from data that is collected from smartphones passively. Next, we demonstrated these data to be highly predictive of self-reported methamphetamine use and having 6 or more sexual partners. If integrated into downstream eHealth/mHealth interventions, passive mobile-sensing could be used to personalize interventions for SGM, which may reduce the burden of participating in intervention programs, as the daily behaviors can be tracked with minimal effort from the participant. However, further work is still needed to evaluate the efficacy of interventions based on automatic behavior tracking.</p><p>Our future work will explore providing personalized interventions using predictive models to determine which types of interventions may be appropriate. We will, for example, investigate sending participants messages and resources that are delivered &#x201C;just in time,&#x201D; such as providing information about PrEP to individuals who may be at elevated HIV risk based on their substance use or sexual behavior but who are not yet taking it. This work will include developing interpretation guidelines for both automated systems and health care providers who may use these predictions in clinical settings.</p></sec></sec></body><back><ack><p>This research was supported through a grant from the National Institute on Drug Abuse (DP2DA049296; Holloway). CC is supported as a RISE Fellow by the National Institutes of Health (R25GM061222). Generative artificial intelligence was not used in any portion of the current manuscript.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to the formative nature of the research, small sample size, and plethora of private text-based information that could increase risk of identifiability of study participants, but are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>MB: Conceptualization, Methodology, Software, Formal analysis, Data curation, Visualization, Writing &#x2013; original draft. KK: Conceptualization, Methodology, Software, Formal analysis, Data curation, Visualization, Writing &#x2013; original draft. CH: Investigation, Data curation, Project administration, Writing &#x2013; review &#x0026; editing. CC: Investigation, Data curation, Formal analysis, Project administration, Writing &#x2013; review &#x0026; editing. ESCW: Investigation, Data curation, Writing &#x2013; review &#x0026; editing. CB: Investigation, Data curation, Writing &#x2013; review &#x0026; editing. AA: Investigation, Data curation, Writing &#x2013; review &#x0026; editing. EY: Writing &#x2013; review &#x0026; editing. IWH: Conceptualization, Funding acquisition, Supervision, Project administration, Writing &#x2013; review &#x0026; editing. MS: Conceptualization, Methodology, Supervision, Writing &#x2013; review &#x0026; editing.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb2">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb3">IDU</term><def><p>Intravenous Drug Use</p></def></def-item><def-item><term id="abb4">LIWC</term><def><p>Linguistic Inquiry and Word Count</p></def></def-item><def-item><term id="abb5">MSM</term><def><p>men who have sex with men</p></def></def-item><def-item><term id="abb6">NLTK</term><def><p>Natural Language Toolkit</p></def></def-item><def-item><term id="abb7">PrEP</term><def><p>pre-exposure prophylaxis</p></def></def-item><def-item><term id="abb8">SGM</term><def><p>sexual and gender minority</p></def></def-item><def-item><term id="abb9">STI</term><def><p>sexually transmitted infection</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Medley</surname><given-names>G</given-names> </name><name name-style="western"><surname>Lipari</surname><given-names>RN</given-names> </name><name name-style="western"><surname>Bose</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cribb</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Kroutil</surname><given-names>LA</given-names> </name><name name-style="western"><surname>McHenry</surname><given-names>G</given-names> </name></person-group><article-title>Sexual orientation and estimates of adult substance use and mental health: Results from the 2015 National Survey on Drug Use and Health</article-title><source>NSDUH Data Review</source><year>2016</year><access-date>2025-08-08</access-date><volume>10</volume><fpage>1</fpage><lpage>54</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.samhsa.gov/data/sites/default/files/NSDUH-SexualOrientation-2015/NSDUH-SexualOrientation-2015/NSDUH-SexualOrientation-2015.htm">https://www.samhsa.gov/data/sites/default/files/NSDUH-SexualOrientation-2015/NSDUH-SexualOrientation-2015/NSDUH-SexualOrientation-2015.htm</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bourne</surname><given-names>A</given-names> </name><name name-style="western"><surname>Weatherburn</surname><given-names>P</given-names> </name></person-group><article-title>Substance use among men who have sex with men: patterns, motivations, impacts and intervention development need</article-title><source>Sex Transm Infect</source><year>2017</year><month>08</month><volume>93</volume><issue>5</issue><fpage>342</fpage><lpage>346</lpage><pub-id pub-id-type="doi">10.1136/sextrans-2016-052674</pub-id><pub-id pub-id-type="medline">28400466</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="report"><article-title>HIV surveillance report</article-title><year>2017</year><access-date>2025-08-08</access-date><volume>29</volume><publisher-name>Centers for Disease Control and Prevention</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://stacks.cdc.gov/view/cdc/60911">https://stacks.cdc.gov/view/cdc/60911</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>HIV.gov</collab></person-group><source>Who Is at Risk for HIV?</source><access-date>2023-02-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.hiv.gov/hiv-basics/overview/about-hiv-and-aids/who-is-at-risk-for-hiv">https://www.hiv.gov/hiv-basics/overview/about-hiv-and-aids/who-is-at-risk-for-hiv</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>Centers for Disease Control and Prevention</collab></person-group><article-title>Diagnoses, deaths, and prevalence of HIV in the united states and 6 territories and freely associated states, 2022</article-title><year>2024</year><access-date>2024-10-15</access-date><volume>35</volume><publisher-name>HIV Surveillance Report</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="http://www.cdc.gov/hiv-data/nhss/hiv-diagnoses-deaths-prevalence.html">http://www.cdc.gov/hiv-data/nhss/hiv-diagnoses-deaths-prevalence.html</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Parsons</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Rendina</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Moody</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Ventuneac</surname><given-names>A</given-names> </name><name name-style="western"><surname>Grov</surname><given-names>C</given-names> </name></person-group><article-title>Syndemic production and sexual compulsivity/hypersexuality in highly sexually active gay and bisexual men: further evidence for a three group conceptualization</article-title><source>Arch Sex Behav</source><year>2015</year><month>10</month><volume>44</volume><issue>7</issue><fpage>1903</fpage><lpage>1913</lpage><pub-id pub-id-type="doi">10.1007/s10508-015-0574-5</pub-id><pub-id pub-id-type="medline">26081246</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Clair</surname><given-names>S</given-names> </name></person-group><article-title>Syndemics and public health: reconceptualizing disease in bio-social context</article-title><source>Med Anthropol Q</source><year>2003</year><month>12</month><volume>17</volume><issue>4</issue><fpage>423</fpage><lpage>441</lpage><pub-id pub-id-type="doi">10.1525/maq.2003.17.4.423</pub-id><pub-id pub-id-type="medline">14716917</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Brooks</surname><given-names>VR</given-names> </name></person-group><source>Minority Stress and Lesbian Women</source><year>1981</year><publisher-name>Lexington Books</publisher-name><pub-id pub-id-type="other">0669045004</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meyer</surname><given-names>IH</given-names> </name></person-group><article-title>Prejudice, social stress, and mental health in lesbian, gay, and bisexual populations: conceptual issues and research evidence</article-title><source>Psychol Bull</source><year>2003</year><month>09</month><volume>129</volume><issue>5</issue><fpage>674</fpage><lpage>697</lpage><pub-id pub-id-type="doi">10.1037/0033-2909.129.5.674</pub-id><pub-id pub-id-type="medline">12956539</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gonzales</surname><given-names>G</given-names> </name><name name-style="western"><surname>Henning-Smith</surname><given-names>C</given-names> </name></person-group><article-title>Health disparities by sexual orientation: Results and implications from the behavioral risk factor surveillance system</article-title><source>J Community Health</source><year>2017</year><month>12</month><volume>42</volume><issue>6</issue><fpage>1163</fpage><lpage>1172</lpage><pub-id pub-id-type="doi">10.1007/s10900-017-0366-z</pub-id><pub-id pub-id-type="medline">28466199</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Budge</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Adelson</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Howard</surname><given-names>KAS</given-names> </name></person-group><article-title>Anxiety and depression in transgender individuals: the roles of transition status, loss, social support, and coping</article-title><source>J Consult Clin Psychol</source><year>2013</year><month>06</month><volume>81</volume><issue>3</issue><fpage>545</fpage><lpage>557</lpage><pub-id pub-id-type="doi">10.1037/a0031774</pub-id><pub-id pub-id-type="medline">23398495</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Felner</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Wisdom</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Stress, coping, and context: Examining substance use among LGBTQ young adults with probable substance use disorders</article-title><source>Psychiatr Serv</source><year>2020</year><month>02</month><day>1</day><volume>71</volume><issue>2</issue><fpage>112</fpage><lpage>120</lpage><pub-id pub-id-type="doi">10.1176/appi.ps.201900029</pub-id><pub-id pub-id-type="medline">31640522</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Knight</surname><given-names>R</given-names> </name><name name-style="western"><surname>Karamouzian</surname><given-names>M</given-names> </name><name name-style="western"><surname>Carson</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Interventions to address substance use and sexual risk among gay, bisexual and other men who have sex with men who use methamphetamine: A systematic review</article-title><source>Drug Alcohol Depend</source><year>2019</year><month>01</month><day>1</day><volume>194</volume><issue>410-429</issue><fpage>410</fpage><lpage>429</lpage><pub-id pub-id-type="doi">10.1016/j.drugalcdep.2018.09.023</pub-id><pub-id pub-id-type="medline">30502543</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meiksin</surname><given-names>R</given-names> </name><name name-style="western"><surname>Melendez-Torres</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Falconer</surname><given-names>J</given-names> </name><name name-style="western"><surname>Witzel</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Weatherburn</surname><given-names>P</given-names> </name><name name-style="western"><surname>Bonell</surname><given-names>C</given-names> </name></person-group><article-title>eHealth interventions to address sexual health, substance use, and mental health among men who have sex with men: Systematic review and synthesis of process evaluations</article-title><source>J Med Internet Res</source><year>2021</year><month>04</month><day>23</day><volume>23</volume><issue>4</issue><fpage>e22477</fpage><pub-id pub-id-type="doi">10.2196/22477</pub-id><pub-id pub-id-type="medline">33890855</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flores</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Santos</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Makofane</surname><given-names>K</given-names> </name><name name-style="western"><surname>Arreola</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ayala</surname><given-names>G</given-names> </name></person-group><article-title>Availability and use of substance abuse treatment programs among substance-using men who have sex with men worldwide</article-title><source>Subst Use Misuse</source><year>2017</year><month>04</month><day>16</day><volume>52</volume><issue>5</issue><fpage>666</fpage><lpage>673</lpage><pub-id pub-id-type="doi">10.1080/10826084.2016.1253744</pub-id><pub-id pub-id-type="medline">28139146</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>Substance Abuse and Mental Health Services Administration</collab></person-group><article-title>Key substance use and mental health indicators in the United States: Results from the 2020 National Survey on Drug Use and Health</article-title><year>2021</year><access-date>2025-04-29</access-date><publisher-name>Behavioral Health Statistics and Quality, Substance Abuse and Mental Health Services Administration</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.samhsa.gov/data/sites/default/files/reports/rpt35325/NSDUHFFRPDFWHTMLFiles2020/2020NSDUHFFR1PDFW102121.pdf">https://www.samhsa.gov/data/sites/default/files/reports/rpt35325/NSDUHFFRPDFWHTMLFiles2020/2020NSDUHFFR1PDFW102121.pdf</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cascalheira</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Helminen</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Shaw</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Scheer</surname><given-names>JR</given-names> </name></person-group><article-title>Structural determinants of tailored behavioral health services for sexual and gender minorities in the United States, 2010 to 2020: a panel analysis</article-title><source>BMC Public Health</source><year>2022</year><month>10</month><day>12</day><volume>22</volume><issue>1</issue><fpage>1908</fpage><pub-id pub-id-type="doi">10.1186/s12889-022-14315-1</pub-id><pub-id pub-id-type="medline">36224564</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sullivan</surname><given-names>PS</given-names> </name><name name-style="western"><surname>Driggers</surname><given-names>R</given-names> </name><name name-style="western"><surname>Stekler</surname><given-names>JD</given-names> </name><etal/></person-group><article-title>Usability and acceptability of a mobile comprehensive HIV prevention app for men who have sex with men: A pilot study</article-title><source>JMIR Mhealth Uhealth</source><year>2017</year><month>03</month><day>9</day><volume>5</volume><issue>3</issue><fpage>e26</fpage><pub-id pub-id-type="doi">10.2196/mhealth.7199</pub-id><pub-id pub-id-type="medline">28279949</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dillingham</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ingersoll</surname><given-names>K</given-names> </name><name name-style="western"><surname>Flickinger</surname><given-names>TE</given-names> </name><etal/></person-group><article-title>PositiveLinks: A mobile health intervention for retention in HIV care and clinical outcomes with 12-month follow-up</article-title><source>AIDS Patient Care STDS</source><year>2018</year><month>06</month><volume>32</volume><issue>6</issue><fpage>241</fpage><lpage>250</lpage><pub-id pub-id-type="doi">10.1089/apc.2017.0303</pub-id><pub-id pub-id-type="medline">29851504</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ingersoll</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Dillingham</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Hettema</surname><given-names>JE</given-names> </name><etal/></person-group><article-title>Pilot RCT of bidirectional text messaging for ART adherence among nonurban substance users with HIV</article-title><source>Health Psychol</source><year>2015</year><month>12</month><volume>34S</volume><issue>0</issue><fpage>1305</fpage><lpage>1315</lpage><pub-id pub-id-type="doi">10.1037/hea0000295</pub-id><pub-id pub-id-type="medline">26651472</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Swendeman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ramanathan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Baetscher</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Smartphone self-monitoring to support self-management among people living with HIV: perceived benefits and theory of change from a mixed-methods randomized pilot study</article-title><source>J Acquir Immune Defic Syndr</source><year>2015</year><month>05</month><day>1</day><volume>69 Suppl 1</volume><issue>0 1</issue><fpage>S80</fpage><lpage>91</lpage><pub-id pub-id-type="doi">10.1097/QAI.0000000000000570</pub-id><pub-id pub-id-type="medline">25867783</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name></person-group><article-title>Toward automating HIV identification: Machine learning for rapid identification of HIV-related social media data</article-title><source>J Acquir Immune Defic Syndr</source><year>2017</year><month>02</month><day>1</day><volume>74</volume><issue>Suppl 2</issue><fpage>S128</fpage><lpage>S131</lpage><pub-id pub-id-type="doi">10.1097/QAI.0000000000001240</pub-id><pub-id pub-id-type="medline">28079723</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krakower</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Gruber</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hsu</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Development and validation of an automated HIV prediction algorithm to identify candidates for pre-exposure prophylaxis: a modelling study</article-title><source>Lancet HIV</source><year>2019</year><month>10</month><volume>6</volume><issue>10</issue><fpage>e696</fpage><lpage>e704</lpage><pub-id pub-id-type="doi">10.1016/S2352-3018(19)30139-0</pub-id><pub-id pub-id-type="medline">31285182</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marcus</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Hurley</surname><given-names>LB</given-names> </name><name name-style="western"><surname>Krakower</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Alexeeff</surname><given-names>S</given-names> </name><name name-style="western"><surname>Silverberg</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Volk</surname><given-names>JE</given-names> </name></person-group><article-title>Use of electronic health record data and machine learning to identify candidates for HIV pre-exposure prophylaxis: a modelling study</article-title><source>Lancet HIV</source><year>2019</year><month>10</month><volume>6</volume><issue>10</issue><fpage>e688</fpage><lpage>e695</lpage><pub-id pub-id-type="doi">10.1016/S2352-3018(19)30137-7</pub-id><pub-id pub-id-type="medline">31285183</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wray</surname><given-names>TB</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ke</surname><given-names>J</given-names> </name><name name-style="western"><surname>P&#x00E9;rez</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Carr</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Monti</surname><given-names>PM</given-names> </name></person-group><article-title>Using smartphone survey data and machine learning to identify situational and contextual risk factors for HIV risk behavior among men who have sex with men who are not on PrEP</article-title><source>Prev Sci</source><year>2019</year><month>08</month><volume>20</volume><issue>6</issue><fpage>904</fpage><lpage>913</lpage><pub-id pub-id-type="doi">10.1007/s11121-019-01019-z</pub-id><pub-id pub-id-type="medline">31073817</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jing</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Analysis of substance use and its outcomes by machine learning I. Childhood evaluation of liability to substance use disorder</article-title><source>Drug Alcohol Depend</source><year>2020</year><month>01</month><day>1</day><volume>206</volume><fpage>107605</fpage><pub-id pub-id-type="doi">10.1016/j.drugalcdep.2019.107605</pub-id><pub-id pub-id-type="medline">31839402</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahn</surname><given-names>WY</given-names> </name><name name-style="western"><surname>Ramesh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Moeller</surname><given-names>FG</given-names> </name><name name-style="western"><surname>Vassileva</surname><given-names>J</given-names> </name></person-group><article-title>Utility of machine-learning approaches to identify behavioral markers for substance use disorders: Impulsivity dimensions as predictors of current cocaine dependence</article-title><source>Front Psychiatry</source><year>2016</year><volume>7</volume><fpage>34</fpage><pub-id pub-id-type="doi">10.3389/fpsyt.2016.00034</pub-id><pub-id pub-id-type="medline">27014100</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hassanpour</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tomita</surname><given-names>N</given-names> </name><name name-style="western"><surname>DeLise</surname><given-names>T</given-names> </name><name name-style="western"><surname>Crosier</surname><given-names>B</given-names> </name><name name-style="western"><surname>Marsch</surname><given-names>LA</given-names> </name></person-group><article-title>Identifying substance use risk based on deep neural networks and Instagram social media data</article-title><source>Neuropsychopharmacology</source><year>2019</year><month>02</month><volume>44</volume><issue>3</issue><fpage>487</fpage><lpage>494</lpage><pub-id pub-id-type="doi">10.1038/s41386-018-0247-x</pub-id><pub-id pub-id-type="medline">30356094</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ovalle</surname><given-names>A</given-names> </name><name name-style="western"><surname>Goldstein</surname><given-names>O</given-names> </name><name name-style="western"><surname>Kachuee</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Leveraging social media activity and machine learning for HIV and substance abuse risk assessment: Development and validation study</article-title><source>J Med Internet Res</source><year>2021</year><month>04</month><day>26</day><volume>23</volume><issue>4</issue><fpage>e22042</fpage><pub-id pub-id-type="doi">10.2196/22042</pub-id><pub-id pub-id-type="medline">33900200</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holloway</surname><given-names>IW</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>ESC</given-names> </name><name name-style="western"><surname>Boka</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Novel machine learning HIV intervention for sexual and gender minority young people who have sex with men (uTECH): Protocol for a randomized comparison trial</article-title><source>JMIR Res Protoc</source><year>2024</year><month>08</month><day>20</day><volume>13</volume><fpage>e58448</fpage><pub-id pub-id-type="doi">10.2196/58448</pub-id><pub-id pub-id-type="medline">39163591</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><source>Qualtrics</source><access-date>2023-07-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.qualtrics.com/">https://www.qualtrics.com/</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><source>Zoom Video Communications, Inc</source><access-date>2023-07-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://zoom.us/">https://zoom.us/</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="web"><article-title>The Platform Pushing What&#x2019;s Possible</article-title><source>Android</source><access-date>2022-10-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.android.com/">https://www.android.com/</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ferreira</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kostakos</surname><given-names>V</given-names> </name><name name-style="western"><surname>Dey</surname><given-names>AK</given-names> </name></person-group><article-title>AWARE: Mobile context instrumentation framework</article-title><source>Front ICT</source><year>2015</year><volume>2</volume><issue>6</issue><pub-id pub-id-type="doi">10.3389/fict.2015.00006</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moshe</surname><given-names>I</given-names> </name><name name-style="western"><surname>Terhorst</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Opoku Asare</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Predicting symptoms of depression and anxiety using smartphone and wearable data</article-title><source>Front Psychiatry</source><year>2021</year><volume>12</volume><fpage>625247</fpage><pub-id pub-id-type="doi">10.3389/fpsyt.2021.625247</pub-id><pub-id pub-id-type="medline">33584388</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Opoku Asare</surname><given-names>K</given-names> </name><name name-style="western"><surname>Terhorst</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Vega</surname><given-names>J</given-names> </name><name name-style="western"><surname>Peltonen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lagerspetz</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ferreira</surname><given-names>D</given-names> </name></person-group><article-title>Predicting depression from smartphone behavioral markers using machine learning methods, hyperparameter optimization, and feature importance analysis: Exploratory study</article-title><source>JMIR Mhealth Uhealth</source><year>2021</year><month>07</month><day>12</day><volume>9</volume><issue>7</issue><fpage>e26540</fpage><pub-id pub-id-type="doi">10.2196/26540</pub-id><pub-id pub-id-type="medline">34255713</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Vega</surname><given-names>J</given-names> </name></person-group><article-title>Monitoring parkinson&#x2019;s disease progression using behavioural inferences, mobile devices and web technologies</article-title><conf-name>Proceedings of the 25th International Conference Companion on World Wide Web (WWW &#x2019;16 Companion)</conf-name><conf-date>Apr 11-15, 2016</conf-date><conf-loc>Montr&#x00E9;al, Qu&#x00E9;bec, Canada</conf-loc><fpage>323</fpage><lpage>327</lpage><pub-id pub-id-type="doi">10.1145/2872518.2888598</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bae</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chung</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ferreira</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dey</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Suffoletto</surname><given-names>B</given-names> </name></person-group><article-title>Mobile phone sensors and supervised machine learning to identify alcohol use events in young adults: Implications for just-in-time adaptive interventions</article-title><source>Addict Behav</source><year>2018</year><month>08</month><volume>83</volume><fpage>42</fpage><lpage>47</lpage><pub-id pub-id-type="doi">10.1016/j.addbeh.2017.11.039</pub-id><pub-id pub-id-type="medline">29217132</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="web"><article-title>Preexposure prophylaxis for the prevention of HIV infection in the United States&#x2014;2017 Update: a clinical practice guideline</article-title><source>Centers for Disease Control and Prevention</source><year>2018</year><access-date>2024-07-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/hiv/pdf/risk/prep/cdc-hiv-prep-guidelines-2017.pdf">https://www.cdc.gov/hiv/pdf/risk/prep/cdc-hiv-prep-guidelines-2017.pdf</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Levenshtein</surname><given-names>VI</given-names> </name></person-group><article-title>Binary codes capable of correcting deletions, insertions, and reversals</article-title><source>Soviet Physics Doklady</source><year>1966</year><month>02</month><access-date>2022-05-01</access-date><volume>10</volume><issue>8</issue><fpage>707</fpage><lpage>710</lpage></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>Life4</collab></person-group><source>Life4/Textdistance: Compute distance between sequences 30+ algorithms, pure Python implementation, common interface, optional external libs usage</source><access-date>2022-10-05</access-date><publisher-name>GitHub</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/life4/textdistance">https://github.com/life4/textdistance</ext-link></comment></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bird</surname><given-names>S</given-names> </name><name name-style="western"><surname>Klein</surname><given-names>E</given-names> </name><name name-style="western"><surname>Loper</surname><given-names>E</given-names> </name></person-group><source>Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit</source><year>2009</year><publisher-name>O&#x2019;Reilly Media, Inc</publisher-name><pub-id pub-id-type="other">0596516495</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>Welcome to LIWC-22</article-title><source>LIWC</source><access-date>2022-10-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.liwc.app/">https://www.liwc.app/</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ghorbani</surname><given-names>AA</given-names> </name></person-group><article-title>An overview of online fake news: Characterization, detection, and discussion</article-title><source>Inf Process Manag</source><year>2020</year><month>03</month><volume>57</volume><issue>2</issue><fpage>102025</fpage><pub-id pub-id-type="doi">10.1016/j.ipm.2019.03.004</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Characterizing the propagation of situational information in social media during COVID-19 epidemic: A case study on Weibo</article-title><source>IEEE Trans Comput Soc Syst</source><year>2020</year><month>03</month><day>20</day><volume>7</volume><issue>2</issue><fpage>556</fpage><lpage>562</lpage><pub-id pub-id-type="doi">10.1109/TCSS.2020.2980007</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rathje</surname><given-names>S</given-names> </name><name name-style="western"><surname>Van Bavel</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>van der Linden</surname><given-names>S</given-names> </name></person-group><article-title>Out-group animosity drives engagement on social media</article-title><source>Proc Natl Acad Sci U S A</source><year>2021</year><month>06</month><day>29</day><volume>118</volume><issue>26</issue><fpage>e2024292118</fpage><pub-id pub-id-type="doi">10.1073/pnas.2024292118</pub-id><pub-id pub-id-type="medline">34162706</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bond</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name></person-group><article-title>Anxious or angry? Effects of discrete emotions on the perceived helpfulness of online reviews</article-title><source>MIS Q</source><year>2014</year><month>06</month><day>1</day><volume>38</volume><issue>2</issue><fpage>539</fpage><lpage>560</lpage><pub-id pub-id-type="doi">10.2307/26634939</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alvero</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Giebel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gebre-Medhin</surname><given-names>B</given-names> </name><name name-style="western"><surname>Antonio</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Stevens</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Domingue</surname><given-names>BW</given-names> </name></person-group><article-title>Essay content and style are strongly related to household income and SAT scores: Evidence from 60,000 undergraduate applications</article-title><source>Sci Adv</source><year>2021</year><month>10</month><day>15</day><volume>7</volume><issue>42</issue><fpage>eabi9031</fpage><pub-id pub-id-type="doi">10.1126/sciadv.abi9031</pub-id><pub-id pub-id-type="medline">34644119</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><conf-name>Proceedings of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)</conf-name><conf-date>2019</conf-date><fpage>4171</fpage><lpage>4186</lpage><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Barbieri</surname><given-names>F</given-names> </name><name name-style="western"><surname>Camacho-Collados</surname><given-names>J</given-names> </name><name name-style="western"><surname>Espinosa Anke</surname><given-names>L</given-names> </name><name name-style="western"><surname>Neves</surname><given-names>L</given-names> </name></person-group><article-title>TweetEval: unified benchmark and comparative evaluation for tweet classification</article-title><year>2020</year><access-date>2025-08-08</access-date><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-loc>Online</conf-loc><fpage>1644</fpage><lpage>1650</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2020.findings-emnlp.148/">https://aclanthology.org/2020.findings-emnlp.148/</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2020.findings-emnlp.148</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yizong Cheng</surname><given-names>C</given-names> </name></person-group><article-title>Mean shift, mode seeking, and clustering</article-title><source>IEEE Trans Pattern Anal Machine Intell</source><year>1995</year><volume>17</volume><issue>8</issue><fpage>790</fpage><lpage>799</lpage><pub-id pub-id-type="doi">10.1109/34.400568</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pedregosa</surname><given-names>F</given-names> </name><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gramfort</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Scikit-learn: machine learning in Python</article-title><source>J Mach Learn Res</source><year>2011</year><month>11</month><day>1</day><volume>12</volume><fpage>2825</fpage><lpage>2830</lpage><pub-id pub-id-type="doi">10.5555/1953048.2078195</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cox</surname><given-names>DR</given-names> </name></person-group><article-title>The regression analysis of binary sequences</article-title><source>J R Stat Soc Ser B</source><year>1958</year><month>07</month><day>1</day><volume>20</volume><issue>2</issue><fpage>215</fpage><lpage>232</lpage><pub-id pub-id-type="doi">10.1111/j.2517-6161.1958.tb00292.x</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedman</surname><given-names>JH</given-names> </name></person-group><article-title>Greedy function approximation: A gradient boosting machine</article-title><source>Ann Statist</source><year>2001</year><month>10</month><volume>29</volume><issue>5</issue><fpage>1189</fpage><lpage>1232</lpage><pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gibson</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Leamon</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Flynn</surname><given-names>N</given-names> </name></person-group><article-title>Epidemiology and public health Consequences of methamphetamine use in California&#x2019;s Central Valley</article-title><source>J Psychoactive Drugs</source><year>2002</year><volume>34</volume><issue>3</issue><fpage>313</fpage><lpage>319</lpage><pub-id pub-id-type="doi">10.1080/02791072.2002.10399969</pub-id><pub-id pub-id-type="medline">12422943</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bryant</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hopwood</surname><given-names>M</given-names> </name><name name-style="western"><surname>Dowsett</surname><given-names>GW</given-names> </name><etal/></person-group><article-title>The rush to risk when interrogating the relationship between methamphetamine use and sexual practice among gay and bisexual men</article-title><source>Int J Drug Policy</source><year>2018</year><month>05</month><volume>55</volume><fpage>242</fpage><lpage>248</lpage><pub-id pub-id-type="doi">10.1016/j.drugpo.2017.12.010</pub-id><pub-id pub-id-type="medline">29279253</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jones</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Compton</surname><given-names>WM</given-names> </name><name name-style="western"><surname>Mustaquim</surname><given-names>D</given-names> </name></person-group><article-title>Patterns and characteristics of methamphetamine use among adults - United States, 2015-2018</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2020</year><month>03</month><day>27</day><access-date>2025-08-08</access-date><volume>69</volume><issue>12</issue><fpage>317</fpage><lpage>323</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/mmwr/volumes/69/wr/mm6912a1.htm">https://www.cdc.gov/mmwr/volumes/69/wr/mm6912a1.htm</ext-link></comment><pub-id pub-id-type="doi">10.15585/mmwr.mm6912a1</pub-id><pub-id pub-id-type="medline">32214077</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goedel</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Duncan</surname><given-names>DT</given-names> </name></person-group><article-title>Geosocial-networking app usage patterns of gay, bisexual, and other men who have sex with men: Survey among users of Grindr, a mobile dating app</article-title><source>JMIR Public Health Surveill</source><year>2015</year><volume>1</volume><issue>1</issue><fpage>e4</fpage><pub-id pub-id-type="doi">10.2196/publichealth.4353</pub-id><pub-id pub-id-type="medline">27227127</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoenigl</surname><given-names>M</given-names> </name><name name-style="western"><surname>Little</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Grelotti</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Grindr users take more risks, but are more open to human immunodeficiency virus (HIV) pre-exposure prophylaxis: Could this dating app provide a platform for HIV prevention outreach?</article-title><source>Clin Infect Dis</source><year>2020</year><month>10</month><day>23</day><volume>71</volume><issue>7</issue><fpage>e135</fpage><lpage>e140</lpage><pub-id pub-id-type="doi">10.1093/cid/ciz1093</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>AMA</given-names> </name><name name-style="western"><surname>Grierson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wain</surname><given-names>D</given-names> </name><name name-style="western"><surname>Pitts</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pattison</surname><given-names>P</given-names> </name></person-group><article-title>Associations between the sexual behaviour of men who have sex with men and the structure and composition of their social networks</article-title><source>Sex Transm Infect</source><year>2004</year><month>12</month><volume>80</volume><issue>6</issue><fpage>455</fpage><lpage>458</lpage><pub-id pub-id-type="doi">10.1136/sti.2004.010355</pub-id><pub-id pub-id-type="medline">15572613</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cavazos-Rehg</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Krauss</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Spitznagel</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Schootman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cottler</surname><given-names>LB</given-names> </name><name name-style="western"><surname>Bierut</surname><given-names>LJ</given-names> </name></person-group><article-title>Number of sexual partners and associations with initiation and intensity of substance use</article-title><source>AIDS Behav</source><year>2011</year><month>05</month><volume>15</volume><issue>4</issue><fpage>869</fpage><lpage>874</lpage><pub-id pub-id-type="doi">10.1007/s10461-010-9669-0</pub-id><pub-id pub-id-type="medline">20107887</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bauermeister</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Pingel</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Jadwin-Cakmak</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Acceptability and preliminary efficacy of a tailored online HIV/STI testing intervention for young men who have sex with men: the Get Connected! program</article-title><source>AIDS Behav</source><year>2015</year><month>10</month><volume>19</volume><issue>10</issue><fpage>1860</fpage><lpage>1874</lpage><pub-id pub-id-type="doi">10.1007/s10461-015-1009-y</pub-id><pub-id pub-id-type="medline">25638038</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>L</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>G</given-names> </name></person-group><article-title>Development and effectiveness of a mobile phone application conducting health behavioral intervention among men who have sex with men, a randomized controlled trial: study protocol</article-title><source>BMC Public Health</source><year>2017</year><month>04</month><day>24</day><volume>17</volume><issue>1</issue><fpage>355</fpage><pub-id pub-id-type="doi">10.1186/s12889-017-4235-6</pub-id><pub-id pub-id-type="medline">28438144</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Le</surname><given-names>LX</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>CA</given-names> </name><etal/></person-group><article-title>Feasibility, acceptability, and design of a mobile ecological momentary assessment for high-risk men who have sex with men in hanoi, vietnam: qualitative study</article-title><source>JMIR Mhealth Uhealth</source><year>2022</year><volume>10</volume><pub-id pub-id-type="doi">10.2196/preprints.30360</pub-id><pub-id pub-id-type="medline">35084340</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Duncan</surname><given-names>DT</given-names> </name><name name-style="western"><surname>Kapadia</surname><given-names>F</given-names> </name><name name-style="western"><surname>Regan</surname><given-names>SD</given-names> </name><etal/></person-group><article-title>Feasibility and acceptability of global positioning system (GPS) methods to study the spatial contexts of substance use and sexual risk behaviors among young men who have sex with men in New York City: A P18 cohort sub-study</article-title><source>PLoS ONE</source><year>2016</year><volume>11</volume><issue>2</issue><fpage>e0147520</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0147520</pub-id><pub-id pub-id-type="medline">26918766</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Duncan</surname><given-names>DT</given-names> </name><name name-style="western"><surname>Chaix</surname><given-names>B</given-names> </name><name name-style="western"><surname>Regan</surname><given-names>SD</given-names> </name><etal/></person-group><article-title>Collecting mobility data with GPS methods to understand the HIV environmental riskscape among young Black men who have sex with men: A multi-city feasibility study in the deep south</article-title><source>AIDS Behav</source><year>2018</year><month>09</month><volume>22</volume><issue>9</issue><fpage>3057</fpage><lpage>3070</lpage><pub-id pub-id-type="doi">10.1007/s10461-018-2163-9</pub-id><pub-id pub-id-type="medline">29797163</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shaw</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ellis</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Kendrick</surname><given-names>LR</given-names> </name><name name-style="western"><surname>Ziegler</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wiseman</surname><given-names>R</given-names> </name></person-group><article-title>Predicting smartphone operating system from personality and individual differences</article-title><source>Cyberpsychol Behav Soc Netw</source><year>2016</year><month>12</month><volume>19</volume><issue>12</issue><fpage>727</fpage><lpage>732</lpage><pub-id pub-id-type="doi">10.1089/cyber.2016.0324</pub-id><pub-id pub-id-type="medline">27849366</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Risk assessment survey and location-based features.</p><media xlink:href="ojphi_v17i1e68013_app1.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material></app-group></back></article>