COVID19_biohackathon_drugs/paper.bib at master · tramya28/COVID19_biohackathon_drugs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
@article{WorldHealthOrganization,
  author       = {World Health Organization},
  title        = {{WHO characterizes COVID-19 as a pandemic}},
  year         = {2020},
  url          = {https://www.who.int/emergencies/diseases/novel-coronavirus-2019/events-as-they-happen},
}

@article{Earle:2010,
  author       = {Earle, Paul},
  title        = {{Earthquake Twitter}},
  journal      = {Nature geoscience},
  year         = {2010},
  volume       = {3},
  issue        = {4},
  pages        = {221-222},
  doi          = {10.1038/ngeo832},
  url          = {https://www.nature.com/articles/ngeo832},
  abstract     = {Twitter messages offer first-hand accounts of earthquakes within minutes. Analyses of their content and geographic distribution can be a useful supplement to instrument-based estimates of quake location and magnitude.},
}

@article{Zou:2018,
  author       = {Zou, Lei. and Lam, Nina S N. and Cai, Heng and Qiang, Yi},
  title        = {{Mining Twitter Data for Improved Understanding of Disaster Resilience}},
  journal      = {Annals of the Association of American Geographers. Association of American Geographers},
  year         = {2010},
  volume       = {108},
  issue        = {5},
  pages        = {1422-1441},
  doi          = {10.1080/24694452.2017.1421897},
  abstract     = {Coastal communities faced with multiple hazards have shown uneven responses and behaviors. These responses and behaviors could be better understood by analyzing real-time social media data through categorizing them into the three phases of the emergency management: preparedness, response, and recovery. This study analyzes the spatial?temporal patterns of Twitter activities during Hurricane Sandy, which struck the U.S. Northeast on 29 October 2012. The study area includes 126 counties affected by Hurricane Sandy. The objectives are threefold: (1) to derive a set of common indexes from Twitter data so that they can be used for emergency management and resilience analysis; (2) to examine whether there are significant geographical and social disparities in disaster-related Twitter use; and (3) to test whether Twitter data can improve postdisaster damage estimation. Three corresponding hypotheses were tested. Results show that common indexes derived from Twitter data, including ratio, normalized ratio, and sentiment, could enable comparison across regions and events and should be documented. Social and geographical disparities in Twitter use existed in the Hurricane Sandy event, with higher disaster-related Twitter use communities generally being communities of higher socioeconomic status. Finally, adding Twitter indexes into a damage estimation model improved the adjusted R2 from 0.46 to 0.56, indicating that social media data could help improve postdisaster damage estimation, but other environmental and socioeconomic variables influencing the capacity to reducing damage might need to be included. The knowledge gained from this study could provide valuable insights into strategies for utilizing social media data to increase resilience to disasters.},
}

@article{Alam:2018,
  author       = {Alam, Firoj. and Ofli, Ferda. and  Imran, Muhammad. and Aupetit, Michael},
  title        = {{A Twitter Tale of Three Hurricanes: Harvey, Irma, and Maria}},
  journal      = {arXiv},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.05144},
  abstract     = {People increasingly use microblogging platforms such as Twitter during natural disasters and emergencies. Research studies have revealed the usefulness of the data available on Twitter for several disaster response tasks. However, making sense of social media data is a challenging task due to several reasons such as limitations of available tools to analyze high-volume and high-velocity data streams. This work presents an extensive multidimensional analysis of textual and multimedia content from millions of tweets shared on Twitter during the three disaster events. Specifically, we employ various Artificial Intelligence techniques from Natural Language Processing and Computer Vision fields, which exploit different machine learning algorithms to process the data generated during the disaster events. Our study reveals the distributions of various types of useful information that can inform crisis managers and responders as well as facilitate the development of future automated systems for disaster management.},
}

@article{Gao:2020,
  author       = {Gao, Jianjun. and Tian, Zhenxue. and Yang, Xu},
  title        = {{Breakthrough: Chloroquine phosphate has shown apparent efficacy in treatment of COVID-19 associated pneumonia in clinical studies}},
  journal      = {Bioscience trends},
  year         = {2020},
  volume       = {14},
  issue        = {1},
  pages        = {72-73},
  doi          = {10.5582/bst.2020.01047},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/32074550} ,
  abstract     = {The coronavirus disease 2019 (COVID-19) virus is spreading rapidly, and scientists are endeavoring to discover drugs for its efficacious treatment in China. Chloroquine phosphate, an old drug for treatment of malaria, is shown to have apparent efficacy and acceptable safety against COVID-19 associated pneumonia in multicenter clinical trials conducted in China. The drug is recommended to be included in the next version of the Guidelines for the Prevention, Diagnosis, and Treatment of Pneumonia Caused by COVID-19 issued by the National Health Commission of the People's Republic of China for treatment of COVID-19 infection in larger populations in the future.},
}

@article{Lu:2020,
  author       = {Lu, Hongzhou},
  title        = {{Drug treatment options for the 2019-new coronavirus (2019-nCoV)}},
  journal      = {Bioscience trends},
  year         = {2020},
  volume       = {14},
  issue        = {1},
  pages        = {69-71},
  doi          = {10.5582/bst.2020.01020},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/31996494},
  abstract     = {As of January 22, 2020, a total of 571 cases of the 2019-new coronavirus (2019-nCoV) have been reported in 25 provinces (districts and cities) in China. At present, there is no vaccine or antiviral treatment for human and animal coronavirus, so that identifying the drug treatment options as soon as possible is critical for the response to the 2019-nCoV outbreak. Three general methods, which include existing broad-spectrum antiviral drugs using standard assays, screening of a chemical library containing many existing compounds or databases, and the redevelopment of new specific drugs based on the genome and biophysical understanding of individual coronaviruses, are used to discover the potential antiviral treatment of human pathogen coronavirus. Lopinavir /Ritonavir, Nucleoside analogues, Neuraminidase inhibitors, Remdesivir, peptide (EK1), abidol, RNA synthesis inhibitors (such as TDF, 3TC), anti-inflammatory drugs (such as hormones and other molecules), Chinese traditional medicine, such ShuFengJieDu Capsules and Lianhuaqingwen Capsule, could be the drug treatment options for 2019-nCoV. However, the efficacy and safety of these drugs for 2019- nCoV still need to be further confirmed by clinical experiments.},
}

@article{Sanders:2020,
  author       = {Sanders, James M. and Monogue, Marguerite L. and Jodlowski, Tomasz Z. and Cutrell, James B},
  title        = {{Pharmacologic Treatments for Coronavirus Disease 2019 (COVID-19): A Review}},
  journal      = {JAMA: the journal of the American Medical Association},
  year         = {2020},
  doi          = {10.1001/jama.2020.6019},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/32282022},
  abstract     = {Importance: The pandemic of coronavirus disease 2019 (COVID-19) caused by the novel severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) presents an unprecedented challenge to identify effective drugs for prevention and treatment. Given the rapid pace of scientific discovery and clinical data generated by the large number of people rapidly infected by SARS-CoV-2, clinicians need accurate evidence regarding effective medical treatments for this infection. Observations: No proven effective therapies for this virus currently exist. The rapidly expanding knowledge regarding SARS-CoV-2 virology provides a significant number of potential drug targets. The most promising therapy is remdesivir. Remdesivir has potent in vitro activity against SARS-CoV-2, but it is not US Food and Drug Administration approved and currently is being tested in ongoing randomized trials. Oseltamivir has not been shown to have efficacy, and corticosteroids are currently not recommended. Current clinical evidence does not support stopping angiotensin-converting enzyme inhibitors or angiotensin receptor blockers in patients with COVID-19. Conclusions and Relevance: The COVID-19 pandemic represents the greatest global public health crisis of this generation and, potentially, since the pandemic influenza outbreak of 1918. The speed and volume of clinical trials launched to investigate potential therapies for COVID-19 highlight both the need and capability to produce high-quality evidence even in the middle of a pandemic. No therapies have been shown effective to date.},
}

@article{Banda:2020,
  author       = {Banda, Juan M. and Tekumalla, Ramya. and Wang, Guanyu. and Yu, Jingyuan. and Liu, Tuo. and Ding, Yuning. and Chowell, Gerardo},
  title        = {{A large-scale COVID-19 Twitter chatter dataset for open scientific research -- an international collaboration}},
  journal      = {arXiv},
  year         = {2020},
  doi          = {10.5281/zenodo.3723940},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/31996494},
  abstract     = {As the COVID-19 pandemic continues its march around the world, an unprecedented amount of open data is being generated for genetics and epidemiological research. The unparalleled rate at which many research groups around the world are releasing data and publications on the ongoing pandemic is allowing other scientists to learn from local experiences and data generated in the front lines of the COVID-19 pandemic. However, there is a need to integrate additional data sources that map and measure the role of social dynamics of such a unique world-wide event into biomedical, biological, and epidemiological analyses. For this purpose, we present a large-scale curated dataset of over 152 million tweets, growing daily, related to COVID-19 chatter generated from January 1st to April 4th at the time of writing. This open dataset will allow researchers to conduct a number of research projects relating to the emotional and mental responses to social distancing measures, the identification of sources of misinformation, and the stratified measurement of sentiment towards the pandemic in near real time.},
}

@article{Tekumalla:2019,
  author       = {Tekumalla, Ramya. and Asl, Javad Rafiei. and Banda, Juan M},
  title        = {{Mining Archive.org’s Twitter Stream Grab for Pharmacovigilance Research Gold}},
  year         = {2019},
  journal      = {bioRxiv},
  url          = {http://biorxiv.org/content/early/2019/12/03/859611.abstract},
  abstract     = {In the last few years Twitter has become an important resource for the identification of Adverse Drug Reactions (ADRs), monitoring flu trends, and other pharmacovigilance and general research applications. Most researchers spend their time crawling Twitter, buying expensive pre-mined datasets, or tediously and slowly building datasets using the limited Twitter API. However, there are a large number of datasets that are publicly available to researchers which are underutilized or unused. In this work, we demonstrate how we mined over 9.4 billion Tweets from archive.org’s Twitter stream grab using a drug-term dictionary and plenty of computing power. Knowing that not everything that shines is gold, we used pre-existing drug-related datasets to build machine learning models to filter our findings for relevance. In this work we present our methodology and the 3,346,758 identified tweets for public use in future research.},
}

@article{Tekumalla:2020,
  author       = {Tekumalla, Ramya. and Banda, Juan M},
  title        = {{Social Media Mining Toolkit (SMMT)}},
  year         = {2020},
  journal      = {arXiv},
  url          = {http://arxiv.org/abs/2003.13894} ,
  abstract     = {There has been a dramatic increase in the popularity of utilizing social media data for research purposes within the biomedical community. In PubMed alone, there have been nearly 2,500 publication entries since 2014 that deal with analyzing social media data from Twitter and Reddit. However, the vast majority of those works do not share their code or data for replicating their studies. With minimal exceptions, the few that do, place the burden on the researcher to figure out how to fetch the data, how to best format their data, and how to create automatic and manual annotations on the acquired data. In order to address this pressing issue, we introduce the Social Media Mining Toolkit (SMMT), a suite of tools aimed to encapsulate the cumbersome details of acquiring, preprocessing, annotating and standardizing social media data. The purpose of our toolkit is for researchers to focus on answering research questions, and not the technical aspects of using social media data. By using a standard toolkit, researchers will be able to acquire, use, and release data in a consistent way that is transparent for everybody using the toolkit, hence, simplifying research reproducibility and accessibility in the social media domain.},
}

@article{NationalLibraryofMedicine:2008,
  author       = {National Library of Medicine},
  title        = {{RxNorm [Internet]}},
  url          = {http://www.nlm.nih.gov/research/umls/rxnorm/},
}

@article{TekumallaandBanda:2020,
  author       = {Tekumalla, Ramya. and Banda, Juan},
  title        = {{A large-scale Twitter dataset for drug safety applications mined from publicly existing resources}},
  journal      = {arXiv},
  year         = {2020},
  doi          = {10.5281/zenodo.3606863},
  url          = {https://zenodo.org/record/3606863},
  abstract     = {With the increase in popularity of deep learning models for natural language processing (NLP) tasks, in the field of Pharmacovigilance, more specifically for the identification of Adverse Drug Reactions (ADRs), there is an inherent need for large-scale social-media datasets aimed at such tasks. With most researchers allocating large amounts of time to crawl Twitter or buying expensive pre-curated datasets, then manually annotating by humans, these approaches do not scale well as more and more data keeps flowing in Twitter. In this work we re-purpose a publicly available archived dataset of more than 9.4 billion Tweets with the objective of creating a very large dataset of drug usage-related tweets. Using existing manually curated datasets from the literature, we then validate our filtered tweets for relevance using machine learning methods, with the end result of a publicly available dataset of 1,181,993 million tweets for public use. We provide all code and detailed procedure on how to extract this dataset and the selected tweet ids for researchers to use.},
}

@article{Karimi:2015,
  author       = {Karimi, Sarvnaz. and Metke-Jimenez, Alejandro. and  Kemp, Madonna. and Wang, Chen},
  title        = {{Cadec: A corpus of adverse drug event annotations}},
  journal      = {Journal of biomedical informatics},
  year         = {2015},
  volume       = {55},
  pages        = {73-81},
  doi          = {10.1016/j.jbi.2015.03.010},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/25817970},
  abstract     = {CSIRO Adverse Drug Event Corpus (Cadec) is a new rich annotated corpus of medical forum posts on patient-reported Adverse Drug Events (ADEs). The corpus is sourced from posts on social media, and contains text that is largely written in colloquial language and often deviates from formal English grammar and punctuation rules. Annotations contain mentions of concepts such as drugs, adverse effects, symptoms, and diseases linked to their corresponding concepts in controlled vocabularies, i.e., SNOMED Clinical Terms and MedDRA. The quality of the annotations is ensured by annotation guidelines, multi-stage annotations, measuring inter-annotator agreement, and final review of the annotations by a clinical terminologist. This corpus is useful for studies in the area of information extraction, or more generally text mining, from social media to detect possible adverse drug reactions from direct patient reports. The corpus is publicly available at https://data.csiro.au.(1).},
}

@article{Sarker:2018,
  author       = {Sarker, Abeed. and Gonzalez-Hernandez, Graciela},
  title        = {{An unsupervised and customizable misspelling generator for mining noisy health-related text sources}},
  journal      = {Journal of biomedical informatics},
  year         = {2018},
  volume       = {88},
  pages        = {98-107},
  doi          = {10.1016/j.jbi.2018.11.007},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/30445220},
}

@article{Lavertu:2019,
  author       = {Lavertu, Adam. and Altman, Russ B},
  title        = {{RedMed: Extending drug lexicons for social media applications}},
  journal      = {Journal of biomedical informatics},
  year         = {2019},
  volume       = {99},
  pages        = {103307},
  doi          = {10.1016/j.jbi.2019.103307},
  url          = {https://www.ncbi.nlm.nih.gov/pubmed/31627020},
  abstract     = {Social media has been identified as a promising potential source of information for pharmacovigilance. The adoption of social media data has been hindered by the massive and noisy nature of the data. Initial attempts to use social media data have relied on exact text matches to drugs of interest, and therefore suffer from the gap between formal drug lexicons and the informal nature of social media. The Reddit comment archive represents an ideal corpus for bridging this gap. We trained a word embedding model, RedMed, to facilitate the identification and retrieval of health entities from Reddit data. We compare the performance of our model trained on a consumer-generated corpus against publicly available models trained on expert-generated corpora. Our automated classification pipeline achieves an accuracy of 0.88 and a specificity of >0.9 across four different term classes. Of all drug mentions, an average of 79% (±0.5%) were exact matches to a generic or trademark drug name, 14% (±0.5%) were misspellings, 6.4% (±0.3%) were synonyms, and 0.13% (±0.05%) were pill marks. We find that our system captures an additional 20% of mentions; these would have been missed by approaches that rely solely on exact string matches. We provide a lexicon of misspellings and synonyms for 2978 drugs and a word embedding model trained on a health-oriented subset of Reddit.},
}