-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.toc
More file actions
209 lines (209 loc) · 16.9 KB
/
main.toc
File metadata and controls
209 lines (209 loc) · 16.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
\babel@toc {english}{}\relax
\gdef \the@ipfilectr {@-1}
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-2}
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-3}
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-4}
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-5}
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-6}
\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}%
\contentsline {section}{\numberline {1.1}Single cell analysis}{1}{section.1.1}%
\contentsline {subsection}{\numberline {1.1.1}Biological background}{1}{subsection.1.1.1}%
\contentsline {subsection}{\numberline {1.1.2}What do we measure and how?}{2}{subsection.1.1.2}%
\contentsline {subsection}{\numberline {1.1.3}High througput profiling methods}{3}{subsection.1.1.3}%
\contentsline {subsection}{\numberline {1.1.4}Data characteristics}{3}{subsection.1.1.4}%
\contentsline {subsection}{\numberline {1.1.5}Typical analysis workflow}{4}{subsection.1.1.5}%
\contentsline {subsubsection}{\nonumberline Exploratory data analysis}{4}{subsubsection*.13}%
\contentsline {subsubsection}{\nonumberline Feature selection and dimensionality reduction}{6}{subsubsection*.15}%
\contentsline {paragraph}{\nonumberline Highly variable gene selection}{7}{paragraph*.17}%
\contentsline {paragraph}{\nonumberline Dimensionality reduction}{7}{paragraph*.19}%
\contentsline {paragraph}{\nonumberline 2D cell-level vizualisation}{8}{paragraph*.21}%
\contentsline {subsubsection}{\nonumberline Clustering}{8}{subsubsection*.23}%
\contentsline {subsubsection}{\nonumberline Batch effect correction}{9}{subsubsection*.25}%
\contentsline {subsubsection}{\nonumberline Trajectory inference}{10}{subsubsection*.27}%
\contentsline {paragraph}{\nonumberline Cluster-based TI approaches}{10}{paragraph*.29}%
\contentsline {paragraph}{\nonumberline Dynamic time warping}{10}{paragraph*.31}%
\contentsline {paragraph}{\nonumberline Minimum weight bipartite matching}{11}{paragraph*.33}%
\contentsline {section}{\numberline {1.2}Challenges in single-cell omics}{12}{section.1.2}%
\contentsline {subsection}{\numberline {1.2.1}Trajectory inference and dynamical systems}{12}{subsection.1.2.1}%
\contentsline {subsection}{\numberline {1.2.2}R/Python interoperability}{13}{subsection.1.2.2}%
\contentsline {subsection}{\numberline {1.2.3}Standardizing benchmarks}{14}{subsection.1.2.3}%
\contentsline {section}{\numberline {1.3}Research context and objectives}{16}{section.1.3}%
\contentsline {paragraph}{\nonumberline Trajectory inference and dynamical systems}{16}{paragraph*.39}%
\contentsline {paragraph}{\nonumberline Interoperability}{17}{paragraph*.41}%
\contentsline {paragraph}{\nonumberline Benchmarking visualization}{17}{paragraph*.43}%
\gdef \the@ipfilectr {}
\contentsline {part}{\numberline {I}Trajectory inference}{25}{part.1}%
\gdef \the@ipfilectr {@-7}
\contentsline {chapter}{\numberline {2}Recent advances in trajectory inference from single-cell omics data}{27}{chapter.2}%
\contentsline {section}{\numberline {2.1}Introduction}{28}{section.2.1}%
\contentsline {section}{\numberline {2.2}Trajectory topologies}{30}{section.2.2}%
\contentsline {section}{\numberline {2.3}Computational approaches for trajectory inference}{33}{section.2.3}%
\contentsline {subsection}{\numberline {2.3.1}Current approaches}{33}{subsection.2.3.1}%
\contentsline {subsection}{\numberline {2.3.2}Novel probabilistic approaches}{34}{subsection.2.3.2}%
\contentsline {section}{\numberline {2.4}Extensions of trajectory inference}{36}{section.2.4}%
\contentsline {subsection}{\numberline {2.4.1}Integration of other data sources}{36}{subsection.2.4.1}%
\contentsline {subsection}{\numberline {2.4.2}Quality control and benchmarking}{37}{subsection.2.4.2}%
\contentsline {section}{\numberline {2.5}Downstream analysis of inferred trajectories}{37}{section.2.5}%
\contentsline {subsection}{\numberline {2.5.1}Visualisation}{38}{subsection.2.5.1}%
\contentsline {subsection}{\numberline {2.5.2}Trajectory differential expression}{38}{subsection.2.5.2}%
\contentsline {subsection}{\numberline {2.5.3}Trajectory alignment and comparison}{39}{subsection.2.5.3}%
\contentsline {subsection}{\numberline {2.5.4}Dynamic gene regulatory network inference}{39}{subsection.2.5.4}%
\contentsline {section}{\numberline {2.6}A future outlook for trajectory inference methods}{40}{section.2.6}%
\contentsline {section}{\numberline {2.7}Acknowledgements}{42}{section.2.7}%
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-8}
\contentsline {chapter}{\numberline {3}dynchro: exploratory analysis and alignment of inferred trajectories}{51}{chapter.3}%
\contentsline {section}{\numberline {3.1}Introduction}{52}{section.3.1}%
\contentsline {section}{\numberline {3.2}Results}{54}{section.3.2}%
\contentsline {subsection}{\numberline {3.2.1}dynchro workflow}{54}{subsection.3.2.1}%
\contentsline {subsection}{\numberline {3.2.2}dynchro shows the effect of batch removal on trajectories and identifies premature stops}{56}{subsection.3.2.2}%
\contentsline {subsection}{\numberline {3.2.3}dynchro identifies correspondences between branching trajectories}{59}{subsection.3.2.3}%
\contentsline {subsection}{\numberline {3.2.4}dynchro clusters similar and dissimilar samples based on trajectories}{61}{subsection.3.2.4}%
\contentsline {subsection}{\numberline {3.2.5}dynchro identifies and tracks similarity and dissimilarity between trajectory pseudotimes}{63}{subsection.3.2.5}%
\contentsline {subsection}{\numberline {3.2.6}dynchro scales well to large datasets using representative pseudocells}{65}{subsection.3.2.6}%
\contentsline {section}{\numberline {3.3}Conclusion}{66}{section.3.3}%
\contentsline {section}{\numberline {3.4}Discussion}{68}{section.3.4}%
\contentsline {section}{\numberline {3.5}Methods}{69}{section.3.5}%
\contentsline {subsection}{\numberline {3.5.1}Lineages}{69}{subsection.3.5.1}%
\contentsline {subsubsection}{\nonumberline Construction}{69}{subsubsection*.62}%
\contentsline {subsubsection}{\nonumberline Comparison}{70}{subsubsection*.64}%
\contentsline {subsection}{\numberline {3.5.2}Dynamic time warping}{70}{subsection.3.5.2}%
\contentsline {subsection}{\numberline {3.5.3}Calculating average cost on path}{72}{subsection.3.5.3}%
\contentsline {subsection}{\numberline {3.5.4}Comparing multiple lineages}{72}{subsection.3.5.4}%
\contentsline {subsection}{\numberline {3.5.5}Interpolating cells}{72}{subsection.3.5.5}%
\contentsline {subsubsection}{\nonumberline Construction of pseudocells}{72}{subsubsection*.66}%
\contentsline {subsubsection}{\nonumberline Smoothing}{73}{subsubsection*.68}%
\contentsline {subsubsection}{\nonumberline Mapping back results}{73}{subsubsection*.70}%
\gdef \the@ipfilectr {}
\contentsline {part}{\numberline {II}Interoperability}{77}{part.2}%
\gdef \the@ipfilectr {@-9}
\contentsline {chapter}{\numberline {4}Polyglot programming for single-cell analysis}{81}{chapter.4}%
\contentsline {section}{\numberline {4.1}Introduction}{82}{section.4.1}%
\contentsline {subsection}{\numberline {4.1.1}Programming languages and software ecosystems}{84}{subsection.4.1.1}%
\contentsline {subsection}{\numberline {4.1.2}Interoperability strategies}{84}{subsection.4.1.2}%
\contentsline {paragraph}{\nonumberline Code porting}{85}{paragraph*.74}%
\contentsline {paragraph}{\nonumberline In-memory interoperability}{85}{paragraph*.76}%
\contentsline {paragraph}{\nonumberline Disk-based interoperability}{85}{paragraph*.78}%
\contentsline {paragraph}{\nonumberline Workflow frameworks}{85}{paragraph*.80}%
\contentsline {section}{\numberline {4.2}In-memory interoperability}{86}{section.4.2}%
\contentsline {paragraph}{\nonumberline Approach}{86}{paragraph*.82}%
\contentsline {subsection}{\numberline {4.2.1}Differences in programming language design}{87}{subsection.4.2.1}%
\contentsline {subsubsection}{\nonumberline Indexing: 0-based or 1-based}{87}{subsubsection*.84}%
\contentsline {subsubsection}{\nonumberline Column major vs row major matrices}{88}{subsubsection*.87}%
\contentsline {subsubsection}{\nonumberline Built-in data types: integers}{89}{subsubsection*.90}%
\contentsline {subsubsection}{\nonumberline Dots in variable names}{90}{subsubsection*.93}%
\contentsline {subsection}{\numberline {4.2.2}\software {rpy2}: a Python foreign function interface to R}{91}{subsection.4.2.2}%
\contentsline {subsubsection}{\nonumberline General usage}{91}{subsubsection*.95}%
\contentsline {subsubsection}{\nonumberline rpy2 extensions for single-cell analysis}{92}{subsubsection*.98}%
\contentsline {subsubsection}{\nonumberline Usecase: running DESeq2 in Python}{95}{subsubsection*.103}%
\contentsline {subsection}{\numberline {4.2.3}\software {reticulate}: an R foreign function interface to Python}{96}{subsection.4.2.3}%
\contentsline {subsubsection}{\nonumberline General usage}{96}{subsubsection*.105}%
\contentsline {subsubsection}{\nonumberline \software {reticulate} for single-cell analysis}{98}{subsubsection*.110}%
\contentsline {section}{\numberline {4.3}Disk-based interoperability}{101}{section.4.3}%
\contentsline {subsection}{\numberline {4.3.1}Interoperable file formats}{101}{subsection.4.3.1}%
\contentsline {subsubsection}{\nonumberline File format features}{102}{subsubsection*.113}%
\contentsline {paragraph}{\nonumberline Sparse matrix support}{102}{paragraph*.115}%
\contentsline {paragraph}{\nonumberline Large image support}{103}{paragraph*.117}%
\contentsline {paragraph}{\nonumberline Lazy chunk loading}{103}{paragraph*.119}%
\contentsline {paragraph}{\nonumberline Remote storage}{103}{paragraph*.121}%
\contentsline {subsection}{\numberline {4.3.2}Disk-based pipelines}{104}{subsection.4.3.2}%
\contentsline {subsubsection}{\nonumberline Pipelines with different environments}{105}{subsubsection*.125}%
\contentsline {subsubsection}{\nonumberline Containerized pipelines}{105}{subsubsection*.127}%
\contentsline {section}{\numberline {4.4}Production-ready workflows}{109}{section.4.4}%
\contentsline {subsection}{\numberline {4.4.1}Key qualities for workflows}{110}{subsection.4.4.1}%
\contentsline {subsection}{\numberline {4.4.2}Enabling technologies}{111}{subsection.4.4.2}%
\contentsline {subsection}{\numberline {4.4.3}Example workflow}{112}{subsection.4.4.3}%
\contentsline {section}{\numberline {4.5}Conclusion}{112}{section.4.5}%
\contentsline {paragraph}{\nonumberline One-off exploratory data analysis}{112}{paragraph*.131}%
\contentsline {paragraph}{\nonumberline Standard publication-focused analysis}{112}{paragraph*.133}%
\contentsline {paragraph}{\nonumberline Cohort-scale data with periodically added data}{113}{paragraph*.135}%
\contentsline {paragraph}{\nonumberline Benchmarking study}{113}{paragraph*.137}%
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-10}
\contentsline {chapter}{\numberline {5}anndataR improves interoperability between R and Python in single-cell transcriptomics}{117}{chapter.5}%
\contentsline {section}{\numberline {5.1}Introduction}{119}{section.5.1}%
\contentsline {subsection}{\numberline {5.1.1}Differences in structure}{119}{subsection.5.1.1}%
\contentsline {subsection}{\numberline {5.1.2}Different programming languages}{121}{subsection.5.1.2}%
\contentsline {section}{\numberline {5.2}Results}{124}{section.5.2}%
\contentsline {section}{\numberline {5.3}Software Design}{125}{section.5.3}%
\contentsline {section}{\numberline {5.4}Conclusion}{126}{section.5.4}%
\contentsline {section}{\numberline {5.5}Acknowledgments}{126}{section.5.5}%
\gdef \the@ipfilectr {}
\contentsline {part}{\numberline {III}Visualization}{135}{part.3}%
\gdef \the@ipfilectr {@-11}
\contentsline {chapter}{\numberline {6}funkyheatmap: Visualising data frames with mixed data types}{137}{chapter.6}%
\contentsline {section}{\numberline {6.1}Summary}{138}{section.6.1}%
\contentsline {section}{\numberline {6.2}Statement of need}{138}{section.6.2}%
\contentsline {section}{\numberline {6.3}Functionality}{139}{section.6.3}%
\contentsline {subsection}{\numberline {6.3.1}Example usage}{140}{subsection.6.3.1}%
\contentsline {section}{\numberline {6.4}Conclusion}{141}{section.6.4}%
\contentsline {section}{\numberline {6.5}Acknowledgements}{143}{section.6.5}%
\contentsline {section}{\numberline {6.6}Author Contributions}{143}{section.6.6}%
\contentsline {paragraph}{\nonumberline Common core}{144}{paragraph*.153}%
\contentsline {paragraph}{\nonumberline Code style}{144}{paragraph*.155}%
\contentsline {paragraph}{\nonumberline Testing}{145}{paragraph*.157}%
\contentsline {paragraph}{\nonumberline Diverging features}{145}{paragraph*.159}%
\gdef \the@ipfilectr {}
\contentsline {part}{\numberline {IV}Reflections}{149}{part.4}%
\gdef \the@ipfilectr {@-12}
\contentsline {chapter}{\numberline {7}Perspectives}{151}{chapter.7}%
\contentsline {section}{\numberline {7.1}Too many TI tools, too little guidance}{151}{section.7.1}%
\contentsline {paragraph}{\nonumberline dynchro aids in exploring trajectories}{153}{paragraph*.163}%
\contentsline {section}{\numberline {7.2}Interoperability is a community project}{153}{section.7.2}%
\contentsline {paragraph}{\nonumberline Community buy-in}{153}{paragraph*.165}%
\contentsline {paragraph}{\nonumberline Community needs}{154}{paragraph*.167}%
\contentsline {paragraph}{\nonumberline Community coordination}{155}{paragraph*.171}%
\contentsline {paragraph}{\nonumberline The future of anndataR}{156}{paragraph*.173}%
\contentsline {section}{\numberline {7.3}We must not overstate the importance of benchmarks}{156}{section.7.3}%
\contentsline {subsubsection}{\nonumberline Datasets}{156}{subsubsection*.175}%
\contentsline {subsubsection}{\nonumberline Ground truth}{157}{subsubsection*.177}%
\contentsline {subsubsection}{\nonumberline Metric selection}{157}{subsubsection*.179}%
\contentsline {subsubsection}{\nonumberline Other factors influence method selection}{158}{subsubsection*.181}%
\contentsline {subsubsection}{\nonumberline Changing best practices}{158}{subsubsection*.183}%
\gdef \the@ipfilectr {}
\gdef \the@ipfilectr {@-13}
\contentsline {chapter}{\numberline {8}Discussion}{163}{chapter.8}%
\contentsline {section}{\numberline {8.1}What is bioinformatics?}{163}{section.8.1}%
\contentsline {subsection}{\numberline {8.1.1}On bioinformatics}{163}{subsection.8.1.1}%
\contentsline {subsection}{\numberline {8.1.2}On data science}{165}{subsection.8.1.2}%
\contentsline {subsection}{\numberline {8.1.3}On science}{167}{subsection.8.1.3}%
\contentsline {subsection}{\numberline {8.1.4}To conclude}{169}{subsection.8.1.4}%
\contentsline {section}{\numberline {8.2}In practice}{169}{section.8.2}%
\contentsline {subsection}{\numberline {8.2.1}Code reproducibility}{170}{subsection.8.2.1}%
\contentsline {paragraph}{\nonumberline Replication difficulties}{171}{paragraph*.186}%
\contentsline {subsubsection}{\nonumberline Organise your code}{171}{subsubsection*.188}%
\contentsline {subsubsection}{\nonumberline Provide intermediary results}{171}{subsubsection*.190}%
\contentsline {subsubsection}{\nonumberline Keep track of dependency versions}{172}{subsubsection*.192}%
\contentsline {subsubsection}{\nonumberline Publish the code on a public repository}{172}{subsubsection*.194}%
\contentsline {subsection}{\numberline {8.2.2}Software maintainability}{172}{subsection.8.2.2}%
\contentsline {subsubsection}{\nonumberline Plan for maintenance}{173}{subsubsection*.196}%
\contentsline {subsubsection}{\nonumberline Carefully consider data formats}{173}{subsubsection*.198}%
\contentsline {subsubsection}{\nonumberline Limit dependencies}{174}{subsubsection*.200}%
\contentsline {subsection}{\numberline {8.2.3}Software maintenance is a shared responsibility}{174}{subsection.8.2.3}%
\contentsline {subsubsection}{\nonumberline Research labs and institutes}{174}{subsubsection*.202}%
\contentsline {subsubsection}{\nonumberline Consortia and communities}{175}{subsubsection*.204}%
\contentsline {subsubsection}{\nonumberline Funding agencies}{176}{subsubsection*.206}%
\contentsline {subsubsection}{\nonumberline Journals}{177}{subsubsection*.208}%
\contentsline {subsubsection}{\nonumberline Companies}{177}{subsubsection*.210}%
\contentsline {section}{\numberline {8.3}Standardization}{179}{section.8.3}%
\contentsline {subsection}{\numberline {8.3.1}What is a standard?}{179}{subsection.8.3.1}%
\contentsline {subsection}{\numberline {8.3.2}Open source organization}{180}{subsection.8.3.2}%
\contentsline {subsection}{\numberline {8.3.3}Open standards}{181}{subsection.8.3.3}%
\gdef \the@ipfilectr {}
\contentsline {part}{\numberline {V}Appendices}{191}{part.5}%
\gdef \the@ipfilectr {@-14}
\contentsline {chapter}{\numberline {A}Example workflow}{193}{appendix.A}%
\contentsline {section}{\numberline {A.1}Building the pipeline}{193}{section.A.1}%
\contentsline {subsection}{\numberline {A.1.1}Component: loading data (in Bash)}{193}{subsection.A.1.1}%
\contentsline {subsection}{\numberline {A.1.2}Component: select data (in Python)}{195}{subsection.A.1.2}%
\contentsline {subsection}{\numberline {A.1.3}Component: differential expression (in R)}{198}{subsection.A.1.3}%
\contentsline {subsection}{\numberline {A.1.4}Nextflow workflow}{202}{subsection.A.1.4}%
\contentsline {section}{\numberline {A.2}Running the workflow}{204}{section.A.2}%
\gdef \the@ipfilectr {}
\contentsline {chapter}{\numberline {B}Getting started with funkyheatmap}{207}{appendix.B}%
\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file