alphafold2-from-scratch/config/experiments/alphafold2_full_reference.yaml at main · pablo-reyes8/alphafold2-from-scratch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
metadata:
  name: alphafold2_full_reference
  description: Reference AlphaFold2/OpenFold hyperparameters for future scaling.
  status: reference_only
  sources:
    - https://github.com/google-deepmind/alphafold/blob/main/alphafold/model/config.py
    - https://github.com/aqlaboratory/openfold/blob/main/openfold/config.py
    - https://github.com/aqlaboratory/openfold/blob/main/openfold/utils/lr_schedulers.py
  notes:
    - Includes template, extra MSA and auxiliary loss settings that the current repo does not fully consume yet.
    - Training defaults below reflect OpenFold reference settings when explicitly defined.

current_repo_alignment:
  purpose: Map the nested AlphaFold/OpenFold reference schema to the flat runnable configs under config/experiments/af2_*.yaml.
  consumed_directly_by_code: false
  flat_config_equivalents:
    data.max_msa_seqs: data.initial_training.max_msa_clusters
    data.max_extra_msa_seqs: data.common.max_extra_msa
    data.max_templates: globals.max_templates
    data.crop_size: data.initial_training.crop_size
    model.c_m: globals.c_m
    model.c_z: globals.c_z
    model.c_s: globals.c_s
    model.max_relpos: globals.max_relative_feature
    model.num_evoformer_blocks: model.evoformer.no_blocks
    model.num_structure_blocks: model.structure_module.no_blocks
    model.recycle_min_bin: model.recycling_embedder.min_bin
    model.recycle_max_bin: model.recycling_embedder.max_bin
    model.recycle_dist_bins: model.recycling_embedder.num_bins
    model.extra_msa_stack_enabled: model.extra_msa.enabled
    model.extra_msa_dim: model.extra_msa.c_in
    model.extra_msa_c_e: model.extra_msa.c_out
    model.extra_msa_num_blocks: model.extra_msa.no_blocks
    model.template_stack_enabled: model.template.enabled
    model.template_c_t: globals.c_t
    model.template_num_blocks: model.template.pair_stack.no_blocks
    model.dist_bins: heads.distogram.num_bins
    model.plddt_bins: heads.plddt.num_bins
    loss.dist_num_bins: heads.distogram.num_bins
    loss.dist_min_bin: heads.distogram.min_bin
    loss.dist_max_bin: heads.distogram.max_bin
    loss.plddt_num_bins: heads.plddt.num_bins
    loss.plddt_inclusion_radius: heads.plddt.cutoff
  current_support:
    implemented:
      - Evoformer trunk
      - extra MSA stack
      - template conditioning
      - recycling embedder
      - IPA-based structure module
      - distogram, pLDDT, and torsion heads/losses
    partial:
      - input feature pipeline
      - template retrieval pipeline
      - structure-module hyperparameter surface
    not_yet_implemented:
      - masked MSA objective
      - experimentally resolved head
      - violation loss
      - TM head
      - all-atom and side-chain reconstruction

globals:
  c_m: 256
  c_z: 128
  c_t: 64
  c_e: 64
  c_s: 384
  num_recycle: 3
  max_relative_feature: 32
  max_templates: 4
  precision: bf16
  eps: 1.0e-8

data:
  common:
    max_extra_msa: 1024
    resample_msa_in_recycling: true
    reduce_msa_clusters_by_max_templates: false
    use_templates: false
    use_template_torsion_angles: false
    masked_msa:
      profile_prob: 0.1
      same_prob: 0.1
      uniform_prob: 0.1
      replace_fraction: 0.15
  initial_training:
    crop_size: 256
    max_msa_clusters: 128
    max_extra_msa: 1024
    subsample_templates: true
    block_delete_msa: true
    clamp_prob: 0.9
    max_distillation_msa_clusters: 1000
    distillation_prob: 0.75
  finetuning:
    crop_size: 384
    max_msa_clusters: 512
    max_extra_msa: 5120
    subsample_templates: true
    block_delete_msa: true
    violation_weight: 1.0
    experimentally_resolved_weight: 0.01

model:
  input_embedder:
    tf_dim: 22
    msa_dim: 49
    relpos_k: 32
  recycling_embedder:
    min_bin: 3.25
    max_bin: 20.75
    num_bins: 15
  extra_msa:
    enabled: true
    c_in: 25
    c_out: 64
    no_blocks: 4
    c_hidden_msa_att: 8
    c_hidden_opm: 32
    c_hidden_mul: 128
    c_hidden_pair_att: 32
    no_heads_msa: 8
    no_heads_pair: 4
    transition_n: 4
    msa_dropout: 0.15
    pair_dropout: 0.25
  evoformer:
    no_blocks: 48
    c_hidden_msa_att: 32
    c_hidden_opm: 32
    c_hidden_mul: 128
    c_hidden_pair_att: 32
    no_heads_msa: 8
    no_heads_pair: 4
    transition_n: 4
    msa_dropout: 0.15
    pair_dropout: 0.25
  template:
    enabled: false
    embed_angles: false
    use_unit_vector: false
    average_templates: false
    offload_templates: false
    distogram:
      min_bin: 3.25
      max_bin: 50.75
      num_bins: 39
    pair_stack:
      no_blocks: 2
      c_hidden_tri_att: 16
      c_hidden_tri_mul: 64
      no_heads: 4
      pair_transition_n: 2
      dropout_rate: 0.25
  structure_module:
    no_blocks: 8
    c_ipa: 16
    c_resnet: 128
    no_heads_ipa: 12
    no_qk_points: 4
    no_v_points: 8
    no_transition_layers: 1
    no_resnet_blocks: 2
    no_angles: 7
    dropout_rate: 0.1
    trans_scale_factor: 10.0

heads:
  distogram:
    num_bins: 64
    weight: 0.3
    min_bin: 2.3125
    max_bin: 21.6875
  plddt:
    num_bins: 50
    hidden_channels: 128
    cutoff: 15.0
    weight: 0.01
  experimentally_resolved:
    output_channels: 37
    weight: 0.0
  masked_msa:
    output_channels: 23
    weight: 2.0
  tm:
    enabled: false
    num_bins: 64
    max_bin: 31
    weight: 0.0

loss:
  fape:
    backbone:
      clamp_distance: 10.0
      loss_unit_distance: 10.0
      weight: 0.5
    sidechain:
      clamp_distance: 10.0
      length_scale: 10.0
      weight: 0.5
    weight: 1.0
    eps: 1.0e-4
  distogram:
    min_bin: 2.3125
    max_bin: 21.6875
    num_bins: 64
    weight: 0.3
  plddt_loss:
    cutoff: 15.0
    num_bins: 50
    weight: 0.01
  supervised_chi:
    chi_weight: 0.5
    angle_norm_weight: 0.01
    weight: 1.0
  masked_msa:
    num_classes: 23
    weight: 2.0
  experimentally_resolved:
    min_resolution: 0.1
    max_resolution: 3.0
    weight: 0.0
  violation:
    violation_tolerance_factor: 12.0
    clash_overlap_tolerance: 1.5
    weight: 0.0

optimizer:
  name: Adam
  learning_rate: 1.0e-3
  eps: 1.0e-5

scheduler:
  name: alphafold_plateau_exponential
  base_lr: 0.0
  max_lr: 1.0e-3
  warmup_no_steps: 1000
  start_decay_after_n_steps: 50000
  decay_every_n_steps: 50000
  decay_factor: 0.95

ema:
  decay: 0.999