pablo-reyes8
diff --git a/‎.github/release.yml‎
Lines changed: 29 additions & 0 deletions b/‎.github/release.yml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎.github/release_template.md‎
Lines changed: 14 additions & 0 deletions b/‎.github/release_template.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 9 additions & 2 deletions b/‎.gitignore‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 3 additions & 4 deletions b/‎Dockerfile‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 17 additions & 9 deletions b/‎README.md‎
Lines changed: 17 additions & 9 deletions
diff --git a/‎config/data/foldbench_subset.yaml‎
Lines changed: 1 addition & 1 deletion b/‎config/data/foldbench_subset.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/experiments/af2_low_vram.yaml‎
Lines changed: 4 additions & 0 deletions b/‎config/experiments/af2_low_vram.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎config/experiments/alphafold2_full_reference.yaml‎
Lines changed: 50 additions & 0 deletions b/‎config/experiments/alphafold2_full_reference.yaml‎
Lines changed: 50 additions & 0 deletions
@@ -0,0 +1,29 @@
+changelog:
+  exclude:
+    labels:
+      - skip-changelog
+  categories:
+    - title: Breaking Changes
+      labels:
+        - breaking-change
+    - title: Architecture
+      labels:
+        - architecture
+        - model
+    - title: Training and Evaluation
+      labels:
+        - training
+        - evaluation
+    - title: Data Pipeline
+      labels:
+        - data
+    - title: Tests and CI
+      labels:
+        - testing
+        - ci
+    - title: Documentation
+      labels:
+        - documentation
+    - title: Other Changes
+      labels:
+        - '*'
@@ -0,0 +1,14 @@
+## Summary
+- 
+
+## Highlights
+- 
+
+## Validation
+- 
+
+## Notes
+- 
+
+## Known Limitations
+- 
@@ -0,0 +1,48 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+      - master
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11"]
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+
+      - name: Install package and test dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e '.[dev,data]'
+
+      - name: Run CPU-safe module and data tests
+        run: |
+          python -m pytest -q \
+            tests/test_module_integrity.py \
+            tests/test_ipa.py \
+            tests/test_row_column_attention.py \
+            tests/test_triangle_attention.py \
+            tests/test_triangle_multiplication.py \
+            tests/test_opm.py \
+            tests/test_extra_msa_stack.py \
+            tests/test_template_stack.py \
+            tests/test_metrics.py \
+            tests/test_loader_wrappers.py \
+            tests/test_showcase_loader.py \
+            tests/test_train_eval_orchestration.py \
+            tests/test_eval_one_epoch.py
@@ -3,12 +3,19 @@ __pycache__/
 .venv/
 *.pyc
 *.pyo
+*.pyd
+*.egg-info/
+build/
+dist/
+.coverage
+htmlcov/
+.mypy_cache/
+.ruff_cache/
 data/af_subset/
 checkpoints*/
+artifacts/
 .venv_tmp_data
 .venv_tmp_crop
 .agents
 .skills-lock
 
-
-
 
@@ -32,6 +32,9 @@ Create an isolated environment and install dependencies:
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
+
+# Optional editable install for local CLI entry points
+pip install -e '.[dev,data]'
 ```
 
 If you work from Conda, use the equivalent environment setup and install the same requirements.
 
@@ -12,10 +12,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     && rm -rf /var/lib/apt/lists/*
 
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /tmp/requirements.txt
-
 COPY . /app
 
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -e ".[data]"
+
 CMD ["bash"]
@@ -10,6 +10,7 @@
 
 [![Python](https://img.shields.io/badge/Python-3.10%2B-blue.svg)](#installation)
 [![PyTorch](https://img.shields.io/badge/PyTorch-2.x-ee4c2c.svg)](#installation)
+[![CI](https://github.com/pablo-reyes8/alpha-fold2/actions/workflows/ci.yml/badge.svg)](https://github.com/pablo-reyes8/alpha-fold2/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](#license)
 [![Status](https://img.shields.io/badge/status-Research%20Prototype-orange)](#project-status)
 
@@ -70,7 +71,7 @@ To make experimentation easier to reproduce, the repository follows a **manifest
 - **Config-Driven Experiments:** Main settings such as model size, depth, learning rate, and EMA can be adjusted through YAML files.
 - **Feature-Rich Loader:** The current dataloader returns sequence/MSA tensors plus `extra_msa_feat`, `extra_msa_mask`, `template_angle_feat`, `template_pair_feat`, and `template_mask` when those artifacts are present in the Foldbench assets.
 - **Data Inspection Utilities:** Provides simple CLI tools to inspect manifests, preview A3M files, and visualize CA distance maps before training.
-- **Notebook-Friendly Workflow:** The main walkthrough notebook is [Alpha_Fold_English.ipynb](notebooks/Alpha_Fold_English.ipynb), and a local training-focused version is available in [notebooks\train_model_setup_examples.ipynb](notebooks/train_model_local.ipynb).
+- **Notebook-Friendly Workflow:** The main walkthrough notebook is [Alpha_Fold_English.ipynb](notebooks/Alpha_Fold_English.ipynb), and a local training-focused walkthrough is available in [train_model_setup_examples.ipynb](notebooks/train_model_setup_examples.ipynb).
 
 ---
 
@@ -84,7 +85,8 @@ To make experimentation easier to reproduce, the repository follows a **manifest
 ├── data/                      # manifest-based data pipeline plus a tiny bundled showcase subset
 │   ├── download_data.sh
 │   ├── foldbench.py
-│   ├── preproces_data.py
+│   ├── preprocess_data.py
+│   ├── loader_wrappers.py
 │   ├── dataloaders.py
 │   ├── collate_proteins.py
 │   ├── visualize_data.py
@@ -94,7 +96,7 @@ To make experimentation easier to reproduce, the repository follows a **manifest
 │   └── losses/
 ├── training/                  # single-device training loop, ablation registry, AMP, EMA, checkpoints, and metrics
 │   ├── ablations/             # predefined architecture and loss ablation presets
-│   └── train_paralel/         # DDP and model-parallel helpers
+│   └── train_parallel/        # DDP and model-parallel helpers
 ├── scripts/                   # operational CLIs for data prep, validation, and training
 │   ├── prepare_data.py
 │   ├── inspect_data.py
@@ -108,6 +110,7 @@ To make experimentation easier to reproduce, the repository follows a **manifest
 ├── notebooks/                 # interactive experiments for Colab or local exploration
 ├── paper/                     # reference material from the AlphaFold paper and notes
 ├── assets/                    # README visuals and showcase media
+├── pyproject.toml
 ├── requirements.txt
 ├── Dockerfile
 └── README.md
@@ -116,7 +119,8 @@ To make experimentation easier to reproduce, the repository follows a **manifest
 ### Key files
 
 - [data/download_data.sh](data/download_data.sh) — downloads the Foldbench subset from a target list or CSV input.
-- [data/preproces_data.py](data/preproces_data.py) — rebuilds manifests, normalizes local paths, and emits YAML summaries.
+- [data/preprocess_data.py](data/preprocess_data.py) — rebuilds manifests, normalizes local paths, and emits YAML summaries.
+- [data/loader_wrappers.py](data/loader_wrappers.py) — convenience builders for plain dataloaders and deterministic train/eval splits over one dataset.
 - [data/dataloaders.py](data/dataloaders.py) — dataset layer that maps manifests, mmCIF structures, MSA files, and torsion targets into tensors.
 - [scripts/prepare_data.py](scripts/prepare_data.py) — high-level CLI for downloading data, refreshing manifests, and smoke-testing loaders.
 - [model/alphafold2.py](model/alphafold2.py) — top-level AlphaFold2-like model that wires embeddings, Evoformer, structure, recycling, and heads.
@@ -125,11 +129,12 @@ To make experimentation easier to reproduce, the repository follows a **manifest
 - [model/alphafold2_full_loss.py](model/alphafold2_full_loss.py) — full training loss orchestrator combining FAPE, distogram, pLDDT, and torsion supervision.
 - [model/losses/](model/losses/) — component losses and helpers for geometry-aware supervision.
 - [training/train_one_epoch.py](training/train_one_epoch.py) — per-epoch optimization routine with AMP, recycling, logging, and metric collection.
+- [training/eval_one_epoch.py](training/eval_one_epoch.py) — evaluation loop that mirrors training-time logging without optimizer steps.
 - [training/train_alphafold2.py](training/train_alphafold2.py) — full training orchestrator for checkpointing, resume, monitoring, and epoch scheduling.
 - [training/ablations/catalog.py](training/ablations/catalog.py) — registry of prebuilt architecture and loss ablations resolved on top of a base experiment config.
 - [training/ablations/runtime.py](training/ablations/runtime.py) — resolves baseline or named ablations into a safe config variant without changing the default training path.
-- [training/train_paralel/data_parallel.py](training/train_paralel/data_parallel.py) — DDP utilities, distributed samplers, and rank synchronization helpers.
-- [training/train_paralel/model_parallel.py](training/train_paralel/model_parallel.py) — two-stage model-parallel wrapper for splitting AlphaFold2 across GPUs.
+- [training/train_parallel/data_parallel.py](training/train_parallel/data_parallel.py) — DDP utilities, distributed samplers, and rank synchronization helpers.
+- [training/train_parallel/model_parallel.py](training/train_parallel/model_parallel.py) — two-stage model-parallel wrapper for splitting AlphaFold2 across GPUs.
 - [scripts/train_model.py](scripts/train_model.py) — standard config-driven single-device training launcher.
 - [scripts/train_parallel.py](scripts/train_parallel.py) — multi-GPU launcher for DDP, model parallelism, and hybrid setups.
 - [scripts/train_ablation.py](scripts/train_ablation.py) — single-device launcher for named architecture and loss ablations.
@@ -150,6 +155,9 @@ The repository includes a tiny downloaded test subset under [data/af_subset_show
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
+
+# Editable install with package metadata and CLI entry points
+pip install -e '.[dev,data]'
 ```
 
 ### 2) Download the subset
@@ -167,7 +175,7 @@ python3 scripts/prepare_data.py download --targets-csv data/Proteinas_secuencias
 ### 3) Rebuild the manifest with local paths
 
 ```bash
-python3 -m data.preproces_data \
+python3 -m data.preprocess_data \
   --config config/data/foldbench_subset.yaml \
   --json-path data/af_subset/jsons/fb_protein.json \
   --msa-root data/af_subset/foldbench_msas \
@@ -203,7 +211,7 @@ dataset = FoldbenchProteinDataset(manifest_csv="data/Proteinas_secuencias.csv")
 
 ### Minimal Python setup
 
-The full notebook [notebooks/train_model_local.ipynb](notebooks/train_model_local.ipynb) exposes many knobs, but the smallest useful training setup looks like this:
+The full notebook [notebooks/train_model_setup_examples.ipynb](notebooks/train_model_setup_examples.ipynb) exposes many knobs, but the smallest useful training setup looks like this:
 
 ```python
 import torch
@@ -443,7 +451,7 @@ Low-VRAM preset for Colab-class GPUs in the `15-20 GB` range, using a reduced tr
 
 This file is a **reference document**, not a statement that the current code already consumes every field end-to-end.
 
-Its role is to provide a structured target for future extension and to document the broader AlphaFold/OpenFold design space.
+Its role is to provide a structured target for future extension and to document the broader AlphaFold/OpenFold design space. It also includes a `current_repo_alignment` section that maps the nested reference schema to the flat config fields consumed by the current codebase.
 
 ---
 
 
@@ -4,7 +4,7 @@ metadata:
   provenance:
     source_manifest_csv: data/Proteinas_secuencias.csv
     source_note: The checked-in CSV was generated in Colab and stores /content paths.
-    refresh_note: Re-run data.preproces_data locally to rewrite paths for your machine.
+    refresh_note: Re-run data.preprocess_data locally to rewrite paths for your machine.
 
 paths:
   dataset_root: data/af_subset
 
@@ -20,6 +20,9 @@ data:
   loader:
     batch_size: 1
     shuffle: true
+    eval_size: 1
+    eval_shuffle: false
+    split_seed: 42
     num_workers: 0
     pin_memory: false
 
@@ -112,6 +115,7 @@ trainer:
   run_name: af2_low_vram
   save_every: 1
   save_last: true
+  eval_every: 1
 
 geometry:
   ideal_backbone_local:
 
@@ -10,6 +10,56 @@ metadata:
     - Includes template, extra MSA and auxiliary loss settings that the current repo does not fully consume yet.
     - Training defaults below reflect OpenFold reference settings when explicitly defined.
 
+current_repo_alignment:
+  purpose: Map the nested AlphaFold/OpenFold reference schema to the flat runnable configs under config/experiments/af2_*.yaml.
+  consumed_directly_by_code: false
+  flat_config_equivalents:
+    data.max_msa_seqs: data.initial_training.max_msa_clusters
+    data.max_extra_msa_seqs: data.common.max_extra_msa
+    data.max_templates: globals.max_templates
+    data.crop_size: data.initial_training.crop_size
+    model.c_m: globals.c_m
+    model.c_z: globals.c_z
+    model.c_s: globals.c_s
+    model.max_relpos: globals.max_relative_feature
+    model.num_evoformer_blocks: model.evoformer.no_blocks
+    model.num_structure_blocks: model.structure_module.no_blocks
+    model.recycle_min_bin: model.recycling_embedder.min_bin
+    model.recycle_max_bin: model.recycling_embedder.max_bin
+    model.recycle_dist_bins: model.recycling_embedder.num_bins
+    model.extra_msa_stack_enabled: model.extra_msa.enabled
+    model.extra_msa_dim: model.extra_msa.c_in
+    model.extra_msa_c_e: model.extra_msa.c_out
+    model.extra_msa_num_blocks: model.extra_msa.no_blocks
+    model.template_stack_enabled: model.template.enabled
+    model.template_c_t: globals.c_t
+    model.template_num_blocks: model.template.pair_stack.no_blocks
+    model.dist_bins: heads.distogram.num_bins
+    model.plddt_bins: heads.plddt.num_bins
+    loss.dist_num_bins: heads.distogram.num_bins
+    loss.dist_min_bin: heads.distogram.min_bin
+    loss.dist_max_bin: heads.distogram.max_bin
+    loss.plddt_num_bins: heads.plddt.num_bins
+    loss.plddt_inclusion_radius: heads.plddt.cutoff
+  current_support:
+    implemented:
+      - Evoformer trunk
+      - extra MSA stack
+      - template conditioning
+      - recycling embedder
+      - IPA-based structure module
+      - distogram, pLDDT, and torsion heads/losses
+    partial:
+      - input feature pipeline
+      - template retrieval pipeline
+      - structure-module hyperparameter surface
+    not_yet_implemented:
+      - masked MSA objective
+      - experimentally resolved head
+      - violation loss
+      - TM head
+      - all-atom and side-chain reconstruction
+
 globals:
   c_m: 256
   c_z: 128