Training script

Kaweees · Kaweees · commit bd041635e2cb · 2025-12-30T09:13:02.000-08:00
diff --git a/examples/train.zig b/examples/train.zig
@@ -29,39 +29,59 @@ pub fn main() !void {
     var sizes = [_]usize{ 3, 2, 1 };
 
     // Initialize the neural network
-    const mlp = MLPType.new(sizes.len - 1, sizes[0..]);
+    const model = MLPType.new(sizes.len - 1, sizes[0..]);
 
-    const inputs = [_][3]*ValueType{
-        [_]*ValueType{ ValueType.new(2), ValueType.new(3), ValueType.new(-1) },
-        [_]*ValueType{ ValueType.new(3), ValueType.new(-1), ValueType.new(0.5) },
-        [_]*ValueType{ ValueType.new(0.5), ValueType.new(1), ValueType.new(1) },
-        [_]*ValueType{ ValueType.new(1), ValueType.new(2), ValueType.new(3) },
+    // dataset
+    const X = [_][3]f64{
+        .{ 2.0, 3.0, -1.0 },
+        .{ 3.0, -1.0, 0.5 },
+        .{ 0.5, 1.0, 1.0 },
+        .{ 1.0, 1.0, -1.0 },
     };
+    const y = [_]f64{ 1.0, -1.0, -1.0, 1.0 };
 
-    mlp.draw_graph("assets/img/mlp");
+    const lr = 1e-2;
+    const epochs: usize = 100;
 
-    var output: []*ValueType = undefined;
-    for (inputs) |in| {
-        // Forward pass through the layer
-        output = mlp.forward(@constCast(&in));
-        std.debug.print("Layer output: {d:7.4}\n", .{output[0].data});
-        for (output) |o| {
-            _ = o.draw_graph("assets/img/perceptron");
-        }
-    }
+    // training loop
+    for (0..epochs) |epoch| {
+        // Zero out the gradients
+        model.zero_grad();
+
+        // Accumulate loss across all samples (like the reference implementation)
+        var loss: ?*ValueType = null;
+        var first = true;
+
+        var i: usize = 0;
+        while (i < X.len) : (i += 1) {
+            var inputs: [X[i].len]*ValueType = undefined;
+            for (&inputs, 0..) |*input, j| {
+                input.* = ValueType.new(X[i][j]);
+            }
 
-    const t1 = TensorType.new(&[_]f64{ 1, 2, 3, 4 });
-    std.debug.print("t1: {d:.4}\n", .{t1.data[0].data});
+            const z = model.forward(&inputs);
+            const ypred = z[0];
 
-    // outputs now contains 1 ValueType pointer (final layer has 1 neuron)
-    const final_output = output[0];
-    std.debug.print("Layer output: {d:.4}\n", .{final_output.data});
+            const ygt = ValueType.new(y[i]);
+            const diff = ypred.sub(ygt);
+            const sq = diff.mul(diff);
 
-    std.debug.print("output.data: {d:.4}\n", .{final_output.data});
-    std.debug.print("output.grad: {d:.4}\n", .{final_output.grad});
+            if (first) {
+                loss = sq;
+                first = false;
+            } else {
+                loss = loss.?.add(sq);
+            }
+        }
+
+        const total_loss = loss.?.data;
 
-    final_output.backwardPass(alloc);
+        // Single backward pass on accumulated loss
+        loss.?.backwardPass(alloc);
 
-    std.debug.print("output.data: {d:.4}\n", .{final_output.data});
-    std.debug.print("output.grad: {d:.4}\n", .{final_output.grad});
+        // Update parameters with SGD
+        model.update_parameters(lr);
+
+        std.debug.print("Epoch={d:4} loss={d:.6}\n", .{ epoch, total_loss });
+    }
 }