input: [1, 2, 3] weights: [[0.1, 0.2, 0.3], row 0: 0.1*1 + 0.2*2 + 0.3*3 = 1.4 [0.4, 0.5, 0.6]] row 1: 0.4*1 + 0.5*2 + 0.6*3 = 3.2 output: [1.4, 3.2] CODE_BLOCK: input: [1, 2, 3] weights: [[0.1, 0.2, 0.3], row 0: 0.1*1 + 0.2*2 + 0.3*3 = 1.4 [0.4, 0.5, 0.6]] row 1: 0.4*1 + 0.5*2 + 0.6*3 = 3.2 output: [1.4, 3.2] CODE_BLOCK: input: [1, 2, 3] weights: [[0.1, 0.2, 0.3], row 0: 0.1*1 + 0.2*2 + 0.3*3 = 1.4 [0.4, 0.5, 0.6]] row 1: 0.4*1 + 0.5*2 + 0.6*3 = 3.2 output: [1.4, 3.2] COMMAND_BLOCK: // --- Value.cs (add inside the Value class) --- public static Value Dot(List<Value> a, List<Value> b) { var result = new Value(0); for (int i = 0; i < a.Count; i++) { result += a[i] * b[i]; } return result; } COMMAND_BLOCK: // --- Value.cs (add inside the Value class) --- public static Value Dot(List<Value> a, List<Value> b) { var result = new Value(0); for (int i = 0; i < a.Count; i++) { result += a[i] * b[i]; } return result; } COMMAND_BLOCK: // --- Value.cs (add inside the Value class) --- public static Value Dot(List<Value> a, List<Value> b) { var result = new Value(0); for (int i = 0; i < a.Count; i++) { result += a[i] * b[i]; } return result; } COMMAND_BLOCK: // --- Helpers.cs --- namespace MicroGPT; public static class Helpers { /// <summary> /// Matrix-vector multiply. Each row of weights is multiplied element-by-element /// with input and summed into a single value. /// </summary> public static List<Value> Linear(List<Value> input, List<List<Value>> weights) => [.. weights.Select(row => Value.Dot(row, input))]; /// <summary> /// Converts raw scores (logits) into a probability distribution. /// </summary> public static List<Value> Softmax(List<Value> logits) { double maxVal = logits.Max(v => v.Data); var exponentials = logits.Select(v => (v - maxVal).Exp()).ToList(); var total = new Value(0); foreach (Value? e in exponentials) { total += e; } return [.. exponentials.Select(e => e / total)]; } } COMMAND_BLOCK: // --- Helpers.cs --- namespace MicroGPT; public static class Helpers { /// <summary> /// Matrix-vector multiply. Each row of weights is multiplied element-by-element /// with input and summed into a single value. /// </summary> public static List<Value> Linear(List<Value> input, List<List<Value>> weights) => [.. weights.Select(row => Value.Dot(row, input))]; /// <summary> /// Converts raw scores (logits) into a probability distribution. /// </summary> public static List<Value> Softmax(List<Value> logits) { double maxVal = logits.Max(v => v.Data); var exponentials = logits.Select(v => (v - maxVal).Exp()).ToList(); var total = new Value(0); foreach (Value? e in exponentials) { total += e; } return [.. exponentials.Select(e => e / total)]; } } COMMAND_BLOCK: // --- Helpers.cs --- namespace MicroGPT; public static class Helpers { /// <summary> /// Matrix-vector multiply. Each row of weights is multiplied element-by-element /// with input and summed into a single value. /// </summary> public static List<Value> Linear(List<Value> input, List<List<Value>> weights) => [.. weights.Select(row => Value.Dot(row, input))]; /// <summary> /// Converts raw scores (logits) into a probability distribution. /// </summary> public static List<Value> Softmax(List<Value> logits) { double maxVal = logits.Max(v => v.Data); var exponentials = logits.Select(v => (v - maxVal).Exp()).ToList(); var total = new Value(0); foreach (Value? e in exponentials) { total += e; } return [.. exponentials.Select(e => e / total)]; } } COMMAND_BLOCK: // --- Chapter5Exercise.cs --- using static MicroGPT.Helpers; namespace MicroGPT; public static class Chapter5Exercise { public static void Run() { // Test Linear: a 2x3 weight matrix times a length-3 input vector var input = new List<Value> { new(1.0), new(2.0), new(3.0) }; var weights = new List<List<Value>> { new() { new(0.1), new(0.2), new(0.3) }, // row 0: 0.1*1 + 0.2*2 + 0.3*3 = 1.4 new() { new(0.4), new(0.5), new(0.6) }, // row 1: 0.4*1 + 0.5*2 + 0.6*3 = 3.2 }; List<Value> output = Linear(input, weights); Console.WriteLine("--- Linear ---"); Console.WriteLine("Expected: 1.4 3.2"); Console.Write("Got: "); foreach (Value v in output) { Console.Write($"{v.Data:F1} "); } Console.WriteLine(); // Test Softmax: converts raw logits into probabilities that sum to 1 var logits = new List<Value> { new(2.0), new(1.0), new(0.1) }; List<Value> probabilities = Softmax(logits); Console.WriteLine("--- Softmax ---"); Console.WriteLine( "Expected: 0.659 0.242 0.099 (sum to 1.0, largest logit gets highest prob)" ); Console.Write("Got: "); foreach (Value p in probabilities) { Console.Write($"{p.Data:F3} "); } Console.WriteLine(); } } COMMAND_BLOCK: // --- Chapter5Exercise.cs --- using static MicroGPT.Helpers; namespace MicroGPT; public static class Chapter5Exercise { public static void Run() { // Test Linear: a 2x3 weight matrix times a length-3 input vector var input = new List<Value> { new(1.0), new(2.0), new(3.0) }; var weights = new List<List<Value>> { new() { new(0.1), new(0.2), new(0.3) }, // row 0: 0.1*1 + 0.2*2 + 0.3*3 = 1.4 new() { new(0.4), new(0.5), new(0.6) }, // row 1: 0.4*1 + 0.5*2 + 0.6*3 = 3.2 }; List<Value> output = Linear(input, weights); Console.WriteLine("--- Linear ---"); Console.WriteLine("Expected: 1.4 3.2"); Console.Write("Got: "); foreach (Value v in output) { Console.Write($"{v.Data:F1} "); } Console.WriteLine(); // Test Softmax: converts raw logits into probabilities that sum to 1 var logits = new List<Value> { new(2.0), new(1.0), new(0.1) }; List<Value> probabilities = Softmax(logits); Console.WriteLine("--- Softmax ---"); Console.WriteLine( "Expected: 0.659 0.242 0.099 (sum to 1.0, largest logit gets highest prob)" ); Console.Write("Got: "); foreach (Value p in probabilities) { Console.Write($"{p.Data:F3} "); } Console.WriteLine(); } } COMMAND_BLOCK: // --- Chapter5Exercise.cs --- using static MicroGPT.Helpers; namespace MicroGPT; public static class Chapter5Exercise { public static void Run() { // Test Linear: a 2x3 weight matrix times a length-3 input vector var input = new List<Value> { new(1.0), new(2.0), new(3.0) }; var weights = new List<List<Value>> { new() { new(0.1), new(0.2), new(0.3) }, // row 0: 0.1*1 + 0.2*2 + 0.3*3 = 1.4 new() { new(0.4), new(0.5), new(0.6) }, // row 1: 0.4*1 + 0.5*2 + 0.6*3 = 3.2 }; List<Value> output = Linear(input, weights); Console.WriteLine("--- Linear ---"); Console.WriteLine("Expected: 1.4 3.2"); Console.Write("Got: "); foreach (Value v in output) { Console.Write($"{v.Data:F1} "); } Console.WriteLine(); // Test Softmax: converts raw logits into probabilities that sum to 1 var logits = new List<Value> { new(2.0), new(1.0), new(0.1) }; List<Value> probabilities = Softmax(logits); Console.WriteLine("--- Softmax ---"); Console.WriteLine( "Expected: 0.659 0.242 0.099 (sum to 1.0, largest logit gets highest prob)" ); Console.Write("Got: "); foreach (Value p in probabilities) { Console.Write($"{p.Data:F3} "); } Console.WriteLine(); } } CODE_BLOCK: case "ch5": Chapter5Exercise.Run(); break; CODE_BLOCK: case "ch5": Chapter5Exercise.Run(); break; CODE_BLOCK: case "ch5": Chapter5Exercise.Run(); break; CODE_BLOCK: dotnet run -- ch5 CODE_BLOCK: dotnet run -- ch5 CODE_BLOCK: dotnet run -- ch5
- Linear takes an input vector and a weight matrix, multiplies each row of weights element-by-element with the input, and sums each row into a single output value:
- Softmax takes a list of raw numbers and turns them into probabilities that add up to 1. For example, [2.0, 1.0, 0.1] becomes roughly [0.66, 0.24, 0.10]. The largest input gets the highest probability.