From 4f8acdaa98bcbb21c8950499fd2be5211bc39cb5 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Fri, 15 Nov 2024 14:27:11 +0200 Subject: [PATCH 01/15] Add initial support for TensorQ8 in the front-end of the API --- tornado-api/src/main/java/module-info.java | 1 + .../tornado/api/types/tensors/Float16.java | 7 ++ .../tornado/api/types/tensors/GGMLType.java | 66 +++++++++++ .../tornado/api/types/tensors/Shape.java | 3 +- .../tornado/api/types/tensors/TensorQ8.java | 105 ++++++++++++++++++ 5 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java create mode 100644 tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java create mode 100644 tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java diff --git a/tornado-api/src/main/java/module-info.java b/tornado-api/src/main/java/module-info.java index 3eb1b3838c..f1a7686948 100644 --- a/tornado-api/src/main/java/module-info.java +++ b/tornado-api/src/main/java/module-info.java @@ -16,6 +16,7 @@ * */ module tornado.api { + requires jdk.unsupported; exports uk.ac.manchester.tornado.api; exports uk.ac.manchester.tornado.api.annotations; exports uk.ac.manchester.tornado.api.common; diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java new file mode 100644 index 0000000000..be74aa2965 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java @@ -0,0 +1,7 @@ +package uk.ac.manchester.tornado.api.types.tensors; + + +public final class Float16 { + public static final int BYTES = 2; +} + diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java new file mode 100644 index 0000000000..8811de6914 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java @@ -0,0 +1,66 @@ +package uk.ac.manchester.tornado.api.types.tensors; + +public enum GGMLType { + F32(Float.BYTES), + F16(Float16.BYTES), + Q4_0(Float16.BYTES + 16 * Byte.BYTES, 32), + Q4_1(2 * Float16.BYTES + 16 * Byte.BYTES, 32), + UNSUPPORTED_Q4_2(Integer.MAX_VALUE), // support has been removed + UNSUPPORTED_Q4_3(Integer.MAX_VALUE), // support has been removed + Q5_0(Integer.MAX_VALUE), + Q5_1(Integer.MAX_VALUE), + Q8_0(Float16.BYTES + 32 * Byte.BYTES, 32), + Q8_1(32 * Byte.BYTES + 2 * Float.BYTES, 32), + // k-quantizations + Q2_K(Integer.MAX_VALUE), + Q3_K(Integer.MAX_VALUE), + Q4_K(2 * Float16.BYTES + ((GGMLType.QK_K / 16) / 8 * 6) + GGMLType.QK_K / 2, GGMLType.QK_K), + Q5_K(2 * Float16.BYTES + ((GGMLType.QK_K / 16) / 8 * 6) + GGMLType.QK_K / 8 + GGMLType.QK_K / 2, GGMLType.QK_K), + Q6_K(GGMLType.QK_K / 2 + GGMLType.QK_K / 4 + GGMLType.QK_K / 16 + Float16.BYTES, GGMLType.QK_K), + Q8_K(Integer.MAX_VALUE), + I8(Byte.BYTES), + I16(Short.BYTES), + I32(Integer.BYTES); + + private static final GGMLType[] VALUES = values(); + + private final int typeSize; + + private final int blockSize; + + public int getTypeSize() { + return typeSize; + } + + public int getBlockSize() { + return blockSize; + } + + public static GGMLType fromId(int id) { + return VALUES[id]; + } + + GGMLType(int typeSize) { + this(typeSize, 1); + } + + public long byteSizeFor(int numberOfElements) { + long t = numberOfElements * (long) getTypeSize(); + assert t % getBlockSize() == 0; + return Math.toIntExact(t / getBlockSize()); + } + + public static final int QK_K = 256; // or 64? + + GGMLType(int typeSize, int blockSize) { + assert blockSize > 0; + assert typeSize > 0; + assert isPowerOf2(blockSize); + this.typeSize = typeSize; + this.blockSize = blockSize; + } + + private static boolean isPowerOf2(int n) { + return n > 0 && (n & (n - 1)) == 0; + } +} \ No newline at end of file diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java index a678cd01d5..d8138651cf 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java @@ -45,7 +45,8 @@ public long[] getDimensions() { * @return the total size of the shape as an int */ public int getSize() { - return (int) Arrays.stream(dimensions).reduce(1, (a, b) -> a * b); + assert Arrays.stream(dimensions).allMatch(i -> i > 0); + return (int) Arrays.stream(dimensions).reduce(Math::multiplyExact).orElseThrow(); } @Override diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java new file mode 100644 index 0000000000..4d17b5cab6 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -0,0 +1,105 @@ +package uk.ac.manchester.tornado.api.types.tensors; + +import sun.misc.Unsafe; +import uk.ac.manchester.tornado.api.types.arrays.HalfFloatArray; +import uk.ac.manchester.tornado.api.types.arrays.LongArray; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.reflect.Field; + +public class TensorQ8 extends Tensor { + private final DType dType; + private final Shape shape; + + private final HalfFloatArray tensorStorage; + + private int numberOfElements; + + + public TensorQ8(Shape shape) { + super(DType.HALF_FLOAT, shape); + this.shape = shape; + this.numberOfElements = shape.getSize(); + this.dType = DType.HALF_FLOAT; + this.tensorStorage = new HalfFloatArray(numberOfElements); + } + + + public TensorQ8(int size, MemorySegment memorySegment) { + super(DType.HALF_FLOAT, new Shape(size)); + this.dType = DType.HALF_FLOAT; + this.shape = new Shape(size); + this.numberOfElements = size; + this.tensorStorage = HalfFloatArray.fromSegment(memorySegment); + } + + + static short readShort(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_SHORT, memorySegment.address()+offset); + } + + static byte readByte(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_BYTE, memorySegment.address()+offset); + } + + public float getFloat(int index) { + assert 0 <= index && index < numberOfElements; + int blockIndex = index / GGMLType.Q8_0.getBlockSize(); + int withinBlockIndex = index % GGMLType.Q8_0.getBlockSize(); + int blockOffset = blockIndex * GGMLType.Q8_0.getTypeSize(); + byte quant = readByte(tensorStorage.getSegment(), blockOffset + Float16.BYTES + withinBlockIndex); + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegment(), blockOffset)); + return quant * scale; + } + + @Override + public Shape getShape() { + return null; + } + + @Override + public String getDTypeAsString() { + return ""; + } + + @Override + public DType getDType() { + return null; + } + + @Override + public int getSize() { + return 0; + } + + @Override + public MemorySegment getSegment() { + return null; + } + + @Override + public MemorySegment getSegmentWithHeader() { + return null; + } + + @Override + public long getNumBytesOfSegmentWithHeader() { + return 0; + } + + @Override + public long getNumBytesOfSegment() { + return 0; + } + + @Override + protected void clear() { + + } + + @Override + public int getElementSize() { + return 0; + } +} From 435cadc0afb5fb7de5a99d8f8025c713173d6e64 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Fri, 15 Nov 2024 15:04:15 +0200 Subject: [PATCH 02/15] WIP --- .../tornado/api/types/tensors/TensorQ8.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 4d17b5cab6..56b1f9cd7a 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -48,14 +48,14 @@ public float getFloat(int index) { int blockIndex = index / GGMLType.Q8_0.getBlockSize(); int withinBlockIndex = index % GGMLType.Q8_0.getBlockSize(); int blockOffset = blockIndex * GGMLType.Q8_0.getTypeSize(); - byte quant = readByte(tensorStorage.getSegment(), blockOffset + Float16.BYTES + withinBlockIndex); + byte quant = readBy te(tensorStorage.getSegment(), blockOffset + Float16.BYTES + withinBlockIndex); float scale = Float.float16ToFloat(readShort(tensorStorage.getSegment(), blockOffset)); return quant * scale; } @Override public Shape getShape() { - return null; + return shape; } @Override @@ -65,32 +65,32 @@ public String getDTypeAsString() { @Override public DType getDType() { - return null; + return dType; } @Override public int getSize() { - return 0; + return numberOfElements; } @Override public MemorySegment getSegment() { - return null; + return tensorStorage.getSegment(); } @Override public MemorySegment getSegmentWithHeader() { - return null; + return tensorStorage.getSegmentWithHeader(); } @Override public long getNumBytesOfSegmentWithHeader() { - return 0; + return tensorStorage.getNumBytesOfSegmentWithHeader(); } @Override public long getNumBytesOfSegment() { - return 0; + return tensorStorage.getNumBytesOfSegment(); } @Override @@ -100,6 +100,6 @@ protected void clear() { @Override public int getElementSize() { - return 0; + return numberOfElements; } } From 60abfbd08f286e437bdf2d1ee5b65c1f09fabb3d Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 18:51:55 +0200 Subject: [PATCH 03/15] Add working version of TensorQ8 with validated precision --- .../tornado/api/types/arrays/ByteArray.java | 26 ++ .../tornado/api/types/tensors/TensorQ8.java | 173 ++++++++-- .../unittests/tensors/TestTensorQ8.java | 312 ++++++++++++++++++ 3 files changed, 479 insertions(+), 32 deletions(-) create mode 100644 tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java index 213f68fb2d..ab5f868da7 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java @@ -27,6 +27,7 @@ import uk.ac.manchester.tornado.api.annotations.Parallel; import uk.ac.manchester.tornado.api.internal.annotations.SegmentElementSize; +import uk.ac.manchester.tornado.api.types.tensors.GGMLType; /** * This class represents an array of bytes stored in native memory. @@ -61,6 +62,23 @@ public ByteArray(int numberOfElements) { segment.setAtIndex(JAVA_INT, 0, numberOfElements); } + public ByteArray(int numberOfElements, boolean noHeader) { + this.numberOfElements = numberOfElements; + baseIndex=0; + segmentByteSize = numberOfElements * BYTE_BYTES; + segment = Arena.ofAuto().allocate(segmentByteSize, 1); +// segment.setAtIndex(JAVA_INT, 0, numberOfElements); + } + + + public ByteArray(int numberOfElements, long requiredStorageSize) { + this.numberOfElements = numberOfElements; + baseIndex=0; +// segmentByteSize = numberOfElements * BYTE_BYTES; + segment = Arena.ofAuto().allocate(requiredStorageSize, 1); + // segment.setAtIndex(JAVA_INT, 0, numberOfElements); + } + /** * Constructs a new {@link ByteArray} instance by concatenating the contents of the given array of {@link ByteArray} instances. * @@ -123,6 +141,14 @@ public static ByteArray fromSegment(MemorySegment segment) { return byteArray; } + public static ByteArray fromSegment(MemorySegment segment, boolean noHeader) { + long byteSize = segment.byteSize(); + int numElements = (int) (byteSize / BYTE_BYTES); + ByteArray byteArray = new ByteArray(numElements, noHeader); + MemorySegment.copy(segment, 0, byteArray.segment, byteArray.baseIndex * BYTE_BYTES, byteSize); + return byteArray; + } + /** * Creates a new instance of the {@link ByteArray} class from a {@link ByteBuffer}. * diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 56b1f9cd7a..29fcd52369 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -1,56 +1,165 @@ +/* + * Copyright (c) 2013-2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ package uk.ac.manchester.tornado.api.types.tensors; -import sun.misc.Unsafe; -import uk.ac.manchester.tornado.api.types.arrays.HalfFloatArray; -import uk.ac.manchester.tornado.api.types.arrays.LongArray; +import uk.ac.manchester.tornado.api.types.arrays.ByteArray; +import uk.ac.manchester.tornado.api.types.arrays.TornadoNativeArray; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; -import java.lang.reflect.Field; public class TensorQ8 extends Tensor { - private final DType dType; + private final boolean DEBUG_TENSOR_Q8 = false; + private final ByteArray tensorStorage; + private final int numberOfElements; private final Shape shape; + private final DType dType; - private final HalfFloatArray tensorStorage; - - private int numberOfElements; + private final int blockSize; + private final int bytesPerBlock; + private static final int HEADER_SIZE = (int) TornadoNativeArray.ARRAY_HEADER; public TensorQ8(Shape shape) { - super(DType.HALF_FLOAT, shape); + super(DType.QINT8, shape); this.shape = shape; this.numberOfElements = shape.getSize(); - this.dType = DType.HALF_FLOAT; - this.tensorStorage = new HalfFloatArray(numberOfElements); + this.dType = DType.QINT8; + this.blockSize = GGMLType.Q8_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize bytes for quantized values + this.bytesPerBlock = Float16.BYTES + blockSize; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes, including header + long dataSize = (long)numBlocks * bytesPerBlock; + long totalSize = dataSize + HEADER_SIZE; + + if (DEBUG_TENSOR_Q8) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Header size: " + HEADER_SIZE); + System.out.println("Total size with header: " + totalSize); + } + + this.tensorStorage = new ByteArray(numberOfElements, totalSize); + } + + private float[] getBlockValues(int blockIndex) { + float[] values = new float[blockSize]; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset)); + for (int i = 0; i < blockSize; i++) { + byte quant = readByte(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset + Float16.BYTES + i); + values[i] = quant * scale; + } + } catch (Exception e) { + throw new RuntimeException("Failed to read block " + blockIndex + + " at offset " + blockOffset + ": " + e.getMessage()); + } + return values; + } + + public float getFloat(int index) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset)); + byte quant = readByte(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset + Float16.BYTES + withinBlockIndex); + return quant * scale; + } catch (Exception e) { + throw new RuntimeException("Failed to get float at index " + index + + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } } + public void setFloat(int index, float value) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + + // Get current block values + float[] blockValues = getBlockValues(blockIndex); + blockValues[withinBlockIndex] = value; + + // Compute optimal scale for block + float scale = computeOptimalScale(blockValues); + + // Update block + int blockOffset = blockIndex * bytesPerBlock; + + try { + // Write scale + writeShort(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset, Float.floatToFloat16(scale)); + + // Write quantized values + for (int i = 0; i < blockValues.length; i++) { + int quantized = Math.min(127, Math.max(-128, Math.round(blockValues[i] / scale))); + writeByte(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset + Float16.BYTES + i, (byte)quantized); + } + } catch (Exception e) { + throw new RuntimeException("Failed to set float at index " + index + + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } - public TensorQ8(int size, MemorySegment memorySegment) { - super(DType.HALF_FLOAT, new Shape(size)); - this.dType = DType.HALF_FLOAT; - this.shape = new Shape(size); - this.numberOfElements = size; - this.tensorStorage = HalfFloatArray.fromSegment(memorySegment); + private float computeOptimalScale(float[] values) { + float maxAbs = 1e-5f; + for (float value : values) { + maxAbs = Math.max(maxAbs, Math.abs(value)); + } + return maxAbs / 127.0f; } static short readShort(MemorySegment memorySegment, long offset) { - return memorySegment.get(ValueLayout.JAVA_SHORT, memorySegment.address()+offset); + return memorySegment.get(ValueLayout.JAVA_SHORT, offset); } static byte readByte(MemorySegment memorySegment, long offset) { - return memorySegment.get(ValueLayout.JAVA_BYTE, memorySegment.address()+offset); + return memorySegment.get(ValueLayout.JAVA_BYTE, offset); } - public float getFloat(int index) { - assert 0 <= index && index < numberOfElements; - int blockIndex = index / GGMLType.Q8_0.getBlockSize(); - int withinBlockIndex = index % GGMLType.Q8_0.getBlockSize(); - int blockOffset = blockIndex * GGMLType.Q8_0.getTypeSize(); - byte quant = readBy te(tensorStorage.getSegment(), blockOffset + Float16.BYTES + withinBlockIndex); - float scale = Float.float16ToFloat(readShort(tensorStorage.getSegment(), blockOffset)); - return quant * scale; + static void writeShort(MemorySegment memorySegment, long offset, short value) { + memorySegment.set(ValueLayout.JAVA_SHORT, offset, value); + } + + static void writeByte(MemorySegment memorySegment, long offset, byte value) { + memorySegment.set(ValueLayout.JAVA_BYTE, offset, value); } @Override @@ -60,17 +169,17 @@ public Shape getShape() { @Override public String getDTypeAsString() { - return ""; + return dType.QINT8.toString(); } @Override public DType getDType() { - return dType; + return DType.QINT8; } @Override public int getSize() { - return numberOfElements; + return shape.getSize(); } @Override @@ -100,6 +209,6 @@ protected void clear() { @Override public int getElementSize() { - return numberOfElements; + return getSize(); } -} +} \ No newline at end of file diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java new file mode 100644 index 0000000000..b68e165f83 --- /dev/null +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java @@ -0,0 +1,312 @@ +package uk.ac.manchester.tornado.unittests.tensors; + +import org.junit.Assert; +import org.junit.Test; +import uk.ac.manchester.tornado.api.types.tensors.GGMLType; +import uk.ac.manchester.tornado.api.types.tensors.Shape; +import uk.ac.manchester.tornado.api.types.tensors.TensorQ8; +import uk.ac.manchester.tornado.unittests.common.TornadoTestBase; + + +public class TestTensorQ8 extends TornadoTestBase { + + @Test + public void testBasicQuantization() { + // Test with a simple 1D tensor + Shape shape = new Shape(1); + TensorQ8 tensor = new TensorQ8(shape); + + // Test setting and getting a single value + float testValue = 1.5f; + tensor.setFloat(0, testValue); + float retrieved = tensor.getFloat(0); + System.out.println("Segment size for storing single value " + tensor.getSegment().byteSize()); + Assert.assertEquals(testValue, retrieved, 0.1f); + } + + @Test + public void testTensorQ8SetAndGetFloat() { + // Define the shape and create a tensor + Shape shape = new Shape(5); // 1D tensor with 128 elements + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Set some values in the tensor using setFloat and then retrieve them with getFloat + float[] valuesToSet = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f}; + for (int i = 0; i < valuesToSet.length; i++) { + tensorQ8.setFloat(i, valuesToSet[i]); + } + + // Check that each retrieved value matches the set value within tolerance + for (int i = 0; i < valuesToSet.length; i++) { + Assert.assertEquals(valuesToSet[i], tensorQ8.getFloat(i), 0.1f); + } + } + + @Test + public void testTensorQ8SetAndGetFloatVerify() { + // Use a size that's aligned with Q8_0 block size (typically 32 elements) + int blockSize = GGMLType.Q8_0.getBlockSize(); // Should be 32 + Shape shape = new Shape(blockSize); // Use full block size + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Create test values array matching the block size + float[] valuesToSet = new float[blockSize]; + // Fill with repeating pattern + float[] pattern = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f}; + for (int i = 0; i < blockSize; i++) { + valuesToSet[i] = pattern[i % pattern.length]; + } + + // Print expected layout information + System.out.println("Total elements: " + shape.getSize()); + System.out.println("Block size: " + blockSize); + System.out.println("Total allocated bytes: " + tensorQ8.getSegment().byteSize()); + + // Set values + for (int i = 0; i < valuesToSet.length; i++) { + tensorQ8.setFloat(i, valuesToSet[i]); + // Immediately verify each value after setting + float retrieved = tensorQ8.getFloat(i); + System.out.printf("Index %d: Set=%.2f Retrieved=%.2f%n", + i, valuesToSet[i], retrieved); + Assert.assertEquals("Value mismatch at index " + i, + valuesToSet[i], retrieved, 0.1f); + } + + // Verify all values again + for (int i = 0; i < valuesToSet.length; i++) { + float retrieved = tensorQ8.getFloat(i); + Assert.assertEquals("Final verification failed at index " + i, + valuesToSet[i], retrieved, 0.1f); + } + } + + @Test + public void testMixedScaleValues() { + // Test handling of mixed scales within a block + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Set values with very different scales + tensorQ8.setFloat(0, 100.0f); + tensorQ8.setFloat(1, 0.001f); + tensorQ8.setFloat(2, -100.0f); + tensorQ8.setFloat(3, -0.001f); + + // Verify large values maintain reasonable accuracy + Assert.assertEquals(100.0f, tensorQ8.getFloat(0), 1.0f); + Assert.assertEquals(-100.0f, tensorQ8.getFloat(2), 1.0f); + + // Small values might have less precision but should maintain sign + float small1 = tensorQ8.getFloat(1); + float small2 = tensorQ8.getFloat(3); + Assert.assertTrue("Small positive value lost sign", small1 >= 0); + Assert.assertTrue("Small negative value lost sign", small2 <= 0); + } + + @Test + public void testQuantizationRange() { + // Test extreme values and quantization handling + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Test values in separate blocks to maintain scale independence + float[] testValues = { + 0.0f, // Zero + 1e-6f, // Very small positive + -1e-6f, // Very small negative + 100.0f, // Large positive + -100.0f, // Large negative + }; + + for (int i = 0; i < testValues.length; i++) { + tensorQ8.setFloat(i, testValues[i]); + float retrieved = tensorQ8.getFloat(i); + + // For very small values, check if they're close to zero + if (Math.abs(testValues[i]) < 1e-5f) { + Assert.assertTrue("Small value not close to zero", + Math.abs(retrieved) < 1e-4f); + } else { + // For larger values, check relative error + float relativeError = Math.abs((retrieved - testValues[i]) / testValues[i]); + Assert.assertTrue("Large relative error at index " + i + + ": expected=" + testValues[i] + ", got=" + retrieved, + relativeError < 0.01f); + } + } + } + + @Test + public void testInt8Range() { + // Test the full INT8 range in a dedicated test + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Set a few values at INT8 boundaries + float[] boundaryValues = { + -128.0f, // Min INT8 + -127.0f, + -64.0f, + 0.0f, + 63.0f, + 126.0f, + 127.0f // Max INT8 + }; + + // Set values one at a time to ensure same scale + for (int i = 0; i < boundaryValues.length; i++) { + tensorQ8.setFloat(i, boundaryValues[i]); + float retrieved = tensorQ8.getFloat(i); + System.out.printf("INT8 boundary test: Setting %.1f, got %.1f%n", + boundaryValues[i], retrieved); + Assert.assertEquals("Value mismatch at INT8 boundary " + boundaryValues[i], + boundaryValues[i], retrieved, 1.0f); // Allow 1.0 tolerance for boundary values + } + } + + @Test + public void testIndependentBlocks() { + // Test that blocks can handle different scales independently + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 3); // 3 blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + System.out.println("\nTesting independent blocks with different scales:"); + + // Block 1: Small values (0.1 to 1.0) + System.out.println("\nBlock 1 - Small values:"); + for (int i = 0; i < blockSize; i++) { + float value = 0.1f + (0.9f * i / blockSize); + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + System.out.printf("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + i, value, retrieved, Math.abs(value - retrieved)); + } + + // Block 2: Medium values (10 to 20) + System.out.println("\nBlock 2 - Medium values:"); + for (int i = 0; i < blockSize; i++) { + float value = 10.0f + (10.0f * i / blockSize); + tensorQ8.setFloat(blockSize + i, value); + float retrieved = tensorQ8.getFloat(blockSize + i); + System.out.printf("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + i, value, retrieved, Math.abs(value - retrieved)); + } + + // Block 3: Large values (100 to 200) + System.out.println("\nBlock 3 - Large values:"); + for (int i = 0; i < blockSize; i++) { + float value = 100.0f + (100.0f * i / blockSize); + tensorQ8.setFloat(2 * blockSize + i, value); + float retrieved = tensorQ8.getFloat(2 * blockSize + i); + System.out.printf("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + i, value, retrieved, Math.abs(value - retrieved)); + } + + // Verify blocks maintain reasonable accuracy + System.out.println("\nVerifying accuracy for each block:"); + + // Helper function to check max absolute difference in a block + for (int block = 0; block < 3; block++) { + float maxDiff = 0.0f; + float maxRelErr = 0.0f; + float minVal = Float.MAX_VALUE; + float maxVal = Float.MIN_VALUE; + + for (int i = 0; i < blockSize; i++) { + int idx = block * blockSize + i; + float original = (block == 0) ? (0.1f + (0.9f * i / blockSize)) : + (block == 1) ? (10.0f + (10.0f * i / blockSize)) : + (100.0f + (100.0f * i / blockSize)); + float retrieved = tensorQ8.getFloat(idx); + float diff = Math.abs(original - retrieved); + float relErr = diff / Math.abs(original); + + maxDiff = Math.max(maxDiff, diff); + maxRelErr = Math.max(maxRelErr, relErr); + minVal = Math.min(minVal, retrieved); + maxVal = Math.max(maxVal, retrieved); + } + + System.out.printf("Block %d stats:%n", block); + System.out.printf(" Value range: %.6f to %.6f%n", minVal, maxVal); + System.out.printf(" Max absolute difference: %.6f%n", maxDiff); + System.out.printf(" Max relative error: %.6f%%%n", maxRelErr * 100); + + // Verify block maintains reasonable range and accuracy + float expectedMaxErr; + if (block == 0) { // Small values + expectedMaxErr = 0.5f; // Larger relative error acceptable for small values + } else if (block == 1) { // Medium values + expectedMaxErr = 0.2f; // 20% error acceptable for medium values + } else { // Large values + expectedMaxErr = 0.1f; // 10% error acceptable for large values + } + + Assert.assertTrue( + String.format("Block %d error too large: %.2f%% > %.2f%%", + block, maxRelErr * 100, expectedMaxErr * 100), + maxRelErr < expectedMaxErr); + } + } + + @Test + public void testConstantBlock() { + // Test how well we can represent a constant value + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float testValue = 10.0f; + System.out.println("\nTesting constant value block:"); + + // Set all values in block to same value + for (int i = 0; i < blockSize; i++) { + tensorQ8.setFloat(i, testValue); + } + + // Verify values + float maxDiff = 0.0f; + for (int i = 0; i < blockSize; i++) { + float retrieved = tensorQ8.getFloat(i); + float diff = Math.abs(retrieved - testValue); + maxDiff = Math.max(maxDiff, diff); + System.out.printf("Index %d: Expected=%.6f Got=%.6f Diff=%.6f%n", + i, testValue, retrieved, diff); + } + + float relativeError = maxDiff / Math.abs(testValue); + System.out.printf("Maximum relative error: %.6f%%%n", relativeError * 100); + + Assert.assertTrue( + String.format("Relative error too large for constant block: %.2f%%", + relativeError * 100), + relativeError < 0.1f); // Expect very good accuracy for constant values + } + + @Test + public void testSingleBlockPrecision() { + // Test precision within a single block using relative error metrics + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float baseValue = 10.0f; // Use a reasonable base value + + System.out.println("\nTesting single block precision:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = baseValue * (i + 1) / shape.getSize(); // Spread values evenly + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + float relativeError = Math.abs((retrieved - value) / value); + + System.out.printf("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n", + i, value, retrieved, relativeError); + + Assert.assertTrue(String.format( + "Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f", + i, value, retrieved, relativeError), + relativeError < 0.1f); // Allow 10% relative error + } + } +} From 26899d3d2bb6b74af6e1caa4c788b026e2874b98 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 18:53:09 +0200 Subject: [PATCH 04/15] Add test information and debug option --- .../tornado/api/types/tensors/TensorQ8.java | 2 +- .../unittests/tensors/TestTensorQ8.java | 26 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 29fcd52369..32f4570154 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2024, APT Group, Department of Computer Science, + * Copyright (c) 2024, APT Group, Department of Computer Science, * The University of Manchester. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java index b68e165f83..bf7f60fef1 100644 --- a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java @@ -1,3 +1,20 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ package uk.ac.manchester.tornado.unittests.tensors; import org.junit.Assert; @@ -7,7 +24,14 @@ import uk.ac.manchester.tornado.api.types.tensors.TensorQ8; import uk.ac.manchester.tornado.unittests.common.TornadoTestBase; - +/** + *

+ * How to run? + *

+ * + * tornado-test -V uk.ac.manchester.tornado.unittests.tensors.TestTensorQ8 + * + */ public class TestTensorQ8 extends TornadoTestBase { @Test From 6737a4ba5628e6ba32846bfd84f6e179baf4b691 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 19:09:15 +0200 Subject: [PATCH 05/15] Add more tests on precision the value modifications --- .../tornado/api/types/tensors/TensorQ8.java | 2 +- .../unittests/tensors/TestTensorQ8.java | 91 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 32f4570154..33e4dd1ebb 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -24,7 +24,7 @@ import java.lang.foreign.ValueLayout; public class TensorQ8 extends Tensor { - private final boolean DEBUG_TENSOR_Q8 = false; + private final boolean DEBUG_TENSOR_Q8 = true; private final ByteArray tensorStorage; private final int numberOfElements; private final Shape shape; diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java index bf7f60fef1..7fc53532e1 100644 --- a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java @@ -333,4 +333,95 @@ public void testSingleBlockPrecision() { relativeError < 0.1f); // Allow 10% relative error } } + + @Test + public void testNonAlignedBlockSize() { + // Test tensor with size not aligned to block size + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize + 5); // Intentionally non-aligned + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Set values in both full and partial blocks + for (int i = 0; i < shape.getSize(); i++) { + float value = i * 1.5f; + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + Assert.assertEquals("Value mismatch in non-aligned blocks", + value, retrieved, 0.1f); + } + } + + @Test + public void testZeroCrossing() { + // Test values around zero to verify sign handling + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Test different ranges of values around zero + float[][] testRanges = { + // Small values - might get quantized to zero + {-0.001f, -0.0001f, 0.0f, 0.0001f, 0.001f}, + // Medium values - should preserve sign + {-0.1f, -0.05f, 0.0f, 0.05f, 0.1f}, + // Larger values - should definitely preserve sign + {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f}}; + + System.out.println("\nTesting zero crossing behavior:"); + for (int range = 0; range < testRanges.length; range++) { + System.out.printf("\nRange %d:%n", range); + + // Set values from current range + for (int i = 0; i < testRanges[range].length; i++) { + float value = testRanges[range][i]; + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + + System.out.printf("Value: %10.6f -> Retrieved: %10.6f%n", value, retrieved); + + if (Math.abs(value) >= 0.01f) { // Only check sign for values >= 0.01 + Assert.assertEquals(String.format("Sign mismatch for value %.6f", value), Math.signum(value), Math.signum(retrieved), 0.0f); + } else { + // For very small values, just verify they're close to zero + Assert.assertTrue(String.format("Small value %.6f not close enough to zero (got %.6f)", value, retrieved), Math.abs(retrieved) < 0.01f); + } + } + } + } + + @Test + public void testRepeatedUpdates() { + // Test stability when repeatedly updating values + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float testValue = 1.0f; + int testIndex = 0; + + // Repeatedly update same value + for (int i = 0; i < 100; i++) { + tensorQ8.setFloat(testIndex, testValue); + float retrieved = tensorQ8.getFloat(testIndex); + Assert.assertEquals("Value unstable after repeated updates", + testValue, retrieved, 0.1f); + } + } + + @Test + public void testAlternatingPatterns() { + // Test alternating positive/negative pattern + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + for (int i = 0; i < shape.getSize(); i++) { + float value = (i % 2 == 0) ? 1.0f : -1.0f; + tensorQ8.setFloat(i, value); + } + + for (int i = 0; i < shape.getSize(); i++) { + float expected = (i % 2 == 0) ? 1.0f : -1.0f; + float retrieved = tensorQ8.getFloat(i); + Assert.assertEquals("Alternating pattern not preserved", + expected, retrieved, 0.1f); + } + } } From be718bb37d19234c0df9030f56b41e1cc6c1ccfe Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 19:14:59 +0200 Subject: [PATCH 06/15] Add licenses were missing --- .../tornado/api/types/tensors/Float16.java | 17 +++++++++++++++++ .../tornado/api/types/tensors/GGMLType.java | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java index be74aa2965..5802f63d01 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java @@ -1,3 +1,20 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ package uk.ac.manchester.tornado.api.types.tensors; diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java index 8811de6914..447805dccd 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java @@ -1,3 +1,20 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ package uk.ac.manchester.tornado.api.types.tensors; public enum GGMLType { From 3818d74402e9ba0c543464ec81c47d1c6738d1e8 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 19:35:20 +0200 Subject: [PATCH 07/15] Simplify header handling --- .../tornado/api/types/arrays/ByteArray.java | 19 +------------ .../tornado/api/types/tensors/TensorQ8.java | 28 ++++++++----------- 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java index ab5f868da7..0bacb9ff15 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java @@ -62,21 +62,12 @@ public ByteArray(int numberOfElements) { segment.setAtIndex(JAVA_INT, 0, numberOfElements); } - public ByteArray(int numberOfElements, boolean noHeader) { - this.numberOfElements = numberOfElements; - baseIndex=0; - segmentByteSize = numberOfElements * BYTE_BYTES; - segment = Arena.ofAuto().allocate(segmentByteSize, 1); -// segment.setAtIndex(JAVA_INT, 0, numberOfElements); - } - public ByteArray(int numberOfElements, long requiredStorageSize) { this.numberOfElements = numberOfElements; baseIndex=0; -// segmentByteSize = numberOfElements * BYTE_BYTES; segment = Arena.ofAuto().allocate(requiredStorageSize, 1); - // segment.setAtIndex(JAVA_INT, 0, numberOfElements); + segment.setAtIndex(JAVA_INT, 0, numberOfElements); } /** @@ -141,14 +132,6 @@ public static ByteArray fromSegment(MemorySegment segment) { return byteArray; } - public static ByteArray fromSegment(MemorySegment segment, boolean noHeader) { - long byteSize = segment.byteSize(); - int numElements = (int) (byteSize / BYTE_BYTES); - ByteArray byteArray = new ByteArray(numElements, noHeader); - MemorySegment.copy(segment, 0, byteArray.segment, byteArray.baseIndex * BYTE_BYTES, byteSize); - return byteArray; - } - /** * Creates a new instance of the {@link ByteArray} class from a {@link ByteBuffer}. * diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 33e4dd1ebb..52f4df0057 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -33,7 +33,7 @@ public class TensorQ8 extends Tensor { private final int blockSize; private final int bytesPerBlock; - private static final int HEADER_SIZE = (int) TornadoNativeArray.ARRAY_HEADER; +// private static final int HEADER_SIZE = (int) TornadoNativeArray.ARRAY_HEADER; public TensorQ8(Shape shape) { super(DType.QINT8, shape); @@ -52,7 +52,7 @@ public TensorQ8(Shape shape) { // Calculate total storage size in bytes, including header long dataSize = (long)numBlocks * bytesPerBlock; - long totalSize = dataSize + HEADER_SIZE; + long totalSize = dataSize; if (DEBUG_TENSOR_Q8) { System.out.println("Debug info:"); @@ -61,7 +61,6 @@ public TensorQ8(Shape shape) { System.out.println("Bytes per block: " + bytesPerBlock); System.out.println("Number of blocks: " + numBlocks); System.out.println("Data size: " + dataSize); - System.out.println("Header size: " + HEADER_SIZE); System.out.println("Total size with header: " + totalSize); } @@ -73,14 +72,13 @@ private float[] getBlockValues(int blockIndex) { int blockOffset = blockIndex * bytesPerBlock; try { - float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset)); + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); for (int i = 0; i < blockSize; i++) { - byte quant = readByte(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset + Float16.BYTES + i); + byte quant = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i); values[i] = quant * scale; } } catch (Exception e) { - throw new RuntimeException("Failed to read block " + blockIndex + - " at offset " + blockOffset + ": " + e.getMessage()); + throw new RuntimeException("Failed to read block " + blockIndex + " at offset " + blockOffset + ": " + e.getMessage()); } return values; } @@ -95,12 +93,11 @@ public float getFloat(int index) { int blockOffset = blockIndex * bytesPerBlock; try { - float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset)); - byte quant = readByte(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset + Float16.BYTES + withinBlockIndex); + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + byte quant = readByte(tensorStorage.getSegmentWithHeader(), + blockOffset + Float16.BYTES + withinBlockIndex); return quant * scale; } catch (Exception e) { - throw new RuntimeException("Failed to get float at index " + index + - " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + throw new RuntimeException("Failed to get float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); } } @@ -124,16 +121,15 @@ public void setFloat(int index, float value) { try { // Write scale - writeShort(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset, Float.floatToFloat16(scale)); + writeShort(tensorStorage.getSegmentWithHeader(), blockOffset, Float.floatToFloat16(scale)); // Write quantized values for (int i = 0; i < blockValues.length; i++) { int quantized = Math.min(127, Math.max(-128, Math.round(blockValues[i] / scale))); - writeByte(tensorStorage.getSegmentWithHeader(), HEADER_SIZE + blockOffset + Float16.BYTES + i, (byte)quantized); + writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i, (byte)quantized); } } catch (Exception e) { - throw new RuntimeException("Failed to set float at index " + index + - " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + throw new RuntimeException("Failed to set float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); } } @@ -184,7 +180,7 @@ public int getSize() { @Override public MemorySegment getSegment() { - return tensorStorage.getSegment(); + return tensorStorage.getSegmentWithHeader(); } @Override From 8c61d72d8ac646269a94c7af5be1e08b1a280d04 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 19:43:04 +0200 Subject: [PATCH 08/15] Add verbose control for precision unit-tests --- .../tornado/api/types/tensors/TensorQ8.java | 4 +- .../unittests/tensors/TestTensorQ8.java | 298 ++++++++---------- 2 files changed, 141 insertions(+), 161 deletions(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 52f4df0057..d972e60442 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -24,7 +24,7 @@ import java.lang.foreign.ValueLayout; public class TensorQ8 extends Tensor { - private final boolean DEBUG_TENSOR_Q8 = true; + private final boolean DEBUG_TENSOR_Q8 = false; private final ByteArray tensorStorage; private final int numberOfElements; private final Shape shape; @@ -205,6 +205,6 @@ protected void clear() { @Override public int getElementSize() { - return getSize(); + return DType.QINT8.getByteSize(); } } \ No newline at end of file diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java index 7fc53532e1..c036767251 100644 --- a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java @@ -24,6 +24,8 @@ import uk.ac.manchester.tornado.api.types.tensors.TensorQ8; import uk.ac.manchester.tornado.unittests.common.TornadoTestBase; +import static java.lang.Boolean.FALSE; + /** *

* How to run? @@ -34,33 +36,42 @@ */ public class TestTensorQ8 extends TornadoTestBase { + private static final boolean VERBOSE = FALSE; // Control verbose output + + private void printVerbose(String message) { + if (VERBOSE) { + System.out.println(message); + } + } + + private void printVerboseF(String format, Object... args) { + if (VERBOSE) { + System.out.printf(format, args); + } + } + @Test public void testBasicQuantization() { - // Test with a simple 1D tensor Shape shape = new Shape(1); TensorQ8 tensor = new TensorQ8(shape); - // Test setting and getting a single value float testValue = 1.5f; tensor.setFloat(0, testValue); float retrieved = tensor.getFloat(0); - System.out.println("Segment size for storing single value " + tensor.getSegment().byteSize()); + printVerboseF("Segment size for storing single value %d%n", tensor.getSegment().byteSize()); Assert.assertEquals(testValue, retrieved, 0.1f); } @Test public void testTensorQ8SetAndGetFloat() { - // Define the shape and create a tensor - Shape shape = new Shape(5); // 1D tensor with 128 elements + Shape shape = new Shape(5); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Set some values in the tensor using setFloat and then retrieve them with getFloat float[] valuesToSet = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f}; for (int i = 0; i < valuesToSet.length; i++) { tensorQ8.setFloat(i, valuesToSet[i]); } - // Check that each retrieved value matches the set value within tolerance for (int i = 0; i < valuesToSet.length; i++) { Assert.assertEquals(valuesToSet[i], tensorQ8.getFloat(i), 0.1f); } @@ -68,36 +79,29 @@ public void testTensorQ8SetAndGetFloat() { @Test public void testTensorQ8SetAndGetFloatVerify() { - // Use a size that's aligned with Q8_0 block size (typically 32 elements) - int blockSize = GGMLType.Q8_0.getBlockSize(); // Should be 32 - Shape shape = new Shape(blockSize); // Use full block size + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Create test values array matching the block size float[] valuesToSet = new float[blockSize]; - // Fill with repeating pattern float[] pattern = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f}; for (int i = 0; i < blockSize; i++) { valuesToSet[i] = pattern[i % pattern.length]; } - // Print expected layout information - System.out.println("Total elements: " + shape.getSize()); - System.out.println("Block size: " + blockSize); - System.out.println("Total allocated bytes: " + tensorQ8.getSegment().byteSize()); + printVerboseF("Total elements: %d%n", shape.getSize()); + printVerboseF("Block size: %d%n", blockSize); + printVerboseF("Total allocated bytes: %d%n", tensorQ8.getSegment().byteSize()); - // Set values for (int i = 0; i < valuesToSet.length; i++) { tensorQ8.setFloat(i, valuesToSet[i]); - // Immediately verify each value after setting float retrieved = tensorQ8.getFloat(i); - System.out.printf("Index %d: Set=%.2f Retrieved=%.2f%n", + printVerboseF("Index %d: Set=%.2f Retrieved=%.2f%n", i, valuesToSet[i], retrieved); Assert.assertEquals("Value mismatch at index " + i, valuesToSet[i], retrieved, 0.1f); } - // Verify all values again for (int i = 0; i < valuesToSet.length; i++) { float retrieved = tensorQ8.getFloat(i); Assert.assertEquals("Final verification failed at index " + i, @@ -107,21 +111,17 @@ public void testTensorQ8SetAndGetFloatVerify() { @Test public void testMixedScaleValues() { - // Test handling of mixed scales within a block Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Set values with very different scales tensorQ8.setFloat(0, 100.0f); tensorQ8.setFloat(1, 0.001f); tensorQ8.setFloat(2, -100.0f); tensorQ8.setFloat(3, -0.001f); - // Verify large values maintain reasonable accuracy Assert.assertEquals(100.0f, tensorQ8.getFloat(0), 1.0f); Assert.assertEquals(-100.0f, tensorQ8.getFloat(2), 1.0f); - // Small values might have less precision but should maintain sign float small1 = tensorQ8.getFloat(1); float small2 = tensorQ8.getFloat(3); Assert.assertTrue("Small positive value lost sign", small1 >= 0); @@ -130,29 +130,21 @@ public void testMixedScaleValues() { @Test public void testQuantizationRange() { - // Test extreme values and quantization handling Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Test values in separate blocks to maintain scale independence float[] testValues = { - 0.0f, // Zero - 1e-6f, // Very small positive - -1e-6f, // Very small negative - 100.0f, // Large positive - -100.0f, // Large negative + 0.0f, 1e-6f, -1e-6f, 100.0f, -100.0f, }; for (int i = 0; i < testValues.length; i++) { tensorQ8.setFloat(i, testValues[i]); float retrieved = tensorQ8.getFloat(i); - // For very small values, check if they're close to zero if (Math.abs(testValues[i]) < 1e-5f) { Assert.assertTrue("Small value not close to zero", Math.abs(retrieved) < 1e-4f); } else { - // For larger values, check relative error float relativeError = Math.abs((retrieved - testValues[i]) / testValues[i]); Assert.assertTrue("Large relative error at index " + i + ": expected=" + testValues[i] + ", got=" + retrieved, @@ -163,75 +155,60 @@ public void testQuantizationRange() { @Test public void testInt8Range() { - // Test the full INT8 range in a dedicated test Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Set a few values at INT8 boundaries float[] boundaryValues = { - -128.0f, // Min INT8 - -127.0f, - -64.0f, - 0.0f, - 63.0f, - 126.0f, - 127.0f // Max INT8 + -128.0f, -127.0f, -64.0f, 0.0f, 63.0f, 126.0f, 127.0f }; - // Set values one at a time to ensure same scale for (int i = 0; i < boundaryValues.length; i++) { tensorQ8.setFloat(i, boundaryValues[i]); float retrieved = tensorQ8.getFloat(i); - System.out.printf("INT8 boundary test: Setting %.1f, got %.1f%n", + printVerboseF("INT8 boundary test: Setting %.1f, got %.1f%n", boundaryValues[i], retrieved); Assert.assertEquals("Value mismatch at INT8 boundary " + boundaryValues[i], - boundaryValues[i], retrieved, 1.0f); // Allow 1.0 tolerance for boundary values + boundaryValues[i], retrieved, 1.0f); } } @Test public void testIndependentBlocks() { - // Test that blocks can handle different scales independently int blockSize = GGMLType.Q8_0.getBlockSize(); - Shape shape = new Shape(blockSize * 3); // 3 blocks + Shape shape = new Shape(blockSize * 3); TensorQ8 tensorQ8 = new TensorQ8(shape); - System.out.println("\nTesting independent blocks with different scales:"); + printVerbose("\nTesting independent blocks with different scales:"); - // Block 1: Small values (0.1 to 1.0) - System.out.println("\nBlock 1 - Small values:"); + printVerbose("\nBlock 1 - Small values:"); for (int i = 0; i < blockSize; i++) { float value = 0.1f + (0.9f * i / blockSize); tensorQ8.setFloat(i, value); float retrieved = tensorQ8.getFloat(i); - System.out.printf("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", i, value, retrieved, Math.abs(value - retrieved)); } - // Block 2: Medium values (10 to 20) - System.out.println("\nBlock 2 - Medium values:"); + printVerbose("\nBlock 2 - Medium values:"); for (int i = 0; i < blockSize; i++) { float value = 10.0f + (10.0f * i / blockSize); tensorQ8.setFloat(blockSize + i, value); float retrieved = tensorQ8.getFloat(blockSize + i); - System.out.printf("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", i, value, retrieved, Math.abs(value - retrieved)); } - // Block 3: Large values (100 to 200) - System.out.println("\nBlock 3 - Large values:"); + printVerbose("\nBlock 3 - Large values:"); for (int i = 0; i < blockSize; i++) { float value = 100.0f + (100.0f * i / blockSize); tensorQ8.setFloat(2 * blockSize + i, value); float retrieved = tensorQ8.getFloat(2 * blockSize + i); - System.out.printf("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", i, value, retrieved, Math.abs(value - retrieved)); } - // Verify blocks maintain reasonable accuracy - System.out.println("\nVerifying accuracy for each block:"); + printVerbose("\nVerifying accuracy for each block:"); - // Helper function to check max absolute difference in a block for (int block = 0; block < 3; block++) { float maxDiff = 0.0f; float maxRelErr = 0.0f; @@ -253,20 +230,12 @@ public void testIndependentBlocks() { maxVal = Math.max(maxVal, retrieved); } - System.out.printf("Block %d stats:%n", block); - System.out.printf(" Value range: %.6f to %.6f%n", minVal, maxVal); - System.out.printf(" Max absolute difference: %.6f%n", maxDiff); - System.out.printf(" Max relative error: %.6f%%%n", maxRelErr * 100); - - // Verify block maintains reasonable range and accuracy - float expectedMaxErr; - if (block == 0) { // Small values - expectedMaxErr = 0.5f; // Larger relative error acceptable for small values - } else if (block == 1) { // Medium values - expectedMaxErr = 0.2f; // 20% error acceptable for medium values - } else { // Large values - expectedMaxErr = 0.1f; // 10% error acceptable for large values - } + printVerboseF("Block %d stats:%n", block); + printVerboseF(" Value range: %.6f to %.6f%n", minVal, maxVal); + printVerboseF(" Max absolute difference: %.6f%n", maxDiff); + printVerboseF(" Max relative error: %.6f%%%n", maxRelErr * 100); + + float expectedMaxErr = (block == 0) ? 0.5f : (block == 1) ? 0.2f : 0.1f; Assert.assertTrue( String.format("Block %d error too large: %.2f%% > %.2f%%", @@ -275,77 +244,125 @@ public void testIndependentBlocks() { } } + @Test - public void testConstantBlock() { - // Test how well we can represent a constant value - int blockSize = GGMLType.Q8_0.getBlockSize(); - Shape shape = new Shape(blockSize); + public void testRepeatedUpdates() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); TensorQ8 tensorQ8 = new TensorQ8(shape); - float testValue = 10.0f; - System.out.println("\nTesting constant value block:"); + float testValue = 1.0f; + int testIndex = 0; - // Set all values in block to same value - for (int i = 0; i < blockSize; i++) { - tensorQ8.setFloat(i, testValue); + printVerbose("\nTesting repeated updates stability:"); + for (int i = 0; i < 100; i++) { + tensorQ8.setFloat(testIndex, testValue); + float retrieved = tensorQ8.getFloat(testIndex); + printVerboseF("Update %d: Expected=%.6f Got=%.6f%n", + i, testValue, retrieved); + Assert.assertEquals("Value unstable after repeated updates", + testValue, retrieved, 0.1f); } + } - // Verify values - float maxDiff = 0.0f; - for (int i = 0; i < blockSize; i++) { + @Test + public void testAlternatingPatterns() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting alternating pattern preservation:"); + + // Set alternating values + printVerbose("Setting alternating values:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = (i % 2 == 0) ? 1.0f : -1.0f; + tensorQ8.setFloat(i, value); float retrieved = tensorQ8.getFloat(i); - float diff = Math.abs(retrieved - testValue); - maxDiff = Math.max(maxDiff, diff); - System.out.printf("Index %d: Expected=%.6f Got=%.6f Diff=%.6f%n", - i, testValue, retrieved, diff); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + i, value, retrieved); } - float relativeError = maxDiff / Math.abs(testValue); - System.out.printf("Maximum relative error: %.6f%%%n", relativeError * 100); - - Assert.assertTrue( - String.format("Relative error too large for constant block: %.2f%%", - relativeError * 100), - relativeError < 0.1f); // Expect very good accuracy for constant values + // Verify alternating values + printVerbose("\nVerifying alternating pattern:"); + for (int i = 0; i < shape.getSize(); i++) { + float expected = (i % 2 == 0) ? 1.0f : -1.0f; + float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Expected=%.6f Got=%.6f%n", + i, expected, retrieved); + Assert.assertEquals("Alternating pattern not preserved", + expected, retrieved, 0.1f); + } } @Test public void testSingleBlockPrecision() { - // Test precision within a single block using relative error metrics Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); TensorQ8 tensorQ8 = new TensorQ8(shape); - float baseValue = 10.0f; // Use a reasonable base value + float baseValue = 10.0f; - System.out.println("\nTesting single block precision:"); + printVerbose("\nTesting single block precision:"); for (int i = 0; i < shape.getSize(); i++) { - float value = baseValue * (i + 1) / shape.getSize(); // Spread values evenly + float value = baseValue * (i + 1) / shape.getSize(); tensorQ8.setFloat(i, value); float retrieved = tensorQ8.getFloat(i); float relativeError = Math.abs((retrieved - value) / value); - System.out.printf("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n", + printVerboseF("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n", i, value, retrieved, relativeError); - Assert.assertTrue(String.format( - "Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f", + Assert.assertTrue( + String.format("Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f", i, value, retrieved, relativeError), - relativeError < 0.1f); // Allow 10% relative error + relativeError < 0.1f); } } + @Test + public void testConstantBlock() { + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float testValue = 10.0f; + printVerbose("\nTesting constant value block:"); + + printVerbose("Setting constant values:"); + for (int i = 0; i < blockSize; i++) { + tensorQ8.setFloat(i, testValue); + } + + float maxDiff = 0.0f; + printVerbose("\nVerifying constant values:"); + for (int i = 0; i < blockSize; i++) { + float retrieved = tensorQ8.getFloat(i); + float diff = Math.abs(retrieved - testValue); + maxDiff = Math.max(maxDiff, diff); + printVerboseF("Index %d: Expected=%.6f Got=%.6f Diff=%.6f%n", + i, testValue, retrieved, diff); + } + + float relativeError = maxDiff / Math.abs(testValue); + printVerboseF("Maximum relative error: %.6f%%%n", relativeError * 100); + + Assert.assertTrue( + String.format("Relative error too large for constant block: %.2f%%", + relativeError * 100), + relativeError < 0.1f); + } + @Test public void testNonAlignedBlockSize() { - // Test tensor with size not aligned to block size int blockSize = GGMLType.Q8_0.getBlockSize(); - Shape shape = new Shape(blockSize + 5); // Intentionally non-aligned + Shape shape = new Shape(blockSize + 5); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Set values in both full and partial blocks + printVerbose("\nTesting non-aligned block size:"); for (int i = 0; i < shape.getSize(); i++) { float value = i * 1.5f; tensorQ8.setFloat(i, value); float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + i, value, retrieved); Assert.assertEquals("Value mismatch in non-aligned blocks", value, retrieved, 0.1f); } @@ -353,75 +370,38 @@ public void testNonAlignedBlockSize() { @Test public void testZeroCrossing() { - // Test values around zero to verify sign handling Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); TensorQ8 tensorQ8 = new TensorQ8(shape); - // Test different ranges of values around zero float[][] testRanges = { - // Small values - might get quantized to zero {-0.001f, -0.0001f, 0.0f, 0.0001f, 0.001f}, - // Medium values - should preserve sign {-0.1f, -0.05f, 0.0f, 0.05f, 0.1f}, - // Larger values - should definitely preserve sign - {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f}}; + {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f} + }; - System.out.println("\nTesting zero crossing behavior:"); + printVerbose("\nTesting zero crossing behavior:"); for (int range = 0; range < testRanges.length; range++) { - System.out.printf("\nRange %d:%n", range); + printVerboseF("\nRange %d:%n", range); - // Set values from current range for (int i = 0; i < testRanges[range].length; i++) { float value = testRanges[range][i]; tensorQ8.setFloat(i, value); float retrieved = tensorQ8.getFloat(i); - System.out.printf("Value: %10.6f -> Retrieved: %10.6f%n", value, retrieved); + printVerboseF("Value: %10.6f -> Retrieved: %10.6f%n", + value, retrieved); - if (Math.abs(value) >= 0.01f) { // Only check sign for values >= 0.01 - Assert.assertEquals(String.format("Sign mismatch for value %.6f", value), Math.signum(value), Math.signum(retrieved), 0.0f); + if (Math.abs(value) >= 0.01f) { + Assert.assertEquals( + String.format("Sign mismatch for value %.6f", value), + Math.signum(value), Math.signum(retrieved), 0.0f); } else { - // For very small values, just verify they're close to zero - Assert.assertTrue(String.format("Small value %.6f not close enough to zero (got %.6f)", value, retrieved), Math.abs(retrieved) < 0.01f); + Assert.assertTrue( + String.format("Small value %.6f not close enough to zero (got %.6f)", + value, retrieved), + Math.abs(retrieved) < 0.01f); } } } } - - @Test - public void testRepeatedUpdates() { - // Test stability when repeatedly updating values - Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); - TensorQ8 tensorQ8 = new TensorQ8(shape); - - float testValue = 1.0f; - int testIndex = 0; - - // Repeatedly update same value - for (int i = 0; i < 100; i++) { - tensorQ8.setFloat(testIndex, testValue); - float retrieved = tensorQ8.getFloat(testIndex); - Assert.assertEquals("Value unstable after repeated updates", - testValue, retrieved, 0.1f); - } - } - - @Test - public void testAlternatingPatterns() { - // Test alternating positive/negative pattern - Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); - TensorQ8 tensorQ8 = new TensorQ8(shape); - - for (int i = 0; i < shape.getSize(); i++) { - float value = (i % 2 == 0) ? 1.0f : -1.0f; - tensorQ8.setFloat(i, value); - } - - for (int i = 0; i < shape.getSize(); i++) { - float expected = (i % 2 == 0) ? 1.0f : -1.0f; - float retrieved = tensorQ8.getFloat(i); - Assert.assertEquals("Alternating pattern not preserved", - expected, retrieved, 0.1f); - } - } } From 31bac2e17badcceb6270da338b6b2132d902ba2e Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 19:46:32 +0200 Subject: [PATCH 09/15] Revert module info defaults --- tornado-api/src/main/java/module-info.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tornado-api/src/main/java/module-info.java b/tornado-api/src/main/java/module-info.java index f1a7686948..3eb1b3838c 100644 --- a/tornado-api/src/main/java/module-info.java +++ b/tornado-api/src/main/java/module-info.java @@ -16,7 +16,6 @@ * */ module tornado.api { - requires jdk.unsupported; exports uk.ac.manchester.tornado.api; exports uk.ac.manchester.tornado.api.annotations; exports uk.ac.manchester.tornado.api.common; From cb65fcfb1c0d339f3513b0d4c83112cefb518921 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 19:51:15 +0200 Subject: [PATCH 10/15] Add more mixed precision tests --- .../unittests/tensors/TestTensorQ8.java | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java index c036767251..30d6d093e1 100644 --- a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java @@ -404,4 +404,140 @@ public void testZeroCrossing() { } } } + + @Test + public void testSequentialBlockUpdates() { + // Test updating blocks in sequence vs random order + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 3); // Three blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting sequential block updates:"); + + // Sequential updates + for (int block = 0; block < 3; block++) { + float blockValue = (block + 1) * 10.0f; + printVerboseF("\nSetting block %d to %.2f:%n", block, blockValue); + + for (int i = 0; i < blockSize; i++) { + int index = block * blockSize + i; + tensorQ8.setFloat(index, blockValue); + float retrieved = tensorQ8.getFloat(index); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + index, blockValue, retrieved); + Assert.assertEquals("Sequential block update failed", + blockValue, retrieved, 0.1f); + } + } + + // Verify all blocks maintain their values + printVerbose("\nVerifying all blocks after updates:"); + for (int block = 0; block < 3; block++) { + float expectedValue = (block + 1) * 10.0f; + for (int i = 0; i < blockSize; i++) { + int index = block * blockSize + i; + float retrieved = tensorQ8.getFloat(index); + Assert.assertEquals("Block value changed unexpectedly", + expectedValue, retrieved, 0.1f); + } + } + } + + @Test + public void testMaximumPrecisionValues() { + // Test precision with values requiring maximum accuracy + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting maximum precision values:"); + + // Test precise decimal values + float[] preciseValues = { + 1.23456789f, + -1.23456789f, + 12.3456789f, + -12.3456789f, + 123.456789f, + -123.456789f + }; + + for (int i = 0; i < preciseValues.length; i++) { + tensorQ8.setFloat(i, preciseValues[i]); + float retrieved = tensorQ8.getFloat(i); + float relativeError = Math.abs((retrieved - preciseValues[i]) / preciseValues[i]); + + printVerboseF("Precise value test %d: Set=%.9f Got=%.9f RelError=%.9f%n", + i, preciseValues[i], retrieved, relativeError); + + // For high-precision values, we expect relative error < 1% + Assert.assertTrue( + String.format("Precision lost: expected=%.9f, got=%.9f, error=%.9f", + preciseValues[i], retrieved, relativeError), + relativeError < 0.01f); + } + } + + @Test + public void testBlockScaleInterference() { + // Test that updates in one block don't affect other blocks' scales + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 2); // Two blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting block scale interference:"); + + // Set first block to small values + printVerbose("\nSetting first block to small values:"); + for (int i = 0; i < blockSize; i++) { + float value = 0.1f + (0.1f * i / blockSize); + tensorQ8.setFloat(i, value); + printVerboseF("Block 1 index %d: Set=%.6f%n", i, value); + } + + // Set second block to large values + printVerbose("\nSetting second block to large values:"); + for (int i = 0; i < blockSize; i++) { + float value = 100.0f + (100.0f * i / blockSize); + tensorQ8.setFloat(blockSize + i, value); + printVerboseF("Block 2 index %d: Set=%.6f%n", i, value); + } + + // Verify first block maintained small values + printVerbose("\nVerifying first block maintained precision:"); + for (int i = 0; i < blockSize; i++) { + float expected = 0.1f + (0.1f * i / blockSize); + float retrieved = tensorQ8.getFloat(i); + float relativeError = Math.abs((retrieved - expected) / expected); + + printVerboseF("Block 1 verification index %d: Expected=%.6f Got=%.6f RelError=%.6f%n", + i, expected, retrieved, relativeError); + + Assert.assertTrue( + String.format("Block 1 precision lost after block 2 update at index %d", i), + relativeError < 0.1f); + } + } + + @Test + public void testBlockBoundaryUpdates() { + // Test updating values at block boundaries + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 2); // Two blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Set values around block boundary + float[] boundaryValues = {1.0f, 2.0f, 3.0f, 4.0f}; + int boundaryStart = blockSize - 2; // Two values before boundary + + printVerbose("\nTesting block boundary updates:"); + for (int i = 0; i < boundaryValues.length; i++) { + int index = boundaryStart + i; + tensorQ8.setFloat(index, boundaryValues[i]); + float retrieved = tensorQ8.getFloat(index); + printVerboseF("Index %d (block boundary +/- 2): Set=%.6f Got=%.6f%n", + index, boundaryValues[i], retrieved); + Assert.assertEquals("Value mismatch at block boundary", + boundaryValues[i], retrieved, 0.1f); + } + } } From 92deea997cb4bdecc1d3ab84f57e7d9e681a5a2f Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 20:06:30 +0200 Subject: [PATCH 11/15] Fix to copy raw memory segment --- .../tornado/api/types/arrays/ByteArray.java | 8 +++++ .../tornado/api/types/tensors/TensorQ8.java | 33 ++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java index 0bacb9ff15..e7eb7f88c7 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java @@ -132,6 +132,14 @@ public static ByteArray fromSegment(MemorySegment segment) { return byteArray; } + // Temporary workaround to copy raw memory segment without a tornado header + public static ByteArray fromSegment(MemorySegment segment, int numberOfElements) { + long byteSize = segment.byteSize(); + ByteArray byteArray = new ByteArray(numberOfElements, byteSize); + MemorySegment.copy(segment, 0, byteArray.segment, byteArray.baseIndex * BYTE_BYTES, byteSize); + return byteArray; + } + /** * Creates a new instance of the {@link ByteArray} class from a {@link ByteBuffer}. * diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index d972e60442..b9058bf36c 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -33,7 +33,6 @@ public class TensorQ8 extends Tensor { private final int blockSize; private final int bytesPerBlock; -// private static final int HEADER_SIZE = (int) TornadoNativeArray.ARRAY_HEADER; public TensorQ8(Shape shape) { super(DType.QINT8, shape); @@ -67,6 +66,38 @@ public TensorQ8(Shape shape) { this.tensorStorage = new ByteArray(numberOfElements, totalSize); } + public TensorQ8(int numberOfElements, MemorySegment memorySegment) { + super(DType.QINT8, new Shape(numberOfElements)); + this.shape = new Shape(numberOfElements); + this.numberOfElements = numberOfElements; + this.dType = DType.QINT8; + this.blockSize = GGMLType.Q8_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize bytes for quantized values + this.bytesPerBlock = Float16.BYTES + blockSize; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes, including header + long dataSize = (long)numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q8) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size with header: " + totalSize); + } + + this.tensorStorage = ByteArray.fromSegment(memorySegment, numberOfElements); + } + private float[] getBlockValues(int blockIndex) { float[] values = new float[blockSize]; int blockOffset = blockIndex * bytesPerBlock; From 483a12e0667d1c02a72ecbf1e03f57ffc25dabd6 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 20:26:39 +0200 Subject: [PATCH 12/15] Minor fix for tensor q8 --- .../uk/ac/manchester/tornado/api/types/tensors/DType.java | 5 ++++- .../uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java index 39da8b6c77..b169868de1 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java @@ -67,7 +67,10 @@ public enum DType { /** * Represents a quantized 8-bit unsigned integer used in specialized applications like machine learning, using 1 byte. */ - QUINT8(1, ValueLayout.JAVA_BYTE); + QUINT8(1, ValueLayout.JAVA_BYTE), + + Q4_0(1, ValueLayout.JAVA_BYTE); + // @formatter:on /** diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index b9058bf36c..5bac48f929 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -18,7 +18,6 @@ package uk.ac.manchester.tornado.api.types.tensors; import uk.ac.manchester.tornado.api.types.arrays.ByteArray; -import uk.ac.manchester.tornado.api.types.arrays.TornadoNativeArray; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; From 38c8d817b20af5acc978b61d34326bb53058217d Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 20:27:28 +0200 Subject: [PATCH 13/15] Add TensorQ4 init support --- .../tornado/api/types/tensors/TensorQ4.java | 268 ++++++++++++ .../unittests/tensors/TestTensorQ4.java | 392 ++++++++++++++++++ 2 files changed, 660 insertions(+) create mode 100644 tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java create mode 100644 tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java new file mode 100644 index 0000000000..88e9da5260 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package uk.ac.manchester.tornado.api.types.tensors; + +import uk.ac.manchester.tornado.api.types.arrays.ByteArray; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; + +public class TensorQ4 extends Tensor { + private final boolean DEBUG_TENSOR_Q4 = false; + private final ByteArray tensorStorage; + private final int numberOfElements; + private final Shape shape; + private final DType dType; + + private final int blockSize; + private final int bytesPerBlock; + + public TensorQ4(Shape shape) { + super(DType.Q4_0, shape); + this.shape = shape; + this.numberOfElements = shape.getSize(); + this.dType = DType.Q4_0; + this.blockSize = GGMLType.Q4_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize/2 bytes for quantized values (4-bits per value) + this.bytesPerBlock = Float16.BYTES + blockSize / 2; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes + long dataSize = (long) numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q4) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size: " + totalSize); + } + + this.tensorStorage = new ByteArray(numberOfElements, totalSize); + } + + public TensorQ4(int numberOfElements, MemorySegment memorySegment) { + super(DType.QINT8, new Shape(numberOfElements)); + this.shape = new Shape(numberOfElements); + this.numberOfElements = numberOfElements; + this.dType = DType.Q4_0; + this.blockSize = GGMLType.Q4_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize/2 bytes for quantized values (4-bits per value) + this.bytesPerBlock = Float16.BYTES + blockSize / 2; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes + long dataSize = (long) numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q4) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size: " + totalSize); + } + + this.tensorStorage = ByteArray.fromSegment(memorySegment, numberOfElements); + } + + private float[] getBlockValues(int blockIndex) { + float[] values = new float[blockSize]; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + + // Read 4-bit quantized values + for (int i = 0; i < blockSize; i++) { + byte quant; + if (i < blockSize / 2) { + // Lower 4 bits + quant = (byte) (readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i) & 0x0F); + } else { + // Upper 4 bits + quant = (byte) ((readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2) >>> 4) & 0x0F); + } + // Convert from 4-bit value to float + quant -= 8; // Center at zero [-8, 7] + values[i] = quant * scale; + } + } catch (Exception e) { + throw new RuntimeException("Failed to read block " + blockIndex + " at offset " + blockOffset + ": " + e.getMessage()); + } + return values; + } + + public float getFloat(int index) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + + // Extract 4-bit value + byte quant; + if (withinBlockIndex < blockSize / 2) { + // Lower 4 bits + quant = (byte) (readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + withinBlockIndex) & 0x0F); + } else { + // Upper 4 bits + quant = (byte) ((readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + withinBlockIndex - blockSize / 2) >>> 4) & 0x0F); + } + quant -= 8; // Center at zero [-8, 7] + return quant * scale; + } catch (Exception e) { + throw new RuntimeException("Failed to get float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } + + public void setFloat(int index, float value) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + + // Get current block values + float[] blockValues = getBlockValues(blockIndex); + blockValues[withinBlockIndex] = value; + + // Compute optimal scale for block + float scale = computeOptimalScale(blockValues); + + // Update block + int blockOffset = blockIndex * bytesPerBlock; + + try { + // Write scale + writeShort(tensorStorage.getSegmentWithHeader(), blockOffset, Float.floatToFloat16(scale)); + + // Write quantized values + for (int i = 0; i < blockValues.length; i++) { + byte quant = (byte) (Math.round(blockValues[i] / scale) + 8); // Add 8 to shift to [0, 15] + quant = (byte) Math.min(15, Math.max(0, quant)); // Clamp to 4-bit range + + if (i < blockSize / 2) { + // Write to lower 4 bits + byte current = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i); + writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i, (byte) ((current & 0xF0) | (quant & 0x0F))); + } else { + // Write to upper 4 bits + byte current = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2); + writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2, (byte) ((current & 0x0F) | (quant << 4))); + } + } + } catch (Exception e) { + throw new RuntimeException("Failed to set float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } + + private float computeOptimalScale(float[] values) { + float maxAbs = 1e-5f; + for (float value : values) { + maxAbs = Math.max(maxAbs, Math.abs(value)); + } + return maxAbs / 7.0f; // Scale to [-7, 7] range for 4-bit values + } + + static short readShort(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_SHORT, offset); + } + + static byte readByte(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_BYTE, offset); + } + + static void writeShort(MemorySegment memorySegment, long offset, short value) { + memorySegment.set(ValueLayout.JAVA_SHORT, offset, value); + } + + static void writeByte(MemorySegment memorySegment, long offset, byte value) { + memorySegment.set(ValueLayout.JAVA_BYTE, offset, value); + } + @Override + public Shape getShape() { + return shape; + } + + @Override + public String getDTypeAsString() { + return dType.QINT8.toString(); + } + + @Override + public DType getDType() { + return DType.QINT8; + } + + @Override + public int getSize() { + return shape.getSize(); + } + + @Override + public MemorySegment getSegment() { + return tensorStorage.getSegmentWithHeader(); + } + + @Override + public MemorySegment getSegmentWithHeader() { + return tensorStorage.getSegmentWithHeader(); + } + + @Override + public long getNumBytesOfSegmentWithHeader() { + return tensorStorage.getNumBytesOfSegmentWithHeader(); + } + + @Override + public long getNumBytesOfSegment() { + return tensorStorage.getNumBytesOfSegment(); + } + + @Override + protected void clear() { + + } + + @Override + public int getElementSize() { + return DType.QINT8.getByteSize(); + } +} \ No newline at end of file diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java new file mode 100644 index 0000000000..4f35d7fdd7 --- /dev/null +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java @@ -0,0 +1,392 @@ +package uk.ac.manchester.tornado.unittests.tensors; + +import org.junit.Assert; +import org.junit.Test; +import uk.ac.manchester.tornado.api.types.tensors.GGMLType; +import uk.ac.manchester.tornado.api.types.tensors.Shape; +import uk.ac.manchester.tornado.api.types.tensors.TensorQ4; +import uk.ac.manchester.tornado.unittests.common.TornadoTestBase; + +import static java.lang.Boolean.FALSE; + +/** + *

+ * How to run? + *

+ * + * tornado-test -V uk.ac.manchester.tornado.unittests.tensors.TestTensorQ4 + * + */ +public class TestTensorQ4 extends TornadoTestBase { + private static final boolean VERBOSE = FALSE; + + private void printVerbose(String message) { + if (VERBOSE) System.out.println(message); + } + + private void printVerboseF(String format, Object... args) { + if (VERBOSE) System.out.printf(format, args); + } + + @Test + public void testBasicQuantization() { + // Unchanged - passing + Shape shape = new Shape(1); + TensorQ4 tensor = new TensorQ4(shape); + + float testValue = 1.0f; + tensor.setFloat(0, testValue); + float retrieved = tensor.getFloat(0); + printVerboseF("Segment size for storing single value %d%n", tensor.getSegment().byteSize()); + Assert.assertEquals(testValue, retrieved, 0.2f); + } + + @Test + public void testFourBitRange() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test a single block to maintain consistent scale + float[] boundaryValues = { + -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f, 4.0f, 6.0f + }; + + printVerbose("\nTesting 4-bit range quantization:"); + for (int i = 0; i < boundaryValues.length; i++) { + tensorQ4.setFloat(i, boundaryValues[i]); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("4-bit value test: Setting %.1f, got %.1f%n", + boundaryValues[i], retrieved); + // Increased tolerance to account for quantization steps + Assert.assertEquals("Value mismatch at 4-bit value " + boundaryValues[i], + boundaryValues[i], retrieved, 0.6f); + } + } + + @Test + public void testPackedValues() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test both nibbles of each byte with values well within quantization range + float[] values = {-4.0f, -2.0f, 0.0f, 2.0f, 4.0f, -4.0f, -2.0f, 2.0f}; + + printVerbose("\nTesting packed 4-bit storage:"); + for (int i = 0; i < values.length; i++) { + tensorQ4.setFloat(i, values[i]); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Packed index %d: Set=%.1f Got=%.1f%n", + i, values[i], retrieved); + Assert.assertEquals("Value mismatch for packed storage", + values[i], retrieved, 0.5f); + } + } + + @Test + public void testBlockScaleInterference() { + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize * 2); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting block scale interference:"); + + // Use values well within the 4-bit quantization range + for (int i = 0; i < blockSize; i++) { + float value = -4.0f + (8.0f * i / blockSize); // Range from -4 to 4 + tensorQ4.setFloat(i, value); + printVerboseF("Block 1 index %d: Set=%.6f%n", i, value); + } + + for (int i = 0; i < blockSize; i++) { + float value = -2.0f + (4.0f * i / blockSize); // Range from -2 to 2 + tensorQ4.setFloat(blockSize + i, value); + printVerboseF("Block 2 index %d: Set=%.6f%n", i, value); + } + + // Verify first block maintained reasonable accuracy + for (int i = 0; i < blockSize; i++) { + float expected = -4.0f + (8.0f * i / blockSize); + float retrieved = tensorQ4.getFloat(i); + float absError = Math.abs(retrieved - expected); + + printVerboseF("Block 1 verification index %d: Expected=%.6f Got=%.6f AbsError=%.6f%n", + i, expected, retrieved, absError); + + Assert.assertTrue("Block 1 accuracy lost after block 2 update", + absError < 0.6f); + } + } + + @Test + public void testFullRangeQuantization() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test evenly spaced values within quantization range + float[] testValues = new float[16]; + for (int i = 0; i < 16; i++) { + testValues[i] = -7.0f + (i * 14.0f / 15.0f); // Range from -7 to 7 + } + + printVerbose("\nTesting quantization range:"); + for (int i = 0; i < testValues.length; i++) { + tensorQ4.setFloat(i, testValues[i]); + float retrieved = tensorQ4.getFloat(i); + + printVerboseF("Step %2d: Set=%.3f Got=%.3f%n", + i, testValues[i], retrieved); + + float absError = Math.abs(retrieved - testValues[i]); + Assert.assertTrue( + String.format("Excessive quantization error: expected=%.3f, got=%.3f, error=%.3f", + testValues[i], retrieved, absError), + absError < 0.6f); + } + } + @Test + public void testTensorQ4SetAndGetFloatVerify() { + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Use values within Q4 range (-8 to 7) + float[] pattern = {0.5f, -1.0f, 4.0f, -6.0f, 0.0f}; + float[] valuesToSet = new float[blockSize]; + for (int i = 0; i < blockSize; i++) { + valuesToSet[i] = pattern[i % pattern.length]; + } + + printVerboseF("Total elements: %d%n", shape.getSize()); + printVerboseF("Block size: %d%n", blockSize); + printVerboseF("Total allocated bytes: %d%n", tensorQ4.getSegment().byteSize()); + + for (int i = 0; i < valuesToSet.length; i++) { + tensorQ4.setFloat(i, valuesToSet[i]); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Index %d: Set=%.2f Retrieved=%.2f%n", + i, valuesToSet[i], retrieved); + Assert.assertEquals("Value mismatch at index " + i, + valuesToSet[i], retrieved, 0.5f); + } + } + + @Test + public void testSingleBlockPrecision() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + float baseValue = 4.0f; // Smaller base value for Q4 range + + printVerbose("\nTesting single block precision:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = baseValue * (i + 1) / shape.getSize(); + tensorQ4.setFloat(i, value); + float retrieved = tensorQ4.getFloat(i); + float relativeError = Math.abs((retrieved - value) / value); + + printVerboseF("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n", + i, value, retrieved, relativeError); + + Assert.assertTrue( + String.format("Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f", + i, value, retrieved, relativeError), + relativeError < 0.3f); // Higher tolerance for Q4 + } + } + + @Test + public void testMaximumPrecisionValues() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting maximum precision values:"); + + float[] preciseValues = { + 1.234f, + -1.234f, + 3.456f, + -3.456f, + 6.789f, + -6.789f + }; + + for (int i = 0; i < preciseValues.length; i++) { + tensorQ4.setFloat(i, preciseValues[i]); + float retrieved = tensorQ4.getFloat(i); + float relativeError = Math.abs((retrieved - preciseValues[i]) / preciseValues[i]); + + printVerboseF("Precise value test %d: Set=%.6f Got=%.6f RelError=%.6f%n", + i, preciseValues[i], retrieved, relativeError); + + Assert.assertTrue( + String.format("Precision lost: expected=%.6f, got=%.6f, error=%.6f", + preciseValues[i], retrieved, relativeError), + relativeError < 0.2f); + } + } + + @Test + public void testSequentialBlockUpdates() { + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize * 3); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting sequential block updates:"); + + // Sequential updates with Q4-appropriate values + for (int block = 0; block < 3; block++) { + float blockValue = (block + 1) * 2.0f; // Values: 2, 4, 6 + printVerboseF("\nSetting block %d to %.2f:%n", block, blockValue); + + for (int i = 0; i < blockSize; i++) { + int index = block * blockSize + i; + tensorQ4.setFloat(index, blockValue); + float retrieved = tensorQ4.getFloat(index); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + index, blockValue, retrieved); + Assert.assertEquals("Sequential block update failed", + blockValue, retrieved, 0.5f); + } + } + } + + @Test + public void testNibbleBoundaryUpdates() { + // Test updating values at nibble boundaries + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Set values around nibble boundaries + float[] values = {1.0f, -1.0f, 2.0f, -2.0f}; + + // Test boundaries between nibbles + for (int i = 0; i < values.length; i++) { + int index = (i * blockSize/4); // Space out across block + tensorQ4.setFloat(index, values[i]); + float retrieved = tensorQ4.getFloat(index); + printVerboseF("Nibble boundary %d: Set=%.6f Got=%.6f%n", + index, values[i], retrieved); + Assert.assertEquals("Value mismatch at nibble boundary", + values[i], retrieved, 0.5f); + } + } + + @Test + public void testAlternatingNibblePatterns() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting alternating nibble pattern:"); + + // Set alternating values across nibble boundaries + for (int i = 0; i < shape.getSize(); i++) { + float value = (i % 2 == 0) ? 1.0f : -1.0f; + tensorQ4.setFloat(i, value); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + i, value, retrieved); + Assert.assertEquals("Alternating pattern not preserved", + value, retrieved, 0.5f); + } + } + + @Test + public void testNibblePackingConsistency() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Create an array of expected quantized values + float[] expectedValues = { + -4.0f, -3.5f, -3.0f, -2.5f, + -2.0f, -1.5f, -1.0f, -0.5f, + 0.0f, 0.5f, 1.0f, 1.5f, + 2.0f, 2.5f, 3.0f, 3.5f + }; + + printVerbose("\nTesting nibble packing consistency:"); + + // Set values + for (int i = 0; i < expectedValues.length; i++) { + tensorQ4.setFloat(i, expectedValues[i]); + } + + // Verify quantization + for (int i = 0; i < expectedValues.length; i++) { + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Pattern %2d: Set=%.4f Got=%.4f Diff=%.4f%n", + i, expectedValues[i], retrieved, + Math.abs(expectedValues[i] - retrieved)); + + // Check if the retrieved value is within one quantization step + float quantStep = 0.5f; // Quantization step size for Q4 + Assert.assertTrue( + String.format("Quantization error too large at index %d: expected=%.4f, got=%.4f", + i, expectedValues[i], retrieved), + Math.abs(retrieved - expectedValues[i]) <= quantStep + ); + } + + // Additional verification for nibble boundaries + printVerbose("\nVerifying nibble boundaries:"); + for (int i = 0; i < expectedValues.length; i += 2) { + float val1 = tensorQ4.getFloat(i); + float val2 = tensorQ4.getFloat(i + 1); + printVerboseF("Nibble pair %d: %.4f %.4f%n", i/2, val1, val2); + + // Verify the difference between adjacent values is consistent + if (i < expectedValues.length - 2) { + float diff1 = val2 - val1; + float diff2 = tensorQ4.getFloat(i + 2) - val2; + Assert.assertTrue( + String.format("Inconsistent quantization steps: %.4f vs %.4f", diff1, diff2), + Math.abs(diff1 - diff2) <= 0.1f + ); + } + } + } + + @Test + public void testGradualValueTransitions() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test gradual transitions to check quantization steps + float step = 14.0f / shape.getSize(); // Range from -7 to 7 + for (int i = 0; i < shape.getSize(); i++) { + float value = -7.0f + (step * i); + tensorQ4.setFloat(i, value); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Step %d: Set=%.3f Got=%.3f%n", + i, value, retrieved); + Assert.assertEquals("Gradual transition not preserved", + value, retrieved, 0.5f); + } + } + + @Test + public void testQ4Symmetry() { + // Test symmetry of positive and negative values + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + for (int i = 0; i <= 7; i++) { + float positive = i * 1.0f; + float negative = -positive; + + tensorQ4.setFloat(i * 2, positive); + tensorQ4.setFloat(i * 2 + 1, negative); + + float retrievedPos = tensorQ4.getFloat(i * 2); + float retrievedNeg = tensorQ4.getFloat(i * 2 + 1); + + printVerboseF("Symmetry test %d: +%.1f->%.1f, %.1f->%.1f%n", + i, positive, retrievedPos, negative, retrievedNeg); + + Assert.assertEquals("Positive value not preserved", positive, retrievedPos, 0.5f); + Assert.assertEquals("Negative value not preserved", negative, retrievedNeg, 0.5f); + Assert.assertEquals("Asymmetric quantization", + Math.abs(retrievedPos), Math.abs(retrievedNeg), 0.1f); + } + } +} From 2e99bba1e70a09f68004118fa4342b2e4618bf91 Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Sat, 16 Nov 2024 20:32:30 +0200 Subject: [PATCH 14/15] Add javadocs on key methods --- .../tornado/api/types/tensors/TensorQ8.java | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java index 5bac48f929..ae3cf26530 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -24,15 +24,30 @@ public class TensorQ8 extends Tensor { private final boolean DEBUG_TENSOR_Q8 = false; + /** Storage for the quantized tensor data including scales and values. */ private final ByteArray tensorStorage; + + /** Total number of elements in the tensor. */ private final int numberOfElements; + + /** Shape information for the tensor. */ private final Shape shape; + + /** Data type of the tensor (QINT8). */ private final DType dType; + /** Number of values in each quantization block. */ private final int blockSize; - private final int bytesPerBlock; + /** Total bytes per block including scale and quantized values. */ + private final int bytesPerBlock; + /** + * Constructs a new Q8 tensor with the specified shape. + * Allocates memory and initializes the tensor storage. + * + * @param shape The shape of the tensor to create + */ public TensorQ8(Shape shape) { super(DType.QINT8, shape); this.shape = shape; @@ -65,6 +80,13 @@ public TensorQ8(Shape shape) { this.tensorStorage = new ByteArray(numberOfElements, totalSize); } + /** + * Constructs a Q8 tensor using existing memory segment data. + * Used for creating a tensor view of pre-existing quantized data. + * + * @param numberOfElements The number of elements in the tensor + * @param memorySegment The memory segment containing the quantized data + */ public TensorQ8(int numberOfElements, MemorySegment memorySegment) { super(DType.QINT8, new Shape(numberOfElements)); this.shape = new Shape(numberOfElements); @@ -113,6 +135,15 @@ private float[] getBlockValues(int blockIndex) { return values; } + /** + * Gets a single float value from the tensor at the specified index. + * The value is dequantized using the scale factor from its containing block. + * + * @param index The index of the value to retrieve + * @return The dequantized float value + * @throws IndexOutOfBoundsException if the index is out of bounds + * @throws RuntimeException if there is an error reading the value + */ public float getFloat(int index) { if (index < 0 || index >= numberOfElements) { throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); @@ -131,6 +162,15 @@ public float getFloat(int index) { } } + /** + * Sets a float value in the tensor at the specified index. + * Updates the entire block's scale factor when any value in the block changes. + * + * @param index The index where the value should be set + * @param value The float value to set + * @throws IndexOutOfBoundsException if the index is out of bounds + * @throws RuntimeException if there is an error writing the value + */ public void setFloat(int index, float value) { if (index < 0 || index >= numberOfElements) { throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); @@ -163,6 +203,14 @@ public void setFloat(int index, float value) { } } + /** + * Computes the optimal scale factor for a block of values. + * The scale is chosen to maximize the use of the INT8 range (-128 to 127). + * + * @param values The array of float values to compute the scale for + * @return The optimal scale factor for quantizing the values + */ + private float computeOptimalScale(float[] values) { float maxAbs = 1e-5f; for (float value : values) { From 9835e4e930932168abb12672c905045d576ef8de Mon Sep 17 00:00:00 2001 From: mikepapadim Date: Wed, 20 Nov 2024 16:05:08 +0000 Subject: [PATCH 15/15] Fix for not initilized bytes --- .../uk/ac/manchester/tornado/api/types/arrays/ByteArray.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java index e7eb7f88c7..5b2f2ba6d3 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java @@ -66,6 +66,7 @@ public ByteArray(int numberOfElements) { public ByteArray(int numberOfElements, long requiredStorageSize) { this.numberOfElements = numberOfElements; baseIndex=0; + this.segmentByteSize = requiredStorageSize; segment = Arena.ofAuto().allocate(requiredStorageSize, 1); segment.setAtIndex(JAVA_INT, 0, numberOfElements); }