diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java index 213f68fb2d..5b2f2ba6d3 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java @@ -27,6 +27,7 @@ import uk.ac.manchester.tornado.api.annotations.Parallel; import uk.ac.manchester.tornado.api.internal.annotations.SegmentElementSize; +import uk.ac.manchester.tornado.api.types.tensors.GGMLType; /** * This class represents an array of bytes stored in native memory. @@ -61,6 +62,15 @@ public ByteArray(int numberOfElements) { segment.setAtIndex(JAVA_INT, 0, numberOfElements); } + + public ByteArray(int numberOfElements, long requiredStorageSize) { + this.numberOfElements = numberOfElements; + baseIndex=0; + this.segmentByteSize = requiredStorageSize; + segment = Arena.ofAuto().allocate(requiredStorageSize, 1); + segment.setAtIndex(JAVA_INT, 0, numberOfElements); + } + /** * Constructs a new {@link ByteArray} instance by concatenating the contents of the given array of {@link ByteArray} instances. * @@ -123,6 +133,14 @@ public static ByteArray fromSegment(MemorySegment segment) { return byteArray; } + // Temporary workaround to copy raw memory segment without a tornado header + public static ByteArray fromSegment(MemorySegment segment, int numberOfElements) { + long byteSize = segment.byteSize(); + ByteArray byteArray = new ByteArray(numberOfElements, byteSize); + MemorySegment.copy(segment, 0, byteArray.segment, byteArray.baseIndex * BYTE_BYTES, byteSize); + return byteArray; + } + /** * Creates a new instance of the {@link ByteArray} class from a {@link ByteBuffer}. * diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java index 39da8b6c77..b169868de1 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java @@ -67,7 +67,10 @@ public enum DType { /** * Represents a quantized 8-bit unsigned integer used in specialized applications like machine learning, using 1 byte. */ - QUINT8(1, ValueLayout.JAVA_BYTE); + QUINT8(1, ValueLayout.JAVA_BYTE), + + Q4_0(1, ValueLayout.JAVA_BYTE); + // @formatter:on /** diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java new file mode 100644 index 0000000000..5802f63d01 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package uk.ac.manchester.tornado.api.types.tensors; + + +public final class Float16 { + public static final int BYTES = 2; +} + diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java new file mode 100644 index 0000000000..447805dccd --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package uk.ac.manchester.tornado.api.types.tensors; + +public enum GGMLType { + F32(Float.BYTES), + F16(Float16.BYTES), + Q4_0(Float16.BYTES + 16 * Byte.BYTES, 32), + Q4_1(2 * Float16.BYTES + 16 * Byte.BYTES, 32), + UNSUPPORTED_Q4_2(Integer.MAX_VALUE), // support has been removed + UNSUPPORTED_Q4_3(Integer.MAX_VALUE), // support has been removed + Q5_0(Integer.MAX_VALUE), + Q5_1(Integer.MAX_VALUE), + Q8_0(Float16.BYTES + 32 * Byte.BYTES, 32), + Q8_1(32 * Byte.BYTES + 2 * Float.BYTES, 32), + // k-quantizations + Q2_K(Integer.MAX_VALUE), + Q3_K(Integer.MAX_VALUE), + Q4_K(2 * Float16.BYTES + ((GGMLType.QK_K / 16) / 8 * 6) + GGMLType.QK_K / 2, GGMLType.QK_K), + Q5_K(2 * Float16.BYTES + ((GGMLType.QK_K / 16) / 8 * 6) + GGMLType.QK_K / 8 + GGMLType.QK_K / 2, GGMLType.QK_K), + Q6_K(GGMLType.QK_K / 2 + GGMLType.QK_K / 4 + GGMLType.QK_K / 16 + Float16.BYTES, GGMLType.QK_K), + Q8_K(Integer.MAX_VALUE), + I8(Byte.BYTES), + I16(Short.BYTES), + I32(Integer.BYTES); + + private static final GGMLType[] VALUES = values(); + + private final int typeSize; + + private final int blockSize; + + public int getTypeSize() { + return typeSize; + } + + public int getBlockSize() { + return blockSize; + } + + public static GGMLType fromId(int id) { + return VALUES[id]; + } + + GGMLType(int typeSize) { + this(typeSize, 1); + } + + public long byteSizeFor(int numberOfElements) { + long t = numberOfElements * (long) getTypeSize(); + assert t % getBlockSize() == 0; + return Math.toIntExact(t / getBlockSize()); + } + + public static final int QK_K = 256; // or 64? + + GGMLType(int typeSize, int blockSize) { + assert blockSize > 0; + assert typeSize > 0; + assert isPowerOf2(blockSize); + this.typeSize = typeSize; + this.blockSize = blockSize; + } + + private static boolean isPowerOf2(int n) { + return n > 0 && (n & (n - 1)) == 0; + } +} \ No newline at end of file diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java index a678cd01d5..d8138651cf 100644 --- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java @@ -45,7 +45,8 @@ public long[] getDimensions() { * @return the total size of the shape as an int */ public int getSize() { - return (int) Arrays.stream(dimensions).reduce(1, (a, b) -> a * b); + assert Arrays.stream(dimensions).allMatch(i -> i > 0); + return (int) Arrays.stream(dimensions).reduce(Math::multiplyExact).orElseThrow(); } @Override diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java new file mode 100644 index 0000000000..88e9da5260 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package uk.ac.manchester.tornado.api.types.tensors; + +import uk.ac.manchester.tornado.api.types.arrays.ByteArray; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; + +public class TensorQ4 extends Tensor { + private final boolean DEBUG_TENSOR_Q4 = false; + private final ByteArray tensorStorage; + private final int numberOfElements; + private final Shape shape; + private final DType dType; + + private final int blockSize; + private final int bytesPerBlock; + + public TensorQ4(Shape shape) { + super(DType.Q4_0, shape); + this.shape = shape; + this.numberOfElements = shape.getSize(); + this.dType = DType.Q4_0; + this.blockSize = GGMLType.Q4_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize/2 bytes for quantized values (4-bits per value) + this.bytesPerBlock = Float16.BYTES + blockSize / 2; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes + long dataSize = (long) numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q4) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size: " + totalSize); + } + + this.tensorStorage = new ByteArray(numberOfElements, totalSize); + } + + public TensorQ4(int numberOfElements, MemorySegment memorySegment) { + super(DType.QINT8, new Shape(numberOfElements)); + this.shape = new Shape(numberOfElements); + this.numberOfElements = numberOfElements; + this.dType = DType.Q4_0; + this.blockSize = GGMLType.Q4_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize/2 bytes for quantized values (4-bits per value) + this.bytesPerBlock = Float16.BYTES + blockSize / 2; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes + long dataSize = (long) numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q4) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size: " + totalSize); + } + + this.tensorStorage = ByteArray.fromSegment(memorySegment, numberOfElements); + } + + private float[] getBlockValues(int blockIndex) { + float[] values = new float[blockSize]; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + + // Read 4-bit quantized values + for (int i = 0; i < blockSize; i++) { + byte quant; + if (i < blockSize / 2) { + // Lower 4 bits + quant = (byte) (readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i) & 0x0F); + } else { + // Upper 4 bits + quant = (byte) ((readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2) >>> 4) & 0x0F); + } + // Convert from 4-bit value to float + quant -= 8; // Center at zero [-8, 7] + values[i] = quant * scale; + } + } catch (Exception e) { + throw new RuntimeException("Failed to read block " + blockIndex + " at offset " + blockOffset + ": " + e.getMessage()); + } + return values; + } + + public float getFloat(int index) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + + // Extract 4-bit value + byte quant; + if (withinBlockIndex < blockSize / 2) { + // Lower 4 bits + quant = (byte) (readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + withinBlockIndex) & 0x0F); + } else { + // Upper 4 bits + quant = (byte) ((readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + withinBlockIndex - blockSize / 2) >>> 4) & 0x0F); + } + quant -= 8; // Center at zero [-8, 7] + return quant * scale; + } catch (Exception e) { + throw new RuntimeException("Failed to get float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } + + public void setFloat(int index, float value) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + + // Get current block values + float[] blockValues = getBlockValues(blockIndex); + blockValues[withinBlockIndex] = value; + + // Compute optimal scale for block + float scale = computeOptimalScale(blockValues); + + // Update block + int blockOffset = blockIndex * bytesPerBlock; + + try { + // Write scale + writeShort(tensorStorage.getSegmentWithHeader(), blockOffset, Float.floatToFloat16(scale)); + + // Write quantized values + for (int i = 0; i < blockValues.length; i++) { + byte quant = (byte) (Math.round(blockValues[i] / scale) + 8); // Add 8 to shift to [0, 15] + quant = (byte) Math.min(15, Math.max(0, quant)); // Clamp to 4-bit range + + if (i < blockSize / 2) { + // Write to lower 4 bits + byte current = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i); + writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i, (byte) ((current & 0xF0) | (quant & 0x0F))); + } else { + // Write to upper 4 bits + byte current = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2); + writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2, (byte) ((current & 0x0F) | (quant << 4))); + } + } + } catch (Exception e) { + throw new RuntimeException("Failed to set float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } + + private float computeOptimalScale(float[] values) { + float maxAbs = 1e-5f; + for (float value : values) { + maxAbs = Math.max(maxAbs, Math.abs(value)); + } + return maxAbs / 7.0f; // Scale to [-7, 7] range for 4-bit values + } + + static short readShort(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_SHORT, offset); + } + + static byte readByte(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_BYTE, offset); + } + + static void writeShort(MemorySegment memorySegment, long offset, short value) { + memorySegment.set(ValueLayout.JAVA_SHORT, offset, value); + } + + static void writeByte(MemorySegment memorySegment, long offset, byte value) { + memorySegment.set(ValueLayout.JAVA_BYTE, offset, value); + } + @Override + public Shape getShape() { + return shape; + } + + @Override + public String getDTypeAsString() { + return dType.QINT8.toString(); + } + + @Override + public DType getDType() { + return DType.QINT8; + } + + @Override + public int getSize() { + return shape.getSize(); + } + + @Override + public MemorySegment getSegment() { + return tensorStorage.getSegmentWithHeader(); + } + + @Override + public MemorySegment getSegmentWithHeader() { + return tensorStorage.getSegmentWithHeader(); + } + + @Override + public long getNumBytesOfSegmentWithHeader() { + return tensorStorage.getNumBytesOfSegmentWithHeader(); + } + + @Override + public long getNumBytesOfSegment() { + return tensorStorage.getNumBytesOfSegment(); + } + + @Override + protected void clear() { + + } + + @Override + public int getElementSize() { + return DType.QINT8.getByteSize(); + } +} \ No newline at end of file diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java new file mode 100644 index 0000000000..ae3cf26530 --- /dev/null +++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package uk.ac.manchester.tornado.api.types.tensors; + +import uk.ac.manchester.tornado.api.types.arrays.ByteArray; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; + +public class TensorQ8 extends Tensor { + private final boolean DEBUG_TENSOR_Q8 = false; + /** Storage for the quantized tensor data including scales and values. */ + private final ByteArray tensorStorage; + + /** Total number of elements in the tensor. */ + private final int numberOfElements; + + /** Shape information for the tensor. */ + private final Shape shape; + + /** Data type of the tensor (QINT8). */ + private final DType dType; + + /** Number of values in each quantization block. */ + private final int blockSize; + + /** Total bytes per block including scale and quantized values. */ + private final int bytesPerBlock; + + /** + * Constructs a new Q8 tensor with the specified shape. + * Allocates memory and initializes the tensor storage. + * + * @param shape The shape of the tensor to create + */ + public TensorQ8(Shape shape) { + super(DType.QINT8, shape); + this.shape = shape; + this.numberOfElements = shape.getSize(); + this.dType = DType.QINT8; + this.blockSize = GGMLType.Q8_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize bytes for quantized values + this.bytesPerBlock = Float16.BYTES + blockSize; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes, including header + long dataSize = (long)numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q8) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size with header: " + totalSize); + } + + this.tensorStorage = new ByteArray(numberOfElements, totalSize); + } + + /** + * Constructs a Q8 tensor using existing memory segment data. + * Used for creating a tensor view of pre-existing quantized data. + * + * @param numberOfElements The number of elements in the tensor + * @param memorySegment The memory segment containing the quantized data + */ + public TensorQ8(int numberOfElements, MemorySegment memorySegment) { + super(DType.QINT8, new Shape(numberOfElements)); + this.shape = new Shape(numberOfElements); + this.numberOfElements = numberOfElements; + this.dType = DType.QINT8; + this.blockSize = GGMLType.Q8_0.getBlockSize(); + + // Each block contains: + // - 2 bytes for float16 scale + // - blockSize bytes for quantized values + this.bytesPerBlock = Float16.BYTES + blockSize; + + // Calculate number of blocks needed to store all elements + int numBlocks = (numberOfElements + blockSize - 1) / blockSize; + + // Calculate total storage size in bytes, including header + long dataSize = (long)numBlocks * bytesPerBlock; + long totalSize = dataSize; + + if (DEBUG_TENSOR_Q8) { + System.out.println("Debug info:"); + System.out.println("Number of elements: " + numberOfElements); + System.out.println("Block size: " + blockSize); + System.out.println("Bytes per block: " + bytesPerBlock); + System.out.println("Number of blocks: " + numBlocks); + System.out.println("Data size: " + dataSize); + System.out.println("Total size with header: " + totalSize); + } + + this.tensorStorage = ByteArray.fromSegment(memorySegment, numberOfElements); + } + + private float[] getBlockValues(int blockIndex) { + float[] values = new float[blockSize]; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + for (int i = 0; i < blockSize; i++) { + byte quant = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i); + values[i] = quant * scale; + } + } catch (Exception e) { + throw new RuntimeException("Failed to read block " + blockIndex + " at offset " + blockOffset + ": " + e.getMessage()); + } + return values; + } + + /** + * Gets a single float value from the tensor at the specified index. + * The value is dequantized using the scale factor from its containing block. + * + * @param index The index of the value to retrieve + * @return The dequantized float value + * @throws IndexOutOfBoundsException if the index is out of bounds + * @throws RuntimeException if there is an error reading the value + */ + public float getFloat(int index) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + int blockOffset = blockIndex * bytesPerBlock; + + try { + float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset)); + byte quant = readByte(tensorStorage.getSegmentWithHeader(), + blockOffset + Float16.BYTES + withinBlockIndex); + return quant * scale; + } catch (Exception e) { + throw new RuntimeException("Failed to get float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } + + /** + * Sets a float value in the tensor at the specified index. + * Updates the entire block's scale factor when any value in the block changes. + * + * @param index The index where the value should be set + * @param value The float value to set + * @throws IndexOutOfBoundsException if the index is out of bounds + * @throws RuntimeException if there is an error writing the value + */ + public void setFloat(int index, float value) { + if (index < 0 || index >= numberOfElements) { + throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements); + } + + int blockIndex = index / blockSize; + int withinBlockIndex = index % blockSize; + + // Get current block values + float[] blockValues = getBlockValues(blockIndex); + blockValues[withinBlockIndex] = value; + + // Compute optimal scale for block + float scale = computeOptimalScale(blockValues); + + // Update block + int blockOffset = blockIndex * bytesPerBlock; + + try { + // Write scale + writeShort(tensorStorage.getSegmentWithHeader(), blockOffset, Float.floatToFloat16(scale)); + + // Write quantized values + for (int i = 0; i < blockValues.length; i++) { + int quantized = Math.min(127, Math.max(-128, Math.round(blockValues[i] / scale))); + writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i, (byte)quantized); + } + } catch (Exception e) { + throw new RuntimeException("Failed to set float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage()); + } + } + + /** + * Computes the optimal scale factor for a block of values. + * The scale is chosen to maximize the use of the INT8 range (-128 to 127). + * + * @param values The array of float values to compute the scale for + * @return The optimal scale factor for quantizing the values + */ + + private float computeOptimalScale(float[] values) { + float maxAbs = 1e-5f; + for (float value : values) { + maxAbs = Math.max(maxAbs, Math.abs(value)); + } + return maxAbs / 127.0f; + } + + + static short readShort(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_SHORT, offset); + } + + static byte readByte(MemorySegment memorySegment, long offset) { + return memorySegment.get(ValueLayout.JAVA_BYTE, offset); + } + + static void writeShort(MemorySegment memorySegment, long offset, short value) { + memorySegment.set(ValueLayout.JAVA_SHORT, offset, value); + } + + static void writeByte(MemorySegment memorySegment, long offset, byte value) { + memorySegment.set(ValueLayout.JAVA_BYTE, offset, value); + } + + @Override + public Shape getShape() { + return shape; + } + + @Override + public String getDTypeAsString() { + return dType.QINT8.toString(); + } + + @Override + public DType getDType() { + return DType.QINT8; + } + + @Override + public int getSize() { + return shape.getSize(); + } + + @Override + public MemorySegment getSegment() { + return tensorStorage.getSegmentWithHeader(); + } + + @Override + public MemorySegment getSegmentWithHeader() { + return tensorStorage.getSegmentWithHeader(); + } + + @Override + public long getNumBytesOfSegmentWithHeader() { + return tensorStorage.getNumBytesOfSegmentWithHeader(); + } + + @Override + public long getNumBytesOfSegment() { + return tensorStorage.getNumBytesOfSegment(); + } + + @Override + protected void clear() { + + } + + @Override + public int getElementSize() { + return DType.QINT8.getByteSize(); + } +} \ No newline at end of file diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java new file mode 100644 index 0000000000..4f35d7fdd7 --- /dev/null +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java @@ -0,0 +1,392 @@ +package uk.ac.manchester.tornado.unittests.tensors; + +import org.junit.Assert; +import org.junit.Test; +import uk.ac.manchester.tornado.api.types.tensors.GGMLType; +import uk.ac.manchester.tornado.api.types.tensors.Shape; +import uk.ac.manchester.tornado.api.types.tensors.TensorQ4; +import uk.ac.manchester.tornado.unittests.common.TornadoTestBase; + +import static java.lang.Boolean.FALSE; + +/** + *

+ * How to run? + *

+ * + * tornado-test -V uk.ac.manchester.tornado.unittests.tensors.TestTensorQ4 + * + */ +public class TestTensorQ4 extends TornadoTestBase { + private static final boolean VERBOSE = FALSE; + + private void printVerbose(String message) { + if (VERBOSE) System.out.println(message); + } + + private void printVerboseF(String format, Object... args) { + if (VERBOSE) System.out.printf(format, args); + } + + @Test + public void testBasicQuantization() { + // Unchanged - passing + Shape shape = new Shape(1); + TensorQ4 tensor = new TensorQ4(shape); + + float testValue = 1.0f; + tensor.setFloat(0, testValue); + float retrieved = tensor.getFloat(0); + printVerboseF("Segment size for storing single value %d%n", tensor.getSegment().byteSize()); + Assert.assertEquals(testValue, retrieved, 0.2f); + } + + @Test + public void testFourBitRange() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test a single block to maintain consistent scale + float[] boundaryValues = { + -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f, 4.0f, 6.0f + }; + + printVerbose("\nTesting 4-bit range quantization:"); + for (int i = 0; i < boundaryValues.length; i++) { + tensorQ4.setFloat(i, boundaryValues[i]); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("4-bit value test: Setting %.1f, got %.1f%n", + boundaryValues[i], retrieved); + // Increased tolerance to account for quantization steps + Assert.assertEquals("Value mismatch at 4-bit value " + boundaryValues[i], + boundaryValues[i], retrieved, 0.6f); + } + } + + @Test + public void testPackedValues() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test both nibbles of each byte with values well within quantization range + float[] values = {-4.0f, -2.0f, 0.0f, 2.0f, 4.0f, -4.0f, -2.0f, 2.0f}; + + printVerbose("\nTesting packed 4-bit storage:"); + for (int i = 0; i < values.length; i++) { + tensorQ4.setFloat(i, values[i]); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Packed index %d: Set=%.1f Got=%.1f%n", + i, values[i], retrieved); + Assert.assertEquals("Value mismatch for packed storage", + values[i], retrieved, 0.5f); + } + } + + @Test + public void testBlockScaleInterference() { + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize * 2); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting block scale interference:"); + + // Use values well within the 4-bit quantization range + for (int i = 0; i < blockSize; i++) { + float value = -4.0f + (8.0f * i / blockSize); // Range from -4 to 4 + tensorQ4.setFloat(i, value); + printVerboseF("Block 1 index %d: Set=%.6f%n", i, value); + } + + for (int i = 0; i < blockSize; i++) { + float value = -2.0f + (4.0f * i / blockSize); // Range from -2 to 2 + tensorQ4.setFloat(blockSize + i, value); + printVerboseF("Block 2 index %d: Set=%.6f%n", i, value); + } + + // Verify first block maintained reasonable accuracy + for (int i = 0; i < blockSize; i++) { + float expected = -4.0f + (8.0f * i / blockSize); + float retrieved = tensorQ4.getFloat(i); + float absError = Math.abs(retrieved - expected); + + printVerboseF("Block 1 verification index %d: Expected=%.6f Got=%.6f AbsError=%.6f%n", + i, expected, retrieved, absError); + + Assert.assertTrue("Block 1 accuracy lost after block 2 update", + absError < 0.6f); + } + } + + @Test + public void testFullRangeQuantization() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test evenly spaced values within quantization range + float[] testValues = new float[16]; + for (int i = 0; i < 16; i++) { + testValues[i] = -7.0f + (i * 14.0f / 15.0f); // Range from -7 to 7 + } + + printVerbose("\nTesting quantization range:"); + for (int i = 0; i < testValues.length; i++) { + tensorQ4.setFloat(i, testValues[i]); + float retrieved = tensorQ4.getFloat(i); + + printVerboseF("Step %2d: Set=%.3f Got=%.3f%n", + i, testValues[i], retrieved); + + float absError = Math.abs(retrieved - testValues[i]); + Assert.assertTrue( + String.format("Excessive quantization error: expected=%.3f, got=%.3f, error=%.3f", + testValues[i], retrieved, absError), + absError < 0.6f); + } + } + @Test + public void testTensorQ4SetAndGetFloatVerify() { + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Use values within Q4 range (-8 to 7) + float[] pattern = {0.5f, -1.0f, 4.0f, -6.0f, 0.0f}; + float[] valuesToSet = new float[blockSize]; + for (int i = 0; i < blockSize; i++) { + valuesToSet[i] = pattern[i % pattern.length]; + } + + printVerboseF("Total elements: %d%n", shape.getSize()); + printVerboseF("Block size: %d%n", blockSize); + printVerboseF("Total allocated bytes: %d%n", tensorQ4.getSegment().byteSize()); + + for (int i = 0; i < valuesToSet.length; i++) { + tensorQ4.setFloat(i, valuesToSet[i]); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Index %d: Set=%.2f Retrieved=%.2f%n", + i, valuesToSet[i], retrieved); + Assert.assertEquals("Value mismatch at index " + i, + valuesToSet[i], retrieved, 0.5f); + } + } + + @Test + public void testSingleBlockPrecision() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + float baseValue = 4.0f; // Smaller base value for Q4 range + + printVerbose("\nTesting single block precision:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = baseValue * (i + 1) / shape.getSize(); + tensorQ4.setFloat(i, value); + float retrieved = tensorQ4.getFloat(i); + float relativeError = Math.abs((retrieved - value) / value); + + printVerboseF("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n", + i, value, retrieved, relativeError); + + Assert.assertTrue( + String.format("Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f", + i, value, retrieved, relativeError), + relativeError < 0.3f); // Higher tolerance for Q4 + } + } + + @Test + public void testMaximumPrecisionValues() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting maximum precision values:"); + + float[] preciseValues = { + 1.234f, + -1.234f, + 3.456f, + -3.456f, + 6.789f, + -6.789f + }; + + for (int i = 0; i < preciseValues.length; i++) { + tensorQ4.setFloat(i, preciseValues[i]); + float retrieved = tensorQ4.getFloat(i); + float relativeError = Math.abs((retrieved - preciseValues[i]) / preciseValues[i]); + + printVerboseF("Precise value test %d: Set=%.6f Got=%.6f RelError=%.6f%n", + i, preciseValues[i], retrieved, relativeError); + + Assert.assertTrue( + String.format("Precision lost: expected=%.6f, got=%.6f, error=%.6f", + preciseValues[i], retrieved, relativeError), + relativeError < 0.2f); + } + } + + @Test + public void testSequentialBlockUpdates() { + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize * 3); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting sequential block updates:"); + + // Sequential updates with Q4-appropriate values + for (int block = 0; block < 3; block++) { + float blockValue = (block + 1) * 2.0f; // Values: 2, 4, 6 + printVerboseF("\nSetting block %d to %.2f:%n", block, blockValue); + + for (int i = 0; i < blockSize; i++) { + int index = block * blockSize + i; + tensorQ4.setFloat(index, blockValue); + float retrieved = tensorQ4.getFloat(index); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + index, blockValue, retrieved); + Assert.assertEquals("Sequential block update failed", + blockValue, retrieved, 0.5f); + } + } + } + + @Test + public void testNibbleBoundaryUpdates() { + // Test updating values at nibble boundaries + int blockSize = GGMLType.Q4_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Set values around nibble boundaries + float[] values = {1.0f, -1.0f, 2.0f, -2.0f}; + + // Test boundaries between nibbles + for (int i = 0; i < values.length; i++) { + int index = (i * blockSize/4); // Space out across block + tensorQ4.setFloat(index, values[i]); + float retrieved = tensorQ4.getFloat(index); + printVerboseF("Nibble boundary %d: Set=%.6f Got=%.6f%n", + index, values[i], retrieved); + Assert.assertEquals("Value mismatch at nibble boundary", + values[i], retrieved, 0.5f); + } + } + + @Test + public void testAlternatingNibblePatterns() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + printVerbose("\nTesting alternating nibble pattern:"); + + // Set alternating values across nibble boundaries + for (int i = 0; i < shape.getSize(); i++) { + float value = (i % 2 == 0) ? 1.0f : -1.0f; + tensorQ4.setFloat(i, value); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + i, value, retrieved); + Assert.assertEquals("Alternating pattern not preserved", + value, retrieved, 0.5f); + } + } + + @Test + public void testNibblePackingConsistency() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Create an array of expected quantized values + float[] expectedValues = { + -4.0f, -3.5f, -3.0f, -2.5f, + -2.0f, -1.5f, -1.0f, -0.5f, + 0.0f, 0.5f, 1.0f, 1.5f, + 2.0f, 2.5f, 3.0f, 3.5f + }; + + printVerbose("\nTesting nibble packing consistency:"); + + // Set values + for (int i = 0; i < expectedValues.length; i++) { + tensorQ4.setFloat(i, expectedValues[i]); + } + + // Verify quantization + for (int i = 0; i < expectedValues.length; i++) { + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Pattern %2d: Set=%.4f Got=%.4f Diff=%.4f%n", + i, expectedValues[i], retrieved, + Math.abs(expectedValues[i] - retrieved)); + + // Check if the retrieved value is within one quantization step + float quantStep = 0.5f; // Quantization step size for Q4 + Assert.assertTrue( + String.format("Quantization error too large at index %d: expected=%.4f, got=%.4f", + i, expectedValues[i], retrieved), + Math.abs(retrieved - expectedValues[i]) <= quantStep + ); + } + + // Additional verification for nibble boundaries + printVerbose("\nVerifying nibble boundaries:"); + for (int i = 0; i < expectedValues.length; i += 2) { + float val1 = tensorQ4.getFloat(i); + float val2 = tensorQ4.getFloat(i + 1); + printVerboseF("Nibble pair %d: %.4f %.4f%n", i/2, val1, val2); + + // Verify the difference between adjacent values is consistent + if (i < expectedValues.length - 2) { + float diff1 = val2 - val1; + float diff2 = tensorQ4.getFloat(i + 2) - val2; + Assert.assertTrue( + String.format("Inconsistent quantization steps: %.4f vs %.4f", diff1, diff2), + Math.abs(diff1 - diff2) <= 0.1f + ); + } + } + } + + @Test + public void testGradualValueTransitions() { + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + // Test gradual transitions to check quantization steps + float step = 14.0f / shape.getSize(); // Range from -7 to 7 + for (int i = 0; i < shape.getSize(); i++) { + float value = -7.0f + (step * i); + tensorQ4.setFloat(i, value); + float retrieved = tensorQ4.getFloat(i); + printVerboseF("Step %d: Set=%.3f Got=%.3f%n", + i, value, retrieved); + Assert.assertEquals("Gradual transition not preserved", + value, retrieved, 0.5f); + } + } + + @Test + public void testQ4Symmetry() { + // Test symmetry of positive and negative values + Shape shape = new Shape(GGMLType.Q4_0.getBlockSize()); + TensorQ4 tensorQ4 = new TensorQ4(shape); + + for (int i = 0; i <= 7; i++) { + float positive = i * 1.0f; + float negative = -positive; + + tensorQ4.setFloat(i * 2, positive); + tensorQ4.setFloat(i * 2 + 1, negative); + + float retrievedPos = tensorQ4.getFloat(i * 2); + float retrievedNeg = tensorQ4.getFloat(i * 2 + 1); + + printVerboseF("Symmetry test %d: +%.1f->%.1f, %.1f->%.1f%n", + i, positive, retrievedPos, negative, retrievedNeg); + + Assert.assertEquals("Positive value not preserved", positive, retrievedPos, 0.5f); + Assert.assertEquals("Negative value not preserved", negative, retrievedNeg, 0.5f); + Assert.assertEquals("Asymmetric quantization", + Math.abs(retrievedPos), Math.abs(retrievedNeg), 0.1f); + } + } +} diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java new file mode 100644 index 0000000000..30d6d093e1 --- /dev/null +++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2024, APT Group, Department of Computer Science, + * The University of Manchester. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package uk.ac.manchester.tornado.unittests.tensors; + +import org.junit.Assert; +import org.junit.Test; +import uk.ac.manchester.tornado.api.types.tensors.GGMLType; +import uk.ac.manchester.tornado.api.types.tensors.Shape; +import uk.ac.manchester.tornado.api.types.tensors.TensorQ8; +import uk.ac.manchester.tornado.unittests.common.TornadoTestBase; + +import static java.lang.Boolean.FALSE; + +/** + *

+ * How to run? + *

+ * + * tornado-test -V uk.ac.manchester.tornado.unittests.tensors.TestTensorQ8 + * + */ +public class TestTensorQ8 extends TornadoTestBase { + + private static final boolean VERBOSE = FALSE; // Control verbose output + + private void printVerbose(String message) { + if (VERBOSE) { + System.out.println(message); + } + } + + private void printVerboseF(String format, Object... args) { + if (VERBOSE) { + System.out.printf(format, args); + } + } + + @Test + public void testBasicQuantization() { + Shape shape = new Shape(1); + TensorQ8 tensor = new TensorQ8(shape); + + float testValue = 1.5f; + tensor.setFloat(0, testValue); + float retrieved = tensor.getFloat(0); + printVerboseF("Segment size for storing single value %d%n", tensor.getSegment().byteSize()); + Assert.assertEquals(testValue, retrieved, 0.1f); + } + + @Test + public void testTensorQ8SetAndGetFloat() { + Shape shape = new Shape(5); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float[] valuesToSet = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f}; + for (int i = 0; i < valuesToSet.length; i++) { + tensorQ8.setFloat(i, valuesToSet[i]); + } + + for (int i = 0; i < valuesToSet.length; i++) { + Assert.assertEquals(valuesToSet[i], tensorQ8.getFloat(i), 0.1f); + } + } + + @Test + public void testTensorQ8SetAndGetFloatVerify() { + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float[] valuesToSet = new float[blockSize]; + float[] pattern = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f}; + for (int i = 0; i < blockSize; i++) { + valuesToSet[i] = pattern[i % pattern.length]; + } + + printVerboseF("Total elements: %d%n", shape.getSize()); + printVerboseF("Block size: %d%n", blockSize); + printVerboseF("Total allocated bytes: %d%n", tensorQ8.getSegment().byteSize()); + + for (int i = 0; i < valuesToSet.length; i++) { + tensorQ8.setFloat(i, valuesToSet[i]); + float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Set=%.2f Retrieved=%.2f%n", + i, valuesToSet[i], retrieved); + Assert.assertEquals("Value mismatch at index " + i, + valuesToSet[i], retrieved, 0.1f); + } + + for (int i = 0; i < valuesToSet.length; i++) { + float retrieved = tensorQ8.getFloat(i); + Assert.assertEquals("Final verification failed at index " + i, + valuesToSet[i], retrieved, 0.1f); + } + } + + @Test + public void testMixedScaleValues() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + tensorQ8.setFloat(0, 100.0f); + tensorQ8.setFloat(1, 0.001f); + tensorQ8.setFloat(2, -100.0f); + tensorQ8.setFloat(3, -0.001f); + + Assert.assertEquals(100.0f, tensorQ8.getFloat(0), 1.0f); + Assert.assertEquals(-100.0f, tensorQ8.getFloat(2), 1.0f); + + float small1 = tensorQ8.getFloat(1); + float small2 = tensorQ8.getFloat(3); + Assert.assertTrue("Small positive value lost sign", small1 >= 0); + Assert.assertTrue("Small negative value lost sign", small2 <= 0); + } + + @Test + public void testQuantizationRange() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float[] testValues = { + 0.0f, 1e-6f, -1e-6f, 100.0f, -100.0f, + }; + + for (int i = 0; i < testValues.length; i++) { + tensorQ8.setFloat(i, testValues[i]); + float retrieved = tensorQ8.getFloat(i); + + if (Math.abs(testValues[i]) < 1e-5f) { + Assert.assertTrue("Small value not close to zero", + Math.abs(retrieved) < 1e-4f); + } else { + float relativeError = Math.abs((retrieved - testValues[i]) / testValues[i]); + Assert.assertTrue("Large relative error at index " + i + + ": expected=" + testValues[i] + ", got=" + retrieved, + relativeError < 0.01f); + } + } + } + + @Test + public void testInt8Range() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float[] boundaryValues = { + -128.0f, -127.0f, -64.0f, 0.0f, 63.0f, 126.0f, 127.0f + }; + + for (int i = 0; i < boundaryValues.length; i++) { + tensorQ8.setFloat(i, boundaryValues[i]); + float retrieved = tensorQ8.getFloat(i); + printVerboseF("INT8 boundary test: Setting %.1f, got %.1f%n", + boundaryValues[i], retrieved); + Assert.assertEquals("Value mismatch at INT8 boundary " + boundaryValues[i], + boundaryValues[i], retrieved, 1.0f); + } + } + + @Test + public void testIndependentBlocks() { + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 3); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting independent blocks with different scales:"); + + printVerbose("\nBlock 1 - Small values:"); + for (int i = 0; i < blockSize; i++) { + float value = 0.1f + (0.9f * i / blockSize); + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + i, value, retrieved, Math.abs(value - retrieved)); + } + + printVerbose("\nBlock 2 - Medium values:"); + for (int i = 0; i < blockSize; i++) { + float value = 10.0f + (10.0f * i / blockSize); + tensorQ8.setFloat(blockSize + i, value); + float retrieved = tensorQ8.getFloat(blockSize + i); + printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + i, value, retrieved, Math.abs(value - retrieved)); + } + + printVerbose("\nBlock 3 - Large values:"); + for (int i = 0; i < blockSize; i++) { + float value = 100.0f + (100.0f * i / blockSize); + tensorQ8.setFloat(2 * blockSize + i, value); + float retrieved = tensorQ8.getFloat(2 * blockSize + i); + printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n", + i, value, retrieved, Math.abs(value - retrieved)); + } + + printVerbose("\nVerifying accuracy for each block:"); + + for (int block = 0; block < 3; block++) { + float maxDiff = 0.0f; + float maxRelErr = 0.0f; + float minVal = Float.MAX_VALUE; + float maxVal = Float.MIN_VALUE; + + for (int i = 0; i < blockSize; i++) { + int idx = block * blockSize + i; + float original = (block == 0) ? (0.1f + (0.9f * i / blockSize)) : + (block == 1) ? (10.0f + (10.0f * i / blockSize)) : + (100.0f + (100.0f * i / blockSize)); + float retrieved = tensorQ8.getFloat(idx); + float diff = Math.abs(original - retrieved); + float relErr = diff / Math.abs(original); + + maxDiff = Math.max(maxDiff, diff); + maxRelErr = Math.max(maxRelErr, relErr); + minVal = Math.min(minVal, retrieved); + maxVal = Math.max(maxVal, retrieved); + } + + printVerboseF("Block %d stats:%n", block); + printVerboseF(" Value range: %.6f to %.6f%n", minVal, maxVal); + printVerboseF(" Max absolute difference: %.6f%n", maxDiff); + printVerboseF(" Max relative error: %.6f%%%n", maxRelErr * 100); + + float expectedMaxErr = (block == 0) ? 0.5f : (block == 1) ? 0.2f : 0.1f; + + Assert.assertTrue( + String.format("Block %d error too large: %.2f%% > %.2f%%", + block, maxRelErr * 100, expectedMaxErr * 100), + maxRelErr < expectedMaxErr); + } + } + + + @Test + public void testRepeatedUpdates() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float testValue = 1.0f; + int testIndex = 0; + + printVerbose("\nTesting repeated updates stability:"); + for (int i = 0; i < 100; i++) { + tensorQ8.setFloat(testIndex, testValue); + float retrieved = tensorQ8.getFloat(testIndex); + printVerboseF("Update %d: Expected=%.6f Got=%.6f%n", + i, testValue, retrieved); + Assert.assertEquals("Value unstable after repeated updates", + testValue, retrieved, 0.1f); + } + } + + @Test + public void testAlternatingPatterns() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting alternating pattern preservation:"); + + // Set alternating values + printVerbose("Setting alternating values:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = (i % 2 == 0) ? 1.0f : -1.0f; + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + i, value, retrieved); + } + + // Verify alternating values + printVerbose("\nVerifying alternating pattern:"); + for (int i = 0; i < shape.getSize(); i++) { + float expected = (i % 2 == 0) ? 1.0f : -1.0f; + float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Expected=%.6f Got=%.6f%n", + i, expected, retrieved); + Assert.assertEquals("Alternating pattern not preserved", + expected, retrieved, 0.1f); + } + } + + @Test + public void testSingleBlockPrecision() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float baseValue = 10.0f; + + printVerbose("\nTesting single block precision:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = baseValue * (i + 1) / shape.getSize(); + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + float relativeError = Math.abs((retrieved - value) / value); + + printVerboseF("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n", + i, value, retrieved, relativeError); + + Assert.assertTrue( + String.format("Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f", + i, value, retrieved, relativeError), + relativeError < 0.1f); + } + } + + @Test + public void testConstantBlock() { + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float testValue = 10.0f; + printVerbose("\nTesting constant value block:"); + + printVerbose("Setting constant values:"); + for (int i = 0; i < blockSize; i++) { + tensorQ8.setFloat(i, testValue); + } + + float maxDiff = 0.0f; + printVerbose("\nVerifying constant values:"); + for (int i = 0; i < blockSize; i++) { + float retrieved = tensorQ8.getFloat(i); + float diff = Math.abs(retrieved - testValue); + maxDiff = Math.max(maxDiff, diff); + printVerboseF("Index %d: Expected=%.6f Got=%.6f Diff=%.6f%n", + i, testValue, retrieved, diff); + } + + float relativeError = maxDiff / Math.abs(testValue); + printVerboseF("Maximum relative error: %.6f%%%n", relativeError * 100); + + Assert.assertTrue( + String.format("Relative error too large for constant block: %.2f%%", + relativeError * 100), + relativeError < 0.1f); + } + + @Test + public void testNonAlignedBlockSize() { + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize + 5); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting non-aligned block size:"); + for (int i = 0; i < shape.getSize(); i++) { + float value = i * 1.5f; + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + i, value, retrieved); + Assert.assertEquals("Value mismatch in non-aligned blocks", + value, retrieved, 0.1f); + } + } + + @Test + public void testZeroCrossing() { + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + float[][] testRanges = { + {-0.001f, -0.0001f, 0.0f, 0.0001f, 0.001f}, + {-0.1f, -0.05f, 0.0f, 0.05f, 0.1f}, + {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f} + }; + + printVerbose("\nTesting zero crossing behavior:"); + for (int range = 0; range < testRanges.length; range++) { + printVerboseF("\nRange %d:%n", range); + + for (int i = 0; i < testRanges[range].length; i++) { + float value = testRanges[range][i]; + tensorQ8.setFloat(i, value); + float retrieved = tensorQ8.getFloat(i); + + printVerboseF("Value: %10.6f -> Retrieved: %10.6f%n", + value, retrieved); + + if (Math.abs(value) >= 0.01f) { + Assert.assertEquals( + String.format("Sign mismatch for value %.6f", value), + Math.signum(value), Math.signum(retrieved), 0.0f); + } else { + Assert.assertTrue( + String.format("Small value %.6f not close enough to zero (got %.6f)", + value, retrieved), + Math.abs(retrieved) < 0.01f); + } + } + } + } + + @Test + public void testSequentialBlockUpdates() { + // Test updating blocks in sequence vs random order + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 3); // Three blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting sequential block updates:"); + + // Sequential updates + for (int block = 0; block < 3; block++) { + float blockValue = (block + 1) * 10.0f; + printVerboseF("\nSetting block %d to %.2f:%n", block, blockValue); + + for (int i = 0; i < blockSize; i++) { + int index = block * blockSize + i; + tensorQ8.setFloat(index, blockValue); + float retrieved = tensorQ8.getFloat(index); + printVerboseF("Index %d: Set=%.6f Got=%.6f%n", + index, blockValue, retrieved); + Assert.assertEquals("Sequential block update failed", + blockValue, retrieved, 0.1f); + } + } + + // Verify all blocks maintain their values + printVerbose("\nVerifying all blocks after updates:"); + for (int block = 0; block < 3; block++) { + float expectedValue = (block + 1) * 10.0f; + for (int i = 0; i < blockSize; i++) { + int index = block * blockSize + i; + float retrieved = tensorQ8.getFloat(index); + Assert.assertEquals("Block value changed unexpectedly", + expectedValue, retrieved, 0.1f); + } + } + } + + @Test + public void testMaximumPrecisionValues() { + // Test precision with values requiring maximum accuracy + Shape shape = new Shape(GGMLType.Q8_0.getBlockSize()); + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting maximum precision values:"); + + // Test precise decimal values + float[] preciseValues = { + 1.23456789f, + -1.23456789f, + 12.3456789f, + -12.3456789f, + 123.456789f, + -123.456789f + }; + + for (int i = 0; i < preciseValues.length; i++) { + tensorQ8.setFloat(i, preciseValues[i]); + float retrieved = tensorQ8.getFloat(i); + float relativeError = Math.abs((retrieved - preciseValues[i]) / preciseValues[i]); + + printVerboseF("Precise value test %d: Set=%.9f Got=%.9f RelError=%.9f%n", + i, preciseValues[i], retrieved, relativeError); + + // For high-precision values, we expect relative error < 1% + Assert.assertTrue( + String.format("Precision lost: expected=%.9f, got=%.9f, error=%.9f", + preciseValues[i], retrieved, relativeError), + relativeError < 0.01f); + } + } + + @Test + public void testBlockScaleInterference() { + // Test that updates in one block don't affect other blocks' scales + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 2); // Two blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + printVerbose("\nTesting block scale interference:"); + + // Set first block to small values + printVerbose("\nSetting first block to small values:"); + for (int i = 0; i < blockSize; i++) { + float value = 0.1f + (0.1f * i / blockSize); + tensorQ8.setFloat(i, value); + printVerboseF("Block 1 index %d: Set=%.6f%n", i, value); + } + + // Set second block to large values + printVerbose("\nSetting second block to large values:"); + for (int i = 0; i < blockSize; i++) { + float value = 100.0f + (100.0f * i / blockSize); + tensorQ8.setFloat(blockSize + i, value); + printVerboseF("Block 2 index %d: Set=%.6f%n", i, value); + } + + // Verify first block maintained small values + printVerbose("\nVerifying first block maintained precision:"); + for (int i = 0; i < blockSize; i++) { + float expected = 0.1f + (0.1f * i / blockSize); + float retrieved = tensorQ8.getFloat(i); + float relativeError = Math.abs((retrieved - expected) / expected); + + printVerboseF("Block 1 verification index %d: Expected=%.6f Got=%.6f RelError=%.6f%n", + i, expected, retrieved, relativeError); + + Assert.assertTrue( + String.format("Block 1 precision lost after block 2 update at index %d", i), + relativeError < 0.1f); + } + } + + @Test + public void testBlockBoundaryUpdates() { + // Test updating values at block boundaries + int blockSize = GGMLType.Q8_0.getBlockSize(); + Shape shape = new Shape(blockSize * 2); // Two blocks + TensorQ8 tensorQ8 = new TensorQ8(shape); + + // Set values around block boundary + float[] boundaryValues = {1.0f, 2.0f, 3.0f, 4.0f}; + int boundaryStart = blockSize - 2; // Two values before boundary + + printVerbose("\nTesting block boundary updates:"); + for (int i = 0; i < boundaryValues.length; i++) { + int index = boundaryStart + i; + tensorQ8.setFloat(index, boundaryValues[i]); + float retrieved = tensorQ8.getFloat(index); + printVerboseF("Index %d (block boundary +/- 2): Set=%.6f Got=%.6f%n", + index, boundaryValues[i], retrieved); + Assert.assertEquals("Value mismatch at block boundary", + boundaryValues[i], retrieved, 0.1f); + } + } +}