diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java
index 213f68fb2d..5b2f2ba6d3 100644
--- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/arrays/ByteArray.java
@@ -27,6 +27,7 @@
 
 import uk.ac.manchester.tornado.api.annotations.Parallel;
 import uk.ac.manchester.tornado.api.internal.annotations.SegmentElementSize;
+import uk.ac.manchester.tornado.api.types.tensors.GGMLType;
 
 /**
  * This class represents an array of bytes stored in native memory.
@@ -61,6 +62,15 @@ public ByteArray(int numberOfElements) {
         segment.setAtIndex(JAVA_INT, 0, numberOfElements);
     }
 
+
+    public ByteArray(int numberOfElements, long requiredStorageSize) {
+        this.numberOfElements = numberOfElements;
+        baseIndex=0;
+        this.segmentByteSize = requiredStorageSize;
+        segment = Arena.ofAuto().allocate(requiredStorageSize, 1);
+        segment.setAtIndex(JAVA_INT, 0, numberOfElements);
+    }
+
     /**
      * Constructs a new {@link ByteArray} instance by concatenating the contents of the given array of {@link ByteArray} instances.
      *
@@ -123,6 +133,14 @@ public static ByteArray fromSegment(MemorySegment segment) {
         return byteArray;
     }
 
+    // Temporary workaround to copy raw memory segment without a tornado header
+    public static ByteArray fromSegment(MemorySegment segment, int numberOfElements) {
+        long byteSize = segment.byteSize();
+        ByteArray byteArray = new ByteArray(numberOfElements, byteSize);
+        MemorySegment.copy(segment, 0, byteArray.segment, byteArray.baseIndex * BYTE_BYTES, byteSize);
+        return byteArray;
+    }
+
     /**
      * Creates a new instance of the {@link ByteArray} class from a {@link ByteBuffer}.
      *
diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java
index 39da8b6c77..b169868de1 100644
--- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/DType.java
@@ -67,7 +67,10 @@ public enum DType {
     /**
      * Represents a quantized 8-bit unsigned integer used in specialized applications like machine learning, using 1 byte.
      */
-    QUINT8(1, ValueLayout.JAVA_BYTE);
+    QUINT8(1, ValueLayout.JAVA_BYTE),
+
+    Q4_0(1,  ValueLayout.JAVA_BYTE);
+
     // @formatter:on
 
     /**
diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java
new file mode 100644
index 0000000000..5802f63d01
--- /dev/null
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Float16.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2024, APT Group, Department of Computer Science,
+ * The University of Manchester.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package uk.ac.manchester.tornado.api.types.tensors;
+
+
+public final class Float16 {
+    public static final int BYTES = 2;
+}
+
diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java
new file mode 100644
index 0000000000..447805dccd
--- /dev/null
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/GGMLType.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2024, APT Group, Department of Computer Science,
+ * The University of Manchester.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package uk.ac.manchester.tornado.api.types.tensors;
+
+public enum GGMLType {
+    F32(Float.BYTES),
+    F16(Float16.BYTES),
+    Q4_0(Float16.BYTES + 16 * Byte.BYTES, 32),
+    Q4_1(2 * Float16.BYTES + 16 * Byte.BYTES, 32),
+    UNSUPPORTED_Q4_2(Integer.MAX_VALUE), // support has been removed
+    UNSUPPORTED_Q4_3(Integer.MAX_VALUE), // support has been removed
+    Q5_0(Integer.MAX_VALUE),
+    Q5_1(Integer.MAX_VALUE),
+    Q8_0(Float16.BYTES + 32 * Byte.BYTES, 32),
+    Q8_1(32 * Byte.BYTES + 2 * Float.BYTES, 32),
+    // k-quantizations
+    Q2_K(Integer.MAX_VALUE),
+    Q3_K(Integer.MAX_VALUE),
+    Q4_K(2 * Float16.BYTES + ((GGMLType.QK_K / 16) / 8 * 6) + GGMLType.QK_K / 2, GGMLType.QK_K),
+    Q5_K(2 * Float16.BYTES + ((GGMLType.QK_K / 16) / 8 * 6) + GGMLType.QK_K / 8 + GGMLType.QK_K / 2, GGMLType.QK_K),
+    Q6_K(GGMLType.QK_K / 2 + GGMLType.QK_K / 4 + GGMLType.QK_K / 16 + Float16.BYTES, GGMLType.QK_K),
+    Q8_K(Integer.MAX_VALUE),
+    I8(Byte.BYTES),
+    I16(Short.BYTES),
+    I32(Integer.BYTES);
+
+    private static final GGMLType[] VALUES = values();
+
+    private final int typeSize;
+
+    private final int blockSize;
+
+    public int getTypeSize() {
+        return typeSize;
+    }
+
+    public int getBlockSize() {
+        return blockSize;
+    }
+
+    public static GGMLType fromId(int id) {
+        return VALUES[id];
+    }
+
+    GGMLType(int typeSize) {
+        this(typeSize, 1);
+    }
+
+    public long byteSizeFor(int numberOfElements) {
+        long t = numberOfElements * (long) getTypeSize();
+        assert t % getBlockSize() == 0;
+        return Math.toIntExact(t / getBlockSize());
+    }
+
+    public static final int QK_K = 256; // or 64?
+
+    GGMLType(int typeSize, int blockSize) {
+        assert blockSize > 0;
+        assert typeSize > 0;
+        assert isPowerOf2(blockSize);
+        this.typeSize = typeSize;
+        this.blockSize = blockSize;
+    }
+
+    private static boolean isPowerOf2(int n) {
+        return n > 0 && (n & (n - 1)) == 0;
+    }
+}
\ No newline at end of file
diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java
index a678cd01d5..d8138651cf 100644
--- a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/Shape.java
@@ -45,7 +45,8 @@ public long[] getDimensions() {
      * @return the total size of the shape as an int
      */
     public int getSize() {
-        return (int) Arrays.stream(dimensions).reduce(1, (a, b) -> a * b);
+        assert Arrays.stream(dimensions).allMatch(i -> i > 0);
+        return (int) Arrays.stream(dimensions).reduce(Math::multiplyExact).orElseThrow();
     }
 
     @Override
diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java
new file mode 100644
index 0000000000..88e9da5260
--- /dev/null
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ4.java
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2024, APT Group, Department of Computer Science,
+ * The University of Manchester.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package uk.ac.manchester.tornado.api.types.tensors;
+
+import uk.ac.manchester.tornado.api.types.arrays.ByteArray;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+
+public class TensorQ4 extends Tensor {
+    private final boolean DEBUG_TENSOR_Q4 = false;
+    private final ByteArray tensorStorage;
+    private final int numberOfElements;
+    private final Shape shape;
+    private final DType dType;
+
+    private final int blockSize;
+    private final int bytesPerBlock;
+
+    public TensorQ4(Shape shape) {
+        super(DType.Q4_0, shape);
+        this.shape = shape;
+        this.numberOfElements = shape.getSize();
+        this.dType = DType.Q4_0;
+        this.blockSize = GGMLType.Q4_0.getBlockSize();
+
+        // Each block contains:
+        // - 2 bytes for float16 scale
+        // - blockSize/2 bytes for quantized values (4-bits per value)
+        this.bytesPerBlock = Float16.BYTES + blockSize / 2;
+
+        // Calculate number of blocks needed to store all elements
+        int numBlocks = (numberOfElements + blockSize - 1) / blockSize;
+
+        // Calculate total storage size in bytes
+        long dataSize = (long) numBlocks * bytesPerBlock;
+        long totalSize = dataSize;
+
+        if (DEBUG_TENSOR_Q4) {
+            System.out.println("Debug info:");
+            System.out.println("Number of elements: " + numberOfElements);
+            System.out.println("Block size: " + blockSize);
+            System.out.println("Bytes per block: " + bytesPerBlock);
+            System.out.println("Number of blocks: " + numBlocks);
+            System.out.println("Data size: " + dataSize);
+            System.out.println("Total size: " + totalSize);
+        }
+
+        this.tensorStorage = new ByteArray(numberOfElements, totalSize);
+    }
+
+    public TensorQ4(int numberOfElements, MemorySegment memorySegment) {
+        super(DType.QINT8, new Shape(numberOfElements));
+        this.shape = new Shape(numberOfElements);
+        this.numberOfElements = numberOfElements;
+        this.dType = DType.Q4_0;
+        this.blockSize = GGMLType.Q4_0.getBlockSize();
+
+        // Each block contains:
+        // - 2 bytes for float16 scale
+        // - blockSize/2 bytes for quantized values (4-bits per value)
+        this.bytesPerBlock = Float16.BYTES + blockSize / 2;
+
+        // Calculate number of blocks needed to store all elements
+        int numBlocks = (numberOfElements + blockSize - 1) / blockSize;
+
+        // Calculate total storage size in bytes
+        long dataSize = (long) numBlocks * bytesPerBlock;
+        long totalSize = dataSize;
+
+        if (DEBUG_TENSOR_Q4) {
+            System.out.println("Debug info:");
+            System.out.println("Number of elements: " + numberOfElements);
+            System.out.println("Block size: " + blockSize);
+            System.out.println("Bytes per block: " + bytesPerBlock);
+            System.out.println("Number of blocks: " + numBlocks);
+            System.out.println("Data size: " + dataSize);
+            System.out.println("Total size: " + totalSize);
+        }
+
+        this.tensorStorage = ByteArray.fromSegment(memorySegment, numberOfElements);
+    }
+
+    private float[] getBlockValues(int blockIndex) {
+        float[] values = new float[blockSize];
+        int blockOffset = blockIndex * bytesPerBlock;
+
+        try {
+            float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset));
+
+            // Read 4-bit quantized values
+            for (int i = 0; i < blockSize; i++) {
+                byte quant;
+                if (i < blockSize / 2) {
+                    // Lower 4 bits
+                    quant = (byte) (readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i) & 0x0F);
+                } else {
+                    // Upper 4 bits
+                    quant = (byte) ((readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2) >>> 4) & 0x0F);
+                }
+                // Convert from 4-bit value to float
+                quant -= 8;  // Center at zero [-8, 7]
+                values[i] = quant * scale;
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to read block " + blockIndex + " at offset " + blockOffset + ": " + e.getMessage());
+        }
+        return values;
+    }
+
+    public float getFloat(int index) {
+        if (index < 0 || index >= numberOfElements) {
+            throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements);
+        }
+
+        int blockIndex = index / blockSize;
+        int withinBlockIndex = index % blockSize;
+        int blockOffset = blockIndex * bytesPerBlock;
+
+        try {
+            float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(), blockOffset));
+
+            // Extract 4-bit value
+            byte quant;
+            if (withinBlockIndex < blockSize / 2) {
+                // Lower 4 bits
+                quant = (byte) (readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + withinBlockIndex) & 0x0F);
+            } else {
+                // Upper 4 bits
+                quant = (byte) ((readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + withinBlockIndex - blockSize / 2) >>> 4) & 0x0F);
+            }
+            quant -= 8;  // Center at zero [-8, 7]
+            return quant * scale;
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to get float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage());
+        }
+    }
+
+    public void setFloat(int index, float value) {
+        if (index < 0 || index >= numberOfElements) {
+            throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements);
+        }
+
+        int blockIndex = index / blockSize;
+        int withinBlockIndex = index % blockSize;
+
+        // Get current block values
+        float[] blockValues = getBlockValues(blockIndex);
+        blockValues[withinBlockIndex] = value;
+
+        // Compute optimal scale for block
+        float scale = computeOptimalScale(blockValues);
+
+        // Update block
+        int blockOffset = blockIndex * bytesPerBlock;
+
+        try {
+            // Write scale
+            writeShort(tensorStorage.getSegmentWithHeader(), blockOffset, Float.floatToFloat16(scale));
+
+            // Write quantized values
+            for (int i = 0; i < blockValues.length; i++) {
+                byte quant = (byte) (Math.round(blockValues[i] / scale) + 8); // Add 8 to shift to [0, 15]
+                quant = (byte) Math.min(15, Math.max(0, quant));  // Clamp to 4-bit range
+
+                if (i < blockSize / 2) {
+                    // Write to lower 4 bits
+                    byte current = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i);
+                    writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i, (byte) ((current & 0xF0) | (quant & 0x0F)));
+                } else {
+                    // Write to upper 4 bits
+                    byte current = readByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2);
+                    writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i - blockSize / 2, (byte) ((current & 0x0F) | (quant << 4)));
+                }
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to set float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage());
+        }
+    }
+
+    private float computeOptimalScale(float[] values) {
+        float maxAbs = 1e-5f;
+        for (float value : values) {
+            maxAbs = Math.max(maxAbs, Math.abs(value));
+        }
+        return maxAbs / 7.0f;  // Scale to [-7, 7] range for 4-bit values
+    }
+
+    static short readShort(MemorySegment memorySegment, long offset) {
+        return memorySegment.get(ValueLayout.JAVA_SHORT, offset);
+    }
+
+    static byte readByte(MemorySegment memorySegment, long offset) {
+        return memorySegment.get(ValueLayout.JAVA_BYTE, offset);
+    }
+
+    static void writeShort(MemorySegment memorySegment, long offset, short value) {
+        memorySegment.set(ValueLayout.JAVA_SHORT, offset, value);
+    }
+
+    static void writeByte(MemorySegment memorySegment, long offset, byte value) {
+        memorySegment.set(ValueLayout.JAVA_BYTE, offset, value);
+    }
+    @Override
+    public Shape getShape() {
+        return shape;
+    }
+
+    @Override
+    public String getDTypeAsString() {
+        return dType.QINT8.toString();
+    }
+
+    @Override
+    public DType getDType() {
+        return DType.QINT8;
+    }
+
+    @Override
+    public int getSize() {
+        return shape.getSize();
+    }
+
+    @Override
+    public MemorySegment getSegment() {
+        return tensorStorage.getSegmentWithHeader();
+    }
+
+    @Override
+    public MemorySegment getSegmentWithHeader() {
+        return tensorStorage.getSegmentWithHeader();
+    }
+
+    @Override
+    public long getNumBytesOfSegmentWithHeader() {
+        return tensorStorage.getNumBytesOfSegmentWithHeader();
+    }
+
+    @Override
+    public long getNumBytesOfSegment() {
+        return tensorStorage.getNumBytesOfSegment();
+    }
+
+    @Override
+    protected void clear() {
+
+    }
+
+    @Override
+    public int getElementSize() {
+        return DType.QINT8.getByteSize();
+    }
+}
\ No newline at end of file
diff --git a/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java
new file mode 100644
index 0000000000..ae3cf26530
--- /dev/null
+++ b/tornado-api/src/main/java/uk/ac/manchester/tornado/api/types/tensors/TensorQ8.java
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2024, APT Group, Department of Computer Science,
+ * The University of Manchester.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package uk.ac.manchester.tornado.api.types.tensors;
+
+import uk.ac.manchester.tornado.api.types.arrays.ByteArray;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+
+public class TensorQ8 extends Tensor {
+    private final boolean  DEBUG_TENSOR_Q8 = false;
+    /** Storage for the quantized tensor data including scales and values. */
+    private final ByteArray tensorStorage;
+
+    /** Total number of elements in the tensor. */
+    private final int numberOfElements;
+
+    /** Shape information for the tensor. */
+    private final Shape shape;
+
+    /** Data type of the tensor (QINT8). */
+    private final DType dType;
+
+    /** Number of values in each quantization block. */
+    private final int blockSize;
+
+    /** Total bytes per block including scale and quantized values. */
+    private final int bytesPerBlock;
+
+    /**
+     * Constructs a new Q8 tensor with the specified shape.
+     * Allocates memory and initializes the tensor storage.
+     *
+     * @param shape The shape of the tensor to create
+     */
+    public TensorQ8(Shape shape) {
+        super(DType.QINT8, shape);
+        this.shape = shape;
+        this.numberOfElements = shape.getSize();
+        this.dType = DType.QINT8;
+        this.blockSize = GGMLType.Q8_0.getBlockSize();
+
+        // Each block contains:
+        // - 2 bytes for float16 scale
+        // - blockSize bytes for quantized values
+        this.bytesPerBlock = Float16.BYTES + blockSize;
+
+        // Calculate number of blocks needed to store all elements
+        int numBlocks = (numberOfElements + blockSize - 1) / blockSize;
+
+        // Calculate total storage size in bytes, including header
+        long dataSize = (long)numBlocks * bytesPerBlock;
+        long totalSize = dataSize;
+
+        if (DEBUG_TENSOR_Q8) {
+            System.out.println("Debug info:");
+            System.out.println("Number of elements: " + numberOfElements);
+            System.out.println("Block size: " + blockSize);
+            System.out.println("Bytes per block: " + bytesPerBlock);
+            System.out.println("Number of blocks: " + numBlocks);
+            System.out.println("Data size: " + dataSize);
+            System.out.println("Total size with header: " + totalSize);
+        }
+
+        this.tensorStorage = new ByteArray(numberOfElements, totalSize);
+    }
+
+    /**
+     * Constructs a Q8 tensor using existing memory segment data.
+     * Used for creating a tensor view of pre-existing quantized data.
+     *
+     * @param numberOfElements The number of elements in the tensor
+     * @param memorySegment The memory segment containing the quantized data
+     */
+    public TensorQ8(int numberOfElements, MemorySegment memorySegment) {
+        super(DType.QINT8, new Shape(numberOfElements));
+        this.shape = new Shape(numberOfElements);
+        this.numberOfElements = numberOfElements;
+        this.dType = DType.QINT8;
+        this.blockSize = GGMLType.Q8_0.getBlockSize();
+
+        // Each block contains:
+        // - 2 bytes for float16 scale
+        // - blockSize bytes for quantized values
+        this.bytesPerBlock = Float16.BYTES + blockSize;
+
+        // Calculate number of blocks needed to store all elements
+        int numBlocks = (numberOfElements + blockSize - 1) / blockSize;
+
+        // Calculate total storage size in bytes, including header
+        long dataSize = (long)numBlocks * bytesPerBlock;
+        long totalSize = dataSize;
+
+        if (DEBUG_TENSOR_Q8) {
+            System.out.println("Debug info:");
+            System.out.println("Number of elements: " + numberOfElements);
+            System.out.println("Block size: " + blockSize);
+            System.out.println("Bytes per block: " + bytesPerBlock);
+            System.out.println("Number of blocks: " + numBlocks);
+            System.out.println("Data size: " + dataSize);
+            System.out.println("Total size with header: " + totalSize);
+        }
+
+        this.tensorStorage = ByteArray.fromSegment(memorySegment, numberOfElements);
+    }
+
+    private float[] getBlockValues(int blockIndex) {
+        float[] values = new float[blockSize];
+        int blockOffset = blockIndex * bytesPerBlock;
+
+        try {
+            float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(),   blockOffset));
+            for (int i = 0; i < blockSize; i++) {
+                byte quant = readByte(tensorStorage.getSegmentWithHeader(),   blockOffset + Float16.BYTES + i);
+                values[i] = quant * scale;
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to read block " + blockIndex + " at offset " + blockOffset + ": " + e.getMessage());
+        }
+        return values;
+    }
+
+    /**
+     * Gets a single float value from the tensor at the specified index.
+     * The value is dequantized using the scale factor from its containing block.
+     *
+     * @param index The index of the value to retrieve
+     * @return The dequantized float value
+     * @throws IndexOutOfBoundsException if the index is out of bounds
+     * @throws RuntimeException if there is an error reading the value
+     */
+    public float getFloat(int index) {
+        if (index < 0 || index >= numberOfElements) {
+            throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements);
+        }
+
+        int blockIndex = index / blockSize;
+        int withinBlockIndex = index % blockSize;
+        int blockOffset = blockIndex * bytesPerBlock;
+
+        try {
+            float scale = Float.float16ToFloat(readShort(tensorStorage.getSegmentWithHeader(),   blockOffset));
+            byte quant = readByte(tensorStorage.getSegmentWithHeader(),  + blockOffset + Float16.BYTES + withinBlockIndex);
+            return quant * scale;
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to get float at index " + index + " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage());
+        }
+    }
+
+    /**
+     * Sets a float value in the tensor at the specified index.
+     * Updates the entire block's scale factor when any value in the block changes.
+     *
+     * @param index The index where the value should be set
+     * @param value The float value to set
+     * @throws IndexOutOfBoundsException if the index is out of bounds
+     * @throws RuntimeException if there is an error writing the value
+     */
+    public void setFloat(int index, float value) {
+        if (index < 0 || index >= numberOfElements) {
+            throw new IndexOutOfBoundsException("Index " + index + " out of bounds for length " + numberOfElements);
+        }
+
+        int blockIndex = index / blockSize;
+        int withinBlockIndex = index % blockSize;
+
+        // Get current block values
+        float[] blockValues = getBlockValues(blockIndex);
+        blockValues[withinBlockIndex] = value;
+
+        // Compute optimal scale for block
+        float scale = computeOptimalScale(blockValues);
+
+        // Update block
+        int blockOffset = blockIndex * bytesPerBlock;
+
+        try {
+            // Write scale
+            writeShort(tensorStorage.getSegmentWithHeader(),   blockOffset, Float.floatToFloat16(scale));
+
+            // Write quantized values
+            for (int i = 0; i < blockValues.length; i++) {
+                int quantized = Math.min(127, Math.max(-128, Math.round(blockValues[i] / scale)));
+                writeByte(tensorStorage.getSegmentWithHeader(), blockOffset + Float16.BYTES + i, (byte)quantized);
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to set float at index " + index +  " (block " + blockIndex + ", offset " + blockOffset + "): " + e.getMessage());
+        }
+    }
+
+    /**
+     * Computes the optimal scale factor for a block of values.
+     * The scale is chosen to maximize the use of the INT8 range (-128 to 127).
+     *
+     * @param values The array of float values to compute the scale for
+     * @return The optimal scale factor for quantizing the values
+     */
+
+    private float computeOptimalScale(float[] values) {
+        float maxAbs = 1e-5f;
+        for (float value : values) {
+            maxAbs = Math.max(maxAbs, Math.abs(value));
+        }
+        return maxAbs / 127.0f;
+    }
+
+
+    static short readShort(MemorySegment memorySegment, long offset) {
+        return memorySegment.get(ValueLayout.JAVA_SHORT, offset);
+    }
+
+    static byte readByte(MemorySegment memorySegment, long offset) {
+        return memorySegment.get(ValueLayout.JAVA_BYTE, offset);
+    }
+
+    static void writeShort(MemorySegment memorySegment, long offset, short value) {
+        memorySegment.set(ValueLayout.JAVA_SHORT, offset, value);
+    }
+
+    static void writeByte(MemorySegment memorySegment, long offset, byte value) {
+        memorySegment.set(ValueLayout.JAVA_BYTE, offset, value);
+    }
+
+    @Override
+    public Shape getShape() {
+        return shape;
+    }
+
+    @Override
+    public String getDTypeAsString() {
+        return dType.QINT8.toString();
+    }
+
+    @Override
+    public DType getDType() {
+        return DType.QINT8;
+    }
+
+    @Override
+    public int getSize() {
+        return shape.getSize();
+    }
+
+    @Override
+    public MemorySegment getSegment() {
+        return tensorStorage.getSegmentWithHeader();
+    }
+
+    @Override
+    public MemorySegment getSegmentWithHeader() {
+        return tensorStorage.getSegmentWithHeader();
+    }
+
+    @Override
+    public long getNumBytesOfSegmentWithHeader() {
+        return tensorStorage.getNumBytesOfSegmentWithHeader();
+    }
+
+    @Override
+    public long getNumBytesOfSegment() {
+        return tensorStorage.getNumBytesOfSegment();
+    }
+
+    @Override
+    protected void clear() {
+
+    }
+
+    @Override
+    public int getElementSize() {
+        return DType.QINT8.getByteSize();
+    }
+}
\ No newline at end of file
diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java
new file mode 100644
index 0000000000..4f35d7fdd7
--- /dev/null
+++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ4.java
@@ -0,0 +1,392 @@
+package uk.ac.manchester.tornado.unittests.tensors;
+
+import org.junit.Assert;
+import org.junit.Test;
+import uk.ac.manchester.tornado.api.types.tensors.GGMLType;
+import uk.ac.manchester.tornado.api.types.tensors.Shape;
+import uk.ac.manchester.tornado.api.types.tensors.TensorQ4;
+import uk.ac.manchester.tornado.unittests.common.TornadoTestBase;
+
+import static java.lang.Boolean.FALSE;
+
+/**
+ * <p>
+ * How to run?
+ * </p>
+ * <code>
+ * tornado-test -V uk.ac.manchester.tornado.unittests.tensors.TestTensorQ4
+ * </code>
+ */
+public class TestTensorQ4 extends TornadoTestBase {
+    private static final boolean VERBOSE = FALSE;
+
+    private void printVerbose(String message) {
+        if (VERBOSE) System.out.println(message);
+    }
+
+    private void printVerboseF(String format, Object... args) {
+        if (VERBOSE) System.out.printf(format, args);
+    }
+
+    @Test
+    public void testBasicQuantization() {
+        // Unchanged - passing
+        Shape shape = new Shape(1);
+        TensorQ4 tensor = new TensorQ4(shape);
+
+        float testValue = 1.0f;
+        tensor.setFloat(0, testValue);
+        float retrieved = tensor.getFloat(0);
+        printVerboseF("Segment size for storing single value %d%n", tensor.getSegment().byteSize());
+        Assert.assertEquals(testValue, retrieved, 0.2f);
+    }
+
+    @Test
+    public void testFourBitRange() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Test a single block to maintain consistent scale
+        float[] boundaryValues = {
+                -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f, 4.0f, 6.0f
+        };
+
+        printVerbose("\nTesting 4-bit range quantization:");
+        for (int i = 0; i < boundaryValues.length; i++) {
+            tensorQ4.setFloat(i, boundaryValues[i]);
+            float retrieved = tensorQ4.getFloat(i);
+            printVerboseF("4-bit value test: Setting %.1f, got %.1f%n",
+                    boundaryValues[i], retrieved);
+            // Increased tolerance to account for quantization steps
+            Assert.assertEquals("Value mismatch at 4-bit value " + boundaryValues[i],
+                    boundaryValues[i], retrieved, 0.6f);
+        }
+    }
+
+    @Test
+    public void testPackedValues() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Test both nibbles of each byte with values well within quantization range
+        float[] values = {-4.0f, -2.0f, 0.0f, 2.0f, 4.0f, -4.0f, -2.0f, 2.0f};
+
+        printVerbose("\nTesting packed 4-bit storage:");
+        for (int i = 0; i < values.length; i++) {
+            tensorQ4.setFloat(i, values[i]);
+            float retrieved = tensorQ4.getFloat(i);
+            printVerboseF("Packed index %d: Set=%.1f Got=%.1f%n",
+                    i, values[i], retrieved);
+            Assert.assertEquals("Value mismatch for packed storage",
+                    values[i], retrieved, 0.5f);
+        }
+    }
+
+    @Test
+    public void testBlockScaleInterference() {
+        int blockSize = GGMLType.Q4_0.getBlockSize();
+        Shape shape = new Shape(blockSize * 2);
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        printVerbose("\nTesting block scale interference:");
+
+        // Use values well within the 4-bit quantization range
+        for (int i = 0; i < blockSize; i++) {
+            float value = -4.0f + (8.0f * i / blockSize); // Range from -4 to 4
+            tensorQ4.setFloat(i, value);
+            printVerboseF("Block 1 index %d: Set=%.6f%n", i, value);
+        }
+
+        for (int i = 0; i < blockSize; i++) {
+            float value = -2.0f + (4.0f * i / blockSize); // Range from -2 to 2
+            tensorQ4.setFloat(blockSize + i, value);
+            printVerboseF("Block 2 index %d: Set=%.6f%n", i, value);
+        }
+
+        // Verify first block maintained reasonable accuracy
+        for (int i = 0; i < blockSize; i++) {
+            float expected = -4.0f + (8.0f * i / blockSize);
+            float retrieved = tensorQ4.getFloat(i);
+            float absError = Math.abs(retrieved - expected);
+
+            printVerboseF("Block 1 verification index %d: Expected=%.6f Got=%.6f AbsError=%.6f%n",
+                    i, expected, retrieved, absError);
+
+            Assert.assertTrue("Block 1 accuracy lost after block 2 update",
+                    absError < 0.6f);
+        }
+    }
+
+    @Test
+    public void testFullRangeQuantization() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Test evenly spaced values within quantization range
+        float[] testValues = new float[16];
+        for (int i = 0; i < 16; i++) {
+            testValues[i] = -7.0f + (i * 14.0f / 15.0f); // Range from -7 to 7
+        }
+
+        printVerbose("\nTesting quantization range:");
+        for (int i = 0; i < testValues.length; i++) {
+            tensorQ4.setFloat(i, testValues[i]);
+            float retrieved = tensorQ4.getFloat(i);
+
+            printVerboseF("Step %2d: Set=%.3f Got=%.3f%n",
+                    i, testValues[i], retrieved);
+
+            float absError = Math.abs(retrieved - testValues[i]);
+            Assert.assertTrue(
+                    String.format("Excessive quantization error: expected=%.3f, got=%.3f, error=%.3f",
+                            testValues[i], retrieved, absError),
+                    absError < 0.6f);
+        }
+    }
+    @Test
+    public void testTensorQ4SetAndGetFloatVerify() {
+        int blockSize = GGMLType.Q4_0.getBlockSize();
+        Shape shape = new Shape(blockSize);
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Use values within Q4 range (-8 to 7)
+        float[] pattern = {0.5f, -1.0f, 4.0f, -6.0f, 0.0f};
+        float[] valuesToSet = new float[blockSize];
+        for (int i = 0; i < blockSize; i++) {
+            valuesToSet[i] = pattern[i % pattern.length];
+        }
+
+        printVerboseF("Total elements: %d%n", shape.getSize());
+        printVerboseF("Block size: %d%n", blockSize);
+        printVerboseF("Total allocated bytes: %d%n", tensorQ4.getSegment().byteSize());
+
+        for (int i = 0; i < valuesToSet.length; i++) {
+            tensorQ4.setFloat(i, valuesToSet[i]);
+            float retrieved = tensorQ4.getFloat(i);
+            printVerboseF("Index %d: Set=%.2f Retrieved=%.2f%n",
+                    i, valuesToSet[i], retrieved);
+            Assert.assertEquals("Value mismatch at index " + i,
+                    valuesToSet[i], retrieved, 0.5f);
+        }
+    }
+
+    @Test
+    public void testSingleBlockPrecision() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        float baseValue = 4.0f;  // Smaller base value for Q4 range
+
+        printVerbose("\nTesting single block precision:");
+        for (int i = 0; i < shape.getSize(); i++) {
+            float value = baseValue * (i + 1) / shape.getSize();
+            tensorQ4.setFloat(i, value);
+            float retrieved = tensorQ4.getFloat(i);
+            float relativeError = Math.abs((retrieved - value) / value);
+
+            printVerboseF("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n",
+                    i, value, retrieved, relativeError);
+
+            Assert.assertTrue(
+                    String.format("Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f",
+                            i, value, retrieved, relativeError),
+                    relativeError < 0.3f);  // Higher tolerance for Q4
+        }
+    }
+
+    @Test
+    public void testMaximumPrecisionValues() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        printVerbose("\nTesting maximum precision values:");
+
+        float[] preciseValues = {
+                1.234f,
+                -1.234f,
+                3.456f,
+                -3.456f,
+                6.789f,
+                -6.789f
+        };
+
+        for (int i = 0; i < preciseValues.length; i++) {
+            tensorQ4.setFloat(i, preciseValues[i]);
+            float retrieved = tensorQ4.getFloat(i);
+            float relativeError = Math.abs((retrieved - preciseValues[i]) / preciseValues[i]);
+
+            printVerboseF("Precise value test %d: Set=%.6f Got=%.6f RelError=%.6f%n",
+                    i, preciseValues[i], retrieved, relativeError);
+
+            Assert.assertTrue(
+                    String.format("Precision lost: expected=%.6f, got=%.6f, error=%.6f",
+                            preciseValues[i], retrieved, relativeError),
+                    relativeError < 0.2f);
+        }
+    }
+
+    @Test
+    public void testSequentialBlockUpdates() {
+        int blockSize = GGMLType.Q4_0.getBlockSize();
+        Shape shape = new Shape(blockSize * 3);
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        printVerbose("\nTesting sequential block updates:");
+
+        // Sequential updates with Q4-appropriate values
+        for (int block = 0; block < 3; block++) {
+            float blockValue = (block + 1) * 2.0f;  // Values: 2, 4, 6
+            printVerboseF("\nSetting block %d to %.2f:%n", block, blockValue);
+
+            for (int i = 0; i < blockSize; i++) {
+                int index = block * blockSize + i;
+                tensorQ4.setFloat(index, blockValue);
+                float retrieved = tensorQ4.getFloat(index);
+                printVerboseF("Index %d: Set=%.6f Got=%.6f%n",
+                        index, blockValue, retrieved);
+                Assert.assertEquals("Sequential block update failed",
+                        blockValue, retrieved, 0.5f);
+            }
+        }
+    }
+
+    @Test
+    public void testNibbleBoundaryUpdates() {
+        // Test updating values at nibble boundaries
+        int blockSize = GGMLType.Q4_0.getBlockSize();
+        Shape shape = new Shape(blockSize);
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Set values around nibble boundaries
+        float[] values = {1.0f, -1.0f, 2.0f, -2.0f};
+
+        // Test boundaries between nibbles
+        for (int i = 0; i < values.length; i++) {
+            int index = (i * blockSize/4);  // Space out across block
+            tensorQ4.setFloat(index, values[i]);
+            float retrieved = tensorQ4.getFloat(index);
+            printVerboseF("Nibble boundary %d: Set=%.6f Got=%.6f%n",
+                    index, values[i], retrieved);
+            Assert.assertEquals("Value mismatch at nibble boundary",
+                    values[i], retrieved, 0.5f);
+        }
+    }
+
+    @Test
+    public void testAlternatingNibblePatterns() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        printVerbose("\nTesting alternating nibble pattern:");
+
+        // Set alternating values across nibble boundaries
+        for (int i = 0; i < shape.getSize(); i++) {
+            float value = (i % 2 == 0) ? 1.0f : -1.0f;
+            tensorQ4.setFloat(i, value);
+            float retrieved = tensorQ4.getFloat(i);
+            printVerboseF("Index %d: Set=%.6f Got=%.6f%n",
+                    i, value, retrieved);
+            Assert.assertEquals("Alternating pattern not preserved",
+                    value, retrieved, 0.5f);
+        }
+    }
+
+    @Test
+    public void testNibblePackingConsistency() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Create an array of expected quantized values
+        float[] expectedValues = {
+                -4.0f, -3.5f, -3.0f, -2.5f,
+                -2.0f, -1.5f, -1.0f, -0.5f,
+                0.0f, 0.5f, 1.0f, 1.5f,
+                2.0f, 2.5f, 3.0f, 3.5f
+        };
+
+        printVerbose("\nTesting nibble packing consistency:");
+
+        // Set values
+        for (int i = 0; i < expectedValues.length; i++) {
+            tensorQ4.setFloat(i, expectedValues[i]);
+        }
+
+        // Verify quantization
+        for (int i = 0; i < expectedValues.length; i++) {
+            float retrieved = tensorQ4.getFloat(i);
+            printVerboseF("Pattern %2d: Set=%.4f Got=%.4f Diff=%.4f%n",
+                    i, expectedValues[i], retrieved,
+                    Math.abs(expectedValues[i] - retrieved));
+
+            // Check if the retrieved value is within one quantization step
+            float quantStep = 0.5f;  // Quantization step size for Q4
+            Assert.assertTrue(
+                    String.format("Quantization error too large at index %d: expected=%.4f, got=%.4f",
+                            i, expectedValues[i], retrieved),
+                    Math.abs(retrieved - expectedValues[i]) <= quantStep
+            );
+        }
+
+        // Additional verification for nibble boundaries
+        printVerbose("\nVerifying nibble boundaries:");
+        for (int i = 0; i < expectedValues.length; i += 2) {
+            float val1 = tensorQ4.getFloat(i);
+            float val2 = tensorQ4.getFloat(i + 1);
+            printVerboseF("Nibble pair %d: %.4f %.4f%n", i/2, val1, val2);
+
+            // Verify the difference between adjacent values is consistent
+            if (i < expectedValues.length - 2) {
+                float diff1 = val2 - val1;
+                float diff2 = tensorQ4.getFloat(i + 2) - val2;
+                Assert.assertTrue(
+                        String.format("Inconsistent quantization steps: %.4f vs %.4f", diff1, diff2),
+                        Math.abs(diff1 - diff2) <= 0.1f
+                );
+            }
+        }
+    }
+
+    @Test
+    public void testGradualValueTransitions() {
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        // Test gradual transitions to check quantization steps
+        float step = 14.0f / shape.getSize();  // Range from -7 to 7
+        for (int i = 0; i < shape.getSize(); i++) {
+            float value = -7.0f + (step * i);
+            tensorQ4.setFloat(i, value);
+            float retrieved = tensorQ4.getFloat(i);
+            printVerboseF("Step %d: Set=%.3f Got=%.3f%n",
+                    i, value, retrieved);
+            Assert.assertEquals("Gradual transition not preserved",
+                    value, retrieved, 0.5f);
+        }
+    }
+
+    @Test
+    public void testQ4Symmetry() {
+        // Test symmetry of positive and negative values
+        Shape shape = new Shape(GGMLType.Q4_0.getBlockSize());
+        TensorQ4 tensorQ4 = new TensorQ4(shape);
+
+        for (int i = 0; i <= 7; i++) {
+            float positive = i * 1.0f;
+            float negative = -positive;
+
+            tensorQ4.setFloat(i * 2, positive);
+            tensorQ4.setFloat(i * 2 + 1, negative);
+
+            float retrievedPos = tensorQ4.getFloat(i * 2);
+            float retrievedNeg = tensorQ4.getFloat(i * 2 + 1);
+
+            printVerboseF("Symmetry test %d: +%.1f->%.1f, %.1f->%.1f%n",
+                    i, positive, retrievedPos, negative, retrievedNeg);
+
+            Assert.assertEquals("Positive value not preserved", positive, retrievedPos, 0.5f);
+            Assert.assertEquals("Negative value not preserved", negative, retrievedNeg, 0.5f);
+            Assert.assertEquals("Asymmetric quantization",
+                    Math.abs(retrievedPos), Math.abs(retrievedNeg), 0.1f);
+        }
+    }
+}
diff --git a/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java
new file mode 100644
index 0000000000..30d6d093e1
--- /dev/null
+++ b/tornado-unittests/src/main/java/uk/ac/manchester/tornado/unittests/tensors/TestTensorQ8.java
@@ -0,0 +1,543 @@
+/*
+ * Copyright (c) 2024, APT Group, Department of Computer Science,
+ * The University of Manchester.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package uk.ac.manchester.tornado.unittests.tensors;
+
+import org.junit.Assert;
+import org.junit.Test;
+import uk.ac.manchester.tornado.api.types.tensors.GGMLType;
+import uk.ac.manchester.tornado.api.types.tensors.Shape;
+import uk.ac.manchester.tornado.api.types.tensors.TensorQ8;
+import uk.ac.manchester.tornado.unittests.common.TornadoTestBase;
+
+import static java.lang.Boolean.FALSE;
+
+/**
+ * <p>
+ * How to run?
+ * </p>
+ * <code>
+ * tornado-test -V uk.ac.manchester.tornado.unittests.tensors.TestTensorQ8
+ * </code>
+ */
+public class TestTensorQ8 extends TornadoTestBase {
+
+    private static final boolean VERBOSE = FALSE;  // Control verbose output
+
+    private void printVerbose(String message) {
+        if (VERBOSE) {
+            System.out.println(message);
+        }
+    }
+
+    private void printVerboseF(String format, Object... args) {
+        if (VERBOSE) {
+            System.out.printf(format, args);
+        }
+    }
+
+    @Test
+    public void testBasicQuantization() {
+        Shape shape = new Shape(1);
+        TensorQ8 tensor = new TensorQ8(shape);
+
+        float testValue = 1.5f;
+        tensor.setFloat(0, testValue);
+        float retrieved = tensor.getFloat(0);
+        printVerboseF("Segment size for storing single value %d%n", tensor.getSegment().byteSize());
+        Assert.assertEquals(testValue, retrieved, 0.1f);
+    }
+
+    @Test
+    public void testTensorQ8SetAndGetFloat() {
+        Shape shape = new Shape(5);
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float[] valuesToSet = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f};
+        for (int i = 0; i < valuesToSet.length; i++) {
+            tensorQ8.setFloat(i, valuesToSet[i]);
+        }
+
+        for (int i = 0; i < valuesToSet.length; i++) {
+            Assert.assertEquals(valuesToSet[i], tensorQ8.getFloat(i), 0.1f);
+        }
+    }
+
+    @Test
+    public void testTensorQ8SetAndGetFloatVerify() {
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize);
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float[] valuesToSet = new float[blockSize];
+        float[] pattern = {0.5f, -1.0f, 25.0f, -30.5f, 0.0f};
+        for (int i = 0; i < blockSize; i++) {
+            valuesToSet[i] = pattern[i % pattern.length];
+        }
+
+        printVerboseF("Total elements: %d%n", shape.getSize());
+        printVerboseF("Block size: %d%n", blockSize);
+        printVerboseF("Total allocated bytes: %d%n", tensorQ8.getSegment().byteSize());
+
+        for (int i = 0; i < valuesToSet.length; i++) {
+            tensorQ8.setFloat(i, valuesToSet[i]);
+            float retrieved = tensorQ8.getFloat(i);
+            printVerboseF("Index %d: Set=%.2f Retrieved=%.2f%n",
+                    i, valuesToSet[i], retrieved);
+            Assert.assertEquals("Value mismatch at index " + i,
+                    valuesToSet[i], retrieved, 0.1f);
+        }
+
+        for (int i = 0; i < valuesToSet.length; i++) {
+            float retrieved = tensorQ8.getFloat(i);
+            Assert.assertEquals("Final verification failed at index " + i,
+                    valuesToSet[i], retrieved, 0.1f);
+        }
+    }
+
+    @Test
+    public void testMixedScaleValues() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        tensorQ8.setFloat(0, 100.0f);
+        tensorQ8.setFloat(1, 0.001f);
+        tensorQ8.setFloat(2, -100.0f);
+        tensorQ8.setFloat(3, -0.001f);
+
+        Assert.assertEquals(100.0f, tensorQ8.getFloat(0), 1.0f);
+        Assert.assertEquals(-100.0f, tensorQ8.getFloat(2), 1.0f);
+
+        float small1 = tensorQ8.getFloat(1);
+        float small2 = tensorQ8.getFloat(3);
+        Assert.assertTrue("Small positive value lost sign", small1 >= 0);
+        Assert.assertTrue("Small negative value lost sign", small2 <= 0);
+    }
+
+    @Test
+    public void testQuantizationRange() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float[] testValues = {
+                0.0f, 1e-6f, -1e-6f, 100.0f, -100.0f,
+        };
+
+        for (int i = 0; i < testValues.length; i++) {
+            tensorQ8.setFloat(i, testValues[i]);
+            float retrieved = tensorQ8.getFloat(i);
+
+            if (Math.abs(testValues[i]) < 1e-5f) {
+                Assert.assertTrue("Small value not close to zero",
+                        Math.abs(retrieved) < 1e-4f);
+            } else {
+                float relativeError = Math.abs((retrieved - testValues[i]) / testValues[i]);
+                Assert.assertTrue("Large relative error at index " + i +
+                                ": expected=" + testValues[i] + ", got=" + retrieved,
+                        relativeError < 0.01f);
+            }
+        }
+    }
+
+    @Test
+    public void testInt8Range() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float[] boundaryValues = {
+                -128.0f, -127.0f, -64.0f, 0.0f, 63.0f, 126.0f, 127.0f
+        };
+
+        for (int i = 0; i < boundaryValues.length; i++) {
+            tensorQ8.setFloat(i, boundaryValues[i]);
+            float retrieved = tensorQ8.getFloat(i);
+            printVerboseF("INT8 boundary test: Setting %.1f, got %.1f%n",
+                    boundaryValues[i], retrieved);
+            Assert.assertEquals("Value mismatch at INT8 boundary " + boundaryValues[i],
+                    boundaryValues[i], retrieved, 1.0f);
+        }
+    }
+
+    @Test
+    public void testIndependentBlocks() {
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize * 3);
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        printVerbose("\nTesting independent blocks with different scales:");
+
+        printVerbose("\nBlock 1 - Small values:");
+        for (int i = 0; i < blockSize; i++) {
+            float value = 0.1f + (0.9f * i / blockSize);
+            tensorQ8.setFloat(i, value);
+            float retrieved = tensorQ8.getFloat(i);
+            printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n",
+                    i, value, retrieved, Math.abs(value - retrieved));
+        }
+
+        printVerbose("\nBlock 2 - Medium values:");
+        for (int i = 0; i < blockSize; i++) {
+            float value = 10.0f + (10.0f * i / blockSize);
+            tensorQ8.setFloat(blockSize + i, value);
+            float retrieved = tensorQ8.getFloat(blockSize + i);
+            printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n",
+                    i, value, retrieved, Math.abs(value - retrieved));
+        }
+
+        printVerbose("\nBlock 3 - Large values:");
+        for (int i = 0; i < blockSize; i++) {
+            float value = 100.0f + (100.0f * i / blockSize);
+            tensorQ8.setFloat(2 * blockSize + i, value);
+            float retrieved = tensorQ8.getFloat(2 * blockSize + i);
+            printVerboseF("Index %d: Set=%.6f Got=%.6f Diff=%.6f%n",
+                    i, value, retrieved, Math.abs(value - retrieved));
+        }
+
+        printVerbose("\nVerifying accuracy for each block:");
+
+        for (int block = 0; block < 3; block++) {
+            float maxDiff = 0.0f;
+            float maxRelErr = 0.0f;
+            float minVal = Float.MAX_VALUE;
+            float maxVal = Float.MIN_VALUE;
+
+            for (int i = 0; i < blockSize; i++) {
+                int idx = block * blockSize + i;
+                float original = (block == 0) ? (0.1f + (0.9f * i / blockSize)) :
+                        (block == 1) ? (10.0f + (10.0f * i / blockSize)) :
+                                (100.0f + (100.0f * i / blockSize));
+                float retrieved = tensorQ8.getFloat(idx);
+                float diff = Math.abs(original - retrieved);
+                float relErr = diff / Math.abs(original);
+
+                maxDiff = Math.max(maxDiff, diff);
+                maxRelErr = Math.max(maxRelErr, relErr);
+                minVal = Math.min(minVal, retrieved);
+                maxVal = Math.max(maxVal, retrieved);
+            }
+
+            printVerboseF("Block %d stats:%n", block);
+            printVerboseF("  Value range: %.6f to %.6f%n", minVal, maxVal);
+            printVerboseF("  Max absolute difference: %.6f%n", maxDiff);
+            printVerboseF("  Max relative error: %.6f%%%n", maxRelErr * 100);
+
+            float expectedMaxErr = (block == 0) ? 0.5f : (block == 1) ? 0.2f : 0.1f;
+
+            Assert.assertTrue(
+                    String.format("Block %d error too large: %.2f%% > %.2f%%",
+                            block, maxRelErr * 100, expectedMaxErr * 100),
+                    maxRelErr < expectedMaxErr);
+        }
+    }
+
+
+    @Test
+    public void testRepeatedUpdates() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float testValue = 1.0f;
+        int testIndex = 0;
+
+        printVerbose("\nTesting repeated updates stability:");
+        for (int i = 0; i < 100; i++) {
+            tensorQ8.setFloat(testIndex, testValue);
+            float retrieved = tensorQ8.getFloat(testIndex);
+            printVerboseF("Update %d: Expected=%.6f Got=%.6f%n",
+                    i, testValue, retrieved);
+            Assert.assertEquals("Value unstable after repeated updates",
+                    testValue, retrieved, 0.1f);
+        }
+    }
+
+    @Test
+    public void testAlternatingPatterns() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        printVerbose("\nTesting alternating pattern preservation:");
+
+        // Set alternating values
+        printVerbose("Setting alternating values:");
+        for (int i = 0; i < shape.getSize(); i++) {
+            float value = (i % 2 == 0) ? 1.0f : -1.0f;
+            tensorQ8.setFloat(i, value);
+            float retrieved = tensorQ8.getFloat(i);
+            printVerboseF("Index %d: Set=%.6f Got=%.6f%n",
+                    i, value, retrieved);
+        }
+
+        // Verify alternating values
+        printVerbose("\nVerifying alternating pattern:");
+        for (int i = 0; i < shape.getSize(); i++) {
+            float expected = (i % 2 == 0) ? 1.0f : -1.0f;
+            float retrieved = tensorQ8.getFloat(i);
+            printVerboseF("Index %d: Expected=%.6f Got=%.6f%n",
+                    i, expected, retrieved);
+            Assert.assertEquals("Alternating pattern not preserved",
+                    expected, retrieved, 0.1f);
+        }
+    }
+
+    @Test
+    public void testSingleBlockPrecision() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float baseValue = 10.0f;
+
+        printVerbose("\nTesting single block precision:");
+        for (int i = 0; i < shape.getSize(); i++) {
+            float value = baseValue * (i + 1) / shape.getSize();
+            tensorQ8.setFloat(i, value);
+            float retrieved = tensorQ8.getFloat(i);
+            float relativeError = Math.abs((retrieved - value) / value);
+
+            printVerboseF("Index %d: Set=%.6f Got=%.6f RelError=%.6f%n",
+                    i, value, retrieved, relativeError);
+
+            Assert.assertTrue(
+                    String.format("Relative error too large at index %d: expected=%.6f, got=%.6f, relative error=%.6f",
+                            i, value, retrieved, relativeError),
+                    relativeError < 0.1f);
+        }
+    }
+
+    @Test
+    public void testConstantBlock() {
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize);
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float testValue = 10.0f;
+        printVerbose("\nTesting constant value block:");
+
+        printVerbose("Setting constant values:");
+        for (int i = 0; i < blockSize; i++) {
+            tensorQ8.setFloat(i, testValue);
+        }
+
+        float maxDiff = 0.0f;
+        printVerbose("\nVerifying constant values:");
+        for (int i = 0; i < blockSize; i++) {
+            float retrieved = tensorQ8.getFloat(i);
+            float diff = Math.abs(retrieved - testValue);
+            maxDiff = Math.max(maxDiff, diff);
+            printVerboseF("Index %d: Expected=%.6f Got=%.6f Diff=%.6f%n",
+                    i, testValue, retrieved, diff);
+        }
+
+        float relativeError = maxDiff / Math.abs(testValue);
+        printVerboseF("Maximum relative error: %.6f%%%n", relativeError * 100);
+
+        Assert.assertTrue(
+                String.format("Relative error too large for constant block: %.2f%%",
+                        relativeError * 100),
+                relativeError < 0.1f);
+    }
+
+    @Test
+    public void testNonAlignedBlockSize() {
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize + 5);
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        printVerbose("\nTesting non-aligned block size:");
+        for (int i = 0; i < shape.getSize(); i++) {
+            float value = i * 1.5f;
+            tensorQ8.setFloat(i, value);
+            float retrieved = tensorQ8.getFloat(i);
+            printVerboseF("Index %d: Set=%.6f Got=%.6f%n",
+                    i, value, retrieved);
+            Assert.assertEquals("Value mismatch in non-aligned blocks",
+                    value, retrieved, 0.1f);
+        }
+    }
+
+    @Test
+    public void testZeroCrossing() {
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        float[][] testRanges = {
+                {-0.001f, -0.0001f, 0.0f, 0.0001f, 0.001f},
+                {-0.1f, -0.05f, 0.0f, 0.05f, 0.1f},
+                {-1.0f, -0.5f, 0.0f, 0.5f, 1.0f}
+        };
+
+        printVerbose("\nTesting zero crossing behavior:");
+        for (int range = 0; range < testRanges.length; range++) {
+            printVerboseF("\nRange %d:%n", range);
+
+            for (int i = 0; i < testRanges[range].length; i++) {
+                float value = testRanges[range][i];
+                tensorQ8.setFloat(i, value);
+                float retrieved = tensorQ8.getFloat(i);
+
+                printVerboseF("Value: %10.6f -> Retrieved: %10.6f%n",
+                        value, retrieved);
+
+                if (Math.abs(value) >= 0.01f) {
+                    Assert.assertEquals(
+                            String.format("Sign mismatch for value %.6f", value),
+                            Math.signum(value), Math.signum(retrieved), 0.0f);
+                } else {
+                    Assert.assertTrue(
+                            String.format("Small value %.6f not close enough to zero (got %.6f)",
+                                    value, retrieved),
+                            Math.abs(retrieved) < 0.01f);
+                }
+            }
+        }
+    }
+
+    @Test
+    public void testSequentialBlockUpdates() {
+        // Test updating blocks in sequence vs random order
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize * 3);  // Three blocks
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        printVerbose("\nTesting sequential block updates:");
+
+        // Sequential updates
+        for (int block = 0; block < 3; block++) {
+            float blockValue = (block + 1) * 10.0f;
+            printVerboseF("\nSetting block %d to %.2f:%n", block, blockValue);
+
+            for (int i = 0; i < blockSize; i++) {
+                int index = block * blockSize + i;
+                tensorQ8.setFloat(index, blockValue);
+                float retrieved = tensorQ8.getFloat(index);
+                printVerboseF("Index %d: Set=%.6f Got=%.6f%n",
+                        index, blockValue, retrieved);
+                Assert.assertEquals("Sequential block update failed",
+                        blockValue, retrieved, 0.1f);
+            }
+        }
+
+        // Verify all blocks maintain their values
+        printVerbose("\nVerifying all blocks after updates:");
+        for (int block = 0; block < 3; block++) {
+            float expectedValue = (block + 1) * 10.0f;
+            for (int i = 0; i < blockSize; i++) {
+                int index = block * blockSize + i;
+                float retrieved = tensorQ8.getFloat(index);
+                Assert.assertEquals("Block value changed unexpectedly",
+                        expectedValue, retrieved, 0.1f);
+            }
+        }
+    }
+
+    @Test
+    public void testMaximumPrecisionValues() {
+        // Test precision with values requiring maximum accuracy
+        Shape shape = new Shape(GGMLType.Q8_0.getBlockSize());
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        printVerbose("\nTesting maximum precision values:");
+
+        // Test precise decimal values
+        float[] preciseValues = {
+                1.23456789f,
+                -1.23456789f,
+                12.3456789f,
+                -12.3456789f,
+                123.456789f,
+                -123.456789f
+        };
+
+        for (int i = 0; i < preciseValues.length; i++) {
+            tensorQ8.setFloat(i, preciseValues[i]);
+            float retrieved = tensorQ8.getFloat(i);
+            float relativeError = Math.abs((retrieved - preciseValues[i]) / preciseValues[i]);
+
+            printVerboseF("Precise value test %d: Set=%.9f Got=%.9f RelError=%.9f%n",
+                    i, preciseValues[i], retrieved, relativeError);
+
+            // For high-precision values, we expect relative error < 1%
+            Assert.assertTrue(
+                    String.format("Precision lost: expected=%.9f, got=%.9f, error=%.9f",
+                            preciseValues[i], retrieved, relativeError),
+                    relativeError < 0.01f);
+        }
+    }
+
+    @Test
+    public void testBlockScaleInterference() {
+        // Test that updates in one block don't affect other blocks' scales
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize * 2);  // Two blocks
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        printVerbose("\nTesting block scale interference:");
+
+        // Set first block to small values
+        printVerbose("\nSetting first block to small values:");
+        for (int i = 0; i < blockSize; i++) {
+            float value = 0.1f + (0.1f * i / blockSize);
+            tensorQ8.setFloat(i, value);
+            printVerboseF("Block 1 index %d: Set=%.6f%n", i, value);
+        }
+
+        // Set second block to large values
+        printVerbose("\nSetting second block to large values:");
+        for (int i = 0; i < blockSize; i++) {
+            float value = 100.0f + (100.0f * i / blockSize);
+            tensorQ8.setFloat(blockSize + i, value);
+            printVerboseF("Block 2 index %d: Set=%.6f%n", i, value);
+        }
+
+        // Verify first block maintained small values
+        printVerbose("\nVerifying first block maintained precision:");
+        for (int i = 0; i < blockSize; i++) {
+            float expected = 0.1f + (0.1f * i / blockSize);
+            float retrieved = tensorQ8.getFloat(i);
+            float relativeError = Math.abs((retrieved - expected) / expected);
+
+            printVerboseF("Block 1 verification index %d: Expected=%.6f Got=%.6f RelError=%.6f%n",
+                    i, expected, retrieved, relativeError);
+
+            Assert.assertTrue(
+                    String.format("Block 1 precision lost after block 2 update at index %d", i),
+                    relativeError < 0.1f);
+        }
+    }
+
+    @Test
+    public void testBlockBoundaryUpdates() {
+        // Test updating values at block boundaries
+        int blockSize = GGMLType.Q8_0.getBlockSize();
+        Shape shape = new Shape(blockSize * 2);  // Two blocks
+        TensorQ8 tensorQ8 = new TensorQ8(shape);
+
+        // Set values around block boundary
+        float[] boundaryValues = {1.0f, 2.0f, 3.0f, 4.0f};
+        int boundaryStart = blockSize - 2;  // Two values before boundary
+
+        printVerbose("\nTesting block boundary updates:");
+        for (int i = 0; i < boundaryValues.length; i++) {
+            int index = boundaryStart + i;
+            tensorQ8.setFloat(index, boundaryValues[i]);
+            float retrieved = tensorQ8.getFloat(index);
+            printVerboseF("Index %d (block boundary +/- 2): Set=%.6f Got=%.6f%n",
+                    index, boundaryValues[i], retrieved);
+            Assert.assertEquals("Value mismatch at block boundary",
+                    boundaryValues[i], retrieved, 0.1f);
+        }
+    }
+}