properties() {
- return recordBatch.properties();
- }
-
- @Override
- public ByteBuffer rawPayload() {
- return recordBatch.rawPayload().duplicate();
- }
-
- public byte[] encode() {
- ByteBuffer buffer = ByteBuffer.allocate(8 + 4 + recordBatch.rawPayload().remaining())
- .putLong(baseOffset)
- .putInt(recordBatch.count())
- .put(recordBatch.rawPayload().duplicate())
- .flip();
- return buffer.array();
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/WrappedByteBuf.java b/s3stream/src/main/java/com/automq/stream/WrappedByteBuf.java
deleted file mode 100644
index cb32e041a..000000000
--- a/s3stream/src/main/java/com/automq/stream/WrappedByteBuf.java
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright 2013 The Netty Project
- *
- * The Netty Project licenses this file to you under the Apache License,
- * version 2.0 (the "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at:
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-package com.automq.stream;
-
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.ByteBufAllocator;
-import io.netty.buffer.ByteBufUtil;
-import io.netty.util.ByteProcessor;
-import io.netty.util.internal.ObjectUtil;
-import io.netty.util.internal.StringUtil;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.channels.FileChannel;
-import java.nio.channels.GatheringByteChannel;
-import java.nio.channels.ScatteringByteChannel;
-import java.nio.charset.Charset;
-
-/**
- * Modify base on io.netty.buffer.WrappedByteBuf
- * Wraps another {@link ByteBuf}.
- *
- * It's important that the {@link #readerIndex()} and {@link #writerIndex()} will not do any adjustments on the
- * indices on the fly because of internal optimizations made by {@link ByteBufUtil#writeAscii(ByteBuf, CharSequence)}
- * and {@link ByteBufUtil#writeUtf8(ByteBuf, CharSequence)}.
- */
-public class WrappedByteBuf extends ByteBuf {
- private final ByteBuf root;
- protected final ByteBuf buf;
- private final Runnable releaseHook;
-
- public WrappedByteBuf(ByteBuf buf, Runnable releaseHook) {
- this(buf, buf, releaseHook);
- }
-
- public WrappedByteBuf(ByteBuf root, ByteBuf buf, Runnable releaseHook) {
- this.root = root;
- this.buf = ObjectUtil.checkNotNull(buf, "buf");
- this.releaseHook = ObjectUtil.checkNotNull(releaseHook, "releaseHook");
- }
-
- @Override
- public final boolean hasMemoryAddress() {
- return buf.hasMemoryAddress();
- }
-
- @Override
- public boolean isContiguous() {
- return buf.isContiguous();
- }
-
- @Override
- public final long memoryAddress() {
- return buf.memoryAddress();
- }
-
- @Override
- public final int capacity() {
- return buf.capacity();
- }
-
- @Override
- public ByteBuf capacity(int newCapacity) {
- buf.capacity(newCapacity);
- return this;
- }
-
- @Override
- public final int maxCapacity() {
- return buf.maxCapacity();
- }
-
- @Override
- public final ByteBufAllocator alloc() {
- return buf.alloc();
- }
-
- @Override
- public final ByteOrder order() {
- return buf.order();
- }
-
- @Override
- public ByteBuf order(ByteOrder endianness) {
- return new WrappedByteBuf(root, buf.order(endianness), releaseHook);
- }
-
- @Override
- public final ByteBuf unwrap() {
- return buf;
- }
-
- @Override
- public ByteBuf asReadOnly() {
- return buf.asReadOnly();
- }
-
- @Override
- public boolean isReadOnly() {
- return buf.isReadOnly();
- }
-
- @Override
- public final boolean isDirect() {
- return buf.isDirect();
- }
-
- @Override
- public final int readerIndex() {
- return buf.readerIndex();
- }
-
- @Override
- public final ByteBuf readerIndex(int readerIndex) {
- buf.readerIndex(readerIndex);
- return this;
- }
-
- @Override
- public final int writerIndex() {
- return buf.writerIndex();
- }
-
- @Override
- public final ByteBuf writerIndex(int writerIndex) {
- buf.writerIndex(writerIndex);
- return this;
- }
-
- @Override
- public ByteBuf setIndex(int readerIndex, int writerIndex) {
- buf.setIndex(readerIndex, writerIndex);
- return this;
- }
-
- @Override
- public final int readableBytes() {
- return buf.readableBytes();
- }
-
- @Override
- public final int writableBytes() {
- return buf.writableBytes();
- }
-
- @Override
- public final int maxWritableBytes() {
- return buf.maxWritableBytes();
- }
-
- @Override
- public int maxFastWritableBytes() {
- return buf.maxFastWritableBytes();
- }
-
- @Override
- public final boolean isReadable() {
- return buf.isReadable();
- }
-
- @Override
- public final boolean isWritable() {
- return buf.isWritable();
- }
-
- @Override
- public final ByteBuf clear() {
- buf.clear();
- return this;
- }
-
- @Override
- public final ByteBuf markReaderIndex() {
- buf.markReaderIndex();
- return this;
- }
-
- @Override
- public final ByteBuf resetReaderIndex() {
- buf.resetReaderIndex();
- return this;
- }
-
- @Override
- public final ByteBuf markWriterIndex() {
- buf.markWriterIndex();
- return this;
- }
-
- @Override
- public final ByteBuf resetWriterIndex() {
- buf.resetWriterIndex();
- return this;
- }
-
- @Override
- public ByteBuf discardReadBytes() {
- buf.discardReadBytes();
- return this;
- }
-
- @Override
- public ByteBuf discardSomeReadBytes() {
- buf.discardSomeReadBytes();
- return this;
- }
-
- @Override
- public ByteBuf ensureWritable(int minWritableBytes) {
- buf.ensureWritable(minWritableBytes);
- return this;
- }
-
- @Override
- public int ensureWritable(int minWritableBytes, boolean force) {
- return buf.ensureWritable(minWritableBytes, force);
- }
-
- @Override
- public boolean getBoolean(int index) {
- return buf.getBoolean(index);
- }
-
- @Override
- public byte getByte(int index) {
- return buf.getByte(index);
- }
-
- @Override
- public short getUnsignedByte(int index) {
- return buf.getUnsignedByte(index);
- }
-
- @Override
- public short getShort(int index) {
- return buf.getShort(index);
- }
-
- @Override
- public short getShortLE(int index) {
- return buf.getShortLE(index);
- }
-
- @Override
- public int getUnsignedShort(int index) {
- return buf.getUnsignedShort(index);
- }
-
- @Override
- public int getUnsignedShortLE(int index) {
- return buf.getUnsignedShortLE(index);
- }
-
- @Override
- public int getMedium(int index) {
- return buf.getMedium(index);
- }
-
- @Override
- public int getMediumLE(int index) {
- return buf.getMediumLE(index);
- }
-
- @Override
- public int getUnsignedMedium(int index) {
- return buf.getUnsignedMedium(index);
- }
-
- @Override
- public int getUnsignedMediumLE(int index) {
- return buf.getUnsignedMediumLE(index);
- }
-
- @Override
- public int getInt(int index) {
- return buf.getInt(index);
- }
-
- @Override
- public int getIntLE(int index) {
- return buf.getIntLE(index);
- }
-
- @Override
- public long getUnsignedInt(int index) {
- return buf.getUnsignedInt(index);
- }
-
- @Override
- public long getUnsignedIntLE(int index) {
- return buf.getUnsignedIntLE(index);
- }
-
- @Override
- public long getLong(int index) {
- return buf.getLong(index);
- }
-
- @Override
- public long getLongLE(int index) {
- return buf.getLongLE(index);
- }
-
- @Override
- public char getChar(int index) {
- return buf.getChar(index);
- }
-
- @Override
- public float getFloat(int index) {
- return buf.getFloat(index);
- }
-
- @Override
- public double getDouble(int index) {
- return buf.getDouble(index);
- }
-
- @Override
- public ByteBuf getBytes(int index, ByteBuf dst) {
- buf.getBytes(index, dst);
- return this;
- }
-
- @Override
- public ByteBuf getBytes(int index, ByteBuf dst, int length) {
- buf.getBytes(index, dst, length);
- return this;
- }
-
- @Override
- public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) {
- buf.getBytes(index, dst, dstIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf getBytes(int index, byte[] dst) {
- buf.getBytes(index, dst);
- return this;
- }
-
- @Override
- public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) {
- buf.getBytes(index, dst, dstIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf getBytes(int index, ByteBuffer dst) {
- buf.getBytes(index, dst);
- return this;
- }
-
- @Override
- public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException {
- buf.getBytes(index, out, length);
- return this;
- }
-
- @Override
- public int getBytes(int index, GatheringByteChannel out, int length) throws IOException {
- return buf.getBytes(index, out, length);
- }
-
- @Override
- public int getBytes(int index, FileChannel out, long position, int length) throws IOException {
- return buf.getBytes(index, out, position, length);
- }
-
- @Override
- public CharSequence getCharSequence(int index, int length, Charset charset) {
- return buf.getCharSequence(index, length, charset);
- }
-
- @Override
- public ByteBuf setBoolean(int index, boolean value) {
- buf.setBoolean(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setByte(int index, int value) {
- buf.setByte(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setShort(int index, int value) {
- buf.setShort(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setShortLE(int index, int value) {
- buf.setShortLE(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setMedium(int index, int value) {
- buf.setMedium(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setMediumLE(int index, int value) {
- buf.setMediumLE(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setInt(int index, int value) {
- buf.setInt(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setIntLE(int index, int value) {
- buf.setIntLE(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setLong(int index, long value) {
- buf.setLong(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setLongLE(int index, long value) {
- buf.setLongLE(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setChar(int index, int value) {
- buf.setChar(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setFloat(int index, float value) {
- buf.setFloat(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setDouble(int index, double value) {
- buf.setDouble(index, value);
- return this;
- }
-
- @Override
- public ByteBuf setBytes(int index, ByteBuf src) {
- buf.setBytes(index, src);
- return this;
- }
-
- @Override
- public ByteBuf setBytes(int index, ByteBuf src, int length) {
- buf.setBytes(index, src, length);
- return this;
- }
-
- @Override
- public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) {
- buf.setBytes(index, src, srcIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf setBytes(int index, byte[] src) {
- buf.setBytes(index, src);
- return this;
- }
-
- @Override
- public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) {
- buf.setBytes(index, src, srcIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf setBytes(int index, ByteBuffer src) {
- buf.setBytes(index, src);
- return this;
- }
-
- @Override
- public int setBytes(int index, InputStream in, int length) throws IOException {
- return buf.setBytes(index, in, length);
- }
-
- @Override
- public int setBytes(int index, ScatteringByteChannel in, int length) throws IOException {
- return buf.setBytes(index, in, length);
- }
-
- @Override
- public int setBytes(int index, FileChannel in, long position, int length) throws IOException {
- return buf.setBytes(index, in, position, length);
- }
-
- @Override
- public ByteBuf setZero(int index, int length) {
- buf.setZero(index, length);
- return this;
- }
-
- @Override
- public int setCharSequence(int index, CharSequence sequence, Charset charset) {
- return buf.setCharSequence(index, sequence, charset);
- }
-
- @Override
- public boolean readBoolean() {
- return buf.readBoolean();
- }
-
- @Override
- public byte readByte() {
- return buf.readByte();
- }
-
- @Override
- public short readUnsignedByte() {
- return buf.readUnsignedByte();
- }
-
- @Override
- public short readShort() {
- return buf.readShort();
- }
-
- @Override
- public short readShortLE() {
- return buf.readShortLE();
- }
-
- @Override
- public int readUnsignedShort() {
- return buf.readUnsignedShort();
- }
-
- @Override
- public int readUnsignedShortLE() {
- return buf.readUnsignedShortLE();
- }
-
- @Override
- public int readMedium() {
- return buf.readMedium();
- }
-
- @Override
- public int readMediumLE() {
- return buf.readMediumLE();
- }
-
- @Override
- public int readUnsignedMedium() {
- return buf.readUnsignedMedium();
- }
-
- @Override
- public int readUnsignedMediumLE() {
- return buf.readUnsignedMediumLE();
- }
-
- @Override
- public int readInt() {
- return buf.readInt();
- }
-
- @Override
- public int readIntLE() {
- return buf.readIntLE();
- }
-
- @Override
- public long readUnsignedInt() {
- return buf.readUnsignedInt();
- }
-
- @Override
- public long readUnsignedIntLE() {
- return buf.readUnsignedIntLE();
- }
-
- @Override
- public long readLong() {
- return buf.readLong();
- }
-
- @Override
- public long readLongLE() {
- return buf.readLongLE();
- }
-
- @Override
- public char readChar() {
- return buf.readChar();
- }
-
- @Override
- public float readFloat() {
- return buf.readFloat();
- }
-
- @Override
- public double readDouble() {
- return buf.readDouble();
- }
-
- @Override
- public ByteBuf readBytes(int length) {
- return buf.readBytes(length);
- }
-
- @Override
- public ByteBuf readSlice(int length) {
- return new WrappedByteBuf(root, buf.readSlice(length), releaseHook);
- }
-
- @Override
- public ByteBuf readRetainedSlice(int length) {
- return new WrappedByteBuf(root, buf.readRetainedSlice(length), releaseHook);
- }
-
- @Override
- public ByteBuf readBytes(ByteBuf dst) {
- buf.readBytes(dst);
- return this;
- }
-
- @Override
- public ByteBuf readBytes(ByteBuf dst, int length) {
- buf.readBytes(dst, length);
- return this;
- }
-
- @Override
- public ByteBuf readBytes(ByteBuf dst, int dstIndex, int length) {
- buf.readBytes(dst, dstIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf readBytes(byte[] dst) {
- buf.readBytes(dst);
- return this;
- }
-
- @Override
- public ByteBuf readBytes(byte[] dst, int dstIndex, int length) {
- buf.readBytes(dst, dstIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf readBytes(ByteBuffer dst) {
- buf.readBytes(dst);
- return this;
- }
-
- @Override
- public ByteBuf readBytes(OutputStream out, int length) throws IOException {
- buf.readBytes(out, length);
- return this;
- }
-
- @Override
- public int readBytes(GatheringByteChannel out, int length) throws IOException {
- return buf.readBytes(out, length);
- }
-
- @Override
- public int readBytes(FileChannel out, long position, int length) throws IOException {
- return buf.readBytes(out, position, length);
- }
-
- @Override
- public CharSequence readCharSequence(int length, Charset charset) {
- return buf.readCharSequence(length, charset);
- }
-
- @Override
- public ByteBuf skipBytes(int length) {
- buf.skipBytes(length);
- return this;
- }
-
- @Override
- public ByteBuf writeBoolean(boolean value) {
- buf.writeBoolean(value);
- return this;
- }
-
- @Override
- public ByteBuf writeByte(int value) {
- buf.writeByte(value);
- return this;
- }
-
- @Override
- public ByteBuf writeShort(int value) {
- buf.writeShort(value);
- return this;
- }
-
- @Override
- public ByteBuf writeShortLE(int value) {
- buf.writeShortLE(value);
- return this;
- }
-
- @Override
- public ByteBuf writeMedium(int value) {
- buf.writeMedium(value);
- return this;
- }
-
- @Override
- public ByteBuf writeMediumLE(int value) {
- buf.writeMediumLE(value);
- return this;
- }
-
- @Override
- public ByteBuf writeInt(int value) {
- buf.writeInt(value);
- return this;
- }
-
- @Override
- public ByteBuf writeIntLE(int value) {
- buf.writeIntLE(value);
- return this;
- }
-
- @Override
- public ByteBuf writeLong(long value) {
- buf.writeLong(value);
- return this;
- }
-
- @Override
- public ByteBuf writeLongLE(long value) {
- buf.writeLongLE(value);
- return this;
- }
-
- @Override
- public ByteBuf writeChar(int value) {
- buf.writeChar(value);
- return this;
- }
-
- @Override
- public ByteBuf writeFloat(float value) {
- buf.writeFloat(value);
- return this;
- }
-
- @Override
- public ByteBuf writeDouble(double value) {
- buf.writeDouble(value);
- return this;
- }
-
- @Override
- public ByteBuf writeBytes(ByteBuf src) {
- buf.writeBytes(src);
- return this;
- }
-
- @Override
- public ByteBuf writeBytes(ByteBuf src, int length) {
- buf.writeBytes(src, length);
- return this;
- }
-
- @Override
- public ByteBuf writeBytes(ByteBuf src, int srcIndex, int length) {
- buf.writeBytes(src, srcIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf writeBytes(byte[] src) {
- buf.writeBytes(src);
- return this;
- }
-
- @Override
- public ByteBuf writeBytes(byte[] src, int srcIndex, int length) {
- buf.writeBytes(src, srcIndex, length);
- return this;
- }
-
- @Override
- public ByteBuf writeBytes(ByteBuffer src) {
- buf.writeBytes(src);
- return this;
- }
-
- @Override
- public int writeBytes(InputStream in, int length) throws IOException {
- return buf.writeBytes(in, length);
- }
-
- @Override
- public int writeBytes(ScatteringByteChannel in, int length) throws IOException {
- return buf.writeBytes(in, length);
- }
-
- @Override
- public int writeBytes(FileChannel in, long position, int length) throws IOException {
- return buf.writeBytes(in, position, length);
- }
-
- @Override
- public ByteBuf writeZero(int length) {
- buf.writeZero(length);
- return this;
- }
-
- @Override
- public int writeCharSequence(CharSequence sequence, Charset charset) {
- return buf.writeCharSequence(sequence, charset);
- }
-
- @Override
- public int indexOf(int fromIndex, int toIndex, byte value) {
- return buf.indexOf(fromIndex, toIndex, value);
- }
-
- @Override
- public int bytesBefore(byte value) {
- return buf.bytesBefore(value);
- }
-
- @Override
- public int bytesBefore(int length, byte value) {
- return buf.bytesBefore(length, value);
- }
-
- @Override
- public int bytesBefore(int index, int length, byte value) {
- return buf.bytesBefore(index, length, value);
- }
-
- @Override
- public int forEachByte(ByteProcessor processor) {
- return buf.forEachByte(processor);
- }
-
- @Override
- public int forEachByte(int index, int length, ByteProcessor processor) {
- return buf.forEachByte(index, length, processor);
- }
-
- @Override
- public int forEachByteDesc(ByteProcessor processor) {
- return buf.forEachByteDesc(processor);
- }
-
- @Override
- public int forEachByteDesc(int index, int length, ByteProcessor processor) {
- return buf.forEachByteDesc(index, length, processor);
- }
-
- @Override
- public ByteBuf copy() {
- return buf.copy();
- }
-
- @Override
- public ByteBuf copy(int index, int length) {
- return buf.copy(index, length);
- }
-
- @Override
- public ByteBuf slice() {
- return new WrappedByteBuf(root, buf.slice(), releaseHook);
- }
-
- @Override
- public ByteBuf retainedSlice() {
- return new WrappedByteBuf(root, buf.retainedSlice(), releaseHook);
- }
-
- @Override
- public ByteBuf slice(int index, int length) {
- return new WrappedByteBuf(root, buf.slice(index, length), releaseHook);
- }
-
- @Override
- public ByteBuf retainedSlice(int index, int length) {
- return new WrappedByteBuf(root, buf.retainedSlice(index, length), releaseHook);
- }
-
- @Override
- public ByteBuf duplicate() {
- return new WrappedByteBuf(root, buf.duplicate(), releaseHook);
- }
-
- @Override
- public ByteBuf retainedDuplicate() {
- return new WrappedByteBuf(root, buf.retainedDuplicate(), releaseHook);
- }
-
- @Override
- public int nioBufferCount() {
- return buf.nioBufferCount();
- }
-
- @Override
- public ByteBuffer nioBuffer() {
- return buf.nioBuffer();
- }
-
- @Override
- public ByteBuffer nioBuffer(int index, int length) {
- return buf.nioBuffer(index, length);
- }
-
- @Override
- public ByteBuffer[] nioBuffers() {
- return buf.nioBuffers();
- }
-
- @Override
- public ByteBuffer[] nioBuffers(int index, int length) {
- return buf.nioBuffers(index, length);
- }
-
- @Override
- public ByteBuffer internalNioBuffer(int index, int length) {
- return buf.internalNioBuffer(index, length);
- }
-
- @Override
- public boolean hasArray() {
- return buf.hasArray();
- }
-
- @Override
- public byte[] array() {
- return buf.array();
- }
-
- @Override
- public int arrayOffset() {
- return buf.arrayOffset();
- }
-
- @Override
- public String toString(Charset charset) {
- return buf.toString(charset);
- }
-
- @Override
- public String toString(int index, int length, Charset charset) {
- return buf.toString(index, length, charset);
- }
-
- @Override
- public int hashCode() {
- return buf.hashCode();
- }
-
- @Override
- @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
- public boolean equals(Object obj) {
- return buf.equals(obj);
- }
-
- @Override
- public int compareTo(ByteBuf buffer) {
- return buf.compareTo(buffer);
- }
-
- @Override
- public String toString() {
- return StringUtil.simpleClassName(this) + '(' + buf.toString() + ')';
- }
-
- @Override
- public ByteBuf retain(int increment) {
- buf.retain(increment);
- return this;
- }
-
- @Override
- public ByteBuf retain() {
- buf.retain();
- return this;
- }
-
- @Override
- public ByteBuf touch() {
- buf.touch();
- return this;
- }
-
- @Override
- public ByteBuf touch(Object hint) {
- buf.touch(hint);
- return this;
- }
-
- @Override
- public final boolean isReadable(int size) {
- return buf.isReadable(size);
- }
-
- @Override
- public final boolean isWritable(int size) {
- return buf.isWritable(size);
- }
-
- @Override
- public final int refCnt() {
- return buf.refCnt();
- }
-
- @Override
- public boolean release() {
- boolean rst = buf.release();
- if (rst && root != null && root.refCnt() == 0) {
- releaseHook.run();
- }
- return rst;
- }
-
- @Override
- public boolean release(int decrement) {
- boolean rst = buf.release(decrement);
- if (rst && root != null && root.refCnt() == 0) {
- releaseHook.run();
- }
- return rst;
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/AppendResult.java b/s3stream/src/main/java/com/automq/stream/api/AppendResult.java
deleted file mode 100644
index 9034643b3..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/AppendResult.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-/**
- * Append RecordBatch to stream result.
- */
-public interface AppendResult {
-
- /**
- * Get record batch base offset.
- *
- * @return record batch base offset.
- */
- long baseOffset();
-
-}
\ No newline at end of file
diff --git a/s3stream/src/main/java/com/automq/stream/api/Client.java b/s3stream/src/main/java/com/automq/stream/api/Client.java
deleted file mode 100644
index 1a4f6df67..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/Client.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.s3.failover.FailoverRequest;
-import com.automq.stream.s3.failover.FailoverResponse;
-import java.util.concurrent.CompletableFuture;
-
-/**
- * Elastic Stream client.
- */
-public interface Client {
- void start();
-
- void shutdown();
-
- /**
- * Get stream client.
- *
- * @return {@link StreamClient}
- */
- StreamClient streamClient();
-
- /**
- * Get KV client.
- *
- * @return {@link KVClient}
- */
- KVClient kvClient();
-
- /**
- * Failover the another node volume
- */
- CompletableFuture failover(FailoverRequest request);
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/CreateStreamOptions.java b/s3stream/src/main/java/com/automq/stream/api/CreateStreamOptions.java
deleted file mode 100644
index 1c62c0033..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/CreateStreamOptions.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.utils.Arguments;
-
-public class CreateStreamOptions {
- private int replicaCount;
- private long epoch;
-
- private CreateStreamOptions() {
- }
-
- public static Builder builder() {
- return new Builder();
- }
-
- public int replicaCount() {
- return replicaCount;
- }
-
- public long epoch() {
- return epoch;
- }
-
- public static class Builder {
- private final CreateStreamOptions options = new CreateStreamOptions();
-
- public Builder replicaCount(int replicaCount) {
- Arguments.check(replicaCount > 0, "replica count should larger than 0");
- options.replicaCount = replicaCount;
- return this;
- }
-
- public Builder epoch(long epoch) {
- options.epoch = epoch;
- return this;
- }
-
- public CreateStreamOptions build() {
- return options;
- }
-
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/FetchResult.java b/s3stream/src/main/java/com/automq/stream/api/FetchResult.java
deleted file mode 100644
index d63746412..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/FetchResult.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.s3.cache.CacheAccessType;
-import java.util.List;
-
-public interface FetchResult {
-
- /**
- * Get fetched RecordBatch list.
- *
- * @return {@link RecordBatchWithContext} list.
- */
- List recordBatchList();
-
- default CacheAccessType getCacheAccessType() {
- return CacheAccessType.DELTA_WAL_CACHE_HIT;
- }
-
- /**
- * Free fetch result backend memory.
- */
- default void free() {
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/KVClient.java b/s3stream/src/main/java/com/automq/stream/api/KVClient.java
deleted file mode 100644
index 7a0bd7ca1..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/KVClient.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.api.KeyValue.Key;
-import com.automq.stream.api.KeyValue.Value;
-import java.util.concurrent.CompletableFuture;
-
-/**
- * Light KV client, support light & simple kv operations.
- */
-public interface KVClient {
- /**
- * Put key value if key not exist, return current key value after putting.
- *
- * @param keyValue {@link KeyValue} k-v pair
- * @return async put result. {@link Value} current value after putting.
- */
- CompletableFuture putKVIfAbsent(KeyValue keyValue);
-
- /**
- * Put key value, overwrite if key exist, return current key value after putting.
- *
- * @param keyValue {@link KeyValue} k-v pair
- * @return async put result. {@link KeyValue} current value after putting.
- */
- CompletableFuture putKV(KeyValue keyValue);
-
- /**
- * Get value by key.
- *
- * @param key key.
- * @return async get result. {@link KeyValue} k-v pair, null if key not exist.
- */
- CompletableFuture getKV(Key key);
-
- /**
- * Delete key value by key. If key not exist, return null.
- *
- * @param key key.
- * @return async delete result. {@link Value} deleted value, null if key not exist.
- */
- CompletableFuture delKV(Key key);
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/KeyValue.java b/s3stream/src/main/java/com/automq/stream/api/KeyValue.java
deleted file mode 100644
index 2d8dee94d..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/KeyValue.java
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import java.nio.ByteBuffer;
-import java.util.Objects;
-
-public class KeyValue {
- private final Key key;
- private final Value value;
-
- private KeyValue(Key key, Value value) {
- this.key = key;
- this.value = value;
- }
-
- public static KeyValue of(String key, ByteBuffer value) {
- return new KeyValue(Key.of(key), Value.of(value));
- }
-
- public Key key() {
- return key;
- }
-
- public Value value() {
- return value;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o)
- return true;
- if (o == null || getClass() != o.getClass())
- return false;
- KeyValue keyValue = (KeyValue) o;
- return Objects.equals(key, keyValue.key) && Objects.equals(value, keyValue.value);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(key, value);
- }
-
- @Override
- public String toString() {
- return "KeyValue{" +
- "key=" + key +
- ", value=" + value +
- '}';
- }
-
- public static class Key {
- private final String key;
-
- private Key(String key) {
- this.key = key;
- }
-
- public static Key of(String key) {
- return new Key(key);
- }
-
- public String get() {
- return key;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
- Key key1 = (Key) o;
- return Objects.equals(key, key1.key);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(key);
- }
-
- @Override
- public String toString() {
- return "Key{" +
- "key='" + key + '\'' +
- '}';
- }
- }
-
- public static class Value {
- private final ByteBuffer value;
-
- private Value(ByteBuffer value) {
- this.value = value;
- }
-
- public static Value of(ByteBuffer value) {
- return new Value(value);
- }
-
- public static Value of(byte[] value) {
- if (value == null) {
- return new Value(null);
- }
- return new Value(ByteBuffer.wrap(value));
- }
-
- public ByteBuffer get() {
- return value;
- }
-
- public boolean isNull() {
- return value == null;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o)
- return true;
- if (!(o instanceof Value))
- return false;
- Value value1 = (Value) o;
- return Objects.equals(value, value1.value);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(value);
- }
-
- @Override
- public String toString() {
- return "Value{" +
- "value=" + value +
- '}';
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/OpenStreamOptions.java b/s3stream/src/main/java/com/automq/stream/api/OpenStreamOptions.java
deleted file mode 100644
index 6e9beeffb..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/OpenStreamOptions.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.utils.Arguments;
-
-public class OpenStreamOptions {
- private WriteMode writeMode = WriteMode.SINGLE;
- private ReadMode readMode = ReadMode.MULTIPLE;
- private long epoch;
-
- private OpenStreamOptions() {
- }
-
- public static Builder builder() {
- return new Builder();
- }
-
- public WriteMode writeMode() {
- return writeMode;
- }
-
- public ReadMode readMode() {
- return readMode;
- }
-
- public long epoch() {
- return epoch;
- }
-
- public enum WriteMode {
- SINGLE(0), MULTIPLE(1);
-
- final int code;
-
- WriteMode(int code) {
- this.code = code;
- }
-
- public int getCode() {
- return code;
- }
- }
-
- public enum ReadMode {
- SINGLE(0), MULTIPLE(1);
-
- final int code;
-
- ReadMode(int code) {
- this.code = code;
- }
-
- public int getCode() {
- return code;
- }
- }
-
- public static class Builder {
- private final OpenStreamOptions options = new OpenStreamOptions();
-
- public Builder writeMode(WriteMode writeMode) {
- Arguments.isNotNull(writeMode, "WriteMode should be set with SINGLE or MULTIPLE");
- options.writeMode = writeMode;
- return this;
- }
-
- public Builder readMode(ReadMode readMode) {
- Arguments.isNotNull(readMode, "ReadMode should be set with SINGLE or MULTIPLE");
- options.readMode = readMode;
- return this;
- }
-
- public Builder epoch(long epoch) {
- options.epoch = epoch;
- return this;
- }
-
- public OpenStreamOptions build() {
- return options;
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/ReadOptions.java b/s3stream/src/main/java/com/automq/stream/api/ReadOptions.java
deleted file mode 100644
index 04dea7b90..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/ReadOptions.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.api.exceptions.FastReadFailFastException;
-
-public class ReadOptions {
- public static final ReadOptions DEFAULT = new ReadOptions();
-
- private boolean fastRead;
- private boolean pooledBuf;
-
- public static Builder builder() {
- return new Builder();
- }
-
- public boolean fastRead() {
- return fastRead;
- }
-
- public boolean pooledBuf() {
- return pooledBuf;
- }
-
- public static class Builder {
- private final ReadOptions options = new ReadOptions();
-
- /**
- * Read from cache, if the data is not in cache, then fail fast with {@link FastReadFailFastException}.
- */
- public Builder fastRead(boolean fastRead) {
- options.fastRead = fastRead;
- return this;
- }
-
- /**
- * Use pooled buffer for reading. The caller is responsible for releasing the buffer.
- */
- public Builder pooledBuf(boolean pooledBuf) {
- options.pooledBuf = pooledBuf;
- return this;
- }
-
- public ReadOptions build() {
- return options;
- }
- }
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/RecordBatch.java b/s3stream/src/main/java/com/automq/stream/api/RecordBatch.java
deleted file mode 100644
index 0ee6c0733..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/RecordBatch.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import java.nio.ByteBuffer;
-import java.util.Map;
-
-/**
- * Record batch.
- */
-public interface RecordBatch {
-
- /**
- * Get payload record count.
- *
- * @return record count.
- */
- int count();
-
- /**
- * Get min timestamp of records.
- *
- * @return min timestamp of records.
- */
- long baseTimestamp();
-
- /**
- * Get record batch extension properties.
- *
- * @return batch extension properties.
- */
- Map properties();
-
- /**
- * Get raw payload.
- *
- * @return raw payload.
- */
- ByteBuffer rawPayload();
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/RecordBatchWithContext.java b/s3stream/src/main/java/com/automq/stream/api/RecordBatchWithContext.java
deleted file mode 100644
index 0075e05a0..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/RecordBatchWithContext.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-public interface RecordBatchWithContext extends RecordBatch {
-
- /**
- * Get record batch base offset.
- *
- * @return base offset.
- */
- long baseOffset();
-
- /**
- * Get record batch exclusive last offset.
- *
- * @return exclusive last offset.
- */
- long lastOffset();
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/Stream.java b/s3stream/src/main/java/com/automq/stream/api/Stream.java
deleted file mode 100644
index 5999332cb..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/Stream.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import com.automq.stream.api.exceptions.StreamClientException;
-import com.automq.stream.s3.context.AppendContext;
-import com.automq.stream.s3.context.FetchContext;
-import java.util.concurrent.CompletableFuture;
-
-/**
- * Record stream.
- */
-public interface Stream {
-
- /**
- * Get stream id
- */
- long streamId();
-
- /**
- * Get stream epoch.
- */
- long streamEpoch();
-
- /**
- * Get stream start offset.
- */
- long startOffset();
-
- /**
- * Get stream confirm record offset.
- */
- long confirmOffset();
-
- /**
- * Get stream next append record offset.
- */
- long nextOffset();
-
- /**
- * Append recordBatch to stream.
- *
- * @param recordBatch {@link RecordBatch}.
- * @return - complete success with async {@link AppendResult}, when append success.
- * - complete exception with {@link StreamClientException}, when append fail. TODO: specify the exception.
- */
- CompletableFuture append(AppendContext context, RecordBatch recordBatch);
-
- default CompletableFuture append(RecordBatch recordBatch) {
- return append(AppendContext.DEFAULT, recordBatch);
- }
-
- /**
- * Fetch recordBatch list from stream. Note the startOffset may be in the middle in the first recordBatch.
- * It is strongly recommended to handle the completion of the returned CompletableFuture in a separate thread.
- *
- * @param context fetch context, {@link FetchContext}.
- * @param startOffset start offset, if the startOffset in middle of a recordBatch, the recordBatch will be returned.
- * @param endOffset exclusive end offset, if the endOffset in middle of a recordBatch, the recordBatch will be returned.
- * @param maxBytesHint max fetch data size hint, the real return data size may be larger than maxBytesHint.
- * @return - complete success with {@link FetchResult}, when fetch success.
- * - complete exception with {@link StreamClientException}, when startOffset is bigger than stream end offset.
- */
- CompletableFuture fetch(FetchContext context, long startOffset, long endOffset, int maxBytesHint);
-
- default CompletableFuture fetch(long startOffset, long endOffset, int maxBytesHint) {
- return fetch(FetchContext.DEFAULT, startOffset, endOffset, maxBytesHint);
- }
-
- /**
- * Trim stream.
- *
- * @param newStartOffset new start offset.
- * @return - complete success with async {@link Void}, when trim success.
- * - complete exception with {@link StreamClientException}, when trim fail.
- */
- CompletableFuture trim(long newStartOffset);
-
- /**
- * Close the stream.
- */
- CompletableFuture close();
-
- /**
- * Destroy stream.
- */
- CompletableFuture destroy();
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/StreamClient.java b/s3stream/src/main/java/com/automq/stream/api/StreamClient.java
deleted file mode 100644
index 7211127b4..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/StreamClient.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api;
-
-import java.util.Optional;
-import java.util.concurrent.CompletableFuture;
-
-/**
- * Stream client, support stream create and open operation.
- */
-public interface StreamClient {
- /**
- * Create and open stream.
- *
- * @param options create stream options.
- * @return {@link Stream}.
- */
- CompletableFuture createAndOpenStream(CreateStreamOptions options);
-
- /**
- * Open stream.
- *
- * @param streamId stream id.
- * @param options open stream options.
- * @return {@link Stream}.
- */
- CompletableFuture openStream(long streamId, OpenStreamOptions options);
-
- /**
- * Retrive an opened stream.
- *
- * @param streamId stream id.
- * @return {@link Optional}.
- */
- Optional getStream(long streamId);
-
- void shutdown();
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/exceptions/ErrorCode.java b/s3stream/src/main/java/com/automq/stream/api/exceptions/ErrorCode.java
deleted file mode 100644
index 66543a61d..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/exceptions/ErrorCode.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api.exceptions;
-
-public class ErrorCode {
-
- public static final short UNEXPECTED = 99;
-
- public static final short STREAM_ALREADY_CLOSED = 1;
- public static final short STREAM_NOT_EXIST = 2;
- public static final short EXPIRED_STREAM_EPOCH = 3;
- public static final short STREAM_NOT_CLOSED = 4;
-
- public static final short OFFSET_OUT_OF_RANGE_BOUNDS = 10;
- public static final short FAST_READ_FAIL_FAST = 11;
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/exceptions/FastReadFailFastException.java b/s3stream/src/main/java/com/automq/stream/api/exceptions/FastReadFailFastException.java
deleted file mode 100644
index 6d8d0147c..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/exceptions/FastReadFailFastException.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api.exceptions;
-
-/**
- * Fail fast exception when fast read is enabled and read need read from S3.
- */
-public class FastReadFailFastException extends StreamClientException {
- public FastReadFailFastException() {
- super(ErrorCode.FAST_READ_FAIL_FAST, "", false);
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/api/exceptions/StreamClientException.java b/s3stream/src/main/java/com/automq/stream/api/exceptions/StreamClientException.java
deleted file mode 100644
index 443fa7afd..000000000
--- a/s3stream/src/main/java/com/automq/stream/api/exceptions/StreamClientException.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.api.exceptions;
-
-/**
- * All stream client exceptions will list extends StreamClientException and list here.
- */
-public class StreamClientException extends RuntimeException {
- private final int code;
-
- public StreamClientException(int code, String str) {
- this(code, str, null);
- }
-
- public StreamClientException(int code, String str, Throwable e) {
- super("code: " + code + ", " + str, e);
- this.code = code;
- }
-
- public StreamClientException(int code, String str, boolean writableStackTrace) {
- super("code: " + code + ", " + str, null, false, writableStackTrace);
- this.code = code;
- }
-
- public int getCode() {
- return this.code;
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/ByteBufAlloc.java b/s3stream/src/main/java/com/automq/stream/s3/ByteBufAlloc.java
deleted file mode 100644
index dfe72bede..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/ByteBufAlloc.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.WrappedByteBuf;
-import io.netty.buffer.AbstractByteBufAllocator;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.ByteBufAllocatorMetric;
-import io.netty.buffer.ByteBufAllocatorMetricProvider;
-import io.netty.buffer.CompositeByteBuf;
-import io.netty.buffer.PooledByteBufAllocator;
-import io.netty.buffer.UnpooledByteBufAllocator;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.LongAdder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class ByteBufAlloc {
- public static final boolean MEMORY_USAGE_DETECT = Boolean.parseBoolean(System.getenv("AUTOMQ_MEMORY_USAGE_DETECT"));
- public static final boolean ALLOCATOR_USAGE_UNPOOLED = Boolean.parseBoolean(System.getenv("AUTOMQ_ALLOCATOR_USAGE_UNPOOLED"));
- public static final boolean BUFFER_USAGE_HEAPED = Boolean.parseBoolean(System.getenv("AUTOMQ_BUFFER_USAGE_HEAPED"));
-
- private static final Logger LOGGER = LoggerFactory.getLogger(ByteBufAlloc.class);
- private static final AbstractByteBufAllocator ALLOC = ALLOCATOR_USAGE_UNPOOLED ? UnpooledByteBufAllocator.DEFAULT : PooledByteBufAllocator.DEFAULT;
- private static final Map USAGE_STATS = new ConcurrentHashMap<>();
- private static long lastMetricLogTime = System.currentTimeMillis();
- private static final Map ALLOC_TYPE = new HashMap<>();
-
- public static final int DEFAULT = 0;
- public static final int ENCODE_RECORD = 1;
- public static final int DECODE_RECORD = 2;
- public static final int WRITE_INDEX_BLOCK = 3;
- public static final int READ_INDEX_BLOCK = 4;
- public static final int WRITE_DATA_BLOCK_HEADER = 5;
- public static final int WRITE_FOOTER = 6;
- public static final int STREAM_OBJECT_COMPACTION_READ = 7;
- public static final int STREAM_OBJECT_COMPACTION_WRITE = 8;
- public static final int STREAM_SET_OBJECT_COMPACTION_READ = 9;
- public static final int STREAM_SET_OBJECT_COMPACTION_WRITE = 10;
- public static ByteBufAllocMetric byteBufAllocMetric = null;
-
- static {
- registerAllocType(DEFAULT, "default");
- registerAllocType(ENCODE_RECORD, "write_record");
- registerAllocType(DECODE_RECORD, "read_record");
- registerAllocType(WRITE_INDEX_BLOCK, "write_index_block");
- registerAllocType(READ_INDEX_BLOCK, "read_index_block");
- registerAllocType(WRITE_DATA_BLOCK_HEADER, "write_data_block_header");
- registerAllocType(WRITE_FOOTER, "write_footer");
- registerAllocType(STREAM_OBJECT_COMPACTION_READ, "stream_object_compaction_read");
- registerAllocType(STREAM_OBJECT_COMPACTION_WRITE, "stream_object_compaction_write");
- registerAllocType(STREAM_SET_OBJECT_COMPACTION_READ, "stream_set_object_compaction_read");
- registerAllocType(STREAM_SET_OBJECT_COMPACTION_WRITE, "stream_set_object_compaction_write");
-
- }
-
- public static CompositeByteBuf compositeByteBuffer() {
- return ALLOC.compositeDirectBuffer(Integer.MAX_VALUE);
- }
-
- public static ByteBuf byteBuffer(int initCapacity) {
- return byteBuffer(initCapacity, DEFAULT);
- }
-
- public static ByteBuf byteBuffer(int initCapacity, int type) {
- try {
- if (MEMORY_USAGE_DETECT) {
- LongAdder usage = USAGE_STATS.compute(type, (k, v) -> {
- if (v == null) {
- v = new LongAdder();
- }
- v.add(initCapacity);
- return v;
- });
- long now = System.currentTimeMillis();
- if (now - lastMetricLogTime > 60000) {
- // it's ok to be not thread safe
- lastMetricLogTime = now;
- ByteBufAlloc.byteBufAllocMetric = new ByteBufAllocMetric();
- LOGGER.info("Buffer usage: {}", ByteBufAlloc.byteBufAllocMetric);
- }
- return new WrappedByteBuf(BUFFER_USAGE_HEAPED ? ALLOC.heapBuffer(initCapacity) : ALLOC.directBuffer(initCapacity), () -> usage.add(-initCapacity));
- } else {
- return BUFFER_USAGE_HEAPED ? ALLOC.heapBuffer(initCapacity) : ALLOC.directBuffer(initCapacity);
- }
- } catch (OutOfMemoryError e) {
- if (MEMORY_USAGE_DETECT) {
- ByteBufAlloc.byteBufAllocMetric = new ByteBufAllocMetric();
- LOGGER.error("alloc buffer OOM, {}", ByteBufAlloc.byteBufAllocMetric, e);
- } else {
- LOGGER.error("alloc buffer OOM", e);
- }
- System.err.println("alloc buffer OOM");
- Runtime.getRuntime().halt(1);
- throw e;
- }
- }
-
- public static void registerAllocType(int type, String name) {
- if (ALLOC_TYPE.containsKey(type)) {
- throw new IllegalArgumentException("type already registered: " + type + "=" + ALLOC_TYPE.get(type));
- }
- ALLOC_TYPE.put(type, name);
- }
-
- public static class ByteBufAllocMetric {
- private final long usedMemory;
- private final long allocatedMemory;
- private final Map detail = new HashMap<>();
-
- public ByteBufAllocMetric() {
- USAGE_STATS.forEach((k, v) -> {
- detail.put(k + "/" + ALLOC_TYPE.get(k), v.longValue());
- });
- ByteBufAllocatorMetric metric = ((ByteBufAllocatorMetricProvider) ALLOC).metric();
- this.usedMemory = BUFFER_USAGE_HEAPED ? metric.usedHeapMemory() : metric.usedDirectMemory();
- this.allocatedMemory = this.detail.values().stream().mapToLong(Long::longValue).sum();
- }
-
- public long getUsedMemory() {
- return usedMemory;
- }
-
- public Map getDetailedMap() {
- return detail;
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder("ByteBufAllocMetric{usedMemory=");
- sb.append(usedMemory);
- sb.append(", allocatedMemory=");
- sb.append(allocatedMemory);
- sb.append(", detail=");
- for (Map.Entry entry : detail.entrySet()) {
- sb.append(entry.getKey()).append("=").append(entry.getValue()).append(",");
- }
- sb.append(", pooled=");
- sb.append(!ALLOCATOR_USAGE_UNPOOLED);
- sb.append(", direct=");
- sb.append(!BUFFER_USAGE_HEAPED);
- sb.append("}");
- return sb.toString();
- }
- }
-
- public interface OOMHandler {
- /**
- * Try handle OOM exception.
- *
- * @param memoryRequired the memory required
- * @return freed memory.
- */
- int handle(int memoryRequired);
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/Config.java b/s3stream/src/main/java/com/automq/stream/s3/Config.java
deleted file mode 100644
index 596d73290..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/Config.java
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-// TODO: rename & init
-public class Config {
- private int nodeId;
- private String endpoint;
- private String region;
- private String bucket;
- private boolean forcePathStyle = false;
- private String walPath = "/tmp/s3stream_wal";
- private long walCacheSize = 200 * 1024 * 1024;
- private long walCapacity = 1024L * 1024 * 1024;
- private int walInitBufferSize = 1024 * 1024;
- private int walMaxBufferSize = 16 * 1024 * 1024;
- private int walThread = 8;
- private long walWindowInitial = 1048576L;
- private long walWindowIncrement = 4194304L;
- private long walWindowMax = 536870912L;
- private long walBlockSoftLimit = 256 * 1024;
- private int walWriteRateLimit = 3000;
- private long walUploadThreshold = 100 * 1024 * 1024;
- private int streamSplitSize = 16777216;
- private int objectBlockSize = 1048576;
- private int objectPartSize = 16777216;
- private long blockCacheSize = 100 * 1024 * 1024;
- private int streamObjectCompactionIntervalMinutes = 60;
- private long streamObjectCompactionMaxSizeBytes = 10737418240L;
- private int controllerRequestRetryMaxCount = Integer.MAX_VALUE;
- private long controllerRequestRetryBaseDelayMs = 500;
- private long nodeEpoch = 0L;
- private int streamSetObjectCompactionInterval = 20;
- private long streamSetObjectCompactionCacheSize = 200 * 1024 * 1024;
- private int streamSetObjectCompactionUploadConcurrency = 8;
- private long streamSetObjectCompactionStreamSplitSize = 16 * 1024 * 1024;
- private int streamSetObjectCompactionForceSplitPeriod = 120;
- private int streamSetObjectCompactionMaxObjectNum = 500;
- private int maxStreamNumPerStreamSetObject = 100000;
- private int maxStreamObjectNumPerCommit = 10000;
- private boolean mockEnable = false;
- private boolean objectLogEnable = false;
- // 100MB/s
- private long networkBaselineBandwidth = 100 * 1024 * 1024;
- private int refillPeriodMs = 1000;
- private long objectRetentionTimeInSecond = 10 * 60; // 10min
- private boolean failoverEnable = false;
-
- public int nodeId() {
- return nodeId;
- }
-
- public String endpoint() {
- return endpoint;
- }
-
- public String region() {
- return region;
- }
-
- public String bucket() {
- return bucket;
- }
-
- public boolean forcePathStyle() {
- return forcePathStyle;
- }
-
- public String walPath() {
- return walPath;
- }
-
- public long walCacheSize() {
- return walCacheSize;
- }
-
- public long walCapacity() {
- return walCapacity;
- }
-
- public int walInitBufferSize() {
- return walInitBufferSize;
- }
-
- public int walMaxBufferSize() {
- return walMaxBufferSize;
- }
-
- public int walThread() {
- return walThread;
- }
-
- public long walWindowInitial() {
- return walWindowInitial;
- }
-
- public long walWindowIncrement() {
- return walWindowIncrement;
- }
-
- public long walWindowMax() {
- return walWindowMax;
- }
-
- public long walBlockSoftLimit() {
- return walBlockSoftLimit;
- }
-
- public int walWriteRateLimit() {
- return walWriteRateLimit;
- }
-
- public long walUploadThreshold() {
- return walUploadThreshold;
- }
-
- public int streamSplitSize() {
- return streamSplitSize;
- }
-
- public int objectBlockSize() {
- return objectBlockSize;
- }
-
- public int objectPartSize() {
- return objectPartSize;
- }
-
- public long blockCacheSize() {
- return blockCacheSize;
- }
-
- public int streamObjectCompactionIntervalMinutes() {
- return streamObjectCompactionIntervalMinutes;
- }
-
- public long streamObjectCompactionMaxSizeBytes() {
- return streamObjectCompactionMaxSizeBytes;
- }
-
- public int controllerRequestRetryMaxCount() {
- return controllerRequestRetryMaxCount;
- }
-
- public long controllerRequestRetryBaseDelayMs() {
- return controllerRequestRetryBaseDelayMs;
- }
-
- public long nodeEpoch() {
- return nodeEpoch;
- }
-
- public int streamSetObjectCompactionInterval() {
- return streamSetObjectCompactionInterval;
- }
-
- public long streamSetObjectCompactionCacheSize() {
- return streamSetObjectCompactionCacheSize;
- }
-
- public int streamSetObjectCompactionUploadConcurrency() {
- return streamSetObjectCompactionUploadConcurrency;
- }
-
- public long streamSetObjectCompactionStreamSplitSize() {
- return streamSetObjectCompactionStreamSplitSize;
- }
-
- public int streamSetObjectCompactionForceSplitPeriod() {
- return streamSetObjectCompactionForceSplitPeriod;
- }
-
- public int streamSetObjectCompactionMaxObjectNum() {
- return streamSetObjectCompactionMaxObjectNum;
- }
-
- public int maxStreamNumPerStreamSetObject() {
- return maxStreamNumPerStreamSetObject;
- }
-
- public int maxStreamObjectNumPerCommit() {
- return maxStreamObjectNumPerCommit;
- }
-
- public boolean mockEnable() {
- return mockEnable;
- }
-
- public boolean objectLogEnable() {
- return objectLogEnable;
- }
-
- public long networkBaselineBandwidth() {
- return networkBaselineBandwidth;
- }
-
- public int refillPeriodMs() {
- return refillPeriodMs;
- }
-
- public Config nodeId(int brokerId) {
- this.nodeId = brokerId;
- return this;
- }
-
- public Config endpoint(String s3Endpoint) {
- this.endpoint = s3Endpoint;
- return this;
- }
-
- public Config region(String s3Region) {
- this.region = s3Region;
- return this;
- }
-
- public Config bucket(String s3Bucket) {
- this.bucket = s3Bucket;
- return this;
- }
-
- public Config forcePathStyle(boolean s3ForcePathStyle) {
- this.forcePathStyle = s3ForcePathStyle;
- return this;
- }
-
- public Config walPath(String s3WALPath) {
- this.walPath = s3WALPath;
- return this;
- }
-
- public Config walCacheSize(long s3WALCacheSize) {
- this.walCacheSize = s3WALCacheSize;
- return this;
- }
-
- public Config walCapacity(long s3WALCapacity) {
- this.walCapacity = s3WALCapacity;
- return this;
- }
-
- public Config walInitBufferSize(int walInitBufferSize) {
- this.walInitBufferSize = walInitBufferSize;
- return this;
- }
-
- public Config walMaxBufferSize(int walMaxBufferSize) {
- this.walMaxBufferSize = walMaxBufferSize;
- return this;
- }
-
- public Config walThread(int s3WALThread) {
- this.walThread = s3WALThread;
- return this;
- }
-
- public Config walWindowInitial(long s3WALWindowInitial) {
- this.walWindowInitial = s3WALWindowInitial;
- return this;
- }
-
- public Config walWindowIncrement(long s3WALWindowIncrement) {
- this.walWindowIncrement = s3WALWindowIncrement;
- return this;
- }
-
- public Config walWindowMax(long s3WALWindowMax) {
- this.walWindowMax = s3WALWindowMax;
- return this;
- }
-
- public Config walBlockSoftLimit(long s3WALBlockSoftLimit) {
- this.walBlockSoftLimit = s3WALBlockSoftLimit;
- return this;
- }
-
- public Config walWriteRateLimit(int s3WALWriteRateLimit) {
- this.walWriteRateLimit = s3WALWriteRateLimit;
- return this;
- }
-
- public Config walUploadThreshold(long s3WALObjectSize) {
- this.walUploadThreshold = s3WALObjectSize;
- return this;
- }
-
- public Config streamSplitSize(int s3StreamSplitSize) {
- this.streamSplitSize = s3StreamSplitSize;
- return this;
- }
-
- public Config objectBlockSize(int s3ObjectBlockSize) {
- this.objectBlockSize = s3ObjectBlockSize;
- return this;
- }
-
- public Config objectPartSize(int s3ObjectPartSize) {
- this.objectPartSize = s3ObjectPartSize;
- return this;
- }
-
- public Config blockCacheSize(long s3CacheSize) {
- this.blockCacheSize = s3CacheSize;
- return this;
- }
-
- public Config streamObjectCompactionIntervalMinutes(int s3StreamObjectCompactionIntervalMinutes) {
- this.streamObjectCompactionIntervalMinutes = s3StreamObjectCompactionIntervalMinutes;
- return this;
- }
-
- public Config streamObjectCompactionMaxSizeBytes(long s3StreamObjectCompactionMaxSizeBytes) {
- this.streamObjectCompactionMaxSizeBytes = s3StreamObjectCompactionMaxSizeBytes;
- return this;
- }
-
- public Config controllerRequestRetryMaxCount(int s3ControllerRequestRetryMaxCount) {
- this.controllerRequestRetryMaxCount = s3ControllerRequestRetryMaxCount;
- return this;
- }
-
- public Config controllerRequestRetryBaseDelayMs(long s3ControllerRequestRetryBaseDelayMs) {
- this.controllerRequestRetryBaseDelayMs = s3ControllerRequestRetryBaseDelayMs;
- return this;
- }
-
- public Config nodeEpoch(long brokerEpoch) {
- this.nodeEpoch = brokerEpoch;
- return this;
- }
-
- public Config streamSetObjectCompactionInterval(int streamSetObjectCompactionInterval) {
- this.streamSetObjectCompactionInterval = streamSetObjectCompactionInterval;
- return this;
- }
-
- public Config streamSetObjectCompactionCacheSize(long streamSetObjectCompactionCacheSize) {
- this.streamSetObjectCompactionCacheSize = streamSetObjectCompactionCacheSize;
- return this;
- }
-
- public Config streamSetObjectCompactionUploadConcurrency(int streamSetObjectCompactionUploadConcurrency) {
- this.streamSetObjectCompactionUploadConcurrency = streamSetObjectCompactionUploadConcurrency;
- return this;
- }
-
- public Config streamSetObjectCompactionStreamSplitSize(long streamSetObjectCompactionStreamSplitSize) {
- this.streamSetObjectCompactionStreamSplitSize = streamSetObjectCompactionStreamSplitSize;
- return this;
- }
-
- public Config streamSetObjectCompactionForceSplitPeriod(int streamSetObjectCompactionForceSplitPeriod) {
- this.streamSetObjectCompactionForceSplitPeriod = streamSetObjectCompactionForceSplitPeriod;
- return this;
- }
-
- public Config streamSetObjectCompactionMaxObjectNum(int streamSetObjectCompactionMaxObjectNum) {
- this.streamSetObjectCompactionMaxObjectNum = streamSetObjectCompactionMaxObjectNum;
- return this;
- }
-
- public Config maxStreamNumPerStreamSetObject(int maxStreamNumPerStreamSetObject) {
- this.maxStreamNumPerStreamSetObject = maxStreamNumPerStreamSetObject;
- return this;
- }
-
- public Config maxStreamObjectNumPerCommit(int maxStreamObjectNumPerCommit) {
- this.maxStreamObjectNumPerCommit = maxStreamObjectNumPerCommit;
- return this;
- }
-
- public Config mockEnable(boolean s3MockEnable) {
- this.mockEnable = s3MockEnable;
- return this;
- }
-
- public Config objectLogEnable(boolean s3ObjectLogEnable) {
- this.objectLogEnable = s3ObjectLogEnable;
- return this;
- }
-
- public Config networkBaselineBandwidth(long networkBaselineBandwidth) {
- this.networkBaselineBandwidth = networkBaselineBandwidth;
- return this;
- }
-
- public Config refillPeriodMs(int refillPeriodMs) {
- this.refillPeriodMs = refillPeriodMs;
- return this;
- }
-
- public Config objectRetentionTimeInSecond(long seconds) {
- objectRetentionTimeInSecond = seconds;
- return this;
- }
-
- public long objectRetentionTimeInSecond() {
- return objectRetentionTimeInSecond;
- }
-
- public Config failoverEnable(boolean failoverEnable) {
- this.failoverEnable = failoverEnable;
- return this;
- }
-
- public boolean failoverEnable() {
- return failoverEnable;
- }
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/Constants.java b/s3stream/src/main/java/com/automq/stream/s3/Constants.java
deleted file mode 100644
index d36122d26..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/Constants.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-package com.automq.stream.s3;
-
-public class Constants {
- public static final int CAPACITY_NOT_SET = -1;
- public static final int NOOP_NODE_ID = -1;
- public static final long NOOP_EPOCH = -1L;
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/DataBlockIndex.java b/s3stream/src/main/java/com/automq/stream/s3/DataBlockIndex.java
deleted file mode 100644
index eea88fd23..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/DataBlockIndex.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import io.netty.buffer.ByteBuf;
-import java.util.Objects;
-
-public final class DataBlockIndex {
-
- public static final int BLOCK_INDEX_SIZE = 8/* streamId */ + 8 /* startOffset */ + 4 /* endOffset delta */
- + 4 /* record count */ + 8 /* block position */ + 4 /* block size */;
- private final long streamId;
- private final long startOffset;
- private final int endOffsetDelta;
- private final int recordCount;
- private final long startPosition;
- private final int size;
-
- public DataBlockIndex(long streamId, long startOffset, int endOffsetDelta, int recordCount, long startPosition,
- int size) {
- this.streamId = streamId;
- this.startOffset = startOffset;
- this.endOffsetDelta = endOffsetDelta;
- this.recordCount = recordCount;
- this.startPosition = startPosition;
- this.size = size;
- }
-
- public long endOffset() {
- return startOffset + endOffsetDelta;
- }
-
- public long endPosition() {
- return startPosition + size;
- }
-
- public void encode(ByteBuf buf) {
- buf.writeLong(streamId);
- buf.writeLong(startOffset);
- buf.writeInt(endOffsetDelta);
- buf.writeInt(recordCount);
- buf.writeLong(startPosition);
- buf.writeInt(size);
- }
-
- public long streamId() {
- return streamId;
- }
-
- public long startOffset() {
- return startOffset;
- }
-
- public int endOffsetDelta() {
- return endOffsetDelta;
- }
-
- public int recordCount() {
- return recordCount;
- }
-
- public long startPosition() {
- return startPosition;
- }
-
- public int size() {
- return size;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (DataBlockIndex) obj;
- return this.streamId == that.streamId &&
- this.startOffset == that.startOffset &&
- this.endOffsetDelta == that.endOffsetDelta &&
- this.recordCount == that.recordCount &&
- this.startPosition == that.startPosition &&
- this.size == that.size;
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(streamId, startOffset, endOffsetDelta, recordCount, startPosition, size);
- }
-
- @Override
- public String toString() {
- return "DataBlockIndex[" +
- "streamId=" + streamId + ", " +
- "startOffset=" + startOffset + ", " +
- "endOffsetDelta=" + endOffsetDelta + ", " +
- "recordCount=" + recordCount + ", " +
- "startPosition=" + startPosition + ", " +
- "size=" + size + ']';
- }
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/DeltaWALUploadTask.java b/s3stream/src/main/java/com/automq/stream/s3/DeltaWALUploadTask.java
deleted file mode 100644
index ab2e54fc0..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/DeltaWALUploadTask.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.objects.CommitStreamSetObjectRequest;
-import com.automq.stream.s3.objects.ObjectManager;
-import com.automq.stream.s3.objects.ObjectStreamRange;
-import com.automq.stream.s3.objects.StreamObject;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.utils.AsyncRateLimiter;
-import com.automq.stream.utils.FutureUtil;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.TimeUnit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.s3.metadata.ObjectUtils.NOOP_OBJECT_ID;
-
-public class DeltaWALUploadTask {
- private static final Logger LOGGER = LoggerFactory.getLogger(DeltaWALUploadTask.class);
- final boolean forceSplit;
- private final Logger s3ObjectLogger;
- private final Map> streamRecordsMap;
- private final int objectBlockSize;
- private final int objectPartSize;
- private final int streamSplitSizeThreshold;
- private final ObjectManager objectManager;
- private final S3Operator s3Operator;
- private final boolean s3ObjectLogEnable;
- private final CompletableFuture prepareCf = new CompletableFuture<>();
- private final CompletableFuture uploadCf = new CompletableFuture<>();
- private final ExecutorService executor;
- private final double rate;
- private final AsyncRateLimiter limiter;
- private long startTimestamp;
- private volatile CommitStreamSetObjectRequest commitStreamSetObjectRequest;
-
- public DeltaWALUploadTask(Config config, Map> streamRecordsMap,
- ObjectManager objectManager, S3Operator s3Operator,
- ExecutorService executor, boolean forceSplit, double rate) {
- this.s3ObjectLogger = S3ObjectLogger.logger(String.format("[DeltaWALUploadTask id=%d] ", config.nodeId()));
- this.streamRecordsMap = streamRecordsMap;
- this.objectBlockSize = config.objectBlockSize();
- this.objectPartSize = config.objectPartSize();
- this.streamSplitSizeThreshold = config.streamSplitSize();
- this.s3ObjectLogEnable = config.objectLogEnable();
- this.objectManager = objectManager;
- this.s3Operator = s3Operator;
- this.forceSplit = forceSplit;
- this.executor = executor;
- this.rate = rate;
- this.limiter = new AsyncRateLimiter(rate);
- }
-
- public static Builder builder() {
- return new Builder();
- }
-
- public CompletableFuture prepare() {
- startTimestamp = System.currentTimeMillis();
- if (forceSplit) {
- prepareCf.complete(NOOP_OBJECT_ID);
- } else {
- objectManager
- .prepareObject(1, TimeUnit.MINUTES.toMillis(60))
- .thenAcceptAsync(prepareCf::complete, executor)
- .exceptionally(ex -> {
- prepareCf.completeExceptionally(ex);
- return null;
- });
- }
- return prepareCf;
- }
-
- public CompletableFuture upload() {
- prepareCf.thenAcceptAsync(objectId -> FutureUtil.exec(() -> upload0(objectId), uploadCf, LOGGER, "upload"), executor);
- return uploadCf;
- }
-
- private void upload0(long objectId) {
- List streamIds = new ArrayList<>(streamRecordsMap.keySet());
- Collections.sort(streamIds);
- CommitStreamSetObjectRequest request = new CommitStreamSetObjectRequest();
-
- ObjectWriter streamSetObject;
- if (forceSplit) {
- // when only has one stream, we only need to write the stream data.
- streamSetObject = ObjectWriter.noop(objectId);
- } else {
- streamSetObject = ObjectWriter.writer(objectId, s3Operator, objectBlockSize, objectPartSize);
- }
-
- List> streamObjectCfList = new LinkedList<>();
-
- List> streamSetWriteCfList = new LinkedList<>();
- for (Long streamId : streamIds) {
- List streamRecords = streamRecordsMap.get(streamId);
- int streamSize = streamRecords.stream().mapToInt(StreamRecordBatch::size).sum();
- if (forceSplit || streamSize >= streamSplitSizeThreshold) {
- streamObjectCfList.add(writeStreamObject(streamRecords, streamSize).thenAccept(so -> {
- synchronized (request) {
- request.addStreamObject(so);
- }
- }));
- } else {
- streamSetWriteCfList.add(limiter.acquire(streamSize).thenAccept(nil -> streamSetObject.write(streamId, streamRecords)));
- long startOffset = streamRecords.get(0).getBaseOffset();
- long endOffset = streamRecords.get(streamRecords.size() - 1).getLastOffset();
- request.addStreamRange(new ObjectStreamRange(streamId, -1L, startOffset, endOffset, streamSize));
- }
- }
- request.setObjectId(objectId);
- request.setOrderId(objectId);
- CompletableFuture streamSetObjectCf = CompletableFuture.allOf(streamSetWriteCfList.toArray(new CompletableFuture[0]))
- .thenCompose(nil -> streamSetObject.close().thenAccept(nil2 -> request.setObjectSize(streamSetObject.size())));
- List> allCf = new LinkedList<>(streamObjectCfList);
- allCf.add(streamSetObjectCf);
- CompletableFuture.allOf(allCf.toArray(new CompletableFuture[0])).thenAccept(nil -> {
- commitStreamSetObjectRequest = request;
- uploadCf.complete(request);
- }).exceptionally(ex -> {
- uploadCf.completeExceptionally(ex);
- return null;
- });
- }
-
- public CompletableFuture commit() {
- return uploadCf.thenCompose(request -> objectManager.commitStreamSetObject(request).thenAccept(resp -> {
- LOGGER.info("Upload delta WAL {}, cost {}ms, rate limiter {}bytes/s", commitStreamSetObjectRequest,
- System.currentTimeMillis() - startTimestamp, rate);
- if (s3ObjectLogEnable) {
- s3ObjectLogger.trace("{}", commitStreamSetObjectRequest);
- }
- }).whenComplete((nil, ex) -> limiter.close()));
- }
-
- private CompletableFuture writeStreamObject(List streamRecords, int streamSize) {
- CompletableFuture cf = objectManager.prepareObject(1, TimeUnit.MINUTES.toMillis(60));
- cf = cf.thenCompose(objectId -> limiter.acquire(streamSize).thenApply(nil -> objectId));
- return cf.thenComposeAsync(objectId -> {
- ObjectWriter streamObjectWriter = ObjectWriter.writer(objectId, s3Operator, objectBlockSize, objectPartSize);
- long streamId = streamRecords.get(0).getStreamId();
- streamObjectWriter.write(streamId, streamRecords);
- long startOffset = streamRecords.get(0).getBaseOffset();
- long endOffset = streamRecords.get(streamRecords.size() - 1).getLastOffset();
- StreamObject streamObject = new StreamObject();
- streamObject.setObjectId(objectId);
- streamObject.setStreamId(streamId);
- streamObject.setStartOffset(startOffset);
- streamObject.setEndOffset(endOffset);
- return streamObjectWriter.close().thenApply(nil -> {
- streamObject.setObjectSize(streamObjectWriter.size());
- return streamObject;
- });
- }, executor);
- }
-
- public static class Builder {
- private Config config;
- private Map> streamRecordsMap;
- private ObjectManager objectManager;
- private S3Operator s3Operator;
- private ExecutorService executor;
- private Boolean forceSplit;
- private double rate = Long.MAX_VALUE;
-
- public Builder config(Config config) {
- this.config = config;
- return this;
- }
-
- public Builder streamRecordsMap(Map> streamRecordsMap) {
- this.streamRecordsMap = streamRecordsMap;
- return this;
- }
-
- public Builder objectManager(ObjectManager objectManager) {
- this.objectManager = objectManager;
- return this;
- }
-
- public Builder s3Operator(S3Operator s3Operator) {
- this.s3Operator = s3Operator;
- return this;
- }
-
- public Builder executor(ExecutorService executor) {
- this.executor = executor;
- return this;
- }
-
- public Builder forceSplit(boolean forceSplit) {
- this.forceSplit = forceSplit;
- return this;
- }
-
- public Builder rate(double rate) {
- this.rate = rate;
- return this;
- }
-
- public DeltaWALUploadTask build() {
- if (forceSplit == null) {
- boolean forceSplit = streamRecordsMap.size() == 1;
- if (!forceSplit) {
- Optional hasStreamSetData = streamRecordsMap.values()
- .stream()
- .map(records -> records.stream().mapToLong(StreamRecordBatch::size).sum() >= config.streamSplitSize())
- .filter(split -> !split)
- .findAny();
- if (hasStreamSetData.isEmpty()) {
- forceSplit = true;
- }
- }
- this.forceSplit = forceSplit;
- }
- return new DeltaWALUploadTask(config, streamRecordsMap, objectManager, s3Operator, executor, forceSplit, rate);
- }
- }
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/ObjectReader.java b/s3stream/src/main/java/com/automq/stream/s3/ObjectReader.java
deleted file mode 100644
index d5441408a..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/ObjectReader.java
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.s3.metadata.S3ObjectMetadata;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.network.ThrottleStrategy;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.utils.CloseableIterator;
-import com.automq.stream.utils.biniarysearch.IndexBlockOrderedBytes;
-import io.netty.buffer.ByteBuf;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Objects;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.s3.ByteBufAlloc.READ_INDEX_BLOCK;
-import static com.automq.stream.s3.ObjectWriter.Footer.FOOTER_SIZE;
-import static com.automq.stream.s3.metadata.ObjectUtils.NOOP_OFFSET;
-
-public class ObjectReader implements AutoCloseable {
- private static final Logger LOGGER = LoggerFactory.getLogger(ObjectReader.class);
- private final S3ObjectMetadata metadata;
- private final String objectKey;
- private final S3Operator s3Operator;
- private final CompletableFuture basicObjectInfoCf;
- private final AtomicInteger refCount = new AtomicInteger(1);
-
- public ObjectReader(S3ObjectMetadata metadata, S3Operator s3Operator) {
- this.metadata = metadata;
- this.objectKey = metadata.key();
- this.s3Operator = s3Operator;
- this.basicObjectInfoCf = new CompletableFuture<>();
- asyncGetBasicObjectInfo();
- }
-
- public String objectKey() {
- return objectKey;
- }
-
- public CompletableFuture basicObjectInfo() {
- return basicObjectInfoCf;
- }
-
- public CompletableFuture find(long streamId, long startOffset, long endOffset) {
- return find(streamId, startOffset, endOffset, Integer.MAX_VALUE);
- }
-
- public CompletableFuture find(long streamId, long startOffset, long endOffset, int maxBytes) {
- return basicObjectInfoCf.thenApply(basicObjectInfo -> basicObjectInfo.indexBlock().find(streamId, startOffset, endOffset, maxBytes));
- }
-
- public CompletableFuture read(DataBlockIndex block) {
- CompletableFuture rangeReadCf = s3Operator.rangeRead(objectKey, block.startPosition(), block.endPosition(), ThrottleStrategy.THROTTLE_1);
- return rangeReadCf.thenApply(DataBlockGroup::new);
- }
-
- void asyncGetBasicObjectInfo() {
- int guessIndexBlockSize = 1024 + (int) (metadata.objectSize() / (1024 * 1024 /* 1MB */) * 36 /* index unit size*/);
- asyncGetBasicObjectInfo0(Math.max(0, metadata.objectSize() - guessIndexBlockSize), true);
- }
-
- private void asyncGetBasicObjectInfo0(long startPosition, boolean firstAttempt) {
- CompletableFuture cf = s3Operator.rangeRead(objectKey, startPosition, metadata.objectSize());
- cf.thenAccept(buf -> {
- try {
- BasicObjectInfo basicObjectInfo = BasicObjectInfo.parse(buf, metadata);
- basicObjectInfoCf.complete(basicObjectInfo);
- } catch (IndexBlockParseException ex) {
- asyncGetBasicObjectInfo0(ex.indexBlockPosition, false);
- }
- }).exceptionally(ex -> {
- LOGGER.warn("s3 range read from {} [{}, {}) failed", objectKey, startPosition, metadata.objectSize(), ex);
- // TODO: delay retry.
- if (firstAttempt) {
- asyncGetBasicObjectInfo0(startPosition, false);
- } else {
- basicObjectInfoCf.completeExceptionally(ex);
- }
- return null;
- });
- }
-
- public ObjectReader retain() {
- refCount.incrementAndGet();
- return this;
- }
-
- public ObjectReader release() {
- if (refCount.decrementAndGet() == 0) {
- close0();
- }
- return this;
- }
-
- @Override
- public void close() {
- release();
- }
-
- public void close0() {
- basicObjectInfoCf.thenAccept(BasicObjectInfo::close);
- }
-
- /**
- *
- */
- public static final class BasicObjectInfo {
- private final long dataBlockSize;
- private final IndexBlock indexBlock;
-
- /**
- * @param dataBlockSize The total size of the data blocks, which equals to index start position.
- * @param indexBlock raw index data.
- */
- public BasicObjectInfo(long dataBlockSize, IndexBlock indexBlock) {
- this.dataBlockSize = dataBlockSize;
- this.indexBlock = indexBlock;
- }
-
- public static BasicObjectInfo parse(ByteBuf objectTailBuf,
- S3ObjectMetadata s3ObjectMetadata) throws IndexBlockParseException {
- objectTailBuf = objectTailBuf.slice();
- long indexBlockPosition = objectTailBuf.getLong(objectTailBuf.readableBytes() - FOOTER_SIZE);
- int indexBlockSize = objectTailBuf.getInt(objectTailBuf.readableBytes() - 40);
- if (indexBlockPosition + objectTailBuf.readableBytes() < s3ObjectMetadata.objectSize()) {
- objectTailBuf.release();
- throw new IndexBlockParseException(indexBlockPosition);
- } else {
- int indexRelativePosition = objectTailBuf.readableBytes() - (int) (s3ObjectMetadata.objectSize() - indexBlockPosition);
-
- // trim the ByteBuf to avoid extra memory occupy.
- ByteBuf indexBlockBuf = objectTailBuf.slice(objectTailBuf.readerIndex() + indexRelativePosition, indexBlockSize);
- ByteBuf copy = ByteBufAlloc.byteBuffer(indexBlockBuf.readableBytes(), READ_INDEX_BLOCK);
- indexBlockBuf.readBytes(copy, indexBlockBuf.readableBytes());
- objectTailBuf.release();
- indexBlockBuf = copy;
- return new BasicObjectInfo(indexBlockPosition, new IndexBlock(s3ObjectMetadata, indexBlockBuf));
- }
- }
-
- public int size() {
- return indexBlock.size();
- }
-
- void close() {
- indexBlock.close();
- }
-
- public long dataBlockSize() {
- return dataBlockSize;
- }
-
- public IndexBlock indexBlock() {
- return indexBlock;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (BasicObjectInfo) obj;
- return this.dataBlockSize == that.dataBlockSize &&
- Objects.equals(this.indexBlock, that.indexBlock);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(dataBlockSize, indexBlock);
- }
-
- @Override
- public String toString() {
- return "BasicObjectInfo[" +
- "dataBlockSize=" + dataBlockSize + ", " +
- "indexBlock=" + indexBlock + ']';
- }
-
- }
-
- public static class IndexBlock {
- public static final int INDEX_BLOCK_UNIT_SIZE = 8/* streamId */ + 8 /* startOffset */ + 4 /* endOffset delta */
- + 4 /* record count */ + 8 /* block position */ + 4 /* block size */;
- private final S3ObjectMetadata s3ObjectMetadata;
- private final ByteBuf buf;
- private final int size;
- private final int count;
-
- public IndexBlock(S3ObjectMetadata s3ObjectMetadata, ByteBuf buf) {
- this.s3ObjectMetadata = s3ObjectMetadata;
- this.buf = buf;
- this.size = buf.readableBytes();
- this.count = buf.readableBytes() / INDEX_BLOCK_UNIT_SIZE;
- }
-
- public Iterator iterator() {
- AtomicInteger getIndex = new AtomicInteger(0);
- return new Iterator<>() {
- @Override
- public boolean hasNext() {
- return getIndex.get() < count;
- }
-
- @Override
- public DataBlockIndex next() {
- return get(getIndex.getAndIncrement());
- }
- };
- }
-
- public DataBlockIndex get(int index) {
- if (index < 0 || index >= count) {
- throw new IllegalArgumentException("index" + index + " is out of range [0, " + count + ")");
- }
- int base = index * INDEX_BLOCK_UNIT_SIZE;
- long streamId = buf.getLong(base);
- long startOffset = buf.getLong(base + 8);
- int endOffsetDelta = buf.getInt(base + 16);
- int recordCount = buf.getInt(base + 20);
- long blockPosition = buf.getLong(base + 24);
- int blockSize = buf.getInt(base + 32);
- return new DataBlockIndex(streamId, startOffset, endOffsetDelta, recordCount, blockPosition, blockSize);
- }
-
- public FindIndexResult find(long streamId, long startOffset, long endOffset) {
- return find(streamId, startOffset, endOffset, Integer.MAX_VALUE);
- }
-
- public FindIndexResult find(long streamId, long startOffset, long endOffset, int maxBytes) {
- long nextStartOffset = startOffset;
- int nextMaxBytes = maxBytes;
- boolean matched = false;
- boolean isFulfilled = false;
- List rst = new LinkedList<>();
- IndexBlockOrderedBytes indexBlockOrderedBytes = new IndexBlockOrderedBytes(this);
- int startIndex = indexBlockOrderedBytes.search(new IndexBlockOrderedBytes.TargetStreamOffset(streamId, startOffset));
- if (startIndex == -1) {
- // mismatched
- return new FindIndexResult(false, nextStartOffset, nextMaxBytes, rst);
- }
- for (int i = startIndex; i < count(); i++) {
- DataBlockIndex index = get(i);
- if (index.streamId() == streamId) {
- if (nextStartOffset < index.startOffset()) {
- break;
- }
- if (index.endOffset() <= nextStartOffset) {
- continue;
- }
- matched = nextStartOffset == index.startOffset();
- nextStartOffset = index.endOffset();
- rst.add(new StreamDataBlock(s3ObjectMetadata.objectId(), index));
-
- // we consider first block as not matched because we do not know exactly how many bytes are within
- // the range in first block, as a result we may read one more block than expected.
- if (matched) {
- int recordPayloadSize = index.size()
- - index.recordCount() * StreamRecordBatchCodec.HEADER_SIZE // sum of encoded record header size
- - ObjectWriter.DataBlock.BLOCK_HEADER_SIZE; // block header size
- nextMaxBytes -= Math.min(nextMaxBytes, recordPayloadSize);
- }
- if ((endOffset != NOOP_OFFSET && nextStartOffset >= endOffset) || nextMaxBytes == 0) {
- isFulfilled = true;
- break;
- }
- } else if (matched) {
- break;
- }
- }
- return new FindIndexResult(isFulfilled, nextStartOffset, nextMaxBytes, rst);
- }
-
- public int size() {
- return size;
- }
-
- public int count() {
- return count;
- }
-
- void close() {
- buf.release();
- }
- }
-
- public static final class FindIndexResult {
- private final boolean isFulfilled;
- private final long nextStartOffset;
- private final int nextMaxBytes;
- private final List streamDataBlocks;
-
- public FindIndexResult(boolean isFulfilled, long nextStartOffset, int nextMaxBytes,
- List streamDataBlocks) {
- this.isFulfilled = isFulfilled;
- this.nextStartOffset = nextStartOffset;
- this.nextMaxBytes = nextMaxBytes;
- this.streamDataBlocks = streamDataBlocks;
- }
-
- public boolean isFulfilled() {
- return isFulfilled;
- }
-
- public long nextStartOffset() {
- return nextStartOffset;
- }
-
- public int nextMaxBytes() {
- return nextMaxBytes;
- }
-
- public List streamDataBlocks() {
- return streamDataBlocks;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (FindIndexResult) obj;
- return this.isFulfilled == that.isFulfilled &&
- this.nextStartOffset == that.nextStartOffset &&
- this.nextMaxBytes == that.nextMaxBytes &&
- Objects.equals(this.streamDataBlocks, that.streamDataBlocks);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(isFulfilled, nextStartOffset, nextMaxBytes, streamDataBlocks);
- }
-
- @Override
- public String toString() {
- return "FindIndexResult[" +
- "isFulfilled=" + isFulfilled + ", " +
- "nextStartOffset=" + nextStartOffset + ", " +
- "nextMaxBytes=" + nextMaxBytes + ", " +
- "streamDataBlocks=" + streamDataBlocks + ']';
- }
-
- }
-
- public static class IndexBlockParseException extends Exception {
- long indexBlockPosition;
-
- public IndexBlockParseException(long indexBlockPosition) {
- this.indexBlockPosition = indexBlockPosition;
- }
-
- }
-
- public static class DataBlockGroup implements AutoCloseable {
- private final ByteBuf buf;
- private final int recordCount;
-
- public DataBlockGroup(ByteBuf buf) {
- this.buf = buf.duplicate();
- this.recordCount = check(buf);
- }
-
- private static int check(ByteBuf buf) {
- buf = buf.duplicate();
- int recordCount = 0;
- while (buf.readableBytes() > 0) {
- byte magicCode = buf.readByte();
- if (magicCode != ObjectWriter.DATA_BLOCK_MAGIC) {
- LOGGER.error("magic code mismatch, expected {}, actual {}", ObjectWriter.DATA_BLOCK_MAGIC, magicCode);
- throw new RuntimeException("[FATAL] magic code mismatch, data is corrupted");
- }
- buf.readByte(); // flag
- recordCount += buf.readInt();
- int dataLength = buf.readInt();
- buf.skipBytes(dataLength);
- }
- return recordCount;
- }
-
- public CloseableIterator iterator() {
- ByteBuf buf = this.buf.duplicate();
- AtomicInteger currentBlockRecordCount = new AtomicInteger(0);
- AtomicInteger remainingRecordCount = new AtomicInteger(recordCount);
- return new CloseableIterator<>() {
- @Override
- public boolean hasNext() {
- // in.available() is not reliable. ZstdInputStreamNoFinalizer might return 1 when there is no more data.
- return remainingRecordCount.get() != 0;
- }
-
- @Override
- public StreamRecordBatch next() {
- if (remainingRecordCount.decrementAndGet() < 0) {
- throw new NoSuchElementException();
- }
- if (currentBlockRecordCount.get() == 0) {
- buf.skipBytes(1 /* magic */ + 1 /* flag */);
- currentBlockRecordCount.set(buf.readInt());
- buf.skipBytes(4);
- }
- currentBlockRecordCount.decrementAndGet();
- return StreamRecordBatchCodec.duplicateDecode(buf);
- }
-
- @Override
- public void close() {
- }
- };
- }
-
- public int recordCount() {
- return recordCount;
- }
-
- @Override
- public void close() {
- buf.release();
- }
- }
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/ObjectWriter.java b/s3stream/src/main/java/com/automq/stream/s3/ObjectWriter.java
deleted file mode 100644
index 85863fb5e..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/ObjectWriter.java
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.s3.metadata.ObjectUtils;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.objects.ObjectStreamRange;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.s3.operator.Writer;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.CompositeByteBuf;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.CompletableFuture;
-
-import static com.automq.stream.s3.ByteBufAlloc.WRITE_DATA_BLOCK_HEADER;
-import static com.automq.stream.s3.ByteBufAlloc.WRITE_FOOTER;
-import static com.automq.stream.s3.ByteBufAlloc.WRITE_INDEX_BLOCK;
-
-/**
- * Write stream records to a single object.
- */
-public interface ObjectWriter {
-
- byte DATA_BLOCK_MAGIC = 0x5A;
- // TODO: first n bit is the compressed flag
- byte DATA_BLOCK_DEFAULT_FLAG = 0x02;
-
- static ObjectWriter writer(long objectId, S3Operator s3Operator, int blockSizeThreshold, int partSizeThreshold) {
- return new DefaultObjectWriter(objectId, s3Operator, blockSizeThreshold, partSizeThreshold);
- }
-
- static ObjectWriter noop(long objectId) {
- return new NoopObjectWriter(objectId);
- }
-
- void write(long streamId, List records);
-
- CompletableFuture close();
-
- List getStreamRanges();
-
- long objectId();
-
- long size();
-
- class DefaultObjectWriter implements ObjectWriter {
-
- private final int blockSizeThreshold;
- private final int partSizeThreshold;
- private final List waitingUploadBlocks;
- private final List completedBlocks;
- private final Writer writer;
- private final long objectId;
- private int waitingUploadBlocksSize;
- private IndexBlock indexBlock;
- private long size;
-
- /**
- * Create a new object writer.
- *
- * @param objectId object id
- * @param s3Operator S3 operator
- * @param blockSizeThreshold the max size of a block
- * @param partSizeThreshold the max size of a part. If it is smaller than {@link Writer#MIN_PART_SIZE}, it will be set to {@link Writer#MIN_PART_SIZE}.
- */
- public DefaultObjectWriter(long objectId, S3Operator s3Operator, int blockSizeThreshold,
- int partSizeThreshold) {
- this.objectId = objectId;
- String objectKey = ObjectUtils.genKey(0, objectId);
- this.blockSizeThreshold = blockSizeThreshold;
- this.partSizeThreshold = Math.max(Writer.MIN_PART_SIZE, partSizeThreshold);
- waitingUploadBlocks = new LinkedList<>();
- completedBlocks = new LinkedList<>();
- writer = s3Operator.writer(objectKey);
- }
-
- public void write(long streamId, List records) {
- List> blocks = groupByBlock(records);
- blocks.forEach(blockRecords -> {
- DataBlock block = new DataBlock(streamId, blockRecords);
- waitingUploadBlocks.add(block);
- waitingUploadBlocksSize += block.size();
- });
- if (waitingUploadBlocksSize >= partSizeThreshold) {
- tryUploadPart();
- }
- }
-
- private List> groupByBlock(List records) {
- List> blocks = new LinkedList<>();
- List blockRecords = new ArrayList<>(records.size());
- for (StreamRecordBatch record : records) {
- size += record.size();
- blockRecords.add(record);
- if (size >= blockSizeThreshold) {
- blocks.add(blockRecords);
- blockRecords = new ArrayList<>(records.size());
- size = 0;
- }
- }
- if (!blockRecords.isEmpty()) {
- blocks.add(blockRecords);
- }
- return blocks;
- }
-
- private synchronized void tryUploadPart() {
- for (; ; ) {
- List uploadBlocks = new ArrayList<>(waitingUploadBlocks.size());
- boolean partFull = false;
- int size = 0;
- for (DataBlock block : waitingUploadBlocks) {
- uploadBlocks.add(block);
- size += block.size();
- if (size >= partSizeThreshold) {
- partFull = true;
- break;
- }
- }
- if (partFull) {
- CompositeByteBuf partBuf = ByteBufAlloc.compositeByteBuffer();
- for (DataBlock block : uploadBlocks) {
- waitingUploadBlocksSize -= block.size();
- partBuf.addComponent(true, block.buffer());
- }
- writer.write(partBuf);
- completedBlocks.addAll(uploadBlocks);
- waitingUploadBlocks.removeIf(uploadBlocks::contains);
- } else {
- break;
- }
- }
- }
-
- public CompletableFuture close() {
- CompositeByteBuf buf = ByteBufAlloc.compositeByteBuffer();
- for (DataBlock block : waitingUploadBlocks) {
- buf.addComponent(true, block.buffer());
- completedBlocks.add(block);
- }
- waitingUploadBlocks.clear();
- indexBlock = new IndexBlock();
- buf.addComponent(true, indexBlock.buffer());
- Footer footer = new Footer(indexBlock.position(), indexBlock.size());
- buf.addComponent(true, footer.buffer());
- writer.write(buf.duplicate());
- size = indexBlock.position() + indexBlock.size() + footer.size();
- return writer.close();
- }
-
- public List getStreamRanges() {
- List streamRanges = new LinkedList<>();
- ObjectStreamRange lastStreamRange = null;
- for (DataBlock block : completedBlocks) {
- ObjectStreamRange streamRange = block.getStreamRange();
- if (lastStreamRange == null || lastStreamRange.getStreamId() != streamRange.getStreamId()) {
- if (lastStreamRange != null) {
- streamRanges.add(lastStreamRange);
- }
- lastStreamRange = new ObjectStreamRange();
- lastStreamRange.setStreamId(streamRange.getStreamId());
- lastStreamRange.setEpoch(streamRange.getEpoch());
- lastStreamRange.setStartOffset(streamRange.getStartOffset());
- }
- lastStreamRange.setEndOffset(streamRange.getEndOffset());
- }
- if (lastStreamRange != null) {
- streamRanges.add(lastStreamRange);
- }
- return streamRanges;
- }
-
- public long objectId() {
- return objectId;
- }
-
- public long size() {
- return size;
- }
-
- class IndexBlock {
- private final ByteBuf buf;
- private final long position;
-
- public IndexBlock() {
- long nextPosition = 0;
- int indexBlockSize = DataBlockIndex.BLOCK_INDEX_SIZE * completedBlocks.size();
- buf = ByteBufAlloc.byteBuffer(indexBlockSize, WRITE_INDEX_BLOCK);
- for (DataBlock block : completedBlocks) {
- ObjectStreamRange streamRange = block.getStreamRange();
- new DataBlockIndex(streamRange.getStreamId(), streamRange.getStartOffset(), (int) (streamRange.getEndOffset() - streamRange.getStartOffset()),
- block.recordCount(), nextPosition, block.size()).encode(buf);
- nextPosition += block.size();
- }
- position = nextPosition;
- }
-
- public ByteBuf buffer() {
- return buf.duplicate();
- }
-
- public long position() {
- return position;
- }
-
- public int size() {
- return buf.readableBytes();
- }
- }
- }
-
- class DataBlock {
- public static final int BLOCK_HEADER_SIZE = 1 /* magic */ + 1/* flag */ + 4 /* record count*/ + 4 /* data length */;
- private final CompositeByteBuf encodedBuf;
- private final ObjectStreamRange streamRange;
- private final int recordCount;
- private final int size;
-
- public DataBlock(long streamId, List records) {
- this.recordCount = records.size();
- this.encodedBuf = ByteBufAlloc.compositeByteBuffer();
- ByteBuf header = ByteBufAlloc.byteBuffer(BLOCK_HEADER_SIZE, WRITE_DATA_BLOCK_HEADER);
- header.writeByte(DATA_BLOCK_MAGIC);
- header.writeByte(DATA_BLOCK_DEFAULT_FLAG);
- header.writeInt(recordCount);
- header.writeInt(0); // data length
- encodedBuf.addComponent(true, header);
- records.forEach(r -> encodedBuf.addComponent(true, r.encoded().retain()));
- this.size = encodedBuf.readableBytes();
- encodedBuf.setInt(BLOCK_HEADER_SIZE - 4, size - BLOCK_HEADER_SIZE);
- this.streamRange = new ObjectStreamRange(streamId, records.get(0).getEpoch(), records.get(0).getBaseOffset(), records.get(records.size() - 1).getLastOffset(), size);
- }
-
- public int size() {
- return size;
- }
-
- public int recordCount() {
- return recordCount;
- }
-
- public ObjectStreamRange getStreamRange() {
- return streamRange;
- }
-
- public ByteBuf buffer() {
- return encodedBuf.duplicate();
- }
- }
-
- class Footer {
- public static final int FOOTER_SIZE = 48;
- private static final long MAGIC = 0x88e241b785f4cff7L;
- private final ByteBuf buf;
-
- public Footer(long indexStartPosition, int indexBlockLength) {
- buf = ByteBufAlloc.byteBuffer(FOOTER_SIZE, WRITE_FOOTER);
- // start position of index block
- buf.writeLong(indexStartPosition);
- // size of index block
- buf.writeInt(indexBlockLength);
- // reserved for future
- buf.writeZero(40 - 8 - 4);
- buf.writeLong(MAGIC);
- }
-
- public ByteBuf buffer() {
- return buf.duplicate();
- }
-
- public int size() {
- return FOOTER_SIZE;
- }
-
- }
-
- class NoopObjectWriter implements ObjectWriter {
- private final long objectId;
-
- public NoopObjectWriter(long objectId) {
- this.objectId = objectId;
- }
-
- @Override
- public void write(long streamId, List records) {
- }
-
- @Override
- public CompletableFuture close() {
- return CompletableFuture.completedFuture(null);
- }
-
- @Override
- public List getStreamRanges() {
- return Collections.emptyList();
- }
-
- @Override
- public long objectId() {
- return objectId;
- }
-
- @Override
- public long size() {
- return 0;
- }
- }
-
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/S3ObjectLogger.java b/s3stream/src/main/java/com/automq/stream/s3/S3ObjectLogger.java
deleted file mode 100644
index d703511bb..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/S3ObjectLogger.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.utils.LogContext;
-import org.slf4j.Logger;
-
-public class S3ObjectLogger {
-
- public static Logger logger(String prefix) {
- return new LogContext(prefix).logger("s3.object.logger");
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/S3Storage.java b/s3stream/src/main/java/com/automq/stream/s3/S3Storage.java
deleted file mode 100644
index 966623d4e..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/S3Storage.java
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.api.exceptions.FastReadFailFastException;
-import com.automq.stream.s3.cache.CacheAccessType;
-import com.automq.stream.s3.cache.LogCache;
-import com.automq.stream.s3.cache.ReadDataBlock;
-import com.automq.stream.s3.cache.S3BlockCache;
-import com.automq.stream.s3.context.AppendContext;
-import com.automq.stream.s3.context.FetchContext;
-import com.automq.stream.s3.metadata.StreamMetadata;
-import com.automq.stream.s3.metrics.S3StreamMetricsManager;
-import com.automq.stream.s3.metrics.TimerUtil;
-import com.automq.stream.s3.metrics.stats.StorageOperationStats;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.objects.ObjectManager;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.s3.streams.StreamManager;
-import com.automq.stream.s3.trace.context.TraceContext;
-import com.automq.stream.s3.wal.WriteAheadLog;
-import com.automq.stream.utils.FutureTicker;
-import com.automq.stream.utils.FutureUtil;
-import com.automq.stream.utils.ThreadUtils;
-import com.automq.stream.utils.Threads;
-import io.netty.buffer.ByteBuf;
-import io.netty.util.HashedWheelTimer;
-import io.netty.util.Timeout;
-import io.opentelemetry.instrumentation.annotations.SpanAttribute;
-import io.opentelemetry.instrumentation.annotations.WithSpan;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.PriorityQueue;
-import java.util.Queue;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReadWriteLock;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.utils.FutureUtil.suppress;
-
-public class S3Storage implements Storage {
- private static final Logger LOGGER = LoggerFactory.getLogger(S3Storage.class);
- private static final FastReadFailFastException FAST_READ_FAIL_FAST_EXCEPTION = new FastReadFailFastException();
- private static final int NUM_STREAM_CALLBACK_LOCKS = 128;
- private final long maxDeltaWALCacheSize;
- private final Config config;
- private final WriteAheadLog deltaWAL;
- /**
- * WAL log cache
- */
- private final LogCache deltaWALCache;
- /**
- * WAL out of order callback sequencer. {@link #streamCallbackLocks} will ensure the memory safety.
- */
- private final WALCallbackSequencer callbackSequencer = new WALCallbackSequencer();
- private final WALConfirmOffsetCalculator confirmOffsetCalculator = new WALConfirmOffsetCalculator();
- private final Queue walPrepareQueue = new LinkedList<>();
- private final Queue walCommitQueue = new LinkedList<>();
- private final List inflightWALUploadTasks = new CopyOnWriteArrayList<>();
- private final ScheduledExecutorService backgroundExecutor = Threads.newSingleThreadScheduledExecutor(
- ThreadUtils.createThreadFactory("s3-storage-background", true), LOGGER);
- private final ExecutorService uploadWALExecutor = Threads.newFixedThreadPoolWithMonitor(
- 4, "s3-storage-upload-wal", true, LOGGER);
- /**
- * A ticker used for batching force upload WAL.
- *
- * @see #forceUpload
- */
- private final FutureTicker forceUploadTicker = new FutureTicker(500, TimeUnit.MILLISECONDS, backgroundExecutor);
- private final Queue backoffRecords = new LinkedBlockingQueue<>();
- private final ScheduledFuture> drainBackoffTask;
- private final StreamManager streamManager;
- private final ObjectManager objectManager;
- private final S3Operator s3Operator;
- private final S3BlockCache blockCache;
- /**
- * Stream callback locks. Used to ensure the stream callbacks will not be called concurrently.
- *
- * @see #handleAppendCallback
- */
- private final Lock[] streamCallbackLocks = IntStream.range(0, NUM_STREAM_CALLBACK_LOCKS).mapToObj(i -> new ReentrantLock()).toArray(Lock[]::new);
- private final HashedWheelTimer timeoutDetect = new HashedWheelTimer(
- ThreadUtils.createThreadFactory("storage-timeout-detect", true), 1, TimeUnit.SECONDS, 100);
- private long lastLogTimestamp = 0L;
- private volatile double maxDataWriteRate = 0.0;
-
- public S3Storage(Config config, WriteAheadLog deltaWAL, StreamManager streamManager, ObjectManager objectManager,
- S3BlockCache blockCache, S3Operator s3Operator) {
- this.config = config;
- this.maxDeltaWALCacheSize = config.walCacheSize();
- this.deltaWAL = deltaWAL;
- this.blockCache = blockCache;
- this.deltaWALCache = new LogCache(config.walCacheSize(), config.walUploadThreshold(), config.maxStreamNumPerStreamSetObject());
- this.streamManager = streamManager;
- this.objectManager = objectManager;
- this.s3Operator = s3Operator;
- this.drainBackoffTask = this.backgroundExecutor.scheduleWithFixedDelay(this::tryDrainBackoffRecords, 100, 100, TimeUnit.MILLISECONDS);
- S3StreamMetricsManager.registerInflightWALUploadTasksCountSupplier(this.inflightWALUploadTasks::size);
- }
-
- static LogCache.LogCacheBlock recoverContinuousRecords(Iterator it,
- List openingStreams) {
- return recoverContinuousRecords(it, openingStreams, LOGGER);
- }
-
- /**
- * Recover continuous records in each stream from the WAL, and put them into the returned {@link LogCache.LogCacheBlock}.
- * It will filter out
- *
- * - the records that are not in the opening streams
- * - the records that have been committed
- * - the records that are not continuous, which means, all records after the first discontinuous record
- *
- *
- * It throws {@link IllegalStateException} if the start offset of the first recovered record mismatches
- * the end offset of any opening stream, which indicates data loss.
- *
- * If there are out of order records (which should never happen or there is a BUG), it will try to re-order them.
- *
- * For example, if we recover following records from the WAL in a stream:
- *
1, 2, 3, 5, 4, 6, 10, 11
- * and the {@link StreamMetadata#endOffset()} of this stream is 3. Then the returned {@link LogCache.LogCacheBlock}
- * will contain records
- * 3, 4, 5, 6
- * Here,
- *
- * - The record 1 and 2 are discarded because they have been committed (less than 3, the end offset of the stream)
- * - The record 10 and 11 are discarded because they are not continuous (10 is not 7, the next offset of 6)
- * - The record 5 and 4 are reordered because they are out of order, and we handle this bug here
- *
- */
- static LogCache.LogCacheBlock recoverContinuousRecords(Iterator it,
- List openingStreams, Logger logger) {
- Map openingStreamEndOffsets = openingStreams.stream().collect(Collectors.toMap(StreamMetadata::streamId, StreamMetadata::endOffset));
- LogCache.LogCacheBlock cacheBlock = new LogCache.LogCacheBlock(1024L * 1024 * 1024);
- long logEndOffset = -1L;
- Map streamNextOffsets = new HashMap<>();
- Map> streamDiscontinuousRecords = new HashMap<>();
- while (it.hasNext()) {
- WriteAheadLog.RecoverResult recoverResult = it.next();
- logEndOffset = recoverResult.recordOffset();
- ByteBuf recordBuf = recoverResult.record().duplicate();
- StreamRecordBatch streamRecordBatch = StreamRecordBatchCodec.decode(recordBuf);
- long streamId = streamRecordBatch.getStreamId();
- Long openingStreamEndOffset = openingStreamEndOffsets.get(streamId);
- if (openingStreamEndOffset == null) {
- // stream is already safe closed. so skip the stream records.
- recordBuf.release();
- continue;
- }
- if (streamRecordBatch.getBaseOffset() < openingStreamEndOffset) {
- // filter committed records.
- recordBuf.release();
- continue;
- }
-
- Long expectNextOffset = streamNextOffsets.get(streamId);
- Queue discontinuousRecords = streamDiscontinuousRecords.get(streamId);
- if (expectNextOffset == null || expectNextOffset == streamRecordBatch.getBaseOffset()) {
- // continuous record, put it into cache.
- cacheBlock.put(streamRecordBatch);
- expectNextOffset = streamRecordBatch.getLastOffset();
- // check if there are some out of order records in the queue.
- if (discontinuousRecords != null) {
- while (!discontinuousRecords.isEmpty()) {
- StreamRecordBatch peek = discontinuousRecords.peek();
- if (peek.getBaseOffset() == expectNextOffset) {
- // should never happen, log it.
- logger.error("[BUG] recover an out of order record, streamId={}, expectNextOffset={}, record={}", streamId, expectNextOffset, peek);
- cacheBlock.put(peek);
- discontinuousRecords.poll();
- expectNextOffset = peek.getLastOffset();
- } else {
- break;
- }
- }
- }
- // update next offset.
- streamNextOffsets.put(streamRecordBatch.getStreamId(), expectNextOffset);
- } else {
- // unexpected record, put it into discontinuous records queue.
- if (discontinuousRecords == null) {
- discontinuousRecords = new PriorityQueue<>(Comparator.comparingLong(StreamRecordBatch::getBaseOffset));
- streamDiscontinuousRecords.put(streamId, discontinuousRecords);
- }
- discontinuousRecords.add(streamRecordBatch);
- }
- }
- // release all discontinuous records.
- streamDiscontinuousRecords.values().forEach(queue -> {
- if (queue.isEmpty()) {
- return;
- }
- logger.info("drop discontinuous records, records={}", queue);
- queue.forEach(StreamRecordBatch::release);
- });
-
- if (logEndOffset >= 0L) {
- cacheBlock.confirmOffset(logEndOffset);
- }
- cacheBlock.records().forEach((streamId, records) -> {
- if (!records.isEmpty()) {
- long startOffset = records.get(0).getBaseOffset();
- long expectedStartOffset = openingStreamEndOffsets.getOrDefault(streamId, startOffset);
- if (startOffset != expectedStartOffset) {
- throw new IllegalStateException(String.format("[BUG] WAL data may lost, streamId %d endOffset=%d from controller, " +
- "but WAL recovered records startOffset=%s", streamId, expectedStartOffset, startOffset));
- }
- }
-
- });
-
- return cacheBlock;
- }
-
- @Override
- public void startup() {
- try {
- LOGGER.info("S3Storage starting");
- recover();
- LOGGER.info("S3Storage start completed");
- } catch (Throwable e) {
- LOGGER.error("S3Storage start fail", e);
- throw new RuntimeException(e);
- }
- }
-
- /**
- * Upload WAL to S3 and close opening streams.
- */
- public void recover() throws Throwable {
- recover0(this.deltaWAL, this.streamManager, this.objectManager, LOGGER);
- }
-
- public void recover(WriteAheadLog deltaWAL, StreamManager streamManager, ObjectManager objectManager,
- Logger logger) throws Throwable {
- recover0(deltaWAL, streamManager, objectManager, logger);
- }
-
- void recover0(WriteAheadLog deltaWAL, StreamManager streamManager, ObjectManager objectManager,
- Logger logger) throws Throwable {
- deltaWAL.start();
- List streams = streamManager.getOpeningStreams().get();
-
- LogCache.LogCacheBlock cacheBlock = recoverContinuousRecords(deltaWAL.recover(), streams, logger);
- Map streamEndOffsets = new HashMap<>();
- cacheBlock.records().forEach((streamId, records) -> {
- if (!records.isEmpty()) {
- streamEndOffsets.put(streamId, records.get(records.size() - 1).getLastOffset());
- }
- });
-
- if (cacheBlock.size() != 0) {
- logger.info("try recover from crash, recover records bytes size {}", cacheBlock.size());
- DeltaWALUploadTask task = DeltaWALUploadTask.builder().config(config).streamRecordsMap(cacheBlock.records())
- .objectManager(objectManager).s3Operator(s3Operator).executor(uploadWALExecutor).build();
- task.prepare().thenCompose(nil -> task.upload()).thenCompose(nil -> task.commit()).get();
- cacheBlock.records().forEach((streamId, records) -> records.forEach(StreamRecordBatch::release));
- }
- deltaWAL.reset().get();
- for (StreamMetadata stream : streams) {
- long newEndOffset = streamEndOffsets.getOrDefault(stream.streamId(), stream.endOffset());
- logger.info("recover try close stream {} with new end offset {}", stream, newEndOffset);
- }
- CompletableFuture.allOf(
- streams
- .stream()
- .map(s -> streamManager.closeStream(s.streamId(), s.epoch()))
- .toArray(CompletableFuture[]::new)
- ).get();
- }
-
- @Override
- public void shutdown() {
- drainBackoffTask.cancel(false);
- for (WalWriteRequest request : backoffRecords) {
- request.cf.completeExceptionally(new IOException("S3Storage is shutdown"));
- }
- deltaWAL.shutdownGracefully();
- backgroundExecutor.shutdown();
- try {
- if (backgroundExecutor.awaitTermination(10, TimeUnit.SECONDS)) {
- LOGGER.warn("await backgroundExecutor timeout 10s");
- }
- } catch (InterruptedException e) {
- backgroundExecutor.shutdownNow();
- LOGGER.warn("await backgroundExecutor close fail", e);
- }
- }
-
- @Override
- @WithSpan
- public CompletableFuture append(AppendContext context, StreamRecordBatch streamRecord) {
- TimerUtil timerUtil = new TimerUtil();
- CompletableFuture cf = new CompletableFuture<>();
- // encoded before append to free heap ByteBuf.
- streamRecord.encoded();
- WalWriteRequest writeRequest = new WalWriteRequest(streamRecord, -1L, cf, context);
- handleAppendRequest(writeRequest);
- append0(context, writeRequest, false);
- cf.whenComplete((nil, ex) -> {
- streamRecord.release();
- StorageOperationStats.getInstance().appendStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- });
- return cf;
- }
-
- /**
- * Append record to WAL.
- *
- * @return backoff status.
- */
- public boolean append0(AppendContext context, WalWriteRequest request, boolean fromBackoff) {
- // TODO: storage status check, fast fail the request when storage closed.
- if (!fromBackoff && !backoffRecords.isEmpty()) {
- backoffRecords.offer(request);
- return true;
- }
- if (!tryAcquirePermit()) {
- if (!fromBackoff) {
- backoffRecords.offer(request);
- }
- StorageOperationStats.getInstance().appendLogCacheFullStats.record(0L);
- if (System.currentTimeMillis() - lastLogTimestamp > 1000L) {
- LOGGER.warn("[BACKOFF] log cache size {} is larger than {}", deltaWALCache.size(), maxDeltaWALCacheSize);
- lastLogTimestamp = System.currentTimeMillis();
- }
- return true;
- }
- WriteAheadLog.AppendResult appendResult;
- try {
- try {
- StreamRecordBatch streamRecord = request.record;
- streamRecord.retain();
- Lock lock = confirmOffsetCalculator.addLock();
- lock.lock();
- try {
- appendResult = deltaWAL.append(new TraceContext(context), streamRecord.encoded());
- } finally {
- lock.unlock();
- }
- } catch (WriteAheadLog.OverCapacityException e) {
- // the WAL write data align with block, 'WAL is full but LogCacheBlock is not full' may happen.
- confirmOffsetCalculator.update();
- forceUpload(LogCache.MATCH_ALL_STREAMS);
- if (!fromBackoff) {
- backoffRecords.offer(request);
- }
- if (System.currentTimeMillis() - lastLogTimestamp > 1000L) {
- LOGGER.warn("[BACKOFF] log over capacity", e);
- lastLogTimestamp = System.currentTimeMillis();
- }
- return true;
- }
- request.offset = appendResult.recordOffset();
- confirmOffsetCalculator.add(request);
- } catch (Throwable e) {
- LOGGER.error("[UNEXPECTED] append WAL fail", e);
- request.cf.completeExceptionally(e);
- return false;
- }
- appendResult.future().whenComplete((nil, ex) -> {
- if (ex != null) {
- // no exception should be thrown from the WAL
- LOGGER.error("[UNEXPECTED] append WAL fail, request {}", request, ex);
- return;
- }
- handleAppendCallback(request);
- });
- return false;
- }
-
- @SuppressWarnings("BooleanMethodIsAlwaysInverted")
- private boolean tryAcquirePermit() {
- return deltaWALCache.size() < maxDeltaWALCacheSize;
- }
-
- private void tryDrainBackoffRecords() {
- try {
- for (; ; ) {
- WalWriteRequest request = backoffRecords.peek();
- if (request == null) {
- break;
- }
- if (append0(request.context, request, true)) {
- LOGGER.warn("try drain backoff record fail, still backoff");
- break;
- }
- backoffRecords.poll();
- }
- } catch (Throwable e) {
- LOGGER.error("[UNEXPECTED] tryDrainBackoffRecords fail", e);
- }
- }
-
- @Override
- @WithSpan
- public CompletableFuture read(FetchContext context,
- @SpanAttribute long streamId,
- @SpanAttribute long startOffset,
- @SpanAttribute long endOffset,
- @SpanAttribute int maxBytes) {
- TimerUtil timerUtil = new TimerUtil();
- CompletableFuture cf = new CompletableFuture<>();
- FutureUtil.propagate(read0(context, streamId, startOffset, endOffset, maxBytes), cf);
- cf.whenComplete((nil, ex) -> StorageOperationStats.getInstance().readStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS)));
- return cf;
- }
-
- @WithSpan
- private CompletableFuture read0(FetchContext context,
- @SpanAttribute long streamId,
- @SpanAttribute long startOffset,
- @SpanAttribute long endOffset,
- @SpanAttribute int maxBytes) {
- List logCacheRecords = deltaWALCache.get(context, streamId, startOffset, endOffset, maxBytes);
- if (!logCacheRecords.isEmpty() && logCacheRecords.get(0).getBaseOffset() <= startOffset) {
- return CompletableFuture.completedFuture(new ReadDataBlock(logCacheRecords, CacheAccessType.DELTA_WAL_CACHE_HIT));
- }
- if (context.readOptions().fastRead()) {
- // fast read fail fast when need read from block cache.
- logCacheRecords.forEach(StreamRecordBatch::release);
- logCacheRecords.clear();
- return CompletableFuture.failedFuture(FAST_READ_FAIL_FAST_EXCEPTION);
- }
- if (!logCacheRecords.isEmpty()) {
- endOffset = logCacheRecords.get(0).getBaseOffset();
- }
- Timeout timeout = timeoutDetect.newTimeout(t -> LOGGER.warn("read from block cache timeout, stream={}, {}, maxBytes: {}", streamId, startOffset, maxBytes), 1, TimeUnit.MINUTES);
- long finalEndOffset = endOffset;
- return blockCache.read(context, streamId, startOffset, endOffset, maxBytes).thenApply(blockCacheRst -> {
- List rst = new ArrayList<>(blockCacheRst.getRecords());
- int remainingBytesSize = maxBytes - rst.stream().mapToInt(StreamRecordBatch::size).sum();
- int readIndex = -1;
- for (int i = 0; i < logCacheRecords.size() && remainingBytesSize > 0; i++) {
- readIndex = i;
- StreamRecordBatch record = logCacheRecords.get(i);
- rst.add(record);
- remainingBytesSize -= record.size();
- }
- try {
- continuousCheck(rst);
- } catch (IllegalArgumentException e) {
- blockCacheRst.getRecords().forEach(StreamRecordBatch::release);
- throw e;
- }
- if (readIndex < logCacheRecords.size()) {
- // release unnecessary record
- logCacheRecords.subList(readIndex + 1, logCacheRecords.size()).forEach(StreamRecordBatch::release);
- }
- return new ReadDataBlock(rst, blockCacheRst.getCacheAccessType());
- }).whenComplete((rst, ex) -> {
- timeout.cancel();
- if (ex != null) {
- LOGGER.error("read from block cache failed, stream={}, {}-{}, maxBytes: {}",
- streamId, startOffset, finalEndOffset, maxBytes, ex);
- logCacheRecords.forEach(StreamRecordBatch::release);
- }
- });
- }
-
- private void continuousCheck(List records) {
- long expectStartOffset = -1L;
- for (StreamRecordBatch record : records) {
- if (expectStartOffset == -1L || record.getBaseOffset() == expectStartOffset) {
- expectStartOffset = record.getLastOffset();
- } else {
- throw new IllegalArgumentException(String.format("Continuous check failed, expect offset: %d," +
- " actual: %d, records: %s", expectStartOffset, record.getBaseOffset(), records));
- }
- }
- }
-
- /**
- * Force upload stream WAL cache to S3. Use group upload to avoid generate too many S3 objects when broker shutdown.
- * {@code streamId} can be {@link LogCache#MATCH_ALL_STREAMS} to force upload all streams.
- */
- @Override
- public CompletableFuture forceUpload(long streamId) {
- TimerUtil timer = new TimerUtil();
- CompletableFuture cf = new CompletableFuture<>();
- // Wait for a while to group force upload tasks.
- forceUploadTicker.tick().whenComplete((nil, ex) -> {
- StorageOperationStats.getInstance().forceUploadWALAwaitStats.record(timer.elapsedAs(TimeUnit.NANOSECONDS));
- uploadDeltaWAL(streamId, true);
- // Wait for all tasks contains streamId complete.
- FutureUtil.propagate(CompletableFuture.allOf(this.inflightWALUploadTasks.stream()
- .filter(it -> it.cache.containsStream(streamId))
- .map(it -> it.cf).toArray(CompletableFuture[]::new)), cf);
- if (LogCache.MATCH_ALL_STREAMS != streamId) {
- callbackSequencer.tryFree(streamId);
- }
- });
- cf.whenComplete((nil, ex) -> StorageOperationStats.getInstance().forceUploadWALCompleteStats.record(timer.elapsedAs(TimeUnit.NANOSECONDS)));
- return cf;
- }
-
- private void handleAppendRequest(WalWriteRequest request) {
- callbackSequencer.before(request);
- }
-
- private void handleAppendCallback(WalWriteRequest request) {
- suppress(() -> handleAppendCallback0(request), LOGGER);
- }
-
- private void handleAppendCallback0(WalWriteRequest request) {
- TimerUtil timer = new TimerUtil();
- List waitingAckRequests;
- Lock lock = getStreamCallbackLock(request.record.getStreamId());
- lock.lock();
- try {
- waitingAckRequests = callbackSequencer.after(request);
- waitingAckRequests.forEach(r -> r.record.retain());
- for (WalWriteRequest waitingAckRequest : waitingAckRequests) {
- boolean full = deltaWALCache.put(waitingAckRequest.record);
- waitingAckRequest.confirmed = true;
- if (full) {
- // cache block is full, trigger WAL upload.
- uploadDeltaWAL();
- }
- }
- } finally {
- lock.unlock();
- }
- for (WalWriteRequest waitingAckRequest : waitingAckRequests) {
- waitingAckRequest.cf.complete(null);
- }
- StorageOperationStats.getInstance().appendCallbackStats.record(timer.elapsedAs(TimeUnit.NANOSECONDS));
- }
-
- private Lock getStreamCallbackLock(long streamId) {
- return streamCallbackLocks[(int) ((streamId & Long.MAX_VALUE) % NUM_STREAM_CALLBACK_LOCKS)];
- }
-
- @SuppressWarnings("UnusedReturnValue")
- CompletableFuture uploadDeltaWAL() {
- return uploadDeltaWAL(LogCache.MATCH_ALL_STREAMS, false);
- }
-
- CompletableFuture uploadDeltaWAL(long streamId, boolean force) {
- synchronized (deltaWALCache) {
- deltaWALCache.setConfirmOffset(confirmOffsetCalculator.get());
- Optional blockOpt = deltaWALCache.archiveCurrentBlockIfContains(streamId);
- if (blockOpt.isPresent()) {
- LogCache.LogCacheBlock logCacheBlock = blockOpt.get();
- DeltaWALUploadTaskContext context = new DeltaWALUploadTaskContext(logCacheBlock);
- context.objectManager = this.objectManager;
- context.force = force;
- return uploadDeltaWAL(context);
- } else {
- return CompletableFuture.completedFuture(null);
- }
- }
- }
-
- // only for test
- CompletableFuture uploadDeltaWAL(LogCache.LogCacheBlock logCacheBlock) {
- DeltaWALUploadTaskContext context = new DeltaWALUploadTaskContext(logCacheBlock);
- context.objectManager = this.objectManager;
- return uploadDeltaWAL(context);
- }
-
- /**
- * Upload cache block to S3. The earlier cache block will have smaller objectId and commit first.
- */
- CompletableFuture uploadDeltaWAL(DeltaWALUploadTaskContext context) {
- context.timer = new TimerUtil();
- CompletableFuture cf = new CompletableFuture<>();
- context.cf = cf;
- inflightWALUploadTasks.add(context);
- backgroundExecutor.execute(() -> FutureUtil.exec(() -> uploadDeltaWAL0(context), cf, LOGGER, "uploadDeltaWAL"));
- cf.whenComplete((nil, ex) -> {
- StorageOperationStats.getInstance().uploadWALCompleteStats.record(context.timer.elapsedAs(TimeUnit.NANOSECONDS));
- inflightWALUploadTasks.remove(context);
- if (ex != null) {
- LOGGER.error("upload delta WAL fail", ex);
- }
- });
- return cf;
- }
-
- private void uploadDeltaWAL0(DeltaWALUploadTaskContext context) {
- // calculate upload rate
- long elapsed = System.currentTimeMillis() - context.cache.createdTimestamp();
- double rate;
- if (context.force || elapsed <= 100L) {
- rate = Long.MAX_VALUE;
- } else {
- rate = context.cache.size() * 1000.0 / Math.min(5000L, elapsed);
- if (rate > maxDataWriteRate) {
- maxDataWriteRate = rate;
- }
- rate = maxDataWriteRate;
- }
- context.task = DeltaWALUploadTask.builder()
- .config(config)
- .streamRecordsMap(context.cache.records())
- .objectManager(objectManager)
- .s3Operator(s3Operator)
- .executor(uploadWALExecutor)
- .rate(rate)
- .build();
- boolean walObjectPrepareQueueEmpty = walPrepareQueue.isEmpty();
- walPrepareQueue.add(context);
- if (!walObjectPrepareQueueEmpty) {
- // there is another WAL upload task is preparing, just return.
- return;
- }
- prepareDeltaWALUpload(context);
- }
-
- private void prepareDeltaWALUpload(DeltaWALUploadTaskContext context) {
- context.task.prepare().thenAcceptAsync(nil -> {
- StorageOperationStats.getInstance().uploadWALPrepareStats.record(context.timer.elapsedAs(TimeUnit.NANOSECONDS));
- // 1. poll out current task and trigger upload.
- DeltaWALUploadTaskContext peek = walPrepareQueue.poll();
- Objects.requireNonNull(peek).task.upload().thenAccept(nil2 -> StorageOperationStats.getInstance()
- .uploadWALUploadStats.record(context.timer.elapsedAs(TimeUnit.NANOSECONDS)));
- // 2. add task to commit queue.
- boolean walObjectCommitQueueEmpty = walCommitQueue.isEmpty();
- walCommitQueue.add(peek);
- if (walObjectCommitQueueEmpty) {
- commitDeltaWALUpload(peek);
- }
- // 3. trigger next task to prepare.
- DeltaWALUploadTaskContext next = walPrepareQueue.peek();
- if (next != null) {
- prepareDeltaWALUpload(next);
- }
- }, backgroundExecutor);
- }
-
- private void commitDeltaWALUpload(DeltaWALUploadTaskContext context) {
- context.task.commit().thenAcceptAsync(nil -> {
- StorageOperationStats.getInstance().uploadWALCommitStats.record(context.timer.elapsedAs(TimeUnit.NANOSECONDS));
- // 1. poll out current task
- walCommitQueue.poll();
- if (context.cache.confirmOffset() != 0) {
- LOGGER.info("try trim WAL to {}", context.cache.confirmOffset());
- deltaWAL.trim(context.cache.confirmOffset());
- }
- // transfer records ownership to block cache.
- freeCache(context.cache);
- context.cf.complete(null);
-
- // 2. trigger next task to commit.
- DeltaWALUploadTaskContext next = walCommitQueue.peek();
- if (next != null) {
- commitDeltaWALUpload(next);
- }
- }, backgroundExecutor).exceptionally(ex -> {
- LOGGER.error("Unexpected exception when commit stream set object", ex);
- context.cf.completeExceptionally(ex);
- System.err.println("Unexpected exception when commit stream set object");
- //noinspection CallToPrintStackTrace
- ex.printStackTrace();
- Runtime.getRuntime().halt(1);
- return null;
- });
- }
-
- private void freeCache(LogCache.LogCacheBlock cacheBlock) {
- deltaWALCache.markFree(cacheBlock);
- }
-
- /**
- * WALConfirmOffsetCalculator is used to calculate the confirmed offset of WAL.
- */
- static class WALConfirmOffsetCalculator {
- public static final long NOOP_OFFSET = -1L;
- private final ReadWriteLock rwLock = new ReentrantReadWriteLock();
- private final Queue queue = new ConcurrentLinkedQueue<>();
- private final AtomicLong confirmOffset = new AtomicLong(NOOP_OFFSET);
-
- public WALConfirmOffsetCalculator() {
- // Update the confirmed offset periodically.
- Threads.newSingleThreadScheduledExecutor(ThreadUtils.createThreadFactory("wal-calculator-update-confirm-offset", true), LOGGER)
- .scheduleAtFixedRate(this::update, 100, 100, TimeUnit.MILLISECONDS);
- }
-
- /**
- * Lock of {@link #add}.
- * Operations of assigning offsets, for example {@link WriteAheadLog#append}, need to be performed while holding the lock.
- */
- public Lock addLock() {
- return rwLock.readLock();
- }
-
- public void add(WalWriteRequest request) {
- assert null != request;
- queue.add(new WalWriteRequestWrapper(request));
- }
-
- /**
- * Return the offset before and including which all records have been persisted.
- * Note: It is updated by {@link #update} periodically, and is not real-time.
- */
- public Long get() {
- return confirmOffset.get();
- }
-
- /**
- * Calculate and update the confirmed offset.
- */
- public void update() {
- long offset = calculate();
- if (offset != NOOP_OFFSET) {
- confirmOffset.set(offset);
- }
- }
-
- /**
- * Calculate the offset before and including which all records have been persisted.
- * All records whose offset is not larger than the returned offset will be removed from the queue.
- * It returns {@link #NOOP_OFFSET} if the first record is not persisted yet.
- */
- synchronized private long calculate() {
- Lock lock = rwLock.writeLock();
- lock.lock();
- try {
- // Insert a flag.
- queue.add(WalWriteRequestWrapper.flag());
- } finally {
- lock.unlock();
- }
-
- long minUnconfirmedOffset = Long.MAX_VALUE;
- boolean reachFlag = false;
- for (WalWriteRequestWrapper wrapper : queue) {
- // Iterate the queue to find the min unconfirmed offset.
- if (wrapper.isFlag()) {
- // Reach the flag.
- reachFlag = true;
- break;
- }
- WalWriteRequest request = wrapper.request;
- assert request.offset != NOOP_OFFSET;
- if (!request.confirmed) {
- minUnconfirmedOffset = Math.min(minUnconfirmedOffset, request.offset);
- }
- }
- assert reachFlag;
-
- long confirmedOffset = NOOP_OFFSET;
- // Iterate the queue to find the max offset less than minUnconfirmedOffset.
- // Remove all records whose offset is less than minUnconfirmedOffset.
- for (Iterator iterator = queue.iterator(); iterator.hasNext(); ) {
- WalWriteRequestWrapper wrapper = iterator.next();
- if (wrapper.isFlag()) {
- /// Reach and remove the flag.
- iterator.remove();
- break;
- }
- WalWriteRequest request = wrapper.request;
- if (request.confirmed && request.offset < minUnconfirmedOffset) {
- confirmedOffset = Math.max(confirmedOffset, request.offset);
- iterator.remove();
- }
- }
- return confirmedOffset;
- }
-
- /**
- * Wrapper of {@link WalWriteRequest}.
- * When the {@code request} is null, it is used as a flag.
- */
- static final class WalWriteRequestWrapper {
- private final WalWriteRequest request;
-
- /**
- *
- */
- WalWriteRequestWrapper(WalWriteRequest request) {
- this.request = request;
- }
-
- static WalWriteRequestWrapper flag() {
- return new WalWriteRequestWrapper(null);
- }
-
- public boolean isFlag() {
- return request == null;
- }
-
- public WalWriteRequest request() {
- return request;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (WalWriteRequestWrapper) obj;
- return Objects.equals(this.request, that.request);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(request);
- }
-
- @Override
- public String toString() {
- return "WalWriteRequestWrapper[" +
- "request=" + request + ']';
- }
-
- }
- }
-
- /**
- * WALCallbackSequencer is used to sequence the unordered returned persistent data.
- */
- static class WALCallbackSequencer {
- private final Map> stream2requests = new ConcurrentHashMap<>();
-
- /**
- * Add request to stream sequence queue.
- * When the {@code request.record.getStreamId()} is different, concurrent calls are allowed.
- * When the {@code request.record.getStreamId()} is the same, concurrent calls are not allowed. And it is
- * necessary to ensure that calls are made in the order of increasing offsets.
- */
- public void before(WalWriteRequest request) {
- try {
- Queue streamRequests = stream2requests.computeIfAbsent(request.record.getStreamId(),
- s -> new ConcurrentLinkedQueue<>());
- streamRequests.add(request);
- } catch (Throwable ex) {
- request.cf.completeExceptionally(ex);
- }
- }
-
- /**
- * Try pop sequence persisted request from stream queue and move forward wal inclusive confirm offset.
- * When the {@code request.record.getStreamId()} is different, concurrent calls are allowed.
- * When the {@code request.record.getStreamId()} is the same, concurrent calls are not allowed.
- *
- * @return popped sequence persisted request.
- */
- public List after(WalWriteRequest request) {
- request.persisted = true;
-
- // Try to pop sequential persisted requests from the queue.
- long streamId = request.record.getStreamId();
- Queue streamRequests = stream2requests.get(streamId);
- WalWriteRequest peek = streamRequests.peek();
- if (peek == null || peek.offset != request.offset) {
- return Collections.emptyList();
- }
-
- LinkedList rst = new LinkedList<>();
- WalWriteRequest poll = streamRequests.poll();
- assert poll == peek;
- rst.add(poll);
-
- for (; ; ) {
- peek = streamRequests.peek();
- if (peek == null || !peek.persisted) {
- break;
- }
- poll = streamRequests.poll();
- assert poll == peek;
- assert poll.record.getBaseOffset() == rst.getLast().record.getLastOffset();
- rst.add(poll);
- }
-
- return rst;
- }
-
- /**
- * Try free stream related resources.
- */
- public void tryFree(long streamId) {
- Queue> queue = stream2requests.get(streamId);
- if (queue != null && queue.isEmpty()) {
- stream2requests.remove(streamId, queue);
- }
- }
- }
-
- public static class DeltaWALUploadTaskContext {
- TimerUtil timer;
- LogCache.LogCacheBlock cache;
- DeltaWALUploadTask task;
- CompletableFuture cf;
- ObjectManager objectManager;
- /**
- * Indicate whether to force upload the delta wal.
- * If true, the delta wal will be uploaded without rate limit.
- */
- boolean force;
-
- public DeltaWALUploadTaskContext(LogCache.LogCacheBlock cache) {
- this.cache = cache;
- }
- }
-
- class LogCacheEvictOOMHandler implements ByteBufAlloc.OOMHandler {
- @Override
- public int handle(int memoryRequired) {
- try {
- CompletableFuture cf = new CompletableFuture<>();
- FutureUtil.exec(() -> cf.complete(deltaWALCache.forceFree(memoryRequired)), cf, LOGGER, "handleOOM");
- return cf.get();
- } catch (Throwable e) {
- return 0;
- }
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/S3Stream.java b/s3stream/src/main/java/com/automq/stream/s3/S3Stream.java
deleted file mode 100644
index a2890f60a..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/S3Stream.java
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.DefaultAppendResult;
-import com.automq.stream.RecordBatchWithContextWrapper;
-import com.automq.stream.api.AppendResult;
-import com.automq.stream.api.FetchResult;
-import com.automq.stream.api.RecordBatch;
-import com.automq.stream.api.RecordBatchWithContext;
-import com.automq.stream.api.Stream;
-import com.automq.stream.api.exceptions.ErrorCode;
-import com.automq.stream.api.exceptions.FastReadFailFastException;
-import com.automq.stream.api.exceptions.StreamClientException;
-import com.automq.stream.s3.cache.CacheAccessType;
-import com.automq.stream.s3.context.AppendContext;
-import com.automq.stream.s3.context.FetchContext;
-import com.automq.stream.s3.metrics.TimerUtil;
-import com.automq.stream.s3.metrics.stats.StreamOperationStats;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.network.AsyncNetworkBandwidthLimiter;
-import com.automq.stream.s3.streams.StreamManager;
-import com.automq.stream.utils.FutureUtil;
-import com.automq.stream.utils.GlobalSwitch;
-import io.netty.buffer.Unpooled;
-import io.opentelemetry.instrumentation.annotations.SpanAttribute;
-import io.opentelemetry.instrumentation.annotations.WithSpan;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.atomic.LongAdder;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.utils.FutureUtil.exec;
-import static com.automq.stream.utils.FutureUtil.propagate;
-
-public class S3Stream implements Stream {
- private static final Logger LOGGER = LoggerFactory.getLogger(S3Stream.class);
- final AtomicLong confirmOffset;
- private final String logIdent;
- private final long streamId;
- private final long epoch;
- private final AtomicLong nextOffset;
- private final Storage storage;
- private final StreamManager streamManager;
- private final Status status;
- private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
- private final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();
- private final ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
- private final ReentrantLock appendLock = new ReentrantLock();
- private final Set> pendingAppends = ConcurrentHashMap.newKeySet();
- private final Set> pendingFetches = ConcurrentHashMap.newKeySet();
- private final AsyncNetworkBandwidthLimiter networkInboundLimiter;
- private final AsyncNetworkBandwidthLimiter networkOutboundLimiter;
- private long startOffset;
- private CompletableFuture lastPendingTrim = CompletableFuture.completedFuture(null);
-
- public S3Stream(long streamId, long epoch, long startOffset, long nextOffset, Storage storage,
- StreamManager streamManager) {
- this(streamId, epoch, startOffset, nextOffset, storage, streamManager, null, null);
- }
-
- public S3Stream(long streamId, long epoch, long startOffset, long nextOffset, Storage storage,
- StreamManager streamManager, AsyncNetworkBandwidthLimiter networkInboundLimiter, AsyncNetworkBandwidthLimiter networkOutboundLimiter) {
- this.streamId = streamId;
- this.epoch = epoch;
- this.startOffset = startOffset;
- this.logIdent = "[Stream id=" + streamId + " epoch=" + epoch + "]";
- this.nextOffset = new AtomicLong(nextOffset);
- this.confirmOffset = new AtomicLong(nextOffset);
- this.status = new Status();
- this.storage = storage;
- this.streamManager = streamManager;
- this.networkInboundLimiter = networkInboundLimiter;
- this.networkOutboundLimiter = networkOutboundLimiter;
- }
-
- public boolean isClosed() {
- return status.isClosed();
- }
-
- @Override
- public long streamId() {
- return this.streamId;
- }
-
- @Override
- public long streamEpoch() {
- return this.epoch;
- }
-
- @Override
- public long startOffset() {
- return this.startOffset;
- }
-
- @Override
- public long confirmOffset() {
- return this.confirmOffset.get();
- }
-
- @Override
- public long nextOffset() {
- return nextOffset.get();
- }
-
- @Override
- @WithSpan
- public CompletableFuture append(AppendContext context, RecordBatch recordBatch) {
- TimerUtil timerUtil = new TimerUtil();
- readLock.lock();
- try {
- CompletableFuture cf = exec(() -> {
- if (networkInboundLimiter != null) {
- networkInboundLimiter.forceConsume(recordBatch.rawPayload().remaining());
- }
- appendLock.lock();
- try {
- return append0(context, recordBatch);
- } finally {
- appendLock.unlock();
- }
- }, LOGGER, "append");
- pendingAppends.add(cf);
- cf.whenComplete((nil, ex) -> {
- StreamOperationStats.getInstance().appendStreamStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- pendingAppends.remove(cf);
- });
- return cf;
- } finally {
- readLock.unlock();
- }
- }
-
- @WithSpan
- private CompletableFuture append0(AppendContext context, RecordBatch recordBatch) {
- if (!status.isWritable()) {
- return FutureUtil.failedFuture(new StreamClientException(ErrorCode.STREAM_ALREADY_CLOSED, logIdent + " stream is not writable"));
- }
- long offset = nextOffset.getAndAdd(recordBatch.count());
- StreamRecordBatch streamRecordBatch = new StreamRecordBatch(streamId, epoch, offset, recordBatch.count(), Unpooled.wrappedBuffer(recordBatch.rawPayload()));
- CompletableFuture cf = storage.append(context, streamRecordBatch).thenApply(nil -> {
- updateConfirmOffset(offset + recordBatch.count());
- return new DefaultAppendResult(offset);
- });
- return cf.whenComplete((rst, ex) -> {
- if (ex == null) {
- return;
- }
- // Wal should keep retry append until stream is fenced or wal is closed.
- status.markFenced();
- if (ex instanceof StreamClientException && ((StreamClientException) ex).getCode() == ErrorCode.EXPIRED_STREAM_EPOCH) {
- LOGGER.info("{} stream append, stream is fenced", logIdent);
- } else {
- LOGGER.warn("{} stream append fail", logIdent, ex);
- }
- });
- }
-
- @Override
- @WithSpan
- public CompletableFuture fetch(FetchContext context,
- @SpanAttribute long startOffset,
- @SpanAttribute long endOffset,
- @SpanAttribute int maxBytes) {
- TimerUtil timerUtil = new TimerUtil();
- readLock.lock();
- try {
- CompletableFuture cf = exec(() -> fetch0(context, startOffset, endOffset, maxBytes), LOGGER, "fetch");
- pendingFetches.add(cf);
- cf.whenComplete((rs, ex) -> {
- StreamOperationStats.getInstance().fetchStreamStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- if (ex != null) {
- Throwable cause = FutureUtil.cause(ex);
- if (!(cause instanceof FastReadFailFastException)) {
- LOGGER.error("{} stream fetch [{}, {}) {} fail", logIdent, startOffset, endOffset, maxBytes, ex);
- }
- } else if (networkOutboundLimiter != null) {
- long totalSize = 0L;
- for (RecordBatch recordBatch : rs.recordBatchList()) {
- totalSize += recordBatch.rawPayload().remaining();
- }
- networkOutboundLimiter.forceConsume(totalSize);
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] fetch data, stream={}, {}-{}, total bytes: {}, cost={}ms", streamId,
- startOffset, endOffset, totalSize, timerUtil.elapsedAs(TimeUnit.MILLISECONDS));
- }
- }
- pendingFetches.remove(cf);
- });
- return cf;
- } finally {
- readLock.unlock();
- }
- }
-
- @WithSpan
- private CompletableFuture fetch0(FetchContext context, long startOffset, long endOffset,
- int maxBytes) {
- if (!status.isReadable()) {
- return FutureUtil.failedFuture(new StreamClientException(ErrorCode.STREAM_ALREADY_CLOSED, logIdent + " stream is already closed"));
- }
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("{} stream try fetch, startOffset: {}, endOffset: {}, maxBytes: {}", logIdent, startOffset, endOffset, maxBytes);
- }
- long confirmOffset = this.confirmOffset.get();
- if (startOffset < startOffset() || endOffset > confirmOffset) {
- return FutureUtil.failedFuture(
- new StreamClientException(
- ErrorCode.OFFSET_OUT_OF_RANGE_BOUNDS,
- String.format("fetch range[%s, %s) is out of stream bound [%s, %s)", startOffset, endOffset, startOffset(), confirmOffset)
- ));
- }
- if (startOffset > endOffset) {
- return FutureUtil.failedFuture(new IllegalArgumentException(String.format("fetch startOffset %s is greater than endOffset %s", startOffset, endOffset)));
- }
- if (startOffset == endOffset) {
- return CompletableFuture.completedFuture(new DefaultFetchResult(Collections.emptyList(), CacheAccessType.DELTA_WAL_CACHE_HIT, false));
- }
- return storage.read(context, streamId, startOffset, endOffset, maxBytes).thenApply(dataBlock -> {
- List records = dataBlock.getRecords();
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("{} stream fetch, startOffset: {}, endOffset: {}, maxBytes: {}, records: {}", logIdent, startOffset, endOffset, maxBytes, records.size());
- }
- return new DefaultFetchResult(records, dataBlock.getCacheAccessType(), context.readOptions().pooledBuf());
- });
- }
-
- @Override
- public CompletableFuture trim(long newStartOffset) {
- writeLock.lock();
- try {
- TimerUtil timerUtil = new TimerUtil();
- return exec(() -> {
- CompletableFuture cf = new CompletableFuture<>();
- lastPendingTrim.whenComplete((nil, ex) -> propagate(trim0(newStartOffset), cf));
- this.lastPendingTrim = cf;
- cf.whenComplete((nil, ex) -> StreamOperationStats.getInstance().trimStreamStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS)));
- return cf;
- }, LOGGER, "trim");
- } finally {
- writeLock.unlock();
- }
- }
-
- private CompletableFuture trim0(long newStartOffset) {
- if (newStartOffset < this.startOffset) {
- LOGGER.warn("{} trim newStartOffset[{}] less than current start offset[{}]", logIdent, newStartOffset, startOffset);
- return CompletableFuture.completedFuture(null);
- }
- this.startOffset = newStartOffset;
- CompletableFuture trimCf = new CompletableFuture<>();
- // await all pending fetches complete to avoid trim offset intersect with fetches.
- CompletableFuture awaitPendingFetchesCf = CompletableFuture.allOf(pendingFetches.toArray(new CompletableFuture[0]));
- awaitPendingFetchesCf.whenComplete((nil, ex) -> propagate(streamManager.trimStream(streamId, epoch, newStartOffset), trimCf));
- trimCf.whenComplete((nil, ex) -> {
- if (ex != null) {
- LOGGER.error("{} trim fail", logIdent, ex);
- } else {
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("{} trim to {}", logIdent, newStartOffset);
- }
- }
- });
- return trimCf;
- }
-
- @Override
- public CompletableFuture close() {
- TimerUtil timerUtil = new TimerUtil();
- writeLock.lock();
- try {
- status.markClosed();
-
- // await all pending append/fetch/trim request
- List> pendingRequests = new ArrayList<>(pendingAppends);
- if (GlobalSwitch.STRICT) {
- pendingRequests.addAll(pendingFetches);
- }
- pendingRequests.add(lastPendingTrim);
- CompletableFuture awaitPendingRequestsCf = CompletableFuture.allOf(pendingRequests.toArray(new CompletableFuture[0]));
- CompletableFuture closeCf = new CompletableFuture<>();
-
- awaitPendingRequestsCf.whenComplete((nil, ex) -> propagate(exec(this::close0, LOGGER, "close"), closeCf));
-
- closeCf.whenComplete((nil, ex) -> {
- if (ex != null) {
- LOGGER.error("{} close fail", logIdent, ex);
- StreamOperationStats.getInstance().closeStreamStats(false).record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- } else {
- LOGGER.info("{} closed", logIdent);
- StreamOperationStats.getInstance().closeStreamStats(true).record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- }
- });
-
- return closeCf;
- } finally {
- writeLock.unlock();
- }
- }
-
- private CompletableFuture close0() {
- return storage.forceUpload(streamId)
- .thenCompose(nil -> streamManager.closeStream(streamId, epoch));
- }
-
- @Override
- public CompletableFuture destroy() {
- writeLock.lock();
- try {
- CompletableFuture destroyCf = close().thenCompose(nil -> exec(this::destroy0, LOGGER, "destroy"));
- destroyCf.whenComplete((nil, ex) -> {
- if (ex != null) {
- LOGGER.error("{} destroy fail", logIdent, ex);
- } else {
- LOGGER.info("{} destroyed", logIdent);
- }
- });
- return destroyCf;
- } finally {
- writeLock.unlock();
- }
- }
-
- private CompletableFuture destroy0() {
- status.markDestroy();
- startOffset = this.confirmOffset.get();
- return streamManager.deleteStream(streamId, epoch);
- }
-
- private void updateConfirmOffset(long newOffset) {
- for (; ; ) {
- long oldConfirmOffset = confirmOffset.get();
- if (oldConfirmOffset >= newOffset) {
- break;
- }
- if (confirmOffset.compareAndSet(oldConfirmOffset, newOffset)) {
- LOGGER.trace("{} stream update confirm offset from {} to {}", logIdent, oldConfirmOffset, newOffset);
- break;
- }
- }
- }
-
- static class DefaultFetchResult implements FetchResult {
- private static final LongAdder INFLIGHT = new LongAdder();
- private final List pooledRecords;
- private final List records;
- private final CacheAccessType cacheAccessType;
- private final boolean pooledBuf;
- private volatile boolean freed = false;
-
- public DefaultFetchResult(List streamRecords, CacheAccessType cacheAccessType,
- boolean pooledBuf) {
- this.pooledRecords = streamRecords;
- this.pooledBuf = pooledBuf;
- this.records = new ArrayList<>(streamRecords.size());
- for (StreamRecordBatch streamRecordBatch : streamRecords) {
- RecordBatch recordBatch = covert(streamRecordBatch, pooledBuf);
- records.add(new RecordBatchWithContextWrapper(recordBatch, streamRecordBatch.getBaseOffset()));
- }
- this.cacheAccessType = cacheAccessType;
- if (!pooledBuf) {
- streamRecords.forEach(StreamRecordBatch::release);
- } else {
- INFLIGHT.increment();
- }
- }
-
- private static RecordBatch covert(StreamRecordBatch streamRecordBatch, boolean pooledBuf) {
- ByteBuffer buf;
- if (pooledBuf) {
- buf = streamRecordBatch.getPayload().nioBuffer();
- } else {
- buf = ByteBuffer.allocate(streamRecordBatch.size());
- streamRecordBatch.getPayload().duplicate().readBytes(buf);
- buf.flip();
- }
- return new RecordBatch() {
- @Override
- public int count() {
- return streamRecordBatch.getCount();
- }
-
- @Override
- public long baseTimestamp() {
- return streamRecordBatch.getEpoch();
- }
-
- @Override
- public Map properties() {
- return Collections.emptyMap();
- }
-
- @Override
- public ByteBuffer rawPayload() {
- return buf;
- }
- };
- }
-
- @Override
- public List recordBatchList() {
- return records;
- }
-
- @Override
- public CacheAccessType getCacheAccessType() {
- return cacheAccessType;
- }
-
- @Override
- public void free() {
- if (!freed && pooledBuf) {
- pooledRecords.forEach(StreamRecordBatch::release);
- INFLIGHT.decrement();
- }
- freed = true;
- }
- }
-
- static class Status {
- private static final int CLOSED_MARK = 1;
- private static final int FENCED_MARK = 1 << 1;
- private static final int DESTROY_MARK = 1 << 2;
- private final AtomicInteger status = new AtomicInteger();
-
- public void markFenced() {
- status.getAndUpdate(operand -> operand | FENCED_MARK);
- }
-
- public void markClosed() {
- status.getAndUpdate(operand -> operand | CLOSED_MARK);
- }
-
- public void markDestroy() {
- status.getAndUpdate(operand -> operand | DESTROY_MARK);
- }
-
- public boolean isClosed() {
- return (status.get() & CLOSED_MARK) != 0;
- }
-
- public boolean isWritable() {
- return status.get() == 0;
- }
-
- public boolean isReadable() {
- return status.get() == 0;
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/S3StreamClient.java b/s3stream/src/main/java/com/automq/stream/s3/S3StreamClient.java
deleted file mode 100644
index 2dee62a96..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/S3StreamClient.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.api.AppendResult;
-import com.automq.stream.api.CreateStreamOptions;
-import com.automq.stream.api.FetchResult;
-import com.automq.stream.api.OpenStreamOptions;
-import com.automq.stream.api.RecordBatch;
-import com.automq.stream.api.Stream;
-import com.automq.stream.api.StreamClient;
-import com.automq.stream.s3.context.AppendContext;
-import com.automq.stream.s3.context.FetchContext;
-import com.automq.stream.s3.metrics.TimerUtil;
-import com.automq.stream.s3.metrics.stats.StreamOperationStats;
-import com.automq.stream.s3.network.AsyncNetworkBandwidthLimiter;
-import com.automq.stream.s3.objects.ObjectManager;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.s3.streams.StreamManager;
-import com.automq.stream.utils.FutureUtil;
-import com.automq.stream.utils.ThreadUtils;
-import com.automq.stream.utils.Threads;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class S3StreamClient implements StreamClient {
- private static final Logger LOGGER = LoggerFactory.getLogger(S3StreamClient.class);
- private static final long STREAM_OBJECT_COMPACTION_INTERVAL_MS = TimeUnit.MINUTES.toMillis(1);
- private final ScheduledExecutorService streamObjectCompactionScheduler = Threads.newSingleThreadScheduledExecutor(
- ThreadUtils.createThreadFactory("stream-object-compaction-scheduler", true), LOGGER, true);
- private final Map openedStreams;
- private final StreamManager streamManager;
- private final Storage storage;
- private final ObjectManager objectManager;
- private final S3Operator s3Operator;
- private final Config config;
- private final AsyncNetworkBandwidthLimiter networkInboundBucket;
- private final AsyncNetworkBandwidthLimiter networkOutboundBucket;
- private ScheduledFuture> scheduledCompactionTaskFuture;
-
- @SuppressWarnings("unused")
- public S3StreamClient(StreamManager streamManager, Storage storage, ObjectManager objectManager,
- S3Operator s3Operator, Config config) {
- this(streamManager, storage, objectManager, s3Operator, config, null, null);
- }
-
- public S3StreamClient(StreamManager streamManager, Storage storage, ObjectManager objectManager,
- S3Operator s3Operator, Config config,
- AsyncNetworkBandwidthLimiter networkInboundBucket, AsyncNetworkBandwidthLimiter networkOutboundBucket) {
- this.streamManager = streamManager;
- this.storage = storage;
- this.openedStreams = new ConcurrentHashMap<>();
- this.objectManager = objectManager;
- this.s3Operator = s3Operator;
- this.config = config;
- this.networkInboundBucket = networkInboundBucket;
- this.networkOutboundBucket = networkOutboundBucket;
- startStreamObjectsCompactions();
- }
-
- @Override
- public CompletableFuture createAndOpenStream(CreateStreamOptions options) {
- TimerUtil timerUtil = new TimerUtil();
- return FutureUtil.exec(() -> streamManager.createStream().thenCompose(streamId -> {
- StreamOperationStats.getInstance().createStreamStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- return openStream0(streamId, options.epoch());
- }), LOGGER, "createAndOpenStream");
- }
-
- @Override
- public CompletableFuture openStream(long streamId, OpenStreamOptions openStreamOptions) {
- return FutureUtil.exec(() -> openStream0(streamId, openStreamOptions.epoch()), LOGGER, "openStream");
- }
-
- @Override
- public Optional getStream(long streamId) {
- return Optional.ofNullable(openedStreams.get(streamId));
- }
-
- /**
- * Start stream objects compactions.
- */
- private void startStreamObjectsCompactions() {
- scheduledCompactionTaskFuture = streamObjectCompactionScheduler.scheduleWithFixedDelay(() -> {
- List operationStreams = new ArrayList<>(openedStreams.values());
- operationStreams.forEach(StreamWrapper::compactStreamObject);
- }, config.streamObjectCompactionIntervalMinutes(), config.streamObjectCompactionIntervalMinutes(), TimeUnit.MINUTES);
- }
-
- private CompletableFuture openStream0(long streamId, long epoch) {
- TimerUtil timerUtil = new TimerUtil();
- return streamManager.openStream(streamId, epoch).
- thenApply(metadata -> {
- StreamWrapper stream = new StreamWrapper(new S3Stream(
- metadata.streamId(), metadata.epoch(),
- metadata.startOffset(), metadata.endOffset(),
- storage, streamManager, networkInboundBucket, networkOutboundBucket));
- openedStreams.put(streamId, stream);
- StreamOperationStats.getInstance().openStreamStats.record(timerUtil.elapsedAs(TimeUnit.NANOSECONDS));
- return stream;
- });
- }
-
- @Override
- public void shutdown() {
- // cancel the submitted task if not started; do not interrupt the task if it is running.
- if (scheduledCompactionTaskFuture != null) {
- scheduledCompactionTaskFuture.cancel(false);
- }
- streamObjectCompactionScheduler.shutdown();
- try {
- if (!streamObjectCompactionScheduler.awaitTermination(10, TimeUnit.SECONDS)) {
- LOGGER.warn("await streamObjectCompactionExecutor timeout 10s");
- streamObjectCompactionScheduler.shutdownNow();
- }
- } catch (InterruptedException e) {
- streamObjectCompactionScheduler.shutdownNow();
- LOGGER.warn("await streamObjectCompactionExecutor close fail", e);
- }
-
- TimerUtil timerUtil = new TimerUtil();
- Map> streamCloseFutures = new ConcurrentHashMap<>();
- openedStreams.forEach((streamId, stream) -> streamCloseFutures.put(streamId, stream.close()));
- for (; ; ) {
- Threads.sleep(1000);
- List closingStreams = streamCloseFutures.entrySet().stream().filter(e -> !e.getValue().isDone()).map(Map.Entry::getKey).collect(Collectors.toList());
- LOGGER.info("waiting streams close, closed {} / all {}, closing[{}]", streamCloseFutures.size() - closingStreams.size(), streamCloseFutures.size(), closingStreams);
- if (closingStreams.isEmpty()) {
- break;
- }
- }
- LOGGER.info("wait streams[{}] closed cost {}ms", streamCloseFutures.keySet(), timerUtil.elapsedAs(TimeUnit.MILLISECONDS));
- }
-
- class StreamWrapper implements Stream {
- private final S3Stream stream;
- private final Semaphore trimCompactionSemaphore = new Semaphore(1);
- private volatile long lastCompactionTimestamp = 0;
-
- public StreamWrapper(S3Stream stream) {
- this.stream = stream;
- }
-
- @Override
- public long streamId() {
- return stream.streamId();
- }
-
- @Override
- public long streamEpoch() {
- return stream.streamEpoch();
- }
-
- @Override
- public long startOffset() {
- return stream.startOffset();
- }
-
- @Override
- public long confirmOffset() {
- return stream.confirmOffset();
- }
-
- @Override
- public long nextOffset() {
- return stream.nextOffset();
- }
-
- @Override
- public CompletableFuture append(AppendContext context, RecordBatch recordBatch) {
- return stream.append(context, recordBatch);
- }
-
- @Override
- public CompletableFuture fetch(FetchContext context, long startOffset, long endOffset,
- int maxBytesHint) {
- return stream.fetch(context, startOffset, endOffset, maxBytesHint);
- }
-
- @Override
- public CompletableFuture trim(long newStartOffset) {
- return stream.trim(newStartOffset).whenComplete((nil, ex) -> {
- if (!trimCompactionSemaphore.tryAcquire()) {
- // ensure only one compaction task which trim triggers
- return;
- }
- streamObjectCompactionScheduler.execute(() -> {
- try {
- // trigger compaction after trim to clean up the expired stream objects.
- this.cleanupStreamObject();
- } finally {
- trimCompactionSemaphore.release();
- }
- });
- });
-
- }
-
- @Override
- public CompletableFuture close() {
- return stream.close().whenComplete((v, e) -> openedStreams.remove(streamId(), this));
- }
-
- @Override
- public CompletableFuture destroy() {
- return stream.destroy().whenComplete((v, e) -> openedStreams.remove(streamId(), this));
- }
-
- public boolean isClosed() {
- return stream.isClosed();
- }
-
- public void cleanupStreamObject() {
- compactStreamObject0(true);
- }
-
- public void compactStreamObject() {
- compactStreamObject0(false);
- }
-
- public void compactStreamObject0(boolean onlyCleanup) {
- if (isClosed()) {
- // the compaction task may be taking a long time,
- // so we need to check if the stream is closed before starting the compaction.
- return;
- }
- if (System.currentTimeMillis() - lastCompactionTimestamp > STREAM_OBJECT_COMPACTION_INTERVAL_MS) {
- // skip compaction if the last compaction is within the interval.
- return;
- }
- StreamObjectCompactor task = StreamObjectCompactor.builder().objectManager(objectManager).stream(this)
- .s3Operator(s3Operator).maxStreamObjectSize(config.streamObjectCompactionMaxSizeBytes()).build();
- if (onlyCleanup) {
- task.cleanup();
- } else {
- task.compact();
- }
- lastCompactionTimestamp = System.currentTimeMillis();
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/Storage.java b/s3stream/src/main/java/com/automq/stream/s3/Storage.java
deleted file mode 100644
index b2b3219e9..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/Storage.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.s3.cache.ReadDataBlock;
-import com.automq.stream.s3.context.AppendContext;
-import com.automq.stream.s3.context.FetchContext;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import java.util.concurrent.CompletableFuture;
-
-/**
- * Write ahead log for server.
- */
-public interface Storage {
-
- void startup();
-
- void shutdown();
-
- /**
- * Append stream record.
- *
- * @param streamRecord {@link StreamRecordBatch}
- */
- CompletableFuture append(AppendContext context, StreamRecordBatch streamRecord);
-
- default CompletableFuture append(StreamRecordBatch streamRecord) {
- return append(AppendContext.DEFAULT, streamRecord);
- }
-
- CompletableFuture read(FetchContext context, long streamId, long startOffset, long endOffset,
- int maxBytes);
-
- default CompletableFuture read(long streamId, long startOffset, long endOffset, int maxBytes) {
- return read(FetchContext.DEFAULT, streamId, startOffset, endOffset, maxBytes);
- }
-
- /**
- * Force stream record in WAL upload to s3
- */
- CompletableFuture forceUpload(long streamId);
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/StreamDataBlock.java b/s3stream/src/main/java/com/automq/stream/s3/StreamDataBlock.java
deleted file mode 100644
index 040d6567f..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/StreamDataBlock.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import io.netty.buffer.ByteBuf;
-import java.util.Comparator;
-import java.util.Objects;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.atomic.AtomicInteger;
-
-public class StreamDataBlock {
- public static final Comparator STREAM_OFFSET_COMPARATOR = Comparator.comparingLong(StreamDataBlock::getStartOffset);
- public static final Comparator BLOCK_POSITION_COMPARATOR = Comparator.comparingLong(StreamDataBlock::getBlockStartPosition);
- private final long objectId;
- private final DataBlockIndex dataBlockIndex;
- private final CompletableFuture dataCf = new CompletableFuture<>();
- private final AtomicInteger refCount = new AtomicInteger(1);
-
- public StreamDataBlock(long objectId, DataBlockIndex dataBlockIndex) {
- this.dataBlockIndex = dataBlockIndex;
- this.objectId = objectId;
- }
-
- public StreamDataBlock(long streamId, long startOffset, long endOffset,
- long objectId, long blockPosition, int blockSize, int recordCount) {
- this.objectId = objectId;
- this.dataBlockIndex = new DataBlockIndex(streamId, startOffset, (int) (endOffset - startOffset), recordCount, blockPosition, blockSize);
- }
-
- public long getStreamId() {
- return dataBlockIndex.streamId();
- }
-
- public long getStartOffset() {
- return dataBlockIndex.startOffset();
- }
-
- public long getEndOffset() {
- return dataBlockIndex.endOffset();
- }
-
- public long getStreamRangeSize() {
- return dataBlockIndex.endOffsetDelta();
- }
-
- public long getObjectId() {
- return objectId;
- }
-
- public long getBlockStartPosition() {
- return dataBlockIndex.startPosition();
- }
-
- public long getBlockEndPosition() {
- return dataBlockIndex.endPosition();
- }
-
- public int getBlockSize() {
- return dataBlockIndex.size();
- }
-
- public DataBlockIndex dataBlockIndex() {
- return dataBlockIndex;
- }
-
- public CompletableFuture getDataCf() {
- return this.dataCf;
- }
-
- public void releaseRef() {
- refCount.decrementAndGet();
- }
-
- public void release() {
- if (refCount.decrementAndGet() == 0) {
- dataCf.thenAccept(buf -> {
- if (buf != null) {
- buf.release();
- }
- });
- }
- }
-
- @Override
- public String toString() {
- return "StreamDataBlock{" +
- "objectId=" + objectId +
- ", dataBlockIndex=" + dataBlockIndex +
- '}';
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o)
- return true;
- if (o == null || getClass() != o.getClass())
- return false;
- StreamDataBlock that = (StreamDataBlock) o;
- return objectId == that.objectId && dataBlockIndex.equals(that.dataBlockIndex);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(objectId, dataBlockIndex);
- }
-
-}
\ No newline at end of file
diff --git a/s3stream/src/main/java/com/automq/stream/s3/StreamObjectCompactor.java b/s3stream/src/main/java/com/automq/stream/s3/StreamObjectCompactor.java
deleted file mode 100644
index d0218a5c9..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/StreamObjectCompactor.java
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.api.Stream;
-import com.automq.stream.s3.metadata.ObjectUtils;
-import com.automq.stream.s3.metadata.S3ObjectMetadata;
-import com.automq.stream.s3.network.ThrottleStrategy;
-import com.automq.stream.s3.objects.CompactStreamObjectRequest;
-import com.automq.stream.s3.objects.ObjectManager;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.s3.operator.Writer;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.CompositeByteBuf;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Optional;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.s3.ByteBufAlloc.STREAM_OBJECT_COMPACTION_READ;
-import static com.automq.stream.s3.ByteBufAlloc.STREAM_OBJECT_COMPACTION_WRITE;
-import static com.automq.stream.s3.metadata.ObjectUtils.NOOP_OBJECT_ID;
-import static com.automq.stream.s3.metadata.ObjectUtils.NOOP_OFFSET;
-
-/**
- * Stream objects compaction task.
- * It intends to:
- * 1. Clean up expired stream objects.
- * 2. Compact some stream objects with the same stream ID into bigger stream objects.
- */
-public class StreamObjectCompactor {
- /**
- * max object count in one group, the group count will limit the compact request size to kraft and multipart object
- * part count (less than {@code Writer.MAX_PART_COUNT}).
- */
- private static final int MAX_OBJECT_GROUP_COUNT = Math.min(5000, Writer.MAX_PART_COUNT / 2);
- private static final Logger LOGGER = LoggerFactory.getLogger(StreamObjectCompactor.class);
- public static final int DEFAULT_DATA_BLOCK_GROUP_SIZE_THRESHOLD = 1024 * 1024; // 1MiB
- private final Logger s3ObjectLogger;
- private final long maxStreamObjectSize;
- private final Stream stream;
- private final ObjectManager objectManager;
- private final S3Operator s3Operator;
- private final int dataBlockGroupSizeThreshold;
- private CompactStreamObjectRequest request;
-
- private StreamObjectCompactor(ObjectManager objectManager, S3Operator s3Operator, Stream stream,
- long maxStreamObjectSize, int dataBlockGroupSizeThreshold) {
- this.objectManager = objectManager;
- this.s3Operator = s3Operator;
- this.stream = stream;
- this.maxStreamObjectSize = Math.min(maxStreamObjectSize, Writer.MAX_OBJECT_SIZE);
- String logIdent = "[StreamObjectsCompactionTask streamId=" + stream.streamId() + "] ";
- this.s3ObjectLogger = S3ObjectLogger.logger(logIdent);
- this.dataBlockGroupSizeThreshold = dataBlockGroupSizeThreshold;
- }
-
- public void compact() {
- try {
- compact0(false);
- } catch (Throwable e) {
- handleCompactException(false, e);
- }
- }
-
- /**
- * Cleanup expired stream objects
- */
- public void cleanup() {
- try {
- compact0(true);
- } catch (Throwable e) {
- handleCompactException(true, e);
- }
- }
-
- private void handleCompactException(boolean onlyCleanup, Throwable e) {
- if (stream instanceof S3StreamClient.StreamWrapper && ((S3StreamClient.StreamWrapper) stream).isClosed()) {
- LOGGER.warn("[STREAM_OBJECT_COMPACT_FAIL],[STREAM_CLOSED],{},onlyCleanup={},req={}", stream.streamId(), onlyCleanup, request, e);
- } else {
- LOGGER.error("[STREAM_OBJECT_COMPACT_FAIL],[UNEXPECTED],{},onlyCleanup={},req={}", stream.streamId(), onlyCleanup, request, e);
- }
- }
-
- void compact0(boolean onlyCleanup) throws ExecutionException, InterruptedException {
- long streamId = stream.streamId();
- long startOffset = stream.startOffset();
-
- List objects = objectManager.getStreamObjects(stream.streamId(), 0L, stream.confirmOffset(), Integer.MAX_VALUE).get();
- List expiredObjects = new ArrayList<>(objects.size());
- List livingObjects = new ArrayList<>(objects.size());
- for (S3ObjectMetadata object : objects) {
- if (object.endOffset() <= startOffset) {
- expiredObjects.add(object);
- } else {
- livingObjects.add(object);
- }
- }
-
- // clean up the expired objects
- if (!expiredObjects.isEmpty()) {
- List compactedObjectIds = expiredObjects.stream().map(S3ObjectMetadata::objectId).collect(Collectors.toList());
- request = new CompactStreamObjectRequest(NOOP_OBJECT_ID, 0,
- streamId, stream.streamEpoch(), NOOP_OFFSET, NOOP_OFFSET, compactedObjectIds);
- objectManager.compactStreamObject(request).get();
- if (s3ObjectLogger.isTraceEnabled()) {
- s3ObjectLogger.trace("{}", request);
- }
- }
-
- if (onlyCleanup) {
- return;
- }
-
- // compact the living objects
- List> objectGroups = group0(livingObjects, maxStreamObjectSize);
- for (List objectGroup : objectGroups) {
- // the object group is single object and there is no data block need to be removed.
- if (objectGroup.size() == 1 && objectGroup.get(0).startOffset() >= startOffset) {
- continue;
- }
- long objectId = objectManager.prepareObject(1, TimeUnit.MINUTES.toMillis(60)).get();
- Optional requestOpt = new StreamObjectGroupCompactor(streamId, stream.streamEpoch(),
- startOffset, objectGroup, objectId, dataBlockGroupSizeThreshold, s3Operator).compact();
- if (requestOpt.isPresent()) {
- request = requestOpt.get();
- objectManager.compactStreamObject(request).get();
- if (s3ObjectLogger.isTraceEnabled()) {
- s3ObjectLogger.trace("{}", request);
- }
- }
- }
- }
-
- static class StreamObjectGroupCompactor {
- private final List objectGroup;
- private final long streamId;
- private final long streamEpoch;
- private final long startOffset;
- // compact object group to the new object
- private final long objectId;
- private final S3Operator s3Operator;
- private final int dataBlockGroupSizeThreshold;
-
- public StreamObjectGroupCompactor(long streamId, long streamEpoch, long startOffset,
- List objectGroup,
- long objectId, int dataBlockGroupSizeThreshold, S3Operator s3Operator) {
- this.streamId = streamId;
- this.streamEpoch = streamEpoch;
- this.startOffset = startOffset;
- this.objectGroup = objectGroup;
- this.objectId = objectId;
- this.dataBlockGroupSizeThreshold = dataBlockGroupSizeThreshold;
- this.s3Operator = s3Operator;
- }
-
- public Optional compact() throws ExecutionException, InterruptedException {
- long nextBlockPosition = 0;
- long objectSize = 0;
- long compactedStartOffset = objectGroup.get(0).startOffset();
- long compactedEndOffset = objectGroup.get(objectGroup.size() - 1).endOffset();
- List compactedObjectIds = new LinkedList<>();
- CompositeByteBuf indexes = ByteBufAlloc.compositeByteBuffer();
- Writer writer = s3Operator.writer(new Writer.Context(STREAM_OBJECT_COMPACTION_READ), ObjectUtils.genKey(0, objectId), ThrottleStrategy.THROTTLE_2);
- long groupStartOffset = -1L;
- long groupStartPosition = -1L;
- int groupSize = 0;
- int groupRecordCount = 0;
- DataBlockIndex lastIndex = null;
- for (S3ObjectMetadata object : objectGroup) {
- try (ObjectReader reader = new ObjectReader(object, s3Operator)) {
- ObjectReader.BasicObjectInfo basicObjectInfo = reader.basicObjectInfo().get();
- ByteBuf subIndexes = ByteBufAlloc.byteBuffer(basicObjectInfo.indexBlock().count() * DataBlockIndex.BLOCK_INDEX_SIZE, STREAM_OBJECT_COMPACTION_WRITE);
- Iterator it = basicObjectInfo.indexBlock().iterator();
- long validDataBlockStartPosition = 0;
- while (it.hasNext()) {
- DataBlockIndex dataBlock = it.next();
- if (dataBlock.endOffset() <= startOffset) {
- validDataBlockStartPosition = dataBlock.endPosition();
- compactedStartOffset = dataBlock.endOffset();
- continue;
- }
- if (groupSize == 0 // the first data block
- || (long) groupSize + dataBlock.size() > dataBlockGroupSizeThreshold
- || (long) groupRecordCount + dataBlock.recordCount() > Integer.MAX_VALUE
- || dataBlock.endOffset() - groupStartOffset > Integer.MAX_VALUE) {
- if (groupSize != 0) {
- new DataBlockIndex(streamId, groupStartOffset, (int) (lastIndex.endOffset() - groupStartOffset),
- groupRecordCount, groupStartPosition, groupSize).encode(subIndexes);
- }
- groupStartOffset = dataBlock.startOffset();
- groupStartPosition = nextBlockPosition;
- groupSize = 0;
- groupRecordCount = 0;
- }
- groupSize += dataBlock.size();
- groupRecordCount += dataBlock.recordCount();
- nextBlockPosition += dataBlock.size();
- lastIndex = dataBlock;
- }
- writer.copyWrite(ObjectUtils.genKey(0, object.objectId()), validDataBlockStartPosition, basicObjectInfo.dataBlockSize());
- objectSize += basicObjectInfo.dataBlockSize() - validDataBlockStartPosition;
- indexes.addComponent(true, subIndexes);
- compactedObjectIds.add(object.objectId());
- }
- }
- if (lastIndex != null) {
- ByteBuf subIndexes = ByteBufAlloc.byteBuffer(DataBlockIndex.BLOCK_INDEX_SIZE, STREAM_OBJECT_COMPACTION_WRITE);
- new DataBlockIndex(streamId, groupStartOffset, (int) (lastIndex.endOffset() - groupStartOffset),
- groupRecordCount, groupStartPosition, groupSize).encode(subIndexes);
- indexes.addComponent(true, subIndexes);
- }
-
- CompositeByteBuf indexBlockAndFooter = ByteBufAlloc.compositeByteBuffer();
- indexBlockAndFooter.addComponent(true, indexes);
- indexBlockAndFooter.addComponent(true, new ObjectWriter.Footer(nextBlockPosition, indexBlockAndFooter.readableBytes()).buffer());
-
- objectSize += indexBlockAndFooter.readableBytes();
- writer.write(indexBlockAndFooter.duplicate());
- writer.close().get();
- return Optional.of(new CompactStreamObjectRequest(objectId, objectSize, streamId, streamEpoch,
- compactedStartOffset, compactedEndOffset, compactedObjectIds));
- }
-
- }
-
- static List> group0(List objects, long maxStreamObjectSize) {
- List> objectGroups = new LinkedList<>();
- long groupSize = 0;
- long groupNextOffset = -1L;
- List group = new LinkedList<>();
- int partCount = 0;
- for (S3ObjectMetadata object : objects) {
- int objectPartCount = (int) ((object.objectSize() + Writer.MAX_PART_SIZE - 1) / Writer.MAX_PART_SIZE);
- if (objectPartCount >= Writer.MAX_PART_COUNT) {
- continue;
- }
- if (groupNextOffset == -1L) {
- groupNextOffset = object.startOffset();
- }
- // group the objects when the object's range is continuous
- if (groupNextOffset != object.startOffset()
- // the group object size is less than maxStreamObjectSize
- || (groupSize + object.objectSize() > maxStreamObjectSize && !group.isEmpty())
- // object count in a group is larger than MAX_OBJECT_GROUP_COUNT
- || group.size() >= MAX_OBJECT_GROUP_COUNT
- || partCount + objectPartCount > Writer.MAX_PART_COUNT
- ) {
- objectGroups.add(group);
- group = new LinkedList<>();
- groupSize = 0;
- }
- group.add(object);
- groupSize += object.objectSize();
- groupNextOffset = object.endOffset();
- partCount += objectPartCount;
- }
- if (!group.isEmpty()) {
- objectGroups.add(group);
- }
- return objectGroups;
- }
-
- // no operation for now.
- public void close() {
- }
-
- public static Builder builder() {
- return new Builder();
- }
-
- public static class Builder {
- private ObjectManager objectManager;
- private S3Operator s3Operator;
- private Stream stream;
- private long maxStreamObjectSize;
- private int dataBlockGroupSizeThreshold = DEFAULT_DATA_BLOCK_GROUP_SIZE_THRESHOLD;
-
- public Builder objectManager(ObjectManager objectManager) {
- this.objectManager = objectManager;
- return this;
- }
-
- public Builder s3Operator(S3Operator s3Operator) {
- this.s3Operator = s3Operator;
- return this;
- }
-
- public Builder stream(Stream stream) {
- this.stream = stream;
- return this;
- }
-
- /**
- * Set compacted stream object max size.
- *
- * @param maxStreamObjectSize compacted stream object max size in bytes.
- * If it is bigger than {@link Writer#MAX_OBJECT_SIZE},
- * it will be set to {@link Writer#MAX_OBJECT_SIZE}.
- * @return builder.
- */
- public Builder maxStreamObjectSize(long maxStreamObjectSize) {
- this.maxStreamObjectSize = maxStreamObjectSize;
- return this;
- }
-
- public Builder dataBlockGroupSizeThreshold(int dataBlockGroupSizeThreshold) {
- this.dataBlockGroupSizeThreshold = dataBlockGroupSizeThreshold;
- return this;
- }
-
- public StreamObjectCompactor build() {
- return new StreamObjectCompactor(objectManager, s3Operator, stream, maxStreamObjectSize, dataBlockGroupSizeThreshold);
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/StreamRecordBatchCodec.java b/s3stream/src/main/java/com/automq/stream/s3/StreamRecordBatchCodec.java
deleted file mode 100644
index a7a4033ad..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/StreamRecordBatchCodec.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.ByteBufSeqAlloc;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import io.netty.buffer.ByteBuf;
-
-import static com.automq.stream.s3.ByteBufAlloc.ENCODE_RECORD;
-
-public class StreamRecordBatchCodec {
- public static final byte MAGIC_V0 = 0x22;
- public static final int HEADER_SIZE =
- 1 // magic
- + 8 // streamId
- + 8 // epoch
- + 8 // baseOffset
- + 4 // lastOffsetDelta
- + 4; // payload length
- private static final ByteBufSeqAlloc ENCODE_ALLOC = new ByteBufSeqAlloc(ENCODE_RECORD, 8);
-
- public static ByteBuf encode(StreamRecordBatch streamRecord) {
- int totalLength = HEADER_SIZE + streamRecord.size(); // payload
- // use sequential allocator to avoid memory fragmentation
- ByteBuf buf = ENCODE_ALLOC.byteBuffer(totalLength);
- buf.writeByte(MAGIC_V0);
- buf.writeLong(streamRecord.getStreamId());
- buf.writeLong(streamRecord.getEpoch());
- buf.writeLong(streamRecord.getBaseOffset());
- buf.writeInt(streamRecord.getCount());
- buf.writeInt(streamRecord.size());
- buf.writeBytes(streamRecord.getPayload().duplicate());
- return buf;
- }
-
- /**
- * Decode a stream record batch from a byte buffer and move the reader index.
- * The returned stream record batch does NOT share the payload buffer with the input buffer.
- */
- public static StreamRecordBatch duplicateDecode(ByteBuf buf) {
- byte magic = buf.readByte(); // magic
- if (magic != MAGIC_V0) {
- throw new RuntimeException("Invalid magic byte " + magic);
- }
- long streamId = buf.readLong();
- long epoch = buf.readLong();
- long baseOffset = buf.readLong();
- int lastOffsetDelta = buf.readInt();
- int payloadLength = buf.readInt();
- ByteBuf payload = ByteBufAlloc.byteBuffer(payloadLength, ByteBufAlloc.DECODE_RECORD);
- buf.readBytes(payload);
- return new StreamRecordBatch(streamId, epoch, baseOffset, lastOffsetDelta, payload);
- }
-
- /**
- * Decode a stream record batch from a byte buffer and move the reader index.
- * The returned stream record batch shares the payload buffer with the input buffer.
- */
- public static StreamRecordBatch decode(ByteBuf buf) {
- buf.readByte(); // magic
- long streamId = buf.readLong();
- long epoch = buf.readLong();
- long baseOffset = buf.readLong();
- int lastOffsetDelta = buf.readInt();
- int payloadLength = buf.readInt();
- ByteBuf payload = buf.slice(buf.readerIndex(), payloadLength);
- buf.skipBytes(payloadLength);
- return new StreamRecordBatch(streamId, epoch, baseOffset, lastOffsetDelta, payload);
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/WalWriteRequest.java b/s3stream/src/main/java/com/automq/stream/s3/WalWriteRequest.java
deleted file mode 100644
index c843e095f..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/WalWriteRequest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3;
-
-import com.automq.stream.s3.cache.LogCache;
-import com.automq.stream.s3.context.AppendContext;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.wal.WriteAheadLog;
-import java.util.concurrent.CompletableFuture;
-
-public class WalWriteRequest implements Comparable {
- final StreamRecordBatch record;
- final AppendContext context;
- final CompletableFuture cf;
- long offset;
- /**
- * Whether the record has been persisted to the {@link WriteAheadLog}
- * When a continuous series of records IN A STREAM have been persisted to the WAL, they can be uploaded to S3.
- *
- * @see S3Storage.WALCallbackSequencer
- */
- boolean persisted;
-
- /**
- * Whether the record has been put to the {@link LogCache}
- * When a continuous series of records have been persisted to the WAL and uploaded to S3, they can be trimmed.
- *
- * @see S3Storage.WALConfirmOffsetCalculator
- */
- boolean confirmed;
-
- public WalWriteRequest(StreamRecordBatch record, long offset, CompletableFuture cf) {
- this(record, offset, cf, AppendContext.DEFAULT);
- }
-
- public WalWriteRequest(StreamRecordBatch record, long offset, CompletableFuture cf, AppendContext context) {
- this.record = record;
- this.offset = offset;
- this.cf = cf;
- this.context = context;
- }
-
- @Override
- public int compareTo(WalWriteRequest o) {
- return record.compareTo(o.record);
- }
-
- @Override
- public String toString() {
- return "WalWriteRequest{" +
- "record=" + record +
- ", offset=" + offset +
- ", persisted=" + persisted +
- ", confirmed=" + confirmed +
- '}';
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/cache/BlockCache.java b/s3stream/src/main/java/com/automq/stream/s3/cache/BlockCache.java
deleted file mode 100644
index 497d3b6da..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/cache/BlockCache.java
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3.cache;
-
-import com.automq.stream.s3.ByteBufAlloc;
-import com.automq.stream.s3.cache.DefaultS3BlockCache.ReadAheadRecord;
-import com.automq.stream.s3.metrics.S3StreamMetricsManager;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.trace.context.TraceContext;
-import com.automq.stream.utils.biniarysearch.StreamRecordBatchList;
-import io.opentelemetry.instrumentation.annotations.SpanAttribute;
-import io.opentelemetry.instrumentation.annotations.WithSpan;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.Objects;
-import java.util.SortedMap;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.s3.model.StreamRecordBatch.OBJECT_OVERHEAD;
-
-public class BlockCache implements ByteBufAlloc.OOMHandler {
- public static final Integer ASYNC_READ_AHEAD_NOOP_OFFSET = -1;
- static final int BLOCK_SIZE = 1024 * 1024;
- private static final Logger LOGGER = LoggerFactory.getLogger(BlockCache.class);
- final Map stream2cache = new HashMap<>();
- private final long maxSize;
- private final LRUCache inactive = new LRUCache<>();
- private final LRUCache active = new LRUCache<>();
- private final AtomicLong size = new AtomicLong();
- private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
- private final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();
- private final ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
- private final List cacheEvictListeners = new ArrayList<>();
-
- public BlockCache(long maxSize) {
- this.maxSize = maxSize;
- S3StreamMetricsManager.registerBlockCacheSizeSupplier(size::get);
- }
-
- public void registerListener(CacheEvictListener listener) {
- cacheEvictListeners.add(listener);
- }
-
- public void put(long streamId, List records) {
- put(streamId, ASYNC_READ_AHEAD_NOOP_OFFSET, ASYNC_READ_AHEAD_NOOP_OFFSET, records);
- }
-
- public void put(long streamId, long raAsyncOffset, long raEndOffset, List records) {
- writeLock.lock();
- try {
- put0(streamId, raAsyncOffset, raEndOffset, records);
- } finally {
- writeLock.unlock();
- }
- }
-
- void put0(long streamId, long raAsyncOffset, long raEndOffset, List records) {
- if (maxSize == 0 || records.isEmpty()) {
- records.forEach(StreamRecordBatch::release);
- return;
- }
- records = new ArrayList<>(records);
- StreamCache streamCache = stream2cache.computeIfAbsent(streamId, id -> new StreamCache());
- long startOffset = records.get(0).getBaseOffset();
- long endOffset = records.get(records.size() - 1).getLastOffset();
-
- if (raAsyncOffset != ASYNC_READ_AHEAD_NOOP_OFFSET && (raAsyncOffset < startOffset || raAsyncOffset >= endOffset)) {
- LOGGER.warn("raAsyncOffset out of range, stream={}, raAsyncOffset: {}, startOffset: {}, endOffset: {}", streamId, raAsyncOffset, startOffset, endOffset);
- }
-
- int size = records.stream().mapToInt(StreamRecordBatch::size).sum();
- size += records.size() * OBJECT_OVERHEAD;
-
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] put block cache, stream={}, {}-{}, raAsyncOffset: {}, raEndOffset: {}, total bytes: {} ", streamId, startOffset, endOffset, raAsyncOffset, raEndOffset, size);
- }
-
- // remove overlapped part.
- SortedMap tailMap = streamCache.tailBlocks(startOffset);
- for (Map.Entry entry : tailMap.entrySet()) {
- CacheBlock cacheBlock = entry.getValue();
- if (cacheBlock.firstOffset >= endOffset) {
- break;
- }
- if (isWithinRange(raAsyncOffset, cacheBlock.firstOffset, cacheBlock.lastOffset) && cacheBlock.readAheadRecord == null) {
- cacheBlock.readAheadRecord = new ReadAheadRecord(raEndOffset);
- }
- // overlap is a rare case, so removeIf is fine for the performance.
- records.removeIf(record -> {
- boolean remove = record.getLastOffset() > cacheBlock.firstOffset && record.getBaseOffset() < cacheBlock.lastOffset;
- if (remove) {
- record.release();
- }
- return remove;
- });
- }
-
- // ensure the cache size.
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] block cache size: {}/{}, ensure size: {} ", this.size.get(), maxSize, size);
- }
- ensureCapacity(size);
-
- // split to 1MB cache blocks which one block contains sequential records.
- long expectStartOffset = -1L;
- LinkedList batchList = new LinkedList<>();
- int partSize = 0;
- for (StreamRecordBatch record : records) {
- if ((expectStartOffset == -1L || record.getBaseOffset() == expectStartOffset) && partSize < BLOCK_SIZE) {
- batchList.add(record);
- partSize += record.size();
- } else {
- ReadAheadRecord raRecord = isWithinRange(raAsyncOffset, batchList.getFirst().getBaseOffset(), batchList.getLast().getLastOffset()) ?
- new ReadAheadRecord(raEndOffset) : null;
- put(streamId, streamCache, new CacheBlock(batchList, raRecord));
- batchList = new LinkedList<>();
- batchList.add(record);
- partSize = record.size();
- }
- expectStartOffset = record.getLastOffset();
- }
- if (!batchList.isEmpty()) {
- ReadAheadRecord raRecord = isWithinRange(raAsyncOffset, batchList.getFirst().getBaseOffset(), batchList.getLast().getLastOffset()) ?
- new ReadAheadRecord(raEndOffset) : null;
- put(streamId, streamCache, new CacheBlock(batchList, raRecord));
- }
- }
-
- public void setReadAheadRecord(long streamId, long raAsyncOffset, long raEndOffset) {
- writeLock.lock();
- try {
- StreamCache streamCache = stream2cache.get(streamId);
- if (streamCache == null) {
- return;
- }
- NavigableMap streamCacheBlocks = streamCache.tailBlocks(raAsyncOffset);
- for (Map.Entry entry : streamCacheBlocks.entrySet()) {
- CacheBlock cacheBlock = entry.getValue();
- if (isWithinRange(raAsyncOffset, cacheBlock.firstOffset, cacheBlock.lastOffset)) {
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] set read ahead record, stream={}, raAsyncOffset: {}, raEndOffset: {}", streamId, raAsyncOffset, raEndOffset);
- }
- cacheBlock.readAheadRecord = new ReadAheadRecord(raEndOffset);
- break;
- }
- }
- } finally {
- writeLock.unlock();
- }
- }
-
- private boolean isWithinRange(long raAsyncOffset, long startOffset, long endOffset) {
- return raAsyncOffset >= startOffset && raAsyncOffset < endOffset;
- }
-
- public boolean checkRange(long streamId, long startOffset, int maxBytes) {
- if (maxBytes <= 0) {
- return true;
- }
- readLock.lock();
- try {
- return checkRange0(streamId, startOffset, maxBytes);
- } finally {
- readLock.unlock();
- }
- }
-
- boolean checkRange0(long streamId, long startOffset, int maxBytes) {
- StreamCache streamCache = stream2cache.get(streamId);
- if (streamCache == null) {
- return false;
- }
-
- NavigableMap streamCacheBlocks = streamCache.tailBlocks(startOffset);
- long nextStartOffset = startOffset;
- int nextMaxBytes = maxBytes;
- LinkedList records = new LinkedList<>();
- for (Map.Entry entry : streamCacheBlocks.entrySet()) {
- CacheBlock cacheBlock = entry.getValue();
- if (cacheBlock.lastOffset <= nextStartOffset || nextStartOffset < cacheBlock.firstOffset) {
- break;
- }
- nextMaxBytes = readFromCacheBlock(records, cacheBlock, nextStartOffset, Long.MAX_VALUE, nextMaxBytes);
- nextStartOffset = records.getLast().getLastOffset();
- if (nextMaxBytes <= 0) {
- return true;
- }
- }
- return nextMaxBytes <= 0;
- }
-
- public GetCacheResult get(long streamId, long startOffset, long endOffset, int maxBytes) {
- return get(TraceContext.DEFAULT, streamId, startOffset, endOffset, maxBytes);
- }
-
- /**
- * Get records from cache.
- * Note: the records is retained, the caller should release it.
- */
- @WithSpan
- public GetCacheResult get(TraceContext context,
- @SpanAttribute long streamId,
- @SpanAttribute long startOffset,
- @SpanAttribute long endOffset,
- @SpanAttribute int maxBytes) {
- context.currentContext();
- if (startOffset >= endOffset || maxBytes <= 0) {
- return GetCacheResult.empty();
- }
-
- readLock.lock();
- try {
- return get0(streamId, startOffset, endOffset, maxBytes);
- } finally {
- readLock.unlock();
- }
- }
-
- public GetCacheResult get0(long streamId, long startOffset, long endOffset, int maxBytes) {
- StreamCache streamCache = stream2cache.get(streamId);
- if (streamCache == null) {
- return GetCacheResult.empty();
- }
- NavigableMap streamCacheBlocks = streamCache.tailBlocks(startOffset);
- long nextStartOffset = startOffset;
- int nextMaxBytes = maxBytes;
- List readAheadRecords = new ArrayList<>();
- LinkedList records = new LinkedList<>();
- for (Map.Entry entry : streamCacheBlocks.entrySet()) {
- CacheBlock cacheBlock = entry.getValue();
- if (cacheBlock.lastOffset <= nextStartOffset || nextStartOffset < cacheBlock.firstOffset) {
- break;
- }
- if (cacheBlock.readAheadRecord != null) {
- readAheadRecords.add(cacheBlock.readAheadRecord);
- cacheBlock.readAheadRecord = null;
- }
- nextMaxBytes = readFromCacheBlock(records, cacheBlock, nextStartOffset, endOffset, nextMaxBytes);
- nextStartOffset = records.getLast().getLastOffset();
- boolean blockCompletedRead = nextStartOffset >= cacheBlock.lastOffset;
- CacheBlockKey cacheBlockKey = new CacheBlockKey(streamId, cacheBlock.firstOffset);
- if (blockCompletedRead) {
- active.remove(cacheBlockKey);
- inactive.put(cacheBlockKey, cacheBlock.size);
- } else {
- if (!active.touch(cacheBlockKey)) {
- inactive.touch(cacheBlockKey);
- }
- }
-
- if (nextStartOffset >= endOffset || nextMaxBytes <= 0) {
- break;
- }
-
- }
-
- records.forEach(StreamRecordBatch::retain);
- return GetCacheResult.of(records, readAheadRecords);
- }
-
- private int readFromCacheBlock(LinkedList records, CacheBlock cacheBlock,
- long nextStartOffset, long endOffset, int nextMaxBytes) {
- boolean matched = false;
- StreamRecordBatchList streamRecordBatchList = new StreamRecordBatchList(cacheBlock.records);
- int startIndex = streamRecordBatchList.search(nextStartOffset);
- if (startIndex == -1) {
- // mismatched
- return nextMaxBytes;
- }
- for (int i = startIndex; i < cacheBlock.records.size(); i++) {
- StreamRecordBatch record = cacheBlock.records.get(i);
- if (record.getBaseOffset() <= nextStartOffset && record.getLastOffset() > nextStartOffset) {
- records.add(record);
- nextStartOffset = record.getLastOffset();
- nextMaxBytes -= record.size();
- matched = true;
- if (nextStartOffset >= endOffset || nextMaxBytes <= 0) {
- break;
- }
- } else if (matched) {
- break;
- }
- }
- return nextMaxBytes;
- }
-
- private void ensureCapacity(int size) {
- ensureCapacity0(size, false);
- }
-
- private int ensureCapacity0(int size, boolean forceEvict) {
- if (!forceEvict && (maxSize - this.size.get() >= size)) {
- return 0;
- }
- int evictBytes = 0;
- for (LRUCache lru : List.of(inactive, active)) {
- for (; ; ) {
- Map.Entry entry = lru.pop();
- if (entry == null) {
- break;
- }
- StreamCache streamCache = stream2cache.get(entry.getKey().streamId);
- if (streamCache == null) {
- LOGGER.error("[BUG] Stream cache not found for streamId: {}", entry.getKey().streamId);
- continue;
- }
- CacheBlock cacheBlock = streamCache.remove(entry.getKey().startOffset);
- if (cacheBlock == null) {
- LOGGER.error("[BUG] Cannot find stream cache block: {} {}", entry.getKey().streamId, entry.getKey().startOffset);
- } else {
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("[S3BlockCache] evict block, stream={}, {}-{}, total bytes: {} ", entry.getKey().streamId, cacheBlock.firstOffset, cacheBlock.lastOffset, cacheBlock.size);
- }
- cacheBlock.free();
- evictBytes += cacheBlock.size;
- cacheEvictListeners.forEach(listener -> listener.onCacheEvict(entry.getKey().streamId, cacheBlock.firstOffset, cacheBlock.lastOffset, cacheBlock.size));
- if (forceEvict) {
- if (evictBytes >= size) {
- return evictBytes;
- }
- } else if (maxSize - this.size.addAndGet(-cacheBlock.size) >= size) {
- return evictBytes;
- }
- }
- }
- }
- return evictBytes;
- }
-
- private void logCacheStatus() {
- try {
- readLock.lock();
- List sortedStreamIds = new ArrayList<>(stream2cache.keySet());
- sortedStreamIds.sort(Long::compareTo);
- for (Long streamId : sortedStreamIds) {
- StreamCache streamCache = stream2cache.get(streamId);
- if (streamCache == null) {
- continue;
- }
- for (Map.Entry entry : streamCache.blocks().entrySet()) {
- CacheBlockKey key = new CacheBlockKey(streamId, entry.getValue().firstOffset);
- LOGGER.debug("[S3BlockCache] stream cache block, stream={}, {}-{}, inactive={}, active={}, total bytes: {} ",
- streamId, entry.getValue().firstOffset, entry.getValue().lastOffset, inactive.containsKey(key), active.containsKey(key), entry.getValue().size);
- }
- }
- } finally {
- readLock.unlock();
- }
- }
-
- private void put(long streamId, StreamCache streamCache, CacheBlock cacheBlock) {
- streamCache.put(cacheBlock);
- active.put(new CacheBlockKey(streamId, cacheBlock.firstOffset), cacheBlock.size);
- size.getAndAdd(cacheBlock.size);
- }
-
- @Override
- public int handle(int memoryRequired) {
- writeLock.lock();
- try {
- return ensureCapacity0(memoryRequired, true);
- } catch (Throwable e) {
- LOGGER.error("[UNEXPECTED] handle OOM failed", e);
- return 0;
- } finally {
- writeLock.unlock();
- }
- }
-
- public interface CacheEvictListener {
- void onCacheEvict(long streamId, long startOffset, long endOffset, int size);
- }
-
- static final class CacheBlockKey {
- private final long streamId;
- private final long startOffset;
-
- CacheBlockKey(long streamId, long startOffset) {
- this.streamId = streamId;
- this.startOffset = startOffset;
- }
-
- public long streamId() {
- return streamId;
- }
-
- public long startOffset() {
- return startOffset;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (CacheBlockKey) obj;
- return this.streamId == that.streamId &&
- this.startOffset == that.startOffset;
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(streamId, startOffset);
- }
-
- @Override
- public String toString() {
- return "CacheBlockKey[" +
- "streamId=" + streamId + ", " +
- "startOffset=" + startOffset + ']';
- }
-
- }
-
- public static class CacheBlock {
- List records;
- long firstOffset;
- long lastOffset;
- int size;
- ReadAheadRecord readAheadRecord;
-
- public CacheBlock(List records, ReadAheadRecord readAheadRecord) {
- this.records = records;
- this.firstOffset = records.get(0).getBaseOffset();
- this.lastOffset = records.get(records.size() - 1).getLastOffset();
- this.size = records.stream().mapToInt(StreamRecordBatch::size).sum();
- this.size += records.size() * OBJECT_OVERHEAD;
- this.readAheadRecord = readAheadRecord;
- }
-
- public void free() {
- records.forEach(StreamRecordBatch::release);
- records = null;
- }
-
- public long size() {
- return size;
- }
- }
-
- public static class GetCacheResult {
- private final List records;
- private final List readAheadRecords;
-
- private GetCacheResult(List records, List readAheadRecords) {
- this.records = records;
- this.readAheadRecords = readAheadRecords;
- }
-
- public static GetCacheResult empty() {
- return new GetCacheResult(Collections.emptyList(), Collections.emptyList());
- }
-
- public static GetCacheResult of(List records, List readAheadRecords) {
- return new GetCacheResult(records, readAheadRecords);
- }
-
- public List getRecords() {
- return records;
- }
-
- public List getReadAheadRecords() {
- return readAheadRecords;
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/cache/CacheAccessType.java b/s3stream/src/main/java/com/automq/stream/s3/cache/CacheAccessType.java
deleted file mode 100644
index 429b37220..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/cache/CacheAccessType.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3.cache;
-
-public enum CacheAccessType {
- DELTA_WAL_CACHE_HIT,
- BLOCK_CACHE_HIT,
- BLOCK_CACHE_MISS,
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/cache/DataBlockReadAccumulator.java b/s3stream/src/main/java/com/automq/stream/s3/cache/DataBlockReadAccumulator.java
deleted file mode 100644
index 7cdfc954a..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/cache/DataBlockReadAccumulator.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3.cache;
-
-import com.automq.stream.s3.DataBlockIndex;
-import com.automq.stream.s3.ObjectReader;
-import com.automq.stream.s3.StreamDataBlock;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.function.BiConsumer;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Accumulate inflight data block read requests to one real read request.
- */
-public class DataBlockReadAccumulator {
- private static final Logger LOGGER = LoggerFactory.getLogger(DataBlockReadAccumulator.class);
- private final Map, DataBlockRecords> inflightDataBlockReads = new ConcurrentHashMap<>();
-
- public List reserveDataBlock(List> dataBlockPairList) {
- List reserveResults = new ArrayList<>();
- synchronized (inflightDataBlockReads) {
- for (Pair pair : dataBlockPairList) {
- ObjectReader reader = pair.getLeft();
- DataBlockIndex blockIndex = pair.getRight().dataBlockIndex();
- Pair key = Pair.of(reader.objectKey(), blockIndex.startPosition());
- DataBlockRecords records = inflightDataBlockReads.get(key);
- CompletableFuture cf = new CompletableFuture<>();
- BiConsumer listener = (rst, ex) -> {
- if (ex != null) {
- cf.completeExceptionally(ex);
- rst.release();
- } else {
- // consumer of DataBlockRecords should release it on completion
- cf.complete(rst);
- }
- };
- int reservedSize = 0;
- if (records == null) {
- records = new DataBlockRecords();
- records.registerListener(listener);
- inflightDataBlockReads.put(key, records);
- reservedSize = blockIndex.size();
- } else {
- records.registerListener(listener);
- }
- reserveResults.add(new ReserveResult(reservedSize, cf));
- }
- }
- return reserveResults;
- }
-
- public void readDataBlock(ObjectReader reader, DataBlockIndex blockIndex) {
- Pair key = Pair.of(reader.objectKey(), blockIndex.startPosition());
- synchronized (inflightDataBlockReads) {
- DataBlockRecords records = inflightDataBlockReads.get(key);
- if (records != null) {
- reader.read(blockIndex).whenComplete((dataBlock, ex) -> {
- try (dataBlock) {
- synchronized (inflightDataBlockReads) {
- inflightDataBlockReads.remove(key, records);
- }
- records.complete(dataBlock, ex);
- } finally {
- records.release();
- }
- });
- }
- }
- }
-
- public static final class ReserveResult {
- private final int reserveSize;
- private final CompletableFuture cf;
-
- public ReserveResult(int reserveSize, CompletableFuture cf) {
- this.reserveSize = reserveSize;
- this.cf = cf;
- }
-
- public int reserveSize() {
- return reserveSize;
- }
-
- public CompletableFuture cf() {
- return cf;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (ReserveResult) obj;
- return this.reserveSize == that.reserveSize &&
- Objects.equals(this.cf, that.cf);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(reserveSize, cf);
- }
-
- @Override
- public String toString() {
- return "ReserveResult[" +
- "reserveSize=" + reserveSize + ", " +
- "cf=" + cf + ']';
- }
-
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/cache/DataBlockRecords.java b/s3stream/src/main/java/com/automq/stream/s3/cache/DataBlockRecords.java
deleted file mode 100644
index f1b0598e7..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/cache/DataBlockRecords.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3.cache;
-
-import com.automq.stream.s3.ObjectReader;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.utils.CloseableIterator;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.function.BiConsumer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DataBlockRecords {
- private static final Logger LOGGER = LoggerFactory.getLogger(DataBlockRecords.class);
- final AtomicInteger refCount = new AtomicInteger(1);
- private final List> listeners = new LinkedList<>();
- private List records = Collections.emptyList();
-
- public void registerListener(BiConsumer listener) {
- retain();
- listeners.add(listener);
- }
-
- public void complete(ObjectReader.DataBlockGroup dataBlockGroup, Throwable ex) {
- if (ex == null) {
- records = new ArrayList<>(dataBlockGroup.recordCount());
- try (CloseableIterator it = dataBlockGroup.iterator()) {
- while (it.hasNext()) {
- records.add(it.next());
- }
- } catch (Throwable e) {
- LOGGER.error("parse data block fail", e);
- records.forEach(StreamRecordBatch::release);
- ex = e;
- }
- }
- Throwable finalEx = ex;
- listeners.forEach(listener -> {
- try {
- listener.accept(this, finalEx);
- } catch (Throwable e) {
- release();
- LOGGER.error("DataBlockRecords fail to notify listener {}", listener, e);
- }
- });
- }
-
- public List records() {
- return Collections.unmodifiableList(records);
- }
-
- void retain() {
- refCount.incrementAndGet();
- }
-
- void release() {
- if (refCount.decrementAndGet() == 0) {
- records.forEach(StreamRecordBatch::release);
- }
- }
-}
diff --git a/s3stream/src/main/java/com/automq/stream/s3/cache/DefaultS3BlockCache.java b/s3stream/src/main/java/com/automq/stream/s3/cache/DefaultS3BlockCache.java
deleted file mode 100644
index 6ea9d9f05..000000000
--- a/s3stream/src/main/java/com/automq/stream/s3/cache/DefaultS3BlockCache.java
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright 2024, AutoMQ CO.,LTD.
- *
- * Use of this software is governed by the Business Source License
- * included in the file BSL.md
- *
- * As of the Change Date specified in that file, in accordance with
- * the Business Source License, use of this software will be governed
- * by the Apache License, Version 2.0
- */
-
-package com.automq.stream.s3.cache;
-
-import com.automq.stream.s3.Config;
-import com.automq.stream.s3.metrics.TimerUtil;
-import com.automq.stream.s3.metrics.stats.StorageOperationStats;
-import com.automq.stream.s3.model.StreamRecordBatch;
-import com.automq.stream.s3.objects.ObjectManager;
-import com.automq.stream.s3.operator.S3Operator;
-import com.automq.stream.s3.trace.context.TraceContext;
-import com.automq.stream.utils.FutureUtil;
-import com.automq.stream.utils.Threads;
-import io.opentelemetry.api.trace.Span;
-import io.opentelemetry.instrumentation.annotations.SpanAttribute;
-import io.opentelemetry.instrumentation.annotations.WithSpan;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.UUID;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.TimeUnit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.automq.stream.s3.metadata.ObjectUtils.NOOP_OFFSET;
-
-public class DefaultS3BlockCache implements S3BlockCache {
- private static final Logger LOGGER = LoggerFactory.getLogger(DefaultS3BlockCache.class);
- private final Map inflightReadAheadTasks = new ConcurrentHashMap<>();
- private final Map inflightReadStatusMap = new ConcurrentHashMap<>();
- private final BlockCache cache;
- private final ExecutorService mainExecutor;
- private final ReadAheadManager readAheadManager;
- private final StreamReader streamReader;
- private final InflightReadThrottle inflightReadThrottle;
-
- public DefaultS3BlockCache(Config config, ObjectManager objectManager, S3Operator s3Operator) {
- int blockSize = config.objectBlockSize();
-
- this.cache = new BlockCache(config.blockCacheSize());
- this.readAheadManager = new ReadAheadManager(blockSize, this.cache);
- this.mainExecutor = Threads.newFixedThreadPoolWithMonitor(
- 2,
- "s3-block-cache-main",
- false,
- LOGGER);
- this.inflightReadThrottle = new InflightReadThrottle();
- this.streamReader = new StreamReader(s3Operator, objectManager, cache, inflightReadAheadTasks, inflightReadThrottle);
- }
-
- public void shutdown() {
- this.mainExecutor.shutdown();
- this.streamReader.shutdown();
- this.inflightReadThrottle.shutdown();
-
- }
-
- @Override
- @WithSpan
- public CompletableFuture read(TraceContext traceContext,
- @SpanAttribute long streamId,
- @SpanAttribute long startOffset,
- @SpanAttribute long endOffset,
- @SpanAttribute int maxBytes) {
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] read data, stream={}, {}-{}, total bytes: {}", streamId, startOffset, endOffset, maxBytes);
- }
- final TraceContext finalTraceContext = new TraceContext(traceContext);
- this.readAheadManager.updateReadProgress(streamId, startOffset);
- TimerUtil timerUtil = new TimerUtil();
- CompletableFuture readCf = new CompletableFuture<>();
- ReadAheadAgent agent = this.readAheadManager.getOrCreateReadAheadAgent(streamId, startOffset);
- UUID uuid = UUID.randomUUID();
- ReadTaskKey key = new ReadTaskKey(streamId, startOffset, endOffset, maxBytes, uuid);
- ReadTaskContext context = new ReadTaskContext(agent, ReadBlockCacheStatus.INIT);
- this.inflightReadStatusMap.put(key, context);
- // submit read task to mainExecutor to avoid read slower the caller thread.
- mainExecutor.execute(() -> {
- try {
- FutureUtil.propagate(read0(finalTraceContext, streamId, startOffset, endOffset, maxBytes, uuid, context).whenComplete((ret, ex) -> {
- if (ex != null) {
- LOGGER.error("read {} [{}, {}), maxBytes: {} from block cache fail", streamId, startOffset, endOffset, maxBytes, ex);
- this.inflightReadThrottle.release(uuid);
- this.inflightReadStatusMap.remove(key);
- return;
- }
- int totalReturnedSize = ret.getRecords().stream().mapToInt(StreamRecordBatch::size).sum();
- this.readAheadManager.updateReadResult(streamId, startOffset,
- ret.getRecords().get(ret.getRecords().size() - 1).getLastOffset(), totalReturnedSize);
-
- long timeElapsed = timerUtil.elapsedAs(TimeUnit.NANOSECONDS);
- boolean isCacheHit = ret.getCacheAccessType() == CacheAccessType.BLOCK_CACHE_HIT;
- StorageOperationStats.getInstance().readBlockCacheStats(isCacheHit).record(timeElapsed);
- Span.fromContext(finalTraceContext.currentContext()).setAttribute("cache_hit", isCacheHit);
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] read data complete, cache hit: {}, stream={}, {}-{}, total bytes: {}",
- ret.getCacheAccessType() == CacheAccessType.BLOCK_CACHE_HIT, streamId, startOffset, endOffset, totalReturnedSize);
- }
- this.inflightReadThrottle.release(uuid);
- this.inflightReadStatusMap.remove(key);
- }), readCf);
- } catch (Exception e) {
- LOGGER.error("read {} [{}, {}), maxBytes: {} from block cache fail, {}", streamId, startOffset, endOffset, maxBytes, e);
- this.inflightReadThrottle.release(uuid);
- this.inflightReadStatusMap.remove(key);
- readCf.completeExceptionally(e);
- }
- });
- return readCf;
- }
-
- @WithSpan
- public CompletableFuture read0(TraceContext traceContext,
- @SpanAttribute long streamId,
- @SpanAttribute long startOffset,
- @SpanAttribute long endOffset,
- @SpanAttribute int maxBytes,
- UUID uuid, ReadTaskContext context) {
- ReadAheadAgent agent = context.agent;
-
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] read0, stream={}, {}-{}, total bytes: {}, uuid: {} ", streamId, startOffset, endOffset, maxBytes, uuid);
- }
-
- if (startOffset >= endOffset || maxBytes <= 0) {
- return CompletableFuture.completedFuture(new ReadDataBlock(Collections.emptyList(), CacheAccessType.BLOCK_CACHE_MISS));
- }
-
- long nextStartOffset = startOffset;
- int nextMaxBytes = maxBytes;
-
- ReadAheadTaskContext inflightReadAheadTaskContext = inflightReadAheadTasks.get(new ReadAheadTaskKey(streamId, nextStartOffset));
- if (inflightReadAheadTaskContext != null) {
- CompletableFuture readCf = new CompletableFuture<>();
- context.setStatus(ReadBlockCacheStatus.WAIT_INFLIGHT_RA);
- inflightReadAheadTaskContext.cf.whenComplete((nil, ex) -> FutureUtil.exec(() -> FutureUtil.propagate(
- read0(traceContext, streamId, startOffset, endOffset, maxBytes, uuid, context), readCf), readCf, LOGGER, "read0"));
- return readCf;
- }
-
- // 1. get from cache
- context.setStatus(ReadBlockCacheStatus.GET_FROM_CACHE);
- BlockCache.GetCacheResult cacheRst = cache.get(traceContext, streamId, nextStartOffset, endOffset, nextMaxBytes);
- List cacheRecords = cacheRst.getRecords();
- if (!cacheRecords.isEmpty()) {
- asyncReadAhead(streamId, agent, cacheRst.getReadAheadRecords());
- nextStartOffset = cacheRecords.get(cacheRecords.size() - 1).getLastOffset();
- nextMaxBytes -= Math.min(nextMaxBytes, cacheRecords.stream().mapToInt(StreamRecordBatch::size).sum());
- if (nextStartOffset >= endOffset || nextMaxBytes == 0) {
- // cache hit
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] read data hit cache, stream={}, {}-{}, total bytes: {} ", streamId, startOffset, endOffset, maxBytes);
- }
- return CompletableFuture.completedFuture(new ReadDataBlock(cacheRecords, CacheAccessType.BLOCK_CACHE_HIT));
- } else {
- // cache partially hit
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] read data partially hit cache, stream={}, {}-{}, total bytes: {} ", streamId, nextStartOffset, endOffset, nextMaxBytes);
- }
- return read0(traceContext, streamId, nextStartOffset, endOffset, nextMaxBytes, uuid, context).thenApply(rst -> {
- List records = new ArrayList<>(cacheRecords);
- records.addAll(rst.getRecords());
- return new ReadDataBlock(records, CacheAccessType.BLOCK_CACHE_MISS);
- });
- }
- }
-
- // 2. get from s3
- context.setStatus(ReadBlockCacheStatus.GET_FROM_S3);
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] read data cache miss, stream={}, {}-{}, total bytes: {} ", streamId, startOffset, endOffset, maxBytes);
- }
- return streamReader.syncReadAhead(traceContext, streamId, startOffset, endOffset, maxBytes, agent, uuid)
- .thenCompose(rst -> {
- if (!rst.isEmpty()) {
- int remainBytes = maxBytes - rst.stream().mapToInt(StreamRecordBatch::size).sum();
- long lastOffset = rst.get(rst.size() - 1).getLastOffset();
- if (remainBytes > 0 && lastOffset < endOffset) {
- // retry read
- return read0(traceContext, streamId, lastOffset, endOffset, remainBytes, uuid, context).thenApply(rst2 -> {
- List records = new ArrayList<>(rst);
- records.addAll(rst2.getRecords());
- return new ReadDataBlock(records, CacheAccessType.BLOCK_CACHE_MISS);
- });
- }
- }
- return CompletableFuture.completedFuture(new ReadDataBlock(rst, CacheAccessType.BLOCK_CACHE_MISS));
- });
- }
-
- private void asyncReadAhead(long streamId, ReadAheadAgent agent, List readAheadRecords) {
- //TODO: read ahead only when there are enough inactive bytes to evict
- if (readAheadRecords.isEmpty()) {
- return;
- }
- ReadAheadRecord lastRecord = readAheadRecords.get(readAheadRecords.size() - 1);
- long nextRaOffset = lastRecord.nextRAOffset();
- int nextRaSize = agent.getNextReadAheadSize();
-
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("[S3BlockCache] async read ahead, stream={}, {}-{}, total bytes: {} ",
- streamId, nextRaOffset, NOOP_OFFSET, nextRaSize);
- }
-
- // check if next ra hits cache
- if (cache.checkRange(streamId, nextRaOffset, nextRaSize)) {
- return;
- }
-
- streamReader.asyncReadAhead(streamId, nextRaOffset, NOOP_OFFSET, nextRaSize, agent);
- }
-
- public enum ReadBlockCacheStatus {
- /* Status for read request */
- INIT,
- WAIT_INFLIGHT_RA,
- GET_FROM_CACHE,
- GET_FROM_S3,
-
- /* Status for read ahead request */
- WAIT_DATA_INDEX,
- WAIT_FETCH_DATA,
- WAIT_THROTTLE,
- }
-
- public static final class ReadAheadTaskKey {
- private final long streamId;
- private final long startOffset;
-
- public ReadAheadTaskKey(long streamId, long startOffset) {
- this.streamId = streamId;
- this.startOffset = startOffset;
- }
-
- public long streamId() {
- return streamId;
- }
-
- public long startOffset() {
- return startOffset;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
- if (obj == null || obj.getClass() != this.getClass())
- return false;
- var that = (ReadAheadTaskKey) obj;
- return this.streamId == that.streamId &&
- this.startOffset == that.startOffset;
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(streamId, startOffset);
- }
-
- @Override
- public String toString() {
- return "ReadAheadTaskKey[" +
- "streamId=" + streamId + ", " +
- "startOffset=" + startOffset + ']';
- }
-
- }
-
- public static class ReadAheadTaskContext {
- final CompletableFuture cf;
- ReadBlockCacheStatus status;
-
- public ReadAheadTaskContext(CompletableFuture