Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce CGO calls when scanning rows #1291

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cifuzz.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
dry-run: false
sanitizer: ${{ matrix.sanitizer }}
- name: Upload Crash
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v4
if: failure()
with:
name: ${{ matrix.sanitizer }}-artifacts
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ jobs:
GO: ${{ matrix.go }}
steps:
- if: startsWith(matrix.os, 'macos')
run: brew update
run: |
brew update
# Handle issues with the upgrade to icu4c v76.
[[ -e "$(brew --prefix)/opt/icu4c" ]] || \
brew reinstall icu4c

- uses: actions/setup-go@v2
with:
Expand Down
115 changes: 93 additions & 22 deletions sqlite3.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,53 @@ static int sqlite3_system_errno(sqlite3 *db) {
return 0;
}
#endif

#define GO_SQLITE3_DECL_DATE (1 << 7)
#define GO_SQLITE3_DECL_BOOL (1 << 6)
#define GO_SQLITE3_DECL_MASK (GO_SQLITE3_DECL_DATE | GO_SQLITE3_DECL_BOOL)
#define GO_SQLITE3_TYPE_MASK (GO_SQLITE3_DECL_BOOL - 1)

// _sqlite3_column_decltypes stores the declared column type in the typs array.
// This function must always be called before _sqlite3_column_types since it
// overwrites the datatype.
static void _sqlite3_column_decltypes(sqlite3_stmt* stmt, uint8_t *typs, int ntyps) {
Copy link
Collaborator

@rittneje rittneje Nov 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it somewhat confusing that this ends up using one integer to contain both the derived declared type and also the real column type.

How much savings would there be relative to today if instead of doing this, we just added a function like static int _sqlite3_column_derived_decltype(sqlite3_stmt* stmt,int iCol)? Then that could return values from your "enum" (0, GO_SQLITE3_DECL_BOOL, or GO_SQLITE3_DECL_DATE), and would not need any bitmasking.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it somewhat confusing that this ends up using one integer to contain both the derived declared type and also the real column type.

Yeah, it's a bit tricky. I originally used two slices to store the derived declared type and real type separately, but switching to using one slice and a the bit mask since it saves an alloc.

How much savings would there be relative to today if instead of doing this, we just added a function like static int _sqlite3_column_derived_decltype(sqlite3_stmt* stmt,int iCol)? Then that could return values from your "enum" (0, GO_SQLITE3_DECL_BOOL, or GO_SQLITE3_DECL_DATE), and would not need any bitmasking.

Most of the performance improvement here comes from batching the CGO calls needed to get the column's declared and real type (basically, the performance improvement comes from reducing the number of CGO calls - see BenchmarkStmt10Cols). So going back to having to call a CGO function for each column would negate most of the performance improvements here.

for (int i = 0; i < ntyps; i++) {
const char *typ = sqlite3_column_decltype(stmt, i);
if (typ == NULL) {
typs[i] = 0;
continue;
}
switch (typ[0]) {
case 'b':
case 'B':
if (!sqlite3_stricmp(typ, "boolean")) {
typs[i] = GO_SQLITE3_DECL_BOOL;
}
break;
case 'd':
case 'D':
if (!sqlite3_stricmp(typ, "date") || !sqlite3_stricmp(typ, "datetime")) {
typs[i] = GO_SQLITE3_DECL_DATE;
}
break;
case 't':
case 'T':
if (!sqlite3_stricmp(typ, "timestamp")) {
typs[i] = GO_SQLITE3_DECL_DATE;
}
break;
default:
typs[i] = 0;
}
}
}

static void _sqlite3_column_types(sqlite3_stmt *stmt, uint8_t *typs, int ntyps) {
for (int i = 0; i < ntyps; i++) {
typs[i] &= GO_SQLITE3_DECL_MASK; // clear lower bits
typs[i] |= (uint8_t)sqlite3_column_type(stmt, i);
}
}
*/
import "C"
import (
Expand Down Expand Up @@ -239,12 +286,6 @@ var SQLiteTimestampFormats = []string{
"2006-01-02",
}

const (
columnDate string = "date"
columnDatetime string = "datetime"
columnTimestamp string = "timestamp"
)

// This variable can be replaced with -ldflags like below:
// go build -ldflags="-X 'github.com/mattn/go-sqlite3.driverName=my-sqlite3'"
var driverName = "sqlite3"
Expand Down Expand Up @@ -390,12 +431,31 @@ type SQLiteResult struct {
changes int64
}

// A columnType is a compact representation of sqlite3 columns datatype and
// declared type. The first two bits store the declared type and the remaining
// six bits store the sqlite3 datatype.
type columnType uint8

// declType returns the declared type, which is currently GO_SQLITE3_DECL_DATE
// or GO_SQLITE3_DECL_BOOL, since those are the only two types that we need for
// converting values.
func (c columnType) declType() int {
return int(c) & C.GO_SQLITE3_DECL_MASK
}

// dataType returns the sqlite3 datatype code of the column, which is the
// result of sqlite3_column_type.
func (c columnType) dataType() int {
return int(c) & C.GO_SQLITE3_TYPE_MASK
}

// SQLiteRows implements driver.Rows.
type SQLiteRows struct {
s *SQLiteStmt
nc int
cols []string
decltype []string
coltype []columnType
cls bool
closed bool
ctx context.Context // no better alternative to pass context into Next() method
Expand Down Expand Up @@ -2146,7 +2206,10 @@ func (rc *SQLiteRows) Columns() []string {
return rc.cols
}

func (rc *SQLiteRows) declTypes() []string {
// DeclTypes return column types.
func (rc *SQLiteRows) DeclTypes() []string {
rc.s.mu.Lock()
defer rc.s.mu.Unlock()
if rc.s.s != nil && rc.decltype == nil {
rc.decltype = make([]string, rc.nc)
for i := 0; i < rc.nc; i++ {
Expand All @@ -2156,13 +2219,6 @@ func (rc *SQLiteRows) declTypes() []string {
return rc.decltype
}

// DeclTypes return column types.
func (rc *SQLiteRows) DeclTypes() []string {
rc.s.mu.Lock()
defer rc.s.mu.Unlock()
return rc.declTypes()
}

// Next move cursor to next. Attempts to honor context timeout from QueryContext call.
func (rc *SQLiteRows) Next(dest []driver.Value) error {
rc.s.mu.Lock()
Expand Down Expand Up @@ -2195,6 +2251,13 @@ func (rc *SQLiteRows) Next(dest []driver.Value) error {
}
}

func (rc *SQLiteRows) colTypePtr() *C.uint8_t {
if len(rc.coltype) == 0 {
return nil
}
return (*C.uint8_t)(unsafe.Pointer(&rc.coltype[0]))
}

// nextSyncLocked moves cursor to next; must be called with locked mutex.
func (rc *SQLiteRows) nextSyncLocked(dest []driver.Value) error {
rv := C._sqlite3_step_internal(rc.s.s)
Expand All @@ -2208,15 +2271,24 @@ func (rc *SQLiteRows) nextSyncLocked(dest []driver.Value) error {
}
return nil
}
if len(dest) == 0 {
return nil
}

rc.declTypes()
if rc.coltype == nil {
rc.coltype = make([]columnType, rc.nc)
C._sqlite3_column_decltypes(rc.s.s, rc.colTypePtr(), C.int(rc.nc))
}
// Must call this each time since sqlite3 is loosely
// typed and the column types can vary between rows.
C._sqlite3_column_types(rc.s.s, rc.colTypePtr(), C.int(rc.nc))

for i := range dest {
switch C.sqlite3_column_type(rc.s.s, C.int(i)) {
switch rc.coltype[i].dataType() {
case C.SQLITE_INTEGER:
val := int64(C.sqlite3_column_int64(rc.s.s, C.int(i)))
switch rc.decltype[i] {
case columnTimestamp, columnDatetime, columnDate:
switch rc.coltype[i].declType() {
case C.GO_SQLITE3_DECL_DATE:
var t time.Time
// Assume a millisecond unix timestamp if it's 13 digits -- too
// large to be a reasonable timestamp in seconds.
Expand All @@ -2231,7 +2303,7 @@ func (rc *SQLiteRows) nextSyncLocked(dest []driver.Value) error {
t = t.In(rc.s.c.loc)
}
dest[i] = t
case "boolean":
case C.GO_SQLITE3_DECL_BOOL:
dest[i] = val > 0
default:
dest[i] = val
Expand All @@ -2255,8 +2327,7 @@ func (rc *SQLiteRows) nextSyncLocked(dest []driver.Value) error {
n := int(C.sqlite3_column_bytes(rc.s.s, C.int(i)))
s := C.GoStringN((*C.char)(unsafe.Pointer(C.sqlite3_column_text(rc.s.s, C.int(i)))), C.int(n))

switch rc.decltype[i] {
case columnTimestamp, columnDatetime, columnDate:
if rc.coltype[i].declType() == C.GO_SQLITE3_DECL_DATE {
var t time.Time
s = strings.TrimSuffix(s, "Z")
for _, format := range SQLiteTimestampFormats {
Expand All @@ -2273,7 +2344,7 @@ func (rc *SQLiteRows) nextSyncLocked(dest []driver.Value) error {
t = t.In(rc.s.c.loc)
}
dest[i] = t
default:
} else {
dest[i] = s
}
}
Expand Down
79 changes: 72 additions & 7 deletions sqlite3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2030,7 +2030,7 @@ func BenchmarkCustomFunctions(b *testing.B) {
}

func TestSuite(t *testing.T) {
initializeTestDB(t)
initializeTestDB(t, false)
defer freeTestDB()

for _, test := range tests {
Expand All @@ -2039,7 +2039,7 @@ func TestSuite(t *testing.T) {
}

func BenchmarkSuite(b *testing.B) {
initializeTestDB(b)
initializeTestDB(b, true)
defer freeTestDB()

for _, benchmark := range benchmarks {
Expand Down Expand Up @@ -2068,8 +2068,13 @@ type TestDB struct {

var db *TestDB

func initializeTestDB(t testing.TB) {
tempFilename := TempFilename(t)
func initializeTestDB(t testing.TB, memory bool) {
var tempFilename string
if memory {
tempFilename = ":memory:"
} else {
tempFilename = TempFilename(t)
}
d, err := sql.Open("sqlite3", tempFilename+"?_busy_timeout=99999")
if err != nil {
os.Remove(tempFilename)
Expand All @@ -2084,9 +2089,11 @@ func freeTestDB() {
if err != nil {
panic(err)
}
err = os.Remove(db.tempFilename)
if err != nil {
panic(err)
if db.tempFilename != "" && db.tempFilename != ":memory:" {
err := os.Remove(db.tempFilename)
if err != nil {
panic(err)
}
}
}

Expand All @@ -2111,6 +2118,7 @@ var benchmarks = []testing.InternalBenchmark{
{Name: "BenchmarkStmt", F: benchmarkStmt},
{Name: "BenchmarkRows", F: benchmarkRows},
{Name: "BenchmarkStmtRows", F: benchmarkStmtRows},
{Name: "BenchmarkStmt10Cols", F: benchmarkStmt10Cols},
}

func (db *TestDB) mustExec(sql string, args ...any) sql.Result {
Expand Down Expand Up @@ -2568,3 +2576,60 @@ func benchmarkStmtRows(b *testing.B) {
}
}
}

func benchmarkStmt10Cols(b *testing.B) {
db.once.Do(makeBench)

const createTableStmt = `
DROP TABLE IF EXISTS bench_cols;
VACUUM;
CREATE TABLE bench_cols (
r0 INTEGER NOT NULL,
r1 INTEGER NOT NULL,
r2 INTEGER NOT NULL,
r3 INTEGER NOT NULL,
r4 INTEGER NOT NULL,
r5 INTEGER NOT NULL,
r6 INTEGER NOT NULL,
r7 INTEGER NOT NULL,
r8 INTEGER NOT NULL,
r9 INTEGER NOT NULL
);`
if _, err := db.Exec(createTableStmt); err != nil {
b.Fatal(err)
}
for i := int64(0); i < 4; i++ {
_, err := db.Exec("INSERT INTO bench_cols VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);",
i, i, i, i, i, i, i, i, i, i)
if err != nil {
b.Fatal(err)
}
}

stmt, err := db.Prepare("SELECT * FROM bench_cols;")
if err != nil {
b.Fatal(err)
}
defer stmt.Close()

b.ResetTimer()
var (
v0, v1, v2, v3, v4 int64
v5, v6, v7, v8, v9 int64
)
for i := 0; i < b.N; i++ {
rows, err := stmt.Query()
if err != nil {
b.Fatal(err)
}
for rows.Next() {
err := rows.Scan(&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9)
if err != nil {
b.Fatal(err)
}
}
if err := rows.Err(); err != nil {
b.Fatal(err)
}
}
}
Loading