diff --git a/.cargo/config b/.cargo/config.toml similarity index 100% rename from .cargo/config rename to .cargo/config.toml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index eba99f4..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -name: Bug report -about: An issue with rust connectorx or python connectorx -title: '' -labels: 'bug' -assignees: '' ---- - -#### What language are you using? - -Replace this text with the **Rust** or **Python**. - -#### What version are you using? - -Replace this text with the version. - -#### What database are you using? - -e.g. PostgreSQL, MySQL - -#### What dataframe are you using? - -e.g. Pandas, Arrow - -#### Can you describe your bug? - -Give a high level description of the bug. - -#### What are the steps to reproduce the behavior? - -If possible, please include a **minimal simple** example including: - -##### Database setup if the error only happens on specific data or data type - -Table schema and example data - -##### Example query / code - -``` -your -code -goes -here -``` - -#### What is the error? - -Show the error result here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 2ef8f68..0000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: Feature request -about: Suggest a new feature for connectorx -title: '' -labels: 'feature' -assignees: '' ---- - - -#### Describe your feature request - -Please describe the behavior you want and the motivation. diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md deleted file mode 100644 index a437c9b..0000000 --- a/.github/ISSUE_TEMPLATE/question.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -name: Question -about: Ask a question about connectorx -title: '' -labels: '' -assignees: '' ---- - -#### Try Discussion? - -You can ask questions in [discussions](https://github.com/sfu-db/connector-x/discussions/categories/q-a). - -#### Other - -Feel free to ask here if you think it's more suitable. diff --git a/.github/config/db1.json b/.github/config/db1.json deleted file mode 100644 index 648955c..0000000 --- a/.github/config/db1.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "driver": "org.postgresql.Driver", - "url": "jdbc:postgresql://postgres:5432/postgres", - "username": "postgres", - "password": "postgres", - "costParams": { - "join": 10.0, - "agg": 20.0, - "sort": 20.0, - "trans": 3.0 - }, - "dialect": "postgres", - "cardEstType": "postgres", - "partitionType": "postgres" -} \ No newline at end of file diff --git a/.github/config/db2.json b/.github/config/db2.json deleted file mode 100644 index 648955c..0000000 --- a/.github/config/db2.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "driver": "org.postgresql.Driver", - "url": "jdbc:postgresql://postgres:5432/postgres", - "username": "postgres", - "password": "postgres", - "costParams": { - "join": 10.0, - "agg": 20.0, - "sort": 20.0, - "trans": 3.0 - }, - "dialect": "postgres", - "cardEstType": "postgres", - "partitionType": "postgres" -} \ No newline at end of file diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index ebd950d..0000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: benchmark - -on: - workflow_dispatch: - push: - branches: - - prerelease - - main - pull_request: - branches: - - main - -jobs: - benchmark: - runs-on: [self-hosted, linux, x64, connectorx-benchmark] - steps: - - uses: actions/checkout@v2 - - - name: Install tools - run: | - apt-get update - apt-get install -y curl postgresql-client build-essential python3-dev python3-pip pkg-config libssl-dev git sqlite3 libsqlite3-dev mysql-client libmysqlclient-dev libkrb5-dev libclang-dev - env: - DEBIAN_FRONTEND: noninteractive - - - name: Install Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt - default: true - - - name: Install other dependent tools - run: | - pip3 install poetry - if [ ! -f "$HOME/.cargo/bin/just" ]; then curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.cargo/bin; fi - - - name: Install python dependencies - run: just bootstrap-python - - - name: Run benchmarks - run: just benchmark-report - env: - POSTGRES_URL: ${{ secrets.POSTGRES_URL }} - MYSQL_URL: ${{ secrets.MYSQL_URL }} - TPCH_TABLE: lineitem - - - name: Ignore git safe directory error - run: git config --global --add safe.directory /tmp/github-runner-connectorx/connector-x/connector-x - - - name: Show benchmark result for pull request - if: ${{ github.event_name == 'pull_request'}} - uses: rhysd/github-action-benchmark@v1 - with: - name: "ConnectorX TPC-H Scale@1 Benchmarks" - tool: "pytest" - output-file-path: benchmark.json - github-token: ${{ secrets.GITHUB_TOKEN }} - auto-push: false - save-data-file: false - fail-threshold: "200%" - comment-always: true - fail-on-alert: true - - - name: Store benchmark result for push operator - if: ${{ github.event_name == 'push'}} - uses: rhysd/github-action-benchmark@v1 - with: - name: "ConnectorX TPC-H Scale@1 Benchmarks" - tool: "pytest" - output-file-path: benchmark.json - github-token: ${{ secrets.GITHUB_TOKEN }} - auto-push: true - alert-threshold: "100%" - fail-threshold: "200%" - comment-always: true - fail-on-alert: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index a425409..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,259 +0,0 @@ -name: ci - -on: - pull_request: - branches: - - main - - prerelease - push: - branches: - - main - -jobs: - # rust: - # runs-on: ubuntu-latest - # container: ubuntu:20.04 - # services: - # # Label used to access the service container - # postgres: - # # Docker Hub image - # image: postgres - # env: - # POSTGRES_PASSWORD: postgres - # # Set health checks to wait until postgres has started - # options: >- - # --health-cmd pg_isready - # --health-interval 10s - # --health-timeout 5s - # --health-retries 5 - # # mysql - # mysql: - # image: ghcr.io/wangxiaoying/mysql:latest - # env: - # MYSQL_DATABASE: mysql - # MYSQL_ROOT_PASSWORD: mysql - # LANG: C.UTF-8 - # ports: - # - 3306:3306 - # options: >- - # --health-cmd "mysqladmin ping" - # --health-interval 10s - # --health-timeout 10s - # --health-retries 5 - # mssql: - # image: mcr.microsoft.com/mssql/server:2019-latest - # env: - # ACCEPT_EULA: y - # SA_PASSWORD: mssql!Password - # ports: - # - 1433:1433 - # options: >- - # --health-cmd "/opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P \"$SA_PASSWORD\" -Q 'SELECT 1' || exit 1" - # --health-interval 10s - # --health-timeout 5s - # --health-retries 20 - # steps: - # - uses: actions/checkout@v2 - - # - name: Install tools - # run: | - # apt-get update - # apt-get install -y curl postgresql-client build-essential pkg-config libssl-dev git sqlite3 libsqlite3-dev mysql-client python3 python3-pip libicu66 libkrb5-dev libclang-dev - # pip3 install mssql-cli - # pip3 install cli-helpers==2.2.0 - # ln -s /usr/bin/python3 /usr/bin/python - # echo "Cache Version ${{ secrets.CACHE_VERSION }}" - # env: - # DEBIAN_FRONTEND: noninteractive - - # - name: Install Rust - # uses: actions-rs/toolchain@v1 - # with: - # toolchain: stable - # components: rustfmt - # default: true - - # - uses: actions/cache@v2 - # with: - # path: | - # ~/.cargo/bin/ - # ~/.cargo/registry/index/ - # ~/.cargo/registry/cache/ - # ~/.cargo/git/db/ - # target/ - # key: ${{ runner.os }}-cargo-${{ secrets.CACHE_VERSION }}-${{ hashFiles('**/Cargo.lock') }} - - # - name: Install other dependent tools - # run: | - # if [ ! -f "$HOME/.cargo/bin/just" ]; then curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.cargo/bin; fi - - # - name: Seed the database - # run: just seed-db - # env: - # POSTGRES_URL: "postgresql://postgres:postgres@postgres:5432/postgres" - # SQLITE_URL: "sqlite:///tmp/test.db" - # MYSQL_HOST: mysql - # MYSQL_PORT: 3306 - # MYSQL_DB: mysql - # MYSQL_USER: root - # MYSQL_PASSWORD: mysql - # MSSQL_HOST: mssql - # MSSQL_PORT: 1433 - # MSSQL_DB: tempdb - # MSSQL_USER: sa - # MSSQL_PASSWORD: mssql!Password - - # - name: Is the code formatted? - # uses: actions-rs/cargo@v1 - # with: - # command: fmt - # args: --all -- --check -q - - # - name: Clippy linting - # uses: actions-rs/cargo@v1 - # with: - # command: clippy - # args: --features all - - # - name: Check each feature gate - # run: just test-feature-gate - - # - name: Run tests - # run: cargo clean && just test - # env: - # POSTGRES_URL: "postgresql://postgres:postgres@postgres:5432/postgres" - # SQLITE_URL: "sqlite:///tmp/test.db" - # MYSQL_URL: "mysql://root:mysql@mysql:3306/mysql" - # MSSQL_URL: "mssql://sa:mssql!Password@mssql:1433/tempdb" - - # - name: Test build docs - # uses: actions-rs/cargo@v1 - # with: - # command: doc - # args: --no-deps --features all - - python: - runs-on: ubuntu-latest - container: ubuntu:20.04 - services: - # Label used to access the service container - postgres: - # Docker Hub image - image: postgres - env: - POSTGRES_PASSWORD: postgres - # Set health checks to wait until postgres has started - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - # mysql - mysql: - image: ghcr.io/wangxiaoying/mysql:latest - env: - MYSQL_DATABASE: mysql - MYSQL_ROOT_PASSWORD: mysql - LANG: C.UTF-8 - ports: - - 3306:3306 - options: >- - --health-cmd "mysqladmin ping" - --health-interval 10s - --health-timeout 10s - --health-retries 5 - mssql: - image: mcr.microsoft.com/mssql/server:2019-latest - env: - ACCEPT_EULA: y - SA_PASSWORD: mssql!Password - ports: - - 1433:1433 - options: >- - --health-cmd "/opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P \"$SA_PASSWORD\" -Q 'SELECT 1' || exit 1" - --health-interval 10s - --health-timeout 5s - --health-retries 20 - - steps: - - uses: actions/checkout@v2 - - - name: Install tools - run: | - apt-get update - apt-get install -y curl postgresql-client build-essential python3-dev python3-pip pkg-config libssl-dev git sqlite3 libsqlite3-dev mysql-client libmysqlclient-dev python3 python3-pip libicu66 libkrb5-dev libclang-dev - pip3 install mssql-cli - pip3 install cli-helpers==2.2.0 - ln -s /usr/bin/python3 /usr/bin/python - env: - DEBIAN_FRONTEND: noninteractive - - - name: Install Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt - default: true - - - uses: actions/setup-java@v3 - with: - distribution: "temurin" - java-version: "17" - - - uses: actions/cache@v2 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-python-${{ secrets.CACHE_VERSION }}-${{ hashFiles('**/Cargo.lock') }} - - - name: Install other dependent tools - run: | - pip3 install poetry - if [ ! -f "$HOME/.cargo/bin/just" ]; then curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.cargo/bin; fi - - - name: Seed the database - run: just seed-db - env: - POSTGRES_URL: "postgresql://postgres:postgres@postgres:5432/postgres" - SQLITE_URL: "sqlite:///tmp/test.db" - MYSQL_HOST: mysql - MYSQL_PORT: 3306 - MYSQL_DB: mysql - MYSQL_USER: root - MYSQL_PASSWORD: mysql - MSSQL_HOST: mssql - MSSQL_PORT: 1433 - MSSQL_DB: tempdb - MSSQL_USER: sa - MSSQL_PASSWORD: mssql!Password - - - name: Clippy linting - uses: actions-rs/cargo@v1 - with: - command: clippy - args: --features all --all - - - name: Cache venv - uses: actions/cache@v2 - with: - path: ~/.cache/pypoetry/virtualenvs - key: ${{ runner.os }}-venv-${{ secrets.CACHE_VERSION }}-${{ hashFiles('connectorx-python/poetry.lock') }} - - - name: Install python dependencies - run: just bootstrap-python - - - name: Test python - run: just test-python - env: - POSTGRES_URL: "postgresql://postgres:postgres@postgres:5432/postgres" - SQLITE_URL: "sqlite:///tmp/test.db" - MYSQL_URL: "mysql://root:mysql@mysql:3306/mysql" - MSSQL_URL: "mssql://sa:mssql!Password@mssql:1433/tempdb" - DB1: "postgresql://postgres:postgres@postgres:5432/postgres" - DB2: "postgresql://postgres:postgres@postgres:5432/postgres" - FED_CONFIG_PATH: ${{ github.workspace }}/.github/config - SQLITE3_STATIC: 1 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index d475ee9..0000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: docs - -on: - push: - branches: - - main - -jobs: - docs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - name: Install Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt - default: true - - - name: Intall Python - uses: actions/setup-python@v1 - with: - python-version: 3.7 - - - name: Install dependencies - run: | - pip install -r docs/requirements.txt - sudo apt-get update - sudo apt-get install -y libkrb5-dev libclang-dev - - - name: Build the book - run: | - jupyter-book build docs - - - name: Build Rust Docs - uses: actions-rs/cargo@v1 - with: - command: doc - args: --no-deps --features all - - - name: Move the rust doc into jupyter book - run: mv target/doc ./docs/_build/html/rust-docs - - - name: Add .nojekyll - run: touch ./docs/_build/html/.nojekyll - - - name: Deploy Docs 🚀 - uses: JamesIves/github-pages-deploy-action@4.0.0 - with: - branch: gh-pages # The branch the action should deploy to. - folder: ./docs/_build/html # The folder the action should deploy. - clean-exclude: dev diff --git a/.github/workflows/import-test.yml b/.github/workflows/import-test.yml deleted file mode 100644 index 20aa1d7..0000000 --- a/.github/workflows/import-test.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: import-test - -on: - workflow_dispatch: - inputs: - indexUrl: - description: "Index Url" - required: true - default: "https://test.pypi.org/simple/" - version: - description: "version" - required: false - default: "" - -jobs: - check: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, macos-10.15, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] - steps: - - uses: actions/checkout@v2 - - - name: Setup python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - name: Check which python we use - run: | - echo $(which python) - - - name: Install ConnectorX - POSIX - if: ${{ matrix.os != 'windows-latest' }} - run: | - pip install numpy importlib-metadata - - version='${{ github.event.inputs.version }}' - if [ -z $version ] - then - pip install --index-url ${{ github.event.inputs.indexUrl }} connectorx - else - pip install --index-url ${{ github.event.inputs.indexUrl }} connectorx==$version - fi - - - name: Install ConnectorX - Windows - if: ${{ matrix.os == 'windows-latest' }} - run: | - pip install numpy importlib-metadata - - $version = '${{ github.event.inputs.version }}' - if ($version) { - pip install --index-url ${{ github.event.inputs.indexUrl }} connectorx==$version - } else { - pip install --index-url ${{ github.event.inputs.indexUrl }} connectorx - } - - - name: Import - run: python -c "from connectorx import read_sql" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 3d7564d..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,263 +0,0 @@ -name: release - -on: - push: - branches: - - prerelease - - release - -jobs: - linux: - runs-on: ubuntu-latest - container: quay.io/pypa/manylinux_2_28_x86_64 - strategy: - matrix: - python-version: [[38, "3.8"], [39, "3.9"], [310, "3.10"], [311, "3.11"]] - steps: - - uses: actions/checkout@v2 - - - name: Set python version - run: | - echo "/opt/python/cp${{ matrix.python-version[0] }}-cp${{ matrix.python-version[0] }}/bin" >> $GITHUB_PATH - - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt - target: aarch64-unknown-linux-gnu - default: true - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - uses: Gr1N/setup-poetry@v8 - - - name: Install tools - run: | - yum install -y epel-release - yum install -y mysql-devel postgresql-devel freetds-devel krb5-libs clang-devel - - - name: Setup project - run: | - just bootstrap-python - - - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - maturin-version: v0.14.15 - command: build - args: -m connectorx-python/Cargo.toml -i python --release --manylinux 2_28 --features integrated-auth-gssapi - env: - SQLITE3_STATIC: 1 - - - name: Copy j4rs dependencies into dist - run: | - cp -rf connectorx-python/target/release/jassets connectorx-python/connectorx/dependencies - - # rebuild the wheel to incorporate j4rs dependencies - - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - maturin-version: v0.14.15 - command: build - args: -m connectorx-python/Cargo.toml -i python --release --manylinux 2_28 --features integrated-auth-gssapi - env: - SQLITE3_STATIC: 1 - - # - uses: PyO3/maturin-action@v1 - # with: - # maturin-version: v0.14.15 - # command: build - # args: -m connectorx-python/Cargo.toml --target aarch64-unknown-linux-gnu -i python --release --manylinux 2_28 --features integrated-auth-gssapi - # env: - # SQLITE3_STATIC: 1 - - - uses: actions/upload-artifact@v3 - with: - name: "ubuntu-latest-${{ matrix.python-version[1] }}" - path: connectorx-python/target/wheels/*.whl - - win-and-mac: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: ["windows-latest", "macos-11"] - python-version: ["3.8", "3.9", "3.10", "3.11"] - include: - - os: "macos-11" - features: "--features integrated-auth-gssapi" - steps: - - uses: actions/checkout@v2 - - - uses: ankane/setup-mysql@v1 - with: - mysql-version: 8 - - - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt - default: true - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - uses: Gr1N/setup-poetry@v8 - - - name: Setup project - run: | - just bootstrap-python - - - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - maturin-version: v0.14.15 - command: build - args: -m connectorx-python/Cargo.toml -i python --release ${{ matrix.features }} - env: - SQLITE3_STATIC: 1 - - - name: Copy j4rs dependencies into dist - run: | - cp -r connectorx-python/target/release/jassets connectorx-python/connectorx/dependencies - - # rebuild the wheel to incorporate j4rs dependencies - - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - maturin-version: v0.14.15 - command: build - args: -m connectorx-python/Cargo.toml -i python --release ${{ matrix.features }} - env: - SQLITE3_STATIC: 1 - - - uses: actions/upload-artifact@v2 - with: - name: "${{ matrix.os }}-${{ matrix.python-version }}" - path: connectorx-python/target/wheels/*.whl - - apple-arm: - runs-on: macos-latest - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - steps: - - uses: actions/checkout@v2 - - - uses: ankane/setup-mysql@v1 - with: - mysql-version: 8 - - - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt - target: aarch64-apple-darwin - default: true - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - uses: Gr1N/setup-poetry@v8 - - - name: Setup project - run: | - just bootstrap-python - - - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - maturin-version: v0.14.15 - command: build - args: -m connectorx-python/Cargo.toml --target aarch64-apple-darwin -i python --release --features integrated-auth-gssapi - env: - SQLITE3_STATIC: 1 - - - name: Copy j4rs dependencies into dist - run: | - cp -rf connectorx-python/target/aarch64-apple-darwin/release/jassets connectorx-python/connectorx/dependencies - - # rebuild the wheel to incorporate j4rs dependencies - - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - maturin-version: v0.14.15 - command: build - args: -m connectorx-python/Cargo.toml --target aarch64-apple-darwin -i python --release --features integrated-auth-gssapi - env: - SQLITE3_STATIC: 1 - - - uses: actions/upload-artifact@v2 - with: - name: "macos-${{ matrix.python-version }}" - path: connectorx-python/target/wheels/*.whl - - verify: - runs-on: ${{ matrix.os }} - needs: [win-and-mac, linux, apple-arm] - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - os: [macos-11, ubuntu-latest, windows-latest] - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - uses: actions/download-artifact@v3 - with: - name: "${{ matrix.os }}-${{ matrix.python-version }}" - - - run: | - pip install *.whl - python -c "import connectorx" - - upload: - runs-on: ubuntu-latest - needs: [verify] - steps: - - name: Download all artifacts - uses: actions/download-artifact@v3 - - - name: Setup environment - run: | - tree . - echo "/home/runner/.local/bin" >> $GITHUB_PATH - - - name: Install Twine - run: pip install twine - - - name: Upload to PyPI site - if: github.ref == 'refs/heads/release' - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: | - for file in $(ls **/*) - do - twine upload --non-interactive -u __token__ -p $PYPI_TOKEN $file || continue - done - - - name: Upload to PyPI test site - if: github.ref == 'refs/heads/prerelease' - env: - PYPI_TEST_TOKEN: ${{ secrets.PYPI_TEST_TOKEN }} - run: | - for file in $(ls **/*) - do - twine upload --non-interactive --repository-url https://test.pypi.org/legacy/ -u __token__ -p $PYPI_TEST_TOKEN $file --verbose || continue - done diff --git a/.gitignore b/.gitignore index e9b003d..f777a9e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,5 @@ benchmark.json docs/_build connectorx/examples/test.rs *.duckdb -federated-query/ +.envrc +.direnv/ diff --git a/Benchmark.md b/Benchmark.md deleted file mode 100644 index 1a57f1a..0000000 --- a/Benchmark.md +++ /dev/null @@ -1,158 +0,0 @@ -# Benchmark Setup - -## Postgres (Docker) - -1. Download PostgreSQL from docker -``` -docker pull postgres -``` - -2. Create a directory for mount point (Optional) -``` -mkdir -p $YOUR_DOCKER_DIR/docker/volumes/postgres -``` - -3. Run PostgreSQL: -``` -# With local mount point -docker run --rm --name pg-connector -e POSTGRES_USER=postgres -e POSTGRES_DB=tpch -e POSTGRES_PASSWORD=postgres -d -p 5432:5432 -v $YOUR_DOCKER_DIR/docker/volumes/postgres:/var/lib/postgresql/data postgres -c shared_buffers=1024MB - -# Without local mount point -docker run --rm --name pg-connector -e POSTGRES_USER=postgres -e POSTGRES_DB=tpch -e POSTGRES_PASSWORD=postgres -d -p 5432:5432 -c shared_buffers=1024MB -``` - -## TPC-H - -1. Download TPC-H toolkit and compile: -``` -git clone https://github.com/gregrahn/tpch-kit.git -cd tpch-kit/dbgen && make MACHINE=LINUX DATABASE=POSTGRESQL -``` - -2. Generate `LINEITEM` table with scale factor 10 -``` -# Generate all tables -./dbgen -s 10 - -# Alternatively you can only generate LINEITEM table using -T option -./dbgen -s 10 -T L -``` - -3. Create table and load schema -``` -createdb -h localhost -U postgres tpch -psql -h localhost -U postgres -d tpch < dss.ddl -``` - -4. Load data into PostgreSQL -``` -psql -h localhost -U postgres -d tpch -c "\copy LINEITEM FROM '$YOUR_TPCH_DIR/tpch-kit/dbgen/lineitem.tbl' DELIMITER '|' ENCODING 'LATIN1';" -``` - -5. Create index for `LINEITEM` on `l_orderkey` -``` -psql -h localhost -U postgres -d tpch -c "CREATE INDEX lineitem_l_orderkey_idx ON LINEITEM USING btree (l_orderkey);" -``` - -## Redshift: Upload TPC-H -> Note: For Redshift, AWS has already hosted TPC-H data in public s3. We borrow the uploading script from [amazon-redshift-utils](https://github.com/awslabs/amazon-redshift-utils/blob/master/src/CloudDataWarehouseBenchmark/Cloud-DWB-Derived-from-TPCH/3TB/ddl.sql). We only modified `LINEITEM`'s sortkey from `(l_shipdate,l_orderkey)` to `(l_orderkey)`. - -1. Make the following changes in the COPY commands of `script/benchmarks/tpch-reshift.sql`: - - 1. Change `credentials` accordingly from Redshift. - 2. (Optional) Change TPC-H data size in `from` s3 string. Currently it is 10GB (equivilant to TPC-H scale factor 10). It can be change to 3TB. - -2. Run modified `tpch-reshift.sql` for Redshift: -``` -psql -h -U -d -p -f tpch-reshift.sql -``` - -# Benchmark result on AWS r5.4xlarge - -We load the lineitem table of TPC-H @ scale=10 into a r5.4xlarge EC2 machine on AWS for each database, and then run ConnectorX to download data from the database -on another r5.4xlarge machine, with the following command: - -```python -import connectorx as cx - -cx.read_sql("connection string", "SELECT * FROM lineitem", partition_on="l_orderkey", partition_num=4) -``` - -Here are the baselines we compare againt: -* Pandas -* Modin -* Dask -* Turbodbc - -Since Modin and Dask support parallel execution, we use the same number of cores (4) to run them. For Turbodbc, we use the result NumPy arrays to construct the final Pandas.DataFrame for a fair comparison. - -## Postgres (db.m6g.4xlarge RDS) - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **13x** less time compared with Pandas. - -## MySQL (db.m6g.4xlarge RDS) - - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **8x** less time compared with Pandas. - - -## SQLite (r5.4xlarge EC2 same instance) - -**Turbodbc does not support read_sql on SQLite** - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **2x** less memory and **5x** less time compared with Pandas. - - -## Oracle (db.r5.4xlarge RDS) - -**Modin and Turbodbc does not support read_sql on Oracle** - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **3x** less time compared with Pandas. - - - -## Mssql (r5.4xlarge docker in another EC2 instance) - -**Modin does not support read_sql on Mssql** - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **14x** less time compared with Pandas. diff --git a/Cargo.lock b/Cargo.lock index c640e98..d865bd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,52 +19,38 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.12", "once_cell", "version_check", ] [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", "const-random", - "getrandom 0.2.10", + "getrandom 0.2.12", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "1.0.5" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - [[package]] name = "allocator-api2" version = "0.2.16" @@ -97,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" [[package]] name = "argminmax" @@ -116,12 +102,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - [[package]] name = "arrayvec" version = "0.7.4" @@ -130,19 +110,15 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "ahash 0.8.3", "arrow-arith", "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-csv", "arrow-data", - "arrow-ipc", - "arrow-json", "arrow-ord", "arrow-row", "arrow-schema", @@ -152,9 +128,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,26 +143,25 @@ dependencies = [ [[package]] name = "arrow-array" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "chrono-tz", "half 2.3.1", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "num", ] [[package]] name = "arrow-buffer" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half 2.3.1", @@ -195,46 +170,27 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "base64 0.21.7", "chrono", - "comfy-table", "half 2.3.1", "lexical-core", "num", ] -[[package]] -name = "arrow-csv" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - [[package]] name = "arrow-data" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", @@ -252,45 +208,11 @@ dependencies = [ "serde", ] -[[package]] -name = "arrow-ipc" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", -] - -[[package]] -name = "arrow-json" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "indexmap 2.0.0", - "lexical-core", - "num", - "serde", - "serde_json", -] - [[package]] name = "arrow-ord" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ "arrow-array", "arrow-buffer", @@ -303,34 +225,32 @@ dependencies = [ [[package]] name = "arrow-row" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "half 2.3.1", - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] name = "arrow-schema" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" -dependencies = [ - "bitflags 2.4.0", -] +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" [[package]] name = "arrow-select" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ + "ahash 0.8.7", "arrow-array", "arrow-buffer", "arrow-data", @@ -340,9 +260,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "46.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -351,7 +271,7 @@ dependencies = [ "arrow-select", "num", "regex", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] @@ -360,7 +280,7 @@ version = "0.17.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59c468daea140b747d781a1da9f7db5f0a8e6636d4af20cc539e43d05b0604fa" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "arrow-format", "bytemuck", "chrono", @@ -369,7 +289,7 @@ dependencies = [ "ethnum", "foreign_vec", "futures", - "getrandom 0.2.10", + "getrandom 0.2.12", "hash_hasher", "lexical-core", "lz4", @@ -394,24 +314,6 @@ dependencies = [ "futures-core", ] -[[package]] -name = "async-compression" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" -dependencies = [ - "bzip2", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", -] - [[package]] name = "async-lock" version = "2.8.0" @@ -493,18 +395,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] name = "async-trait" -version = "0.1.73" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -569,9 +471,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.4" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bb8" @@ -618,7 +520,7 @@ version = "0.59.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" dependencies = [ - "bitflags 1.3.2", + "bitflags 1.2.1", "cexpr", "clang-sys", "clap", @@ -637,15 +539,15 @@ dependencies = [ [[package]] name = "bitflags" -version = "1.3.2" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" [[package]] name = "bitvec" @@ -659,29 +561,6 @@ dependencies = [ "wyz", ] -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -693,68 +572,26 @@ dependencies = [ [[package]] name = "borsh" -version = "0.10.3" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b" +checksum = "f58b559fd6448c6e2fd0adb5720cd98a2506594cafa4737ff98c396f3e82f667" dependencies = [ "borsh-derive", - "hashbrown 0.13.2", + "cfg_aliases", ] [[package]] name = "borsh-derive" -version = "0.10.3" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7" +checksum = "7aadb5b6ccbd078890f6d7003694e33816e6b784358f18e15e7e6d9f065a57cd" dependencies = [ - "borsh-derive-internal", - "borsh-schema-derive-internal", + "once_cell", "proc-macro-crate", - "proc-macro2", - "syn 1.0.109", -] - -[[package]] -name = "borsh-derive-internal" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb" -dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", -] - -[[package]] -name = "borsh-schema-derive-internal" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "brotli" -version = "3.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "syn 2.0.48", + "syn_derive", ] [[package]] @@ -765,9 +602,9 @@ checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytecheck" @@ -793,9 +630,9 @@ dependencies = [ [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" dependencies = [ "bytemuck_derive", ] @@ -808,14 +645,14 @@ checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -823,27 +660,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "cast" version = "0.3.0" @@ -860,12 +676,6 @@ dependencies = [ "libc", ] -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - [[package]] name = "cexpr" version = "0.6.0" @@ -881,11 +691,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" dependencies = [ "android-tzdata", "iana-time-zone", @@ -893,36 +709,14 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets", -] - -[[package]] -name = "chrono-tz" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" -dependencies = [ - "parse-zoneinfo", - "phf", - "phf_codegen", + "windows-targets 0.52.0", ] [[package]] name = "clang-sys" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" dependencies = [ "glob", "libc", @@ -937,7 +731,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags 1.3.2", + "bitflags 1.2.1", "strsim 0.8.0", "textwrap", "unicode-width", @@ -955,21 +749,21 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.0.1" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" dependencies = [ "crossterm", - "strum 0.24.1", - "strum_macros 0.24.3", + "strum", + "strum_macros", "unicode-width", ] [[package]] name = "concurrent-queue" -version = "2.2.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" +checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" dependencies = [ "crossbeam-utils", ] @@ -985,8 +779,8 @@ dependencies = [ ] [[package]] -name = "connectorx" -version = "0.3.3-alpha.1" +name = "connector_arrow" +version = "0.0.1" dependencies = [ "anyhow", "arrow", @@ -996,7 +790,6 @@ dependencies = [ "chrono", "criterion", "csv", - "datafusion", "env_logger", "fallible-streaming-iterator", "fehler", @@ -1004,12 +797,10 @@ dependencies = [ "gcp-bigquery-client", "hex", "iai", - "itertools 0.10.5", - "j4rs", + "itertools", "log", "mysql_common", "native-tls", - "ndarray", "num-traits", "openssl", "oracle", @@ -1040,49 +831,31 @@ dependencies = [ "uuid 0.8.2", ] -[[package]] -name = "connectorx-cpp" -version = "0.3.3-alpha.1" -dependencies = [ - "arrow", - "connectorx", - "libc", - "openssl", -] - [[package]] name = "const-random" -version = "0.1.15" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" dependencies = [ "const-random-macro", - "proc-macro-hack", ] [[package]] name = "const-random-macro" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.12", "once_cell", - "proc-macro-hack", "tiny-keccak", ] -[[package]] -name = "constant_time_eq" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" - [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -1090,9 +863,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "cpp_demangle" @@ -1105,9 +878,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] @@ -1132,7 +905,7 @@ dependencies = [ "clap", "criterion-plot", "csv", - "itertools 0.10.5", + "itertools", "lazy_static", "num-traits", "oorandom", @@ -1154,16 +927,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" dependencies = [ "cast", - "itertools 0.10.5", + "itertools", ] [[package]] name = "crossbeam" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" dependencies = [ - "cfg-if", "crossbeam-channel", "crossbeam-deque", "crossbeam-epoch", @@ -1173,70 +945,57 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" dependencies = [ - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-queue" -version = "0.3.8" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" dependencies = [ - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crossterm" -version = "0.26.1" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.2", "crossterm_winapi", "libc", - "mio", "parking_lot 0.12.1", - "signal-hook", - "signal-hook-mio", "winapi", ] @@ -1267,9 +1026,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -1279,9 +1038,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] @@ -1315,194 +1074,10 @@ name = "darling_macro" version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" -dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.0", - "lock_api", - "once_cell", - "parking_lot_core 0.9.8", -] - -[[package]] -name = "datafusion" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a4e4fc25698a14c90b34dda647ba10a5a966dc04b036d22e77fb1048663375d" -dependencies = [ - "ahash 0.8.3", - "arrow", - "arrow-array", - "arrow-schema", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-sql", - "flate2", - "futures", - "glob", - "half 2.3.1", - "hashbrown 0.14.0", - "indexmap 2.0.0", - "itertools 0.11.0", - "log", - "num_cpus", - "object_store", - "parking_lot 0.12.1", - "parquet", - "percent-encoding", - "pin-project-lite", - "rand 0.8.5", - "sqlparser 0.37.0", - "tempfile", - "tokio", - "tokio-util 0.7.8", - "url", - "uuid 1.4.1", - "xz2", - "zstd", -] - -[[package]] -name = "datafusion-common" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c23ad0229ea4a85bf76b236d8e75edf539881fdb02ce4e2394f9a76de6055206" -dependencies = [ - "arrow", - "arrow-array", - "async-compression", - "bytes", - "bzip2", - "chrono", - "flate2", - "futures", - "num_cpus", - "object_store", - "parquet", - "sqlparser 0.37.0", - "tokio", - "tokio-util 0.7.8", - "xz2", - "zstd", -] - -[[package]] -name = "datafusion-execution" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b37d2fc1a213baf34e0a57c85b8e6648f1a95152798fd6738163ee96c19203f" -dependencies = [ - "arrow", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures", - "hashbrown 0.14.0", - "log", - "object_store", - "parking_lot 0.12.1", - "rand 0.8.5", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6ea9844395f537730a145e5d87f61fecd37c2bc9d54e1dc89b35590d867345d" -dependencies = [ - "ahash 0.8.3", - "arrow", - "datafusion-common", - "sqlparser 0.37.0", - "strum 0.25.0", - "strum_macros 0.25.2", -] - -[[package]] -name = "datafusion-optimizer" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8a30e0f79c5d59ba14d3d70f2500e87e0ff70236ad5e47f9444428f054fd2be" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown 0.14.0", - "itertools 0.11.0", - "log", - "regex-syntax 0.7.5", -] - -[[package]] -name = "datafusion-physical-expr" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "766c567082c9bbdcb784feec8fe40c7049cedaeb3a18d54f563f75fe0dc1932c" -dependencies = [ - "ahash 0.8.3", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "base64 0.21.4", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "half 2.3.1", - "hashbrown 0.14.0", - "hex", - "indexmap 2.0.0", - "itertools 0.11.0", - "libc", - "log", - "md-5", - "paste", - "petgraph", - "rand 0.8.5", - "regex", - "sha2", - "unicode-segmentation", - "uuid 1.4.1", -] - -[[package]] -name = "datafusion-sql" -version = "31.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "811fd084cf2d78aa0c76b74320977c7084ad0383690612528b580795764b4dd0" -dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-expr", - "log", - "sqlparser 0.37.0", +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", ] [[package]] @@ -1516,22 +1091,23 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ + "powerfmt", "serde", ] [[package]] name = "derive_utils" -version = "0.13.2" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abcad25e9720609ccb3dcdb795d845e37d8ce34183330a9f48b03a1a71c8e21" +checksum = "61bb5a1014ce6dfc2a378578509abe775a5aa06bff584a547555d9efdb81b926" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -1545,43 +1121,11 @@ dependencies = [ "subtle", ] -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - -[[package]] -name = "dunce" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" - [[package]] name = "dyn-clone" -version = "1.0.13" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555" +checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" [[package]] name = "either" @@ -1671,27 +1215,27 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] name = "enumflags2" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c041f5090df68b32bcd905365fd51769c8b9d553fe87fde0b683534f10c01bd2" +checksum = "5998b4f30320c9d93aed72f63af821bfdac50465b75428fce77b48ec482c3939" dependencies = [ "enumflags2_derive", ] [[package]] name = "enumflags2_derive" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9a1f9f7d83e59740248a6e14ecf93929ade55027844dfcea78beafccc15745" +checksum = "f95e2801cd355d4a1a3e3953ce6ee5ae9603a5c833455343a8bfe3f44d418246" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -1715,30 +1259,19 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] name = "ethnum" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8ff382b2fa527fb7fb06eeebfc5bbb3f17e3cc6b9d70b006c41daa8824adac" +checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" [[package]] name = "event-listener" @@ -1752,6 +1285,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + [[package]] name = "fallible-streaming-iterator" version = "0.1.9" @@ -1766,9 +1305,9 @@ checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" [[package]] name = "fastrand" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "fehler" @@ -1796,27 +1335,11 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "libz-sys", @@ -1852,9 +1375,9 @@ checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -1884,7 +1407,7 @@ checksum = "b0fa992f1656e1707946bbba340ad244f0814009ef8c0118eb7b658395f19a2e" dependencies = [ "frunk_proc_macro_helpers", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -1896,7 +1419,7 @@ dependencies = [ "frunk_core", "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -1908,15 +1431,9 @@ dependencies = [ "frunk_core", "frunk_proc_macro_helpers", "quote", - "syn 2.0.33", + "syn 2.0.48", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "funty" version = "2.0.0" @@ -1925,9 +1442,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -1940,9 +1457,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -1950,15 +1467,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -1967,38 +1484,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -2056,9 +1573,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "js-sys", @@ -2069,9 +1586,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "glob" @@ -2081,9 +1598,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -2091,10 +1608,10 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap 2.2.1", "slab", "tokio", - "tokio-util 0.7.8", + "tokio-util 0.7.10", "tracing", ] @@ -2127,25 +1644,16 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash 0.7.6", -] - -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.3", + "ahash 0.7.7", ] [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "allocator-api2", "rayon", ] @@ -2156,7 +1664,7 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" dependencies = [ - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -2176,9 +1684,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" [[package]] name = "hex" @@ -2197,18 +1705,18 @@ dependencies = [ [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "http" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" dependencies = [ "bytes", "fnv", @@ -2217,9 +1725,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http", @@ -2246,9 +1754,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", @@ -2261,7 +1769,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2 0.5.5", "tokio", "tower-service", "tracing", @@ -2285,14 +1793,14 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http", "hyper", - "rustls 0.21.7", + "rustls 0.21.10", "tokio", "tokio-rustls 0.24.1", ] @@ -2305,16 +1813,16 @@ checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -2334,9 +1842,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -2354,12 +1862,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +checksum = "433de089bd45971eecf4668ee0ee8f4cec17db4f8bd8f7bc3197a6ce37aa7d9b" dependencies = [ "equivalent", - "hashbrown 0.14.0", + "hashbrown 0.14.3", ] [[package]] @@ -2368,7 +1876,7 @@ version = "0.10.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3886428c6400486522cf44b8626e7b94ad794c14390290f2a274dcf728a58f" dependencies = [ - "ahash 0.7.6", + "ahash 0.7.7", "atty", "indexmap 1.9.3", "itoa", @@ -2389,27 +1897,20 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "io-enum" -version = "1.1.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5305557fa27b460072ae15ce07617e999f5879f14d376c8449f0bfb9f9d8e91e" +checksum = "53b53d712d99a73eec59ee5e4fe6057f8052142d38eeafbbffcb06b36d738a6e" dependencies = [ "derive_utils", - "syn 2.0.33", ] [[package]] name = "ipnet" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "itertools" @@ -2420,64 +1921,17 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - [[package]] name = "itoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" - -[[package]] -name = "j4rs" -version = "0.15.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76cc9c1648a1cc940ac10c19f56e50bee15344590e10f220899d955db5f87ac2" -dependencies = [ - "cesu8", - "dirs", - "dunce", - "fs_extra", - "glob", - "java-locator", - "jni-sys", - "lazy_static", - "libc", - "libloading", - "log", - "serde", - "serde_json", - "sha2", -] - -[[package]] -name = "java-locator" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90003f2fd9c52f212c21d8520f1128da0080bad6fff16b68fe6e7f2f0c3780c2" -dependencies = [ - "glob", - "lazy_static", -] - -[[package]] -name = "jni-sys" -version = "0.3.0" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" dependencies = [ "libc", ] @@ -2578,9 +2032,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.148" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libgssapi" @@ -2588,7 +2042,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "724dbcd1f871da9c67983537a47ac510c278656f6392418ad67c7a52720e54b2" dependencies = [ - "bitflags 1.3.2", + "bitflags 1.2.1", "bytes", "lazy_static", "libgssapi-sys", @@ -2606,25 +2060,25 @@ dependencies = [ [[package]] name = "libloading" -version = "0.7.4" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" dependencies = [ "cfg-if", - "winapi", + "windows-sys 0.48.0", ] [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libsqlite3-sys" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" +checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716" dependencies = [ "cc", "pkg-config", @@ -2633,9 +2087,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.12" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" dependencies = [ "cc", "pkg-config", @@ -2644,15 +2098,15 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.7" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -2693,33 +2147,13 @@ dependencies = [ "libc", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "matrixmultiply" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" -dependencies = [ - "autocfg", - "rawpointer", -] - [[package]] name = "md-5" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ + "cfg-if", "digest", ] @@ -2737,9 +2171,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.6.3" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memmap2" @@ -2761,9 +2195,9 @@ dependencies = [ [[package]] name = "memoffset" -version = "0.9.0" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" dependencies = [ "autocfg", ] @@ -2791,14 +2225,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", - "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -2844,7 +2277,7 @@ dependencies = [ "percent-encoding", "serde", "serde_json", - "socket2 0.4.9", + "socket2 0.4.10", "twox-hash", "url", ] @@ -2858,7 +2291,7 @@ dependencies = [ "base64 0.13.1", "bigdecimal", "bindgen", - "bitflags 1.3.2", + "bitflags 1.2.1", "bitvec", "byteorder", "bytes", @@ -2884,7 +2317,7 @@ dependencies = [ "subprocess", "thiserror", "time", - "uuid 1.4.1", + "uuid 1.7.0", ] [[package]] @@ -2914,29 +2347,17 @@ dependencies = [ "tempfile", ] -[[package]] -name = "ndarray" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] - [[package]] name = "nix" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa9b4819da1bc61c0ea48b63b7bc8604064dd43013e7cc325df098d49cd7c18a" +checksum = "f5e06129fb611568ef4e868c14b326274959aa70ff7776e9d55323531c374945" dependencies = [ - "bitflags 1.3.2", + "bitflags 1.2.1", "cc", "cfg-if", "libc", + "memoffset", ] [[package]] @@ -3046,9 +2467,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", "libm", @@ -3060,7 +2481,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.2", + "hermit-abi 0.3.4", "libc", ] @@ -3075,39 +2496,18 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d359e231e5451f4f9fa889d56e3ce34f8724f1a61db2107739359717cf2bbf08" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "humantime", - "itertools 0.10.5", - "parking_lot 0.12.1", - "percent-encoding", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" @@ -3117,11 +2517,11 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.2", "cfg-if", "foreign-types", "libc", @@ -3138,7 +2538,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -3147,24 +2547,14 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "openssl-src" -version = "300.1.3+3.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2c101a165fff9935e34def4669595ab1c7847943c42be86e21503e482be107" -dependencies = [ - "cc", -] - [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae" dependencies = [ "cc", "libc", - "openssl-src", "pkg-config", "vcpkg", ] @@ -3208,15 +2598,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "ordered-float" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" -dependencies = [ - "num-traits", -] - [[package]] name = "owning_ref" version = "0.4.1" @@ -3244,7 +2625,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.8", + "parking_lot_core 0.9.9", ] [[package]] @@ -3263,58 +2644,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", + "redox_syscall 0.4.1", "smallvec", - "windows-targets", -] - -[[package]] -name = "parquet" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad2cba786ae07da4d73371a88b9e0f9d3ffac1a9badc83922e0e15814f5c5fa" -dependencies = [ - "ahash 0.8.3", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.21.4", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "hashbrown 0.14.0", - "lz4", - "num", - "num-bigint", - "object_store", - "paste", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", -] - -[[package]] -name = "parse-zoneinfo" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" -dependencies = [ - "regex", + "windows-targets 0.48.5", ] [[package]] @@ -3340,19 +2678,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" - -[[package]] -name = "petgraph" -version = "0.6.4" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap 2.0.0", -] +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phf" @@ -3363,26 +2691,6 @@ dependencies = [ "phf_shared", ] -[[package]] -name = "phf_codegen" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - [[package]] name = "phf_shared" version = "0.11.2" @@ -3406,9 +2714,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "planus" @@ -3453,7 +2761,7 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1362d4a136c0ebacb40d88a37ba361738b222fd8a2ee9340a3d8642f698c52b" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.12", "polars-core", "polars-io", "polars-lazy", @@ -3470,7 +2778,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f967c901fa5da4ca7f64e813d1268488ba97e9b3004cefc579ff851c197a1138" dependencies = [ "arrow2", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "multiversion", "num-traits", "polars-error", @@ -3484,14 +2792,14 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b24f92fc5b167f668ff85ab9607dfa72e2c09664cacef59297ee8601dee60126" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "arrow2", - "bitflags 2.4.0", + "bitflags 2.4.2", "chrono", "comfy-table", "either", - "hashbrown 0.14.0", - "indexmap 2.0.0", + "hashbrown 0.14.3", + "indexmap 2.2.1", "num-traits", "once_cell", "polars-arrow", @@ -3525,7 +2833,7 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92cab0df9f2a35702fa5aec99edfaabf9ae8e9cdd0acf69e143ad2d132f34f9c" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "arrow2", "async-trait", "bytes", @@ -3556,8 +2864,8 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c33762ec2a55e01c9f8776b34db86257c70a0a3b3929bd4eb91a52aacf61456" dependencies = [ - "ahash 0.8.3", - "bitflags 2.4.0", + "ahash 0.8.7", + "bitflags 2.4.2", "glob", "once_cell", "polars-arrow", @@ -3582,7 +2890,7 @@ dependencies = [ "argminmax", "arrow2", "either", - "indexmap 2.0.0", + "indexmap 2.2.1", "memchr", "polars-arrow", "polars-core", @@ -3598,7 +2906,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f2bc9a12da9ed043fb0cb51dbcb87b365e4845b7ab6399d7a81e838460c6974" dependencies = [ "enum_dispatch", - "hashbrown 0.14.0", + "hashbrown 0.14.3", "num-traits", "polars-arrow", "polars-core", @@ -3618,7 +2926,7 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb67b014f0295e8e9dbb84404a91d666d477b3bc248a2ed51bc442833b16da35" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "arrow2", "once_cell", "polars-arrow", @@ -3630,7 +2938,7 @@ dependencies = [ "rayon", "regex", "smartstring", - "strum_macros 0.25.2", + "strum_macros", "version_check", ] @@ -3685,8 +2993,8 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c326708a370d71dc6e11a8f4bbc10a8479e1c314dc048ba73543b815cd0bf339" dependencies = [ - "ahash 0.8.3", - "hashbrown 0.14.0", + "ahash 0.8.7", + "hashbrown 0.14.3", "num-traits", "once_cell", "polars-error", @@ -3703,7 +3011,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7915b33ed60abc46040cbcaa25ffa1c7ec240668e0477c4f3070786f5916d451" dependencies = [ "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "futures-util", "log", "tokio", @@ -3742,10 +3050,10 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" dependencies = [ - "base64 0.21.4", + "base64 0.21.7", "byteorder", "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "hmac", "md-5", "memchr", @@ -3762,13 +3070,19 @@ checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" dependencies = [ "bytes", "chrono", - "fallible-iterator", + "fallible-iterator 0.2.0", "postgres-protocol", "serde", "serde_json", "uuid 0.8.2", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pprof" version = "0.5.1" @@ -3801,24 +3115,41 @@ checksum = "be91bcc43e73799dc46a6c194a55e7aae1d86cc867c860fd4a436019af21bd8c" [[package]] name = "proc-macro-crate" -version = "0.1.5" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ - "toml", + "proc-macro-error-attr", + "proc-macro2", + "quote", + "version_check", ] [[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" +name = "proc-macro-error-attr" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] [[package]] name = "proc-macro2" -version = "1.0.67" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -3854,9 +3185,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -3904,13 +3235,13 @@ dependencies = [ [[package]] name = "r2d2_sqlite" -version = "0.22.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f31323d6161385f385046738df520e0e8694fa74852d35891fc0be08348ddc" +checksum = "4dc290b669d30e20751e813517bbe13662d020419c5c8818ff10b6e8bb7777f6" dependencies = [ "r2d2", "rusqlite", - "uuid 1.4.1", + "uuid 1.7.0", ] [[package]] @@ -3978,7 +3309,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.12", ] [[package]] @@ -4000,17 +3331,11 @@ dependencies = [ "rand_core 0.5.1", ] -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - [[package]] name = "rayon" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -4018,14 +3343,12 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] @@ -4034,50 +3357,39 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags 1.3.2", + "bitflags 1.2.1", ] [[package]] name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_users" -version = "0.4.3" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "getrandom 0.2.10", - "redox_syscall 0.2.16", - "thiserror", + "bitflags 1.2.1", ] [[package]] name = "regex" -version = "1.9.5" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] name = "regex-automata" -version = "0.3.8" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] @@ -4088,26 +3400,26 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rend" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581008d2099240d37fb08d77ad713bcaec2c4d89d50b5b21a8bb1996bbab68ab" +checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd" dependencies = [ "bytecheck", ] [[package]] name = "reqwest" -version = "0.11.20" +version = "0.11.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" +checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" dependencies = [ - "base64 0.21.4", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -4116,7 +3428,7 @@ dependencies = [ "http", "http-body", "hyper", - "hyper-rustls 0.24.1", + "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -4124,11 +3436,12 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.7", - "rustls-pemfile 1.0.3", + "rustls 0.21.10", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", + "system-configuration", "tokio", "tokio-rustls 0.24.1", "tower-service", @@ -4142,9 +3455,9 @@ dependencies = [ [[package]] name = "rgb" -version = "0.8.36" +version = "0.8.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20ec2d3e3fc7a92ced357df9cebd5a10b6fb2aa1ee797bf7e9ce2f17dffc8f59" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" dependencies = [ "bytemuck", ] @@ -4158,34 +3471,49 @@ dependencies = [ "cc", "libc", "once_cell", - "spin", - "untrusted", + "spin 0.5.2", + "untrusted 0.7.1", "web-sys", "winapi", ] +[[package]] +name = "ring" +version = "0.17.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" +dependencies = [ + "cc", + "getrandom 0.2.12", + "libc", + "spin 0.9.8", + "untrusted 0.9.0", + "windows-sys 0.48.0", +] + [[package]] name = "rkyv" -version = "0.7.42" +version = "0.7.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58" +checksum = "527a97cdfef66f65998b5f3b637c26f5a5ec09cc52a3f9932313ac645f4190f5" dependencies = [ "bitvec", "bytecheck", + "bytes", "hashbrown 0.12.3", "ptr_meta", "rend", "rkyv_derive", "seahash", "tinyvec", - "uuid 1.4.1", + "uuid 1.7.0", ] [[package]] name = "rkyv_derive" -version = "0.7.42" +version = "0.7.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e06b915b5c230a17d7a736d1e2e63ee753c256a8614ef3f5147b13a4f5541d" +checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033" dependencies = [ "proc-macro2", "quote", @@ -4194,13 +3522,13 @@ dependencies = [ [[package]] name = "rusqlite" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +checksum = "a78046161564f5e7cd9008aff3b2990b3850dc8e0349119b98e8f251e099f24d" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.2", "chrono", - "fallible-iterator", + "fallible-iterator 0.3.0", "fallible-streaming-iterator", "hashlink", "libsqlite3-sys", @@ -4209,9 +3537,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.32.0" +version = "1.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c4216490d5a413bc6d10fa4742bd7d4955941d062c0ef873141d6b0e7b30fd" +checksum = "06676aec5ccb8fc1da723cc8c0f9a46549f21ebb8753d3915c6c41db1e7f1dc4" dependencies = [ "arrayvec", "borsh", @@ -4226,9 +3554,9 @@ dependencies = [ [[package]] name = "rust_decimal_macros" -version = "1.32.0" +version = "1.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86444b802de0b10ac5e563b5ddb43b541b9705de4e01a50e82194d2b183c1835" +checksum = "2e43721f4ef7060ebc2c3ede757733209564ca8207f47674181bcd425dd76945" dependencies = [ "quote", "rust_decimal", @@ -4257,15 +3585,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.13" +version = "0.38.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -4275,19 +3603,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" dependencies = [ "log", - "ring", + "ring 0.16.20", "sct", "webpki", ] [[package]] name = "rustls" -version = "0.21.7" +version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", - "ring", + "ring 0.17.7", "rustls-webpki", "sct", ] @@ -4299,7 +3627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile 1.0.3", + "rustls-pemfile 1.0.4", "schannel", "security-framework", ] @@ -4315,21 +3643,21 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64 0.21.4", + "base64 0.21.7", ] [[package]] name = "rustls-webpki" -version = "0.101.5" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45a27e3b59326c16e23d30aeb7a36a24cc0d29e71d68ff611cdfb4a01d013bed" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring", - "untrusted", + "ring 0.17.7", + "untrusted 0.9.0", ] [[package]] @@ -4340,9 +3668,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "same-file" @@ -4361,11 +3689,11 @@ checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" [[package]] name = "schannel" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -4385,12 +3713,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring", - "untrusted", + "ring 0.17.7", + "untrusted 0.9.0", ] [[package]] @@ -4401,11 +3729,11 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "2.9.2" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "23a2ac85147a3a11d77ecf1bc7166ec0b92febfa4461c37944e180f319ece467" dependencies = [ - "bitflags 1.3.2", + "bitflags 1.2.1", "core-foundation", "core-foundation-sys", "libc", @@ -4424,21 +3752,15 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" - -[[package]] -name = "seq-macro" -version = "0.3.5" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" dependencies = [ "serde_derive", ] @@ -4455,20 +3777,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] name = "serde_json" -version = "1.0.106" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", @@ -4489,9 +3811,9 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures", @@ -4500,9 +3822,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.7" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", @@ -4511,39 +3833,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" - -[[package]] -name = "signal-hook" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" -dependencies = [ - "libc", - "signal-hook-registry", -] - -[[package]] -name = "signal-hook-mio" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" -dependencies = [ - "libc", - "mio", - "signal-hook", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simdutf8" @@ -4568,9 +3860,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "smartstring" @@ -4583,39 +3875,11 @@ dependencies = [ "version_check", ] -[[package]] -name = "snafu" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" -dependencies = [ - "doc-comment", - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "snap" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" - [[package]] name = "socket2" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" dependencies = [ "libc", "winapi", @@ -4623,12 +3887,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -4637,6 +3901,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "sqlparser" version = "0.36.1" @@ -4653,18 +3923,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075" dependencies = [ "log", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", ] [[package]] @@ -4714,45 +3972,23 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" - [[package]] name = "strum" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" -dependencies = [ - "strum_macros 0.25.2", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] [[package]] name = "strum_macros" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -4807,20 +4043,32 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.33" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9caece70c63bfba29ec2fed841a09851b14a235c60010fa4de58089b6c025668" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "syn_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "sysinfo" -version = "0.29.10" +version = "0.29.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a18d114d420ada3a891e6bc8e96a2023402203296a47cdd65083377dad18ba5" +checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" dependencies = [ "cfg-if", "core-foundation-sys", @@ -4830,6 +4078,27 @@ dependencies = [ "winapi", ] +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.2.1", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tap" version = "1.0.1" @@ -4838,28 +4107,28 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "target-features" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f6b473c37f9add4cf1df5b4d66a8ef58ab6c895f1a3b3f949cf3e21230140e" +checksum = "cfb5fa503293557c5158bd215fdc225695e567a77e453f5d4452a50a193969bd" [[package]] name = "tempfile" -version = "3.8.0" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.3.5", + "redox_syscall 0.4.1", "rustix", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "termcolor" -version = "1.2.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] @@ -4875,33 +4144,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.48" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.48" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", + "syn 2.0.48", ] [[package]] @@ -4938,14 +4196,15 @@ dependencies = [ [[package]] name = "time" -version = "0.3.28" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" dependencies = [ "deranged", "itoa", "libc", "num_threads", + "powerfmt", "serde", "time-core", "time-macros", @@ -4953,15 +4212,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f" dependencies = [ "time-core", ] @@ -5002,9 +4261,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.32.0" +version = "1.35.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" dependencies = [ "backtrace", "bytes", @@ -5013,20 +4272,20 @@ dependencies = [ "num_cpus", "parking_lot 0.12.1", "pin-project-lite", - "socket2 0.5.4", + "socket2 0.5.5", "tokio-macros", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] @@ -5041,9 +4300,9 @@ dependencies = [ [[package]] name = "tokio-openssl" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08f9ffb7809f1b20c1b398d92acf4cc719874b3b2b2d9ea2f09b4a80350878a" +checksum = "6ffab79df67727f6acf57f1ff743091873c24c579b1e2ce4d8f53e47ded4d63d" dependencies = [ "futures-util", "openssl", @@ -5060,7 +4319,7 @@ dependencies = [ "async-trait", "byteorder", "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "futures-channel", "futures-util", "log", @@ -5071,9 +4330,9 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.8.5", - "socket2 0.5.4", + "socket2 0.5.5", "tokio", - "tokio-util 0.7.8", + "tokio-util 0.7.10", "whoami", ] @@ -5094,7 +4353,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.7", + "rustls 0.21.10", "tokio", ] @@ -5126,9 +4385,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.8" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -5139,12 +4398,20 @@ dependencies = [ ] [[package]] -name = "toml" -version = "0.5.11" +name = "toml_datetime" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" + +[[package]] +name = "toml_edit" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" dependencies = [ - "serde", + "indexmap 2.2.1", + "toml_datetime", + "winnow", ] [[package]] @@ -5155,11 +4422,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -5168,29 +4434,29 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.33", + "syn 2.0.48", ] [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", ] [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "twox-hash" @@ -5205,15 +4471,15 @@ dependencies = [ [[package]] name = "typenum" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -5230,17 +4496,11 @@ dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-segmentation" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" - [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "untrusted" @@ -5248,11 +4508,17 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -5271,17 +4537,17 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.12", "md5 0.7.0", ] [[package]] name = "uuid" -version = "1.4.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.12", "rand 0.8.5", ] @@ -5412,19 +4678,19 @@ dependencies = [ [[package]] name = "webpki" -version = "0.22.1" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e" +checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" dependencies = [ - "ring", - "untrusted", + "ring 0.17.7", + "untrusted 0.9.0", ] [[package]] name = "webpki-roots" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" +checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" [[package]] name = "which" @@ -5466,9 +4732,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -5485,7 +4751,7 @@ version = "0.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f820cd208ce9c6b050812dc2d724ba98c6c1e9db5ce9b3f58d925ae5723a5e6" dependencies = [ - "bitflags 1.3.2", + "bitflags 1.2.1", "byteorder", "md5 0.6.1", "rand 0.7.3", @@ -5493,12 +4759,12 @@ dependencies = [ ] [[package]] -name = "windows" -version = "0.48.0" +name = "windows-core" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets", + "windows-targets 0.52.0", ] [[package]] @@ -5507,7 +4773,16 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] @@ -5516,13 +4791,28 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] @@ -5531,42 +4821,93 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "winnow" +version = "0.5.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1931d78a9c73861da0134f453bb1f790ce49b2e30eba8410b4b79bac72b46a2d" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.50.0" @@ -5574,7 +4915,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" dependencies = [ "cfg-if", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -5588,18 +4929,9 @@ dependencies = [ [[package]] name = "xxhash-rust" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b" - -[[package]] -name = "xz2" -version = "0.1.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] +checksum = "53be06678ed9e83edb1745eb72efc0bbcd7b5c3c35711a860906aed827a13d61" [[package]] name = "yup-oauth2" @@ -5614,7 +4946,7 @@ dependencies = [ "http", "hyper", "hyper-rustls 0.23.2", - "itertools 0.10.5", + "itertools", "log", "percent-encoding", "rustls 0.20.9", @@ -5628,6 +4960,26 @@ dependencies = [ "url", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "zstd" version = "0.12.4" @@ -5649,11 +5001,10 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.8+zstd.1.5.5" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", - "libc", "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index 0bbd499..b74c6db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,3 @@ [workspace] -default-members = ["connectorx"] -members = ["connectorx", "connectorx-cpp"] +members = ["connector_arrow"] resolver = "2" - -[profile.release] -debug = true -lto = true - -[workspace.dependencies] -arrow = {version = "46", features = ["prettyprint", "ffi"]} -arrow2 = {version = "0.17", default-features = false} diff --git a/Justfile b/Justfile index 07ac506..73e8e33 100644 --- a/Justfile +++ b/Justfile @@ -1,4 +1,20 @@ -set dotenv-load := true +export POSTGRES_URL := "postgres://root:root@localhost:5432/dummy" +export SQLITE_URL := "sqlite://sqlite.db" + +export MYSQL_HOST := "localhost" +export MYSQL_PORT := "3306" +export MYSQL_USER := "root" +export MYSQL_PASSWORD := "root" +export MYSQL_DB := "dummy" + +export MSSQL_HOST := "localhost" +export MSSQL_USER := "sa" +export MSSQL_PASSWORD := "Wordpass123##" +export MSSQL_DB := "db" + + +default: + just --list build-release: cargo build --release --features all @@ -6,125 +22,16 @@ build-release: build-debug: cargo build --features all -build-cpp +ARGS="": - cd connectorx-cpp && cargo build {{ARGS}} - -build-cpp-release +ARGS="": - cd connectorx-cpp && cargo build --release {{ARGS}} - -test +ARGS="": - cargo test --features all {{ARGS}} -- --nocapture +test: + cargo nextest run --features=src_csv,src_postgres,src_dummy,src_sqlite,dst_arrow,dst_arrow2 test-feature-gate: - cargo c --features src_postgres - cargo c --features src_mysql - cargo c --features src_mssql - cargo c --features src_sqlite - cargo c --features src_oracle - cargo c --features src_csv - cargo c --features src_dummy - cargo c --features dst_arrow - cargo c --features dst_arrow2 - -bootstrap-python: - cd connectorx-python && poetry install - -setup-java: - cd federated-query/rewriter && mvn package -Dmaven.test.skip=true - cp -f ./federated-query/rewriter/target/federated-rewriter-1.0-SNAPSHOT-jar-with-dependencies.jar connectorx-python/connectorx/dependencies/federated-rewriter.jar - -setup-python: - cd connectorx-python && poetry run maturin develop --release - -test-python +opts="": setup-python - cd connectorx-python && poetry run pytest connectorx/tests -v -s {{opts}} - -test-python-s +opts="": - cd connectorx-python && poetry run pytest connectorx/tests -v -s {{opts}} - -test-fed file="3.sql": - cd connectorx && cargo run --features src_postgres --features src_mysql --features dst_arrow --features federation --example federated_test "../federated-query/test-queries/{{file}}" - -test-datafusion: - cd connectorx && cargo run --features src_postgres --features src_mysql --features dst_arrow --features federation --example test - -seed-db: - #!/bin/bash - psql $POSTGRES_URL -f scripts/postgres.sql - sqlite3 ${SQLITE_URL#sqlite://} < scripts/sqlite.sql - mysql --protocol tcp -h$MYSQL_HOST -P$MYSQL_PORT -u$MYSQL_USER -p$MYSQL_PASSWORD $MYSQL_DB < scripts/mysql.sql - mssql-cli -S$MSSQL_HOST -U$MSSQL_USER -P$MSSQL_PASSWORD -d$MSSQL_DB -i scripts/mssql.sql - -# dbs not included in ci -seed-db-more: - mysql --protocol tcp -h$CLICKHOUSE_HOST -P$CLICKHOUSE_PORT -u$CLICKHOUSE_USER -p$CLICKHOUSE_PASSWORD $CLICKHOUSE_DB < scripts/clickhouse.sql - psql $REDSHIFT_URL -f scripts/redshift.sql - ORACLE_URL_SCRIPT=`echo ${ORACLE_URL#oracle://} | sed "s/:/\//"` - cat scripts/oracle.sql | sqlplus $ORACLE_URL_SCRIPT - mysql --protocol tcp -h$MARIADB_HOST -P$MARIADB_PORT -u$MARIADB_USER -p$MARIADB_PASSWORD $MARIADB_DB < scripts/mysql.sql - -# benches -flame-tpch conn="POSTGRES_URL": - cd connectorx-python && PYO3_PYTHON=$HOME/.pyenv/versions/3.8.6/bin/python3.8 PYTHONPATH=$HOME/.pyenv/versions/conn/lib/python3.8/site-packages LD_LIBRARY_PATH=$HOME/.pyenv/versions/3.8.6/lib/ cargo run --no-default-features --features executable --features fptr --features nbstr --features dsts --features srcs --release --example flame_tpch {{conn}} - -build-tpch: - cd connectorx-python && cargo build --no-default-features --features executable --features fptr --release --example tpch - -cachegrind-tpch: build-tpch - valgrind --tool=cachegrind target/release/examples/tpch - -python-tpch name +ARGS="": setup-python - #!/bin/bash - export PYTHONPATH=$PWD/connectorx-python - cd connectorx-python && \ - poetry run python ../benchmarks/tpch-{{name}}.py {{ARGS}} - -python-tpch-ext name +ARGS="": - cd connectorx-python && poetry run python ../benchmarks/tpch-{{name}}.py {{ARGS}} - -python-ddos name +ARGS="": setup-python - #!/bin/bash - export PYTHONPATH=$PWD/connectorx-python - cd connectorx-python && \ - poetry run python ../benchmarks/ddos-{{name}}.py {{ARGS}} - -python-ddos-ext name +ARGS="": - cd connectorx-python && poetry run python ../benchmarks/ddos-{{name}}.py {{ARGS}} - - -python-shell: - cd connectorx-python && \ - poetry run ipython - -benchmark-report: setup-python - cd connectorx-python && \ - poetry run pytest connectorx/tests/benchmarks.py --benchmark-json ../benchmark.json - -# releases -build-python-wheel: - # need to get the j4rs dependency first - cd connectorx-python && maturin build --release -i python - # copy files - cp -rf connectorx-python/target/release/jassets connectorx-python/connectorx/dependencies - # build final wheel - cd connectorx-python && maturin build --release -i python - -bench-fed path: - just python-tpch fed --file {{path}}/q2.sql - just python-tpch-ext fed --file {{path}}/q3.sql - just python-tpch-ext fed --file {{path}}/q4.sql - just python-tpch-ext fed --file {{path}}/q5.sql - just python-tpch-ext fed --file {{path}}/q7.sql - just python-tpch-ext fed --file {{path}}/q8.sql - just python-tpch-ext fed --file {{path}}/q9.sql - just python-tpch-ext fed --file {{path}}/q10.sql - just python-tpch-ext fed --file {{path}}/q11.sql - just python-tpch-ext fed --file {{path}}/q12.sql - just python-tpch-ext fed --file {{path}}/q13.sql - just python-tpch-ext fed --file {{path}}/q14.sql - just python-tpch-ext fed --file {{path}}/q16.sql - just python-tpch-ext fed --file {{path}}/q17.sql - just python-tpch-ext fed --file {{path}}/q18.sql - just python-tpch-ext fed --file {{path}}/q19.sql - just python-tpch-ext fed --file {{path}}/q20.sql - just python-tpch-ext fed --file {{path}}/q22.sql + cargo check --features src_postgres + cargo check --features src_mysql + cargo check --features src_mssql + cargo check --features src_sqlite + cargo check --features src_oracle + cargo check --features src_csv + cargo check --features src_dummy + cargo check --features dst_arrow + cargo check --features dst_arrow2 diff --git a/README.md b/README.md index bd760ff..4e79992 100644 --- a/README.md +++ b/README.md @@ -1,87 +1,34 @@ -# ConnectorX [![status][ci_badge]][ci_page] [![discussions][discussion_badge]][discussion_page] [![Downloads][download_badge]][download_page] +# Connector Arrow -[ci_badge]: https://github.com/sfu-db/connector-x/workflows/ci/badge.svg -[ci_page]: https://github.com/sfu-db/connector-x/actions -[discussion_badge]: https://img.shields.io/badge/Forum-Github%20Discussions-blue -[discussion_page]: https://github.com/sfu-db/connector-x/discussions -[download_badge]: https://pepy.tech/badge/connectorx -[download_page]: https://pepy.tech/project/connectorx +Load data from many data sources into Apache Arrow, the fastest way. -Load data from to , the fastest way. +Fork of [ConnectorX](https://github.com/sfu-db/connector-x). -ConnectorX enables you to load data from databases into Python in the fastest and most memory efficient way. +[Documentation](https://docs.rs/connector_arrow) -What you need is one line of code: - -```python -import connectorx as cx - -cx.read_sql("postgresql://username:password@server:port/database", "SELECT * FROM lineitem") -``` - -Optionally, you can accelerate the data loading using parallelism by specifying a partition column. - -```python -import connectorx as cx - -cx.read_sql("postgresql://username:password@server:port/database", "SELECT * FROM lineitem", partition_on="l_orderkey", partition_num=10) -``` - -The function will partition the query by **evenly** splitting the specified column to the amount of partitions. -ConnectorX will assign one thread for each partition to load and write data in parallel. -Currently, we support partitioning on **numerical** columns (**cannot contain NULL**) for **SPJA** queries. - -# Installation - -```bash -pip install connectorx -``` - -Check out [here](https://sfu-db.github.io/connector-x/install.html#build-from-source-code) to see how to build python wheel from source. - -# Performance - -We compared different solutions in Python that provides the `read_sql` function, by loading a 10x TPC-H lineitem table (8.6GB) from Postgres into a DataFrame, with 4 cores parallelism. - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses up to **3x** less memory and **21x** less time (**3x** less memory and **13x** less time compared with Pandas.). More on [here](https://github.com/sfu-db/connector-x/blob/main/Benchmark.md#benchmark-result-on-aws-r54xlarge). - -## How does ConnectorX achieve a lightning speed while keeping the memory footprint low? +## How does Connector Arrow achieve a lightning speed while keeping the memory footprint low? We observe that existing solutions more or less do data copy multiple times when downloading the data. Additionally, implementing a data intensive application in Python brings additional cost. -ConnectorX is written in Rust and follows "zero-copy" principle. -This allows it to make full use of the CPU by becoming cache and branch predictor friendly. Moreover, the architecture of ConnectorX ensures the data will be copied exactly once, directly from the source to the destination. +Connector Arrow is written in Rust and follows "zero-copy" principle. +This allows it to make full use of the CPU by becoming cache and branch predictor friendly. Moreover, the architecture of Connector Arrow ensures the data will be copied exactly once, directly from the source to the destination. -## How does ConnectorX download the data? +## How does Connector Arrow download the data? -Upon receiving the query, e.g. `SELECT * FROM lineitem`, ConnectorX will first issue a `LIMIT 1` query `SELECT * FROM lineitem LIMIT 1` to get the schema of the result set. +Upon receiving the query, e.g. `SELECT * FROM lineitem`, Connector Arrow will first issue a `LIMIT 1` query `SELECT * FROM lineitem LIMIT 1` to get the schema of the result set. -Then, if `partition_on` is specified, ConnectorX will issue `SELECT MIN($partition_on), MAX($partition_on) FROM (SELECT * FROM lineitem)` to know the range of the partition column. +Then, if `partition_on` is specified, Connector Arrow will issue `SELECT MIN($partition_on), MAX($partition_on) FROM (SELECT * FROM lineitem)` to know the range of the partition column. After that, the original query is split into partitions based on the min/max information, e.g. `SELECT * FROM (SELECT * FROM lineitem) WHERE $partition_on > 0 AND $partition_on < 10000`. -ConnectorX will then run a count query to get the partition size (e.g. `SELECT COUNT(*) FROM (SELECT * FROM lineitem) WHERE $partition_on > 0 AND $partition_on < 10000`). If the partition +Connector Arrow will then run a count query to get the partition size (e.g. `SELECT COUNT(*) FROM (SELECT * FROM lineitem) WHERE $partition_on > 0 AND $partition_on < 10000`). If the partition is not specified, the count query will be `SELECT COUNT(*) FROM (SELECT * FROM lineitem)`. -Finally, ConnectorX will use the schema info as well as the count info to allocate memory and download data by executing the queries normally. +Finally, Connector Arrow will use the schema info as well as the count info to allocate memory and download data by executing the queries normally. Once the downloading begins, there will be one thread for each partition so that the data are downloaded in parallel at the partition level. The thread will issue the query of the corresponding partition to the database and then write the returned data to the destination row-wise or column-wise (depends on the database) in a streaming fashion. -# Supported Sources & Destinations - -Example connection string, supported protocols and data types for each data source can be found [here](https://sfu-db.github.io/connector-x/databases.html). - -For more planned data sources, please check out our [discussion](https://github.com/sfu-db/connector-x/discussions/61). - ## Sources - [x] Postgres - [x] Mysql @@ -93,63 +40,7 @@ For more planned data sources, please check out our [discussion](https://github. - [x] Azure SQL Database (through mssql protocol) - [x] Oracle - [x] Big Query -- [ ] ODBC (WIP) -- [ ] ... ## Destinations -- [x] Pandas -- [x] PyArrow -- [x] Modin (through Pandas) -- [x] Dask (through Pandas) -- [x] Polars (through PyArrow) - -# Documentation - -Doc: https://sfu-db.github.io/connector-x/intro.html -Rust docs: [stable](https://docs.rs/connectorx) [nightly](https://sfu-db.github.io/connector-x/connectorx/) - -# Next Plan - -Checkout our [discussion][discussion_page] to participate in deciding our next plan! - -# Historical Benchmark Results - -https://sfu-db.github.io/connector-x/dev/bench/ - -# Developer's Guide -Please see [Developer's Guide](https://github.com/sfu-db/connector-x/blob/main/CONTRIBUTING.md) for information about developing ConnectorX. - -# Supports - -You are always welcomed to: -1. Ask questions & propose new ideas in our github [discussion][discussion_page]. -2. Ask questions in stackoverflow. Make sure to have #connectorx attached. - -# Organizations and Projects using ConnectorX - -[](https://github.com/pola-rs/polars) -[](https://dataprep.ai/) -[](https://modin.readthedocs.io) - -To add your project/organization here, reply our post [here](https://github.com/sfu-db/connector-x/discussions/146) - -# Citing ConnectorX - -If you use ConnectorX, please consider citing the following paper: - -Xiaoying Wang, Weiyuan Wu, Jinze Wu, Yizhou Chen, Nick Zrymiak, Changbo Qu, Lampros Flokas, George Chow, Jiannan Wang, Tianzheng Wang, Eugene Wu, Qingqing Zhou. [ConnectorX: Accelerating Data Loading From Databases to Dataframes.](https://www.vldb.org/pvldb/vol15/p2994-wang.pdf) _VLDB 2022_. - -BibTeX entry: - -```bibtex -@article{connectorx2022, - author = {Xiaoying Wang and Weiyuan Wu and Jinze Wu and Yizhou Chen and Nick Zrymiak and Changbo Qu and Lampros Flokas and George Chow and Jiannan Wang and Tianzheng Wang and Eugene Wu and Qingqing Zhou}, - title = {ConnectorX: Accelerating Data Loading From Databases to Dataframes}, - journal = {Proc. {VLDB} Endow.}, - volume = {15}, - number = {11}, - pages = {2994--3003}, - year = {2022}, - url = {https://www.vldb.org/pvldb/vol15/p2994-wang.pdf}, -} -``` +- [x] [arrow](https://crates.io/crates/arrow) +- [x] [arrow2](https://crates.io/crates/arrow2) diff --git a/assets/Technical_Report__ConnectorX.pdf b/assets/Technical_Report__ConnectorX.pdf deleted file mode 100644 index 2734c9b..0000000 Binary files a/assets/Technical_Report__ConnectorX.pdf and /dev/null differ diff --git a/assets/cache_overview.jpg b/assets/cache_overview.jpg deleted file mode 100644 index 4fd4330..0000000 Binary files a/assets/cache_overview.jpg and /dev/null differ diff --git a/assets/cache_workflow.jpeg b/assets/cache_workflow.jpeg deleted file mode 100644 index 5d702c1..0000000 Binary files a/assets/cache_workflow.jpeg and /dev/null differ diff --git a/assets/cx_cache.jpeg b/assets/cx_cache.jpeg deleted file mode 100644 index 7f302b2..0000000 Binary files a/assets/cx_cache.jpeg and /dev/null differ diff --git a/assets/destinations.gif b/assets/destinations.gif deleted file mode 100644 index dbdf7d2..0000000 Binary files a/assets/destinations.gif and /dev/null differ diff --git a/assets/mssql-mem.png b/assets/mssql-mem.png deleted file mode 100644 index 2e026a2..0000000 Binary files a/assets/mssql-mem.png and /dev/null differ diff --git a/assets/mssql-time.png b/assets/mssql-time.png deleted file mode 100644 index 9aee47a..0000000 Binary files a/assets/mssql-time.png and /dev/null differ diff --git a/assets/mysql-mem.png b/assets/mysql-mem.png deleted file mode 100644 index 6e82eb2..0000000 Binary files a/assets/mysql-mem.png and /dev/null differ diff --git a/assets/mysql-time.png b/assets/mysql-time.png deleted file mode 100644 index 426b135..0000000 Binary files a/assets/mysql-time.png and /dev/null differ diff --git a/assets/oracle-mem.png b/assets/oracle-mem.png deleted file mode 100644 index 6fd0dfa..0000000 Binary files a/assets/oracle-mem.png and /dev/null differ diff --git a/assets/oracle-time.png b/assets/oracle-time.png deleted file mode 100644 index 9cc6f14..0000000 Binary files a/assets/oracle-time.png and /dev/null differ diff --git a/assets/pg-mem.png b/assets/pg-mem.png deleted file mode 100644 index dc0d55a..0000000 Binary files a/assets/pg-mem.png and /dev/null differ diff --git a/assets/pg-time.png b/assets/pg-time.png deleted file mode 100644 index bad81d1..0000000 Binary files a/assets/pg-time.png and /dev/null differ diff --git a/assets/sources.gif b/assets/sources.gif deleted file mode 100644 index 3fbc915..0000000 Binary files a/assets/sources.gif and /dev/null differ diff --git a/assets/sqlite-mem.png b/assets/sqlite-mem.png deleted file mode 100644 index 2be5714..0000000 Binary files a/assets/sqlite-mem.png and /dev/null differ diff --git a/assets/sqlite-time.png b/assets/sqlite-time.png deleted file mode 100644 index ba0493b..0000000 Binary files a/assets/sqlite-time.png and /dev/null differ diff --git a/benchmarks/ddos-cx.py b/benchmarks/ddos-cx.py deleted file mode 100644 index a66d687..0000000 --- a/benchmarks/ddos-cx.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -Usage: - tpch-cx.py [--protocol=] [--conn=] [--ret=] - -Options: - --protocol= The protocol to use [default: binary]. - --conn= The connection url to use [default: POSTGRES_URL]. - --ret= The return type [default: pandas]. - -h --help Show this screen. - --version Show version. -""" -import os - -import connectorx as cx -from contexttimer import Timer -from docopt import docopt -import pandas as pd -import modin.pandas as mpd -import dask.dataframe as dd -import polars as pl -import pyarrow as pa - - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - conn = os.environ[args["--conn"]] - table = "DDOS" - part_num = int(args[""]) - - with Timer() as timer: - if part_num > 1: - df = cx.read_sql( - conn, - f"""SELECT * FROM {table}""", - partition_on="ID", - partition_num=int(args[""]), - protocol=args["--protocol"], - return_type=args["--ret"], - ) - else: - df = cx.read_sql( - conn, - f"""SELECT * FROM {table}""", - protocol=args["--protocol"], - return_type=args["--ret"], - ) - print("time in total:", timer.elapsed) - - print(df) - print([(c, df[c].dtype) for c in df.columns]) - print(df.info(memory_usage='deep')) diff --git a/benchmarks/ddos-dask.py b/benchmarks/ddos-dask.py deleted file mode 100644 index b4b52be..0000000 --- a/benchmarks/ddos-dask.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Usage: - tpch-dask.py [--conn=] [--table=] [--index=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - --table=
The connection url to use [default: DDOS]. - --index= The connection url to use [default: id]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. - -Drivers: - PostgreSQL: postgresql, postgresql+psycopg2 - MySQL: mysql, mysql+mysqldb, mysql+pymysql - Redshift: postgresql, redshift, redshift+psycopg2 -""" - -import os - -import dask.dataframe as dd -from contexttimer import Timer -from docopt import docopt -from dask.distributed import Client, LocalCluster -from sqlalchemy.engine.url import make_url - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - index_col = args["--index"] - conn = os.environ[args["--conn"]] - conn = make_url(conn) - table = args["--table"] - driver = args.get("--driver", None) - npartition = int(args[""]) - - cluster = LocalCluster(n_workers=npartition, scheduler_port=0, memory_limit="230G") - client = Client(cluster) - - # https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite - # 4 initial slashes is needed for Unix/Mac - if conn.drivername == "sqlite": - conn = f"sqlite:///{str(conn)[9:]}" - elif driver is not None: - conn = str(conn.set(drivername=driver)) - print(f"conn url: {conn}") - - with Timer() as timer: - df = dd.read_sql_table( - table, - str(conn), - index_col, - npartitions=npartition, - limits=(0, 7902474), - ).compute() - - print(f"[Total] {timer.elapsed:.2f}s") - - print(df) - print([(c, df[c].dtype) for c in df.columns]) diff --git a/benchmarks/ddos-modin.py b/benchmarks/ddos-modin.py deleted file mode 100644 index df6b542..0000000 --- a/benchmarks/ddos-modin.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Usage: - tpch-modin.py [--conn=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. - -Drivers: - PostgreSQL: postgresql, postgresql+psycopg2 - MySQL: mysql, mysql+mysqldb, mysql+pymysql - Redshift: postgresql, redshift, redshift+psycopg2 -""" - -import os - -import modin.config as config -import modin.pandas as pd -from contexttimer import Timer -from docopt import docopt -from dask.distributed import Client, LocalCluster -from sqlalchemy.engine.url import make_url - -# modin adopts the fastest mysqlclient connector for mysql - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ[args["--conn"]] - conn = make_url(conn) - table = "DDOS" - driver = args.get("--driver", None) - - partitions = int(args[""]) - config.NPartitions.put(partitions) - - cluster = LocalCluster(n_workers=partitions, scheduler_port=0, memory_limit="230G") - client = Client(cluster) - - # https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite - # 4 initial slashes is needed for Unix/Mac - if conn.drivername == "sqlite": - conn = f"sqlite:///{str(conn)[9:]}" - elif driver is not None: - conn = str(conn.set(drivername=driver)) - print(f"conn url: {conn}") - - with Timer() as timer: - df = pd.read_sql( - f"SELECT * FROM {table}", - str(conn), - ) - print(f"[Total] {timer.elapsed:.2f}s") - - print(df) - print([(c, df[c].dtype) for c in df.columns]) diff --git a/benchmarks/ddos-pandas-chunk.py b/benchmarks/ddos-pandas-chunk.py deleted file mode 100644 index 3822ad1..0000000 --- a/benchmarks/ddos-pandas-chunk.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Usage: - tpch-pandas-chunk.py [--conn=] [--csize=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES]. - --csize= Chunk size [default: 1000]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. -""" - -import os -from contexttimer import Timer -from docopt import docopt -import pandas as pd -from sqlalchemy import create_engine -from sqlalchemy.engine.url import make_url -import time - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ[args["--conn"]] - chunksize = int(args["--csize"]) - driver = args.get("--driver", None) - conn = make_url(conn) - if driver is not None: - conn = conn.set(drivername=driver) - if conn.drivername == "sqlite": - conn = conn.set(database="/" + conn.database) - - print(f"chunksize: {chunksize}, conn url: {str(conn)}") - - with Timer() as timer: - engine = create_engine(conn) - conn = engine.connect().execution_options( - stream_results=True, max_row_buffer=chunksize) - dfs = [] - with Timer() as stream_timer: - for df in pd.read_sql("SELECT * FROM DDOS", conn, chunksize=chunksize): - dfs.append(df) - print(f"time iterate batches: {stream_timer.elapsed}") - df = pd.concat(dfs) - print(f"time in total: {timer.elapsed}s") - time.sleep(3) # capture peak memory - - conn.close() - print(df) - print(df.info(memory_usage="deep")) - # print(df._data.blocks) - - # print("======") - # print(len(dfs)) - # for d in dfs: - # print(d.info(memory_usage="deep")) - # print(d._data.blocks) - # break diff --git a/benchmarks/ddos-pandas.py b/benchmarks/ddos-pandas.py deleted file mode 100644 index 9a80b95..0000000 --- a/benchmarks/ddos-pandas.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Usage: - tpch-pandas.py [--conn=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. - -Drivers: - PostgreSQL: postgresql, postgresql+psycopg2 - MySQL: mysql, mysql+mysqldb, mysql+pymysql - Redshift: postgresql, redshift, redshift+psycopg2 - -""" - -import os - -from contexttimer import Timer -from sqlalchemy import create_engine -from docopt import docopt -import pandas as pd -import sqlite3 -from clickhouse_driver import connect -from sqlalchemy.engine.url import make_url - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - table = "DDOS" - driver = args.get("--driver", None) - conn = os.environ[args["--conn"]] - conn = make_url(conn) - - if conn.drivername == "sqlite": - conn = sqlite3.connect(str(conn)[9:]) - elif driver == "clickhouse": - # clickhouse-driver uses native protocol: 9000 - conn = conn.set(drivername=driver, port=9000) - conn = connect(str(conn)) - else: # go with sqlalchemy - if driver is not None: - conn = conn.set(drivername=driver) - print(f"conn url: {str(conn)}") - engine = create_engine(conn) - conn = engine.connect() - - with Timer() as timer: - df = pd.read_sql( - f"SELECT * FROM {table}", - conn, - ) - print(f"[Total] {timer.elapsed:.2f}s") - conn.close() - - print(df) - print([(c, df[c].dtype) for c in df.columns]) diff --git a/benchmarks/ddos-turbodbc.py b/benchmarks/ddos-turbodbc.py deleted file mode 100644 index 933a17f..0000000 --- a/benchmarks/ddos-turbodbc.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Usage: - tpch-turbodbc.py [--driver=] [--ret=] - -Options: - --driver= ODBC driver to use [default: PostgreSQL]. - --ret= The return type [default: pandas-numpy]. - -h --help Show this screen. - --version Show version. - -""" - -import os - -from docopt import docopt -from turbodbc import connect, make_options -import pandas as pd -from contexttimer import Timer - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - table = "DDOS" - driver = args["--driver"] - ret = args["--ret"] - query = f"SELECT * FROM {table}" - - with Timer() as gtimer: - with Timer() as timer: - if driver == "MSSQL": - options = make_options(prefer_unicode=True) - connection = connect( - dsn=driver, uid=os.environ["MSSQL_USER"], pwd=os.environ["MSSQL_PASSWORD"], turbodbc_options=options) - else: - connection = connect(dsn=driver) - cursor = connection.cursor() - print(f"connect: {timer.elapsed}") - with Timer() as timer: - cursor.execute(query) - print(f"execute: {timer.elapsed}") - if ret == "pandas-numpy": - with Timer() as timer: - data = cursor.fetchallnumpy() - print(f"fetchallnumpy: {timer.elapsed}") - with Timer() as timer: - df = pd.DataFrame(data=data) - print(f"convert to pandas: {timer.elapsed}") - elif ret == "pandas-arrow": - with Timer() as timer: - data = cursor.fetchallarrow() - print(f"fetchallarrow: {timer.elapsed}") - with Timer() as timer: - # to be fair with other benchmarks, generate consolidate blocks and convert date - df = data.to_pandas(split_blocks=False, date_as_object=False) - print(f"convert to pandas: {timer.elapsed}") - else: - assert ret == "arrow" - with Timer() as timer: - df = cursor.fetchallarrow() - print(f"fetchallarrow: {timer.elapsed}") - - print(f"time in total: {gtimer.elapsed}") - print(df) - print([(c, df[c].dtype) for c in df.columns]) diff --git a/benchmarks/tpch-cx-aw.py b/benchmarks/tpch-cx-aw.py deleted file mode 100644 index 2df5924..0000000 --- a/benchmarks/tpch-cx-aw.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Usage: - tpch-cx-aw.py [--protocol=] [--conn=] [--ret=] - -Options: - --protocol= The protocol to use [default: binary]. - --conn= The connection url to use [default: POSTGRES_URL]. - --ret= The return type [default: pandas]. - -h --help Show this screen. - --version Show version. -""" -import os - -import connectorx as cx -from contexttimer import Timer -from docopt import docopt - - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - conn = os.environ[args["--conn"]] - table = os.environ["TPCH_TABLE"] - part_num = int(args[""]) - ret = args["--ret"] - - print(f"[CX-AW] conn: {conn}, part_num: {part_num}, return: {ret}") - - with Timer() as gtimer: - with Timer() as timer: - if part_num > 1: - data = cx.read_sql( - conn, - f"""SELECT * FROM {table}""", - partition_on="L_ORDERKEY", - partition_num=int(args[""]), - protocol=args["--protocol"], - return_type="arrow", - ) - else: - data = cx.read_sql( - conn, - f"""SELECT * FROM {table}""", - protocol=args["--protocol"], - return_type="arrow", - ) - print("got arrow:", timer.elapsed) - if ret == "pandas": - with Timer() as timer: - df = data.to_pandas(split_blocks=False, date_as_object=False) - print("convert to pandas:", timer.elapsed) - - print(f"time in total: {gtimer.elapsed}") - print(df) diff --git a/benchmarks/tpch-cx.py b/benchmarks/tpch-cx.py deleted file mode 100644 index fbd5a48..0000000 --- a/benchmarks/tpch-cx.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -Usage: - tpch-cx.py [--protocol=] [--conn=] [--ret=] - -Options: - --protocol= The protocol to use [default: binary]. - --conn= The connection url to use [default: POSTGRES_URL]. - --ret= The return type [default: pandas]. - -h --help Show this screen. - --version Show version. -""" -import os - -import connectorx as cx -from contexttimer import Timer -from docopt import docopt -import pandas as pd -import modin.pandas as mpd -import dask.dataframe as dd -import polars as pl -import pyarrow as pa - - -def describe(df): - if isinstance(df, pd.DataFrame): - print(df.head()) - elif isinstance(df, mpd.DataFrame): - print(df.head()) - elif isinstance(df, pl.DataFrame): - print(df.head()) - elif isinstance(df, dd.DataFrame): - print(df.head()) - elif isinstance(df, pa.Table): - print(df.slice(0, 10).to_pandas()) - else: - raise ValueError("unknown type") - - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - conn = os.environ[args["--conn"]] - table = os.environ["TPCH_TABLE"] - part_num = int(args[""]) - - with Timer() as timer: - if part_num > 1: - df = cx.read_sql( - conn, - f"""SELECT * FROM {table}""", - partition_on="L_ORDERKEY", - partition_num=int(args[""]), - protocol=args["--protocol"], - return_type=args["--ret"], - ) - else: - df = cx.read_sql( - conn, - f"""SELECT * FROM {table}""", - protocol=args["--protocol"], - return_type=args["--ret"], - ) - print("time in total:", timer.elapsed) - - print(type(df), len(df)) - describe(df) \ No newline at end of file diff --git a/benchmarks/tpch-dask.py b/benchmarks/tpch-dask.py deleted file mode 100644 index af139a4..0000000 --- a/benchmarks/tpch-dask.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Usage: - tpch-dask.py [--conn=] [--index=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - --index= The connection url to use [default: l_orderkey]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. - -Drivers: - PostgreSQL: postgresql, postgresql+psycopg2 - MySQL: mysql, mysql+mysqldb, mysql+pymysql - Redshift: postgresql, redshift, redshift+psycopg2 -""" - -import os - -import dask.dataframe as dd -from contexttimer import Timer -from docopt import docopt -from dask.distributed import Client, LocalCluster -from sqlalchemy.engine.url import make_url - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - index_col = args["--index"] - conn = os.environ[args["--conn"]] - conn = make_url(conn) - table = os.environ["TPCH_TABLE"] - driver = args.get("--driver", None) - npartition = int(args[""]) - - cluster = LocalCluster(n_workers=npartition, scheduler_port=0, memory_limit="230G") - client = Client(cluster) - - # https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite - # 4 initial slashes is needed for Unix/Mac - if conn.drivername == "sqlite": - conn = f"sqlite:///{str(conn)[9:]}" - elif driver is not None: - conn = str(conn.set(drivername=driver)) - print(f"conn url: {conn}") - - with Timer() as timer: - df = dd.read_sql_table( - table, - conn, - index_col, - npartitions=npartition, - limits=(0, 60000000), - parse_dates=[ - "l_shipdate", - "l_commitdate", - "l_receiptdate", - "L_SHIPDATE", - "L_COMMITDATE", - "L_RECEIPTDATE", - ], - ).compute() - - print(f"[Total] {timer.elapsed:.2f}s") - - print(df.head()) - print(len(df)) - print(df.dtypes) diff --git a/benchmarks/tpch-fed.py b/benchmarks/tpch-fed.py deleted file mode 100644 index c4ccebc..0000000 --- a/benchmarks/tpch-fed.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Usage: - tpch-fed.py [--file=] [--dir=] [--runs=] [--print] - -Options: - --file= Query file. - --dir= Query path. - --runs= # runs [default: 1]. - --print Print query result. - -h --help Show this screen. - --version Show version. -""" - -import os -import sys -import time -import connectorx as cx -from contexttimer import Timer -from docopt import docopt -from pathlib import Path - -def run_query_from_file(query_file, doprint=False, ntries=0): - with open(query_file, "r") as f: - sql = f.read() - print(f"file: {query_file}") - - try: - with Timer() as timer: - df = cx.read_sql(db_map, sql, return_type="arrow") - print(f"time in total: {timer.elapsed:.2f}, {len(df)} rows, {len(df.columns)} cols") - if doprint: - print(df) - del df - # print(df.schema) - # print(df) - except RuntimeError as e: - print(e) - if ntries >= 5: - raise - print("retry in 10 seconds...") - sys.stdout.flush() - time.sleep(10) - run_query_from_file(query_file, ntries+1) - - sys.stdout.flush() - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - query_file = args["--file"] - - db_map = {} - db_conns = os.environ["FED_CONN"] - for conn in db_conns.split(','): - db_map[conn.split('=', 1)[0]] = conn.split('=', 1)[1] - - print(f"dbs: {db_map}") - - for i in range(int(args["--runs"])): - print(f"=============== run {i} ================") - print() - sys.stdout.flush() - if args["--file"]: - filename = args["--file"] - run_query_from_file(filename, args["--print"]) - elif args["--dir"]: - for filename in sorted(Path(args["--dir"]).glob("q*.sql")): - run_query_from_file(filename, args["--print"]) - time.sleep(2) - diff --git a/benchmarks/tpch-modin-exp.py b/benchmarks/tpch-modin-exp.py deleted file mode 100644 index a5ef407..0000000 --- a/benchmarks/tpch-modin-exp.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Usage: - tpch-modin-exp.py - -Options: - -h --help Show this screen. - --version Show version. -""" - -import os -os.environ["MODIN_ENGINE"] = "ray" -import ray -from contexttimer import Timer -from docopt import docopt - - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ["POSTGRES_URL"] - table = os.environ["POSTGRES_TABLE"] - - partitions = int(args[""]) - # ray.init(num_cpus=partitions, object_store_memory=10**10, _plasma_directory="/tmp") - ray.init(num_cpus=partitions, object_store_memory=10**10) - - import modin.config as config - import modin.experimental.pandas as pd - - config.NPartitions.put(partitions) - with Timer() as timer: - df = pd.read_sql( - f"{table}", # use table here, a bug exists in modin experimental read_sql for query - conn, - parse_dates=[ - "l_shipdate", - "l_commitdate", - "l_receiptdate", - ], - partition_column="l_orderkey", - lower_bound=0, - upper_bound=60000000, - max_sessions=partitions, - ) - print(f"[Total] {timer.elapsed:.2f}s") - - print(df.head()) diff --git a/benchmarks/tpch-modin.py b/benchmarks/tpch-modin.py deleted file mode 100644 index 69ac57f..0000000 --- a/benchmarks/tpch-modin.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Usage: - tpch-modin.py [--conn=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. - -Drivers: - PostgreSQL: postgresql, postgresql+psycopg2 - MySQL: mysql, mysql+mysqldb, mysql+pymysql - Redshift: postgresql, redshift, redshift+psycopg2 -""" - -import os - -import modin.config as config -import modin.pandas as pd -from contexttimer import Timer -from docopt import docopt -from dask.distributed import Client, LocalCluster -from sqlalchemy.engine.url import make_url - -# modin adopts the fastest mysqlclient connector for mysql - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ[args["--conn"]] - conn = make_url(conn) - table = os.environ["TPCH_TABLE"] - driver = args.get("--driver", None) - - partitions = int(args[""]) - config.NPartitions.put(partitions) - - cluster = LocalCluster(n_workers=partitions, scheduler_port=0, memory_limit="230G") - client = Client(cluster) - - # https://docs.sqlalchemy.org/en/13/core/engines.html#sqlite - # 4 initial slashes is needed for Unix/Mac - if conn.drivername == "sqlite": - conn = f"sqlite:///{str(conn)[9:]}" - elif driver is not None: - conn = str(conn.set(drivername=driver)) - print(f"conn url: {conn}") - - with Timer() as timer: - df = pd.read_sql( - f"SELECT * FROM {table}", - str(conn), - parse_dates=[ - "l_shipdate", - "l_commitdate", - "l_receiptdate", - "L_SHIPDATE", - "L_COMMITDATE", - "L_RECEIPTDATE", - ], - ) - print(f"[Total] {timer.elapsed:.2f}s") - - print(df.head()) - print(len(df)) - print(df.dtypes) diff --git a/benchmarks/tpch-pandahouse.py b/benchmarks/tpch-pandahouse.py deleted file mode 100644 index 0a8ed6a..0000000 --- a/benchmarks/tpch-pandahouse.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Usage: - tpch-pandahouse.py [--index=] - -Options: - --index= The connection url to use [default: L_ORDERKEY]. - -h --help Show this screen. - --version Show version. -""" - -import os - -from contexttimer import Timer -from docopt import docopt -import pandas as pd -from pandahouse import read_clickhouse - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - index_col = args["--index"] - table = os.environ["TPCH_TABLE"] - - conn = { - "host": f"http://{os.environ['CLICKHOUSE_HOST']}:8123", # 8123 is default clickhouse http port - "database": os.environ["CLICKHOUSE_DB"], - "user": os.environ["CLICKHOUSE_USER"], - "password": os.environ["CLICKHOUSE_PASSWORD"], - } - print(conn) - - with Timer() as timer: - df = read_clickhouse(f'SELECT * FROM {conn["database"]}.{table}', index_col=index_col, connection=conn) - print(f"[Total] {timer.elapsed:.2f}s") - - print(df.head()) - print(df.tail()) - print(len(df)) \ No newline at end of file diff --git a/benchmarks/tpch-pandas-chunk.py b/benchmarks/tpch-pandas-chunk.py deleted file mode 100644 index d51c4e2..0000000 --- a/benchmarks/tpch-pandas-chunk.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Usage: - tpch-pandas-chunk.py [--conn=] [--csize=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES]. - --csize= Chunk size [default: 1000]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. -""" - -import os -from contexttimer import Timer -from docopt import docopt -import pandas as pd -from sqlalchemy import create_engine -from sqlalchemy.engine.url import make_url -import time - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ[args["--conn"]] - chunksize = int(args["--csize"]) - driver = args.get("--driver", None) - conn = make_url(conn) - if driver is not None: - conn = conn.set(drivername=driver) - if conn.drivername == "sqlite": - conn = conn.set(database="/" + conn.database) - print(f"chunksize: {chunksize}, conn url: {str(conn)}") - - with Timer() as timer: - engine = create_engine(conn) - conn = engine.connect().execution_options( - stream_results=True, max_row_buffer=chunksize) - dfs = [] - with Timer() as stream_timer: - for df in pd.read_sql("SELECT * FROM lineitem", - conn, parse_dates=[ - "l_shipdate", - "l_commitdate", - "l_receiptdate", - "L_SHIPDATE", - "L_COMMITDATE", - "L_RECEIPTDATE",], chunksize=chunksize): - dfs.append(df) - print(f"time iterate batches: {stream_timer.elapsed}") - df = pd.concat(dfs) - print(f"time in total: {timer.elapsed}s") - time.sleep(3) # capture peak memory - - conn.close() - print(df) - print(df.info(memory_usage="deep")) - # print(df._data.blocks) - - # print("======") - # print(len(dfs)) - # for d in dfs: - # print(d.info(memory_usage="deep")) - # print(d._data.blocks) - # break diff --git a/benchmarks/tpch-pandas.py b/benchmarks/tpch-pandas.py deleted file mode 100644 index 408790d..0000000 --- a/benchmarks/tpch-pandas.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Usage: - tpch-pandas.py [--conn=] [--driver=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - --driver= The driver to use using sqlalchemy: https://docs.sqlalchemy.org/en/14/core/engines.html. - -h --help Show this screen. - --version Show version. - -Drivers: - PostgreSQL: postgresql, postgresql+psycopg2 - MySQL: mysql, mysql+mysqldb, mysql+pymysql - Redshift: postgresql, redshift, redshift+psycopg2 - -""" - -import os - -from contexttimer import Timer -from sqlalchemy import create_engine -from docopt import docopt -import pandas as pd -import sqlite3 -from clickhouse_driver import connect -from sqlalchemy.engine.url import make_url - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - table = os.environ["TPCH_TABLE"] - driver = args.get("--driver", None) - conn = os.environ[args["--conn"]] - conn = make_url(conn) - - if conn.drivername == "sqlite": - conn = sqlite3.connect(str(conn)[9:]) - elif driver == "clickhouse": - # clickhouse-driver uses native protocol: 9000 - conn = conn.set(drivername=driver, port=9000) - conn = connect(str(conn)) - else: # go with sqlalchemy - if driver is not None: - conn = conn.set(drivername=driver) - print(f"conn url: {str(conn)}") - engine = create_engine(conn) - conn = engine.connect() - - with Timer() as timer: - df = pd.read_sql( - f"SELECT * FROM {table}", - conn, - parse_dates=[ - "l_shipdate", - "l_commitdate", - "l_receiptdate", - "L_SHIPDATE", - "L_COMMITDATE", - "L_RECEIPTDATE", - ], - ) - print(f"[Total] {timer.elapsed:.2f}s") - conn.close() - - print(df.head()) - print(df.tail()) - print(len(df)) - print(df.dtypes) diff --git a/benchmarks/tpch-presto.py b/benchmarks/tpch-presto.py deleted file mode 100644 index df348ea..0000000 --- a/benchmarks/tpch-presto.py +++ /dev/null @@ -1,79 +0,0 @@ -""" -Usage: - tpch-cx.py [--protocol=] - -Options: - --protocol= The protocol to use [default: prestodb]. - -h --help Show this screen. - --version Show version. -""" -import os - -from docopt import docopt -import prestodb -from pyhive import presto -from sqlalchemy.engine import create_engine -import pandas as pd -from contexttimer import Timer - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - proto = args["--protocol"] - table = os.environ["TPCH_TABLE"] - - if proto == "prestodb": - conn = prestodb.dbapi.connect( - host=os.environ["PRESTO_HOST"], - port=int(os.environ["PRESTO_PORT"]), - user=os.environ["PRESTO_USER"], - catalog=os.environ["PRESTO_CATALOG"], - schema=os.environ["PRESTO_SCHEMA"], - ) - cur = conn.cursor() - with Timer() as timer: - cur.execute(f'SELECT * FROM {table}') - rows = cur.fetchall() - print(f"fetch all: {timer.elapsed:.2f}") - - with Timer() as timer: - df = pd.DataFrame(rows) - print(f"to df: {timer.elapsed:.2f}") - - elif proto == "pyhive-pd": - connection = presto.connect( - host=os.environ["PRESTO_HOST"], - port=int(os.environ["PRESTO_PORT"]), - username=os.environ["PRESTO_USER"], - catalog=os.environ["PRESTO_CATALOG"], - schema=os.environ["PRESTO_SCHEMA"], - ) - - with Timer() as timer: - df = pd.read_sql("select * from lineitem", connection) - print(f"Time in total: {timer.elapsed:.2f}") - elif proto == "pyhive": - connection = presto.connect( - host=os.environ["PRESTO_HOST"], - port=int(os.environ["PRESTO_PORT"]), - username=os.environ["PRESTO_USER"], - catalog=os.environ["PRESTO_CATALOG"], - schema=os.environ["PRESTO_SCHEMA"], - ) - cur = connection.cursor() - with Timer() as timer: - cur.execute(f'SELECT * FROM {table}') - rows = cur.fetchall() - print(f"fetch all: {timer.elapsed:.2f}") - - with Timer() as timer: - df = pd.DataFrame(rows) - print(f"to df: {timer.elapsed:.2f}") - elif proto == "sqlalchemy": - engine = create_engine(f'presto://{os.environ["PRESTO_USER"]}@{os.environ["PRESTO_HOST"]}:{os.environ["PRESTO_PORT"]}/{os.environ["PRESTO_CATALOG"]}/{os.environ["PRESTO_SCHEMA"]}') - conn = engine.connect() - with Timer() as timer: - df = pd.read_sql(f"SELECT * FROM {table}", conn) - print(f"Time in total: {timer.elapsed:.2f}") - - print(df.head()) - print(len(df)) diff --git a/benchmarks/tpch-pyarrow-p.py b/benchmarks/tpch-pyarrow-p.py deleted file mode 100644 index 0578ff7..0000000 --- a/benchmarks/tpch-pyarrow-p.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Usage: - tpch-pyarrow-p.py - -Options: - -h --help Show this screen. - --version Show version. -""" -import io -import itertools -import os -from multiprocessing import Pool -from typing import Any, List - -import numpy as np -import pyarrow as pa -from contexttimer import Timer -from docopt import docopt -from pyarrow import csv -from sqlalchemy import create_engine - - -def get_sqls(table: str, count: int) -> List[str]: - sqls = [] - split = np.linspace(0, 60000000, num=count + 1, endpoint=True, dtype=int) - for i in range(len(split) - 1): - - sqls.append( - f"""SELECT - l_orderkey, - l_partkey, - l_suppkey, - l_linenumber, - l_quantity::float8, - l_extendedprice::float8, - l_discount::float8, - l_tax::float8, - l_returnflag, - l_linestatus, - l_shipdate, - l_commitdate, - l_receiptdate, - l_shipinstruct, - l_shipmode, - l_comment - FROM {table} - WHERE l_orderkey > {split[i]} and l_orderkey <= {split[i+1]}""" - ) - return sqls - - -def func(id: int, conn: str, query: str) -> Any: - engine = create_engine(conn) - conn = engine.connect() - cur = conn.connection.cursor() - store = io.BytesIO() - - with Timer() as timer: - cur.copy_expert(f"COPY ({query}) TO STDOUT WITH CSV HEADER;", store) - print(f"[Copy {id}] {timer.elapsed:.2f}s") - - store.seek(0) - with Timer() as timer: - df = csv.read_csv(store, read_options=csv.ReadOptions(use_threads=False)) - print(f"[Read CSV {id}] {timer.elapsed:.2f}s") - - return df - - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ["POSTGRES_URL"] - table = os.environ["POSTGRES_TABLE"] - - queries = get_sqls(table, int(args[""])) - - print(f"number of threads: {len(queries)}\nsqls: {queries}") - - with Timer() as timer, Pool(len(queries)) as pool: - dfs = pool.starmap( - func, zip(range(len(queries)), itertools.repeat(conn), queries) - ) - - print(f"[All Jobs] {timer.elapsed:.2f}s") - - with Timer() as timer: - df = pa.concat_tables(dfs) - print(f"[Concat] {timer.elapsed:.2f}s") - - with Timer() as timer: - df = df.to_pandas() - print(f"[To Pandas] {timer.elapsed:.2f}s") - - print(df.head()) diff --git a/benchmarks/tpch-pyarrow.py b/benchmarks/tpch-pyarrow.py deleted file mode 100644 index 66121da..0000000 --- a/benchmarks/tpch-pyarrow.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Usage: - tpch-pyarrow.py - -Options: - -h --help Show this screen. - --version Show version. -""" -import io -import os - -from contexttimer import Timer -from pyarrow import csv -from sqlalchemy import create_engine -from docopt import docopt - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ["POSTGRES_URL"] - table = os.environ["POSTGRES_TABLE"] - - engine = create_engine(conn) - conn = engine.connect() - - cur = conn.connection.cursor() - store = io.BytesIO() - with Timer() as timer: - cur.copy_expert( - f"COPY (SELECT * FROM {table}) TO STDOUT WITH CSV HEADER;", store - ) - print(f"[Copy] {timer.elapsed:.2f}s") - - store.seek(0) - - with Timer() as timer: - df = csv.read_csv(store, read_options=csv.ReadOptions(use_threads=False)) - print(f"[Read CSV] {timer.elapsed:.2f}s") - - with Timer() as timer: - df = df.to_pandas() - print(f"[To Pandas] {timer.elapsed:.2f}s") - - conn.close() - print(df.head()) - # _, peak = tracemalloc.get_traced_memory() - # print(f"memory peak: {peak/10**9:.2f}G") diff --git a/benchmarks/tpch-queries-cx.py b/benchmarks/tpch-queries-cx.py deleted file mode 100644 index 2c3f83d..0000000 --- a/benchmarks/tpch-queries-cx.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Usage: - tpch-queries-cx.py [--conn=] [--ret=] [--part=] [--protocol=] [--force-parallel] - -Options: - --ret= The return type [default: pandas]. - --conn= The connection url to use [default: POSTGRES_URL]. - --part= The number of partitions to use [default: 1]. - --protocol= The protocol to use [default: binary]. - --force-parallel Force parallelism by setting variables - -h --help Show this screen. - --version Show version. - """ - -import os - -from pathlib import Path -from contexttimer import Timer -from docopt import docopt -import connectorx as cx - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ[args["--conn"]] - print(f"conn url: {conn}") - - ret = args["--ret"] - print(f"return type: {ret}") - - qid = args[""] - print(f"execute query id: {qid}") - - part = int(args["--part"]) - print(f"# partitions: {part}") - - # multi_access_plan = "force_parallel" if args["--force-parallel"] else "default" - # print(f"plan: {multi_access_plan}") - - if part > 1: - qdir = Path(f"{os.environ['TPCH_QUERIES']}_part", f"q{qid}.sql") - with open(qdir, "r") as f: - part_col = f.readline()[:-1] # first line is partition key, remove last '\n' - query = f.read() - else: - qdir = Path(os.environ["TPCH_QUERIES"], f"q{qid}.sql") - with open(qdir, "r") as f: - part_col = "" - query = f.read() - print(f"load query from: {qdir}") - print(f"query: {query}") - print(f"partition on : {part_col}") - query = query.replace("%", "%%") - - with Timer() as timer: - if ret == "pandas": - if part > 1: - # df = cx.read_sql(conn, query, partition_on=part_col, partition_num=part, protocol=args["--protocol"], multi_access_plan=multi_access_plan) - df = cx.read_sql(conn, query, partition_on=part_col, partition_num=part, protocol=args["--protocol"]) - else: - df = cx.read_sql(conn, query, protocol=args["--protocol"]) - elif ret == "arrow": - if part > 1: - # table = cx.read_sql(conn, query, return_type="arrow", partition_on=part_col, partition_num=part, protocol=args["--protocol"], multi_access_plan=multi_access_plan) - table = cx.read_sql(conn, query, return_type="arrow", partition_on=part_col, partition_num=part, protocol=args["--protocol"]) - else: - table = cx.read_sql(conn, query, return_type="arrow", protocol=args["--protocol"]) - print(f"get arrow table time: {timer.elapsed:.2f}s") - df = table.to_pandas(split_blocks=False, date_as_object=False) - print(f"[cx][QID: {qid} Total] {timer.elapsed:.2f}s") - - print(df) diff --git a/benchmarks/tpch-queries-pd.py b/benchmarks/tpch-queries-pd.py deleted file mode 100644 index e018f5c..0000000 --- a/benchmarks/tpch-queries-pd.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Usage: - tpch-queries-pd.py [--conn=] - -Options: - --conn= The connection url to use [default: POSTGRES_URL]. - -h --help Show this screen. - --version Show version. - """ - -import os - -from pathlib import Path -from contexttimer import Timer -from sqlalchemy import create_engine -from docopt import docopt -import pandas as pd - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ[args["--conn"]] - print(f"conn url: {conn}") - engine = create_engine(conn) - conn = engine.connect() - - qid = args[""] - print(f"execute query id: {qid}") - - qdir = Path(os.environ["TPCH_QUERIES"], f"q{qid}.sql") - print(f"load query from: {qdir}") - - with open(qdir, "r") as f: - query = f.read() - print(f"query: {query}") - query = query.replace("%", "%%") - - with Timer() as timer: - df = pd.read_sql(query, conn) - print(f"[pd][QID: {qid} Total] {timer.elapsed:.2f}s") - - conn.close() - print(df) - print(f"result size: {len(df)}x{len(df.columns)}") diff --git a/benchmarks/tpch-rust-arrow.py b/benchmarks/tpch-rust-arrow.py deleted file mode 100644 index 8d0a5d4..0000000 --- a/benchmarks/tpch-rust-arrow.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -Usage: - tpch-rust-arrow.py - -Options: - -h --help Show this screen. - --version Show version. -""" -import json -import os -import time -from typing import List - -import numpy as np -import pyarrow as pa -from connectorx import read_pg -from docopt import docopt - - -def get_sqls(table: str, count: int) -> List[str]: - sqls = [] - split = np.linspace(0, 60000000, num=count + 1, endpoint=True, dtype=int) - for i in range(len(split) - 1): - - sqls.append( - f"""select l_orderkey, - l_partkey, - l_suppkey, - l_linenumber, - l_quantity::float8, - l_extendedprice::float8, - l_discount::float8, - l_tax::float8, - l_returnflag, - l_linestatus, - l_shipdate, - l_commitdate, - l_receiptdate, - l_shipinstruct, - l_shipmode, - l_comment from {table} where l_orderkey > {split[i]} and l_orderkey <= {split[i+1]}""" - ) - return sqls - - -def field_to_json(field): - json = { - "name": field.name, - "nullable": field.nullable, - } - if isinstance(field.type, pa.ListType): - json = { - **json, - "type": {"name": "list"}, - "children": [field_to_json(field.type.value_field)], - } - elif field.type == pa.float64(): - json = { - **json, - "type": {"name": "floatingpoint", "precision": "DOUBLE"}, - "children": [], - } - elif field.type == pa.uint64(): - json = { - **json, - "type": {"name": "int", "bitWidth": 64, "isSigned": False}, - "children": [], - } - elif field.type == pa.string(): - json = { - **json, - "type": {"name": "utf8"}, - "children": [], - } - elif field.type == pa.date32(): - json = { - **json, - "type": {"name": "date", "unit": "DAY"}, - "children": [], - } - elif isinstance(field.type, pa.StructType): - json = { - **json, - "type": {"name": "struct"}, - "children": [ - field_to_json(field.type[i]) for i in range(field.type.num_fields) - ], - } - else: - raise NotImplementedError(field.type) - - return json - - -def schema_to_json(schema): - return { - "fields": [field_to_json(schema.field(name)) for name in schema.names], - "metadata": {}, - } - - -SCHEMA = pa.schema( - [ - pa.field("l_orderkey", pa.uint64(), False), - pa.field("l_partkey", pa.uint64(), False), - pa.field("l_suppkey", pa.uint64(), False), - pa.field("l_linenumber", pa.uint64(), False), - pa.field("l_quantity", pa.float64(), False), - pa.field("l_extendedprice", pa.float64(), False), - pa.field("l_discount", pa.float64(), False), - pa.field("l_tax", pa.float64(), False), - pa.field("l_returnflag", pa.string(), False), - pa.field("l_linestatus", pa.string(), False), - # pa.field("l_shipdate", pa.date32(), False), - # pa.field("l_commitdate", pa.date32(), False), - # pa.field("l_receiptdate", pa.date32(), False), - pa.field("l_shipdate", pa.string(), False), - pa.field("l_commitdate", pa.string(), False), - pa.field("l_receiptdate", pa.string(), False), - pa.field("l_shipinstruct", pa.string(), False), - pa.field("l_shipmode", pa.string(), False), - pa.field("l_comment", pa.string(), False), - ] -) - - -if __name__ == "__main__": - args = docopt(__doc__, version="1.0") - conn = os.environ["POSTGRES_URL"] - table = os.environ["POSTGRES_TABLE"] - - queries = get_sqls(table, int(args[""])) - - print(f"numer of threads: {int(args[''])}\nsqls: {queries}") - - then = time.time() - table = read_pg( - conn, - queries, - json.dumps(schema_to_json(SCHEMA)), - ) - print(f"finish read_pg:", time.time() - then) - - tb = pa.Table.from_arrays( - [ - pa.chunked_array([pa.Array._import_from_c(*ptr) for ptr in ptrs]) - for ptrs in table.values() - ], - names=list(table.keys()), - ) - print("finish concat:", time.time() - then) - - df = tb.to_pandas() - print("finish to_pandas:", time.time() - then) - print(df) diff --git a/benchmarks/tpch-turbodbc.py b/benchmarks/tpch-turbodbc.py deleted file mode 100644 index 745a6f8..0000000 --- a/benchmarks/tpch-turbodbc.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -Usage: - tpch-turbodbc.py [--driver=] [--ret=] - -Options: - --driver= ODBC driver to use [default: PostgreSQL]. - --ret= The return type [default: pandas-numpy]. - -h --help Show this screen. - --version Show version. - -""" - -import os - -from docopt import docopt -from turbodbc import connect, make_options -import pandas as pd -from contexttimer import Timer - -if __name__ == "__main__": - args = docopt(__doc__, version="Naval Fate 2.0") - table = os.environ["TPCH_TABLE"] - driver = args["--driver"] - ret = args["--ret"] - query = f"SELECT * FROM {table}" - - with Timer() as gtimer: - with Timer() as timer: - if driver == "MSSQL": - options = make_options(prefer_unicode=True) - connection = connect( - dsn=driver, uid=os.environ["MSSQL_USER"], pwd=os.environ["MSSQL_PASSWORD"], turbodbc_options=options) - else: - connection = connect(dsn=driver) - cursor = connection.cursor() - print(f"connect: {timer.elapsed}") - with Timer() as timer: - cursor.execute(query) - print(f"execute: {timer.elapsed}") - if ret == "pandas-numpy": - with Timer() as timer: - data = cursor.fetchallnumpy() - print(f"fetchallnumpy: {timer.elapsed}") - with Timer() as timer: - df = pd.DataFrame(data=data) - print(f"convert to pandas: {timer.elapsed}") - elif ret == "pandas-arrow": - with Timer() as timer: - data = cursor.fetchallarrow() - print(f"fetchallarrow: {timer.elapsed}") - with Timer() as timer: - # to be fair with other benchmarks, generate consolidate blocks and convert date - df = data.to_pandas(split_blocks=False, date_as_object=False) - print(f"convert to pandas: {timer.elapsed}") - else: - assert ret == "arrow" - with Timer() as timer: - df = cursor.fetchallarrow() - print(f"fetchallarrow: {timer.elapsed}") - - print(f"time in total: {gtimer.elapsed}") - print(df) diff --git a/connectorx/Cargo.toml b/connector_arrow/Cargo.toml similarity index 79% rename from connectorx/Cargo.toml rename to connector_arrow/Cargo.toml index e64f2d2..46a4ecd 100644 --- a/connectorx/Cargo.toml +++ b/connector_arrow/Cargo.toml @@ -1,13 +1,17 @@ [package] -authors = ["SFU Database System Lab "] -description = "Load data from databases to dataframes, the fastest way." -documentation = "https://docs.rs/connectorx" +authors = ["Aljaž Mur Eržen"] +description = "Load data from databases to Apache Arrow, the fastest way." +documentation = "https://docs.rs/connector_arrow" edition = "2018" license = "MIT" -name = "connectorx" +name = "connector_arrow" readme = "../README.md" -repository = "https://github.com/sfu-db/connector-x" -version = "0.3.3-alpha.1" +repository = "https://github.com/aljazerzen/connector_arrow" +version = "0.0.1" + +[lib] +crate-type = ["cdylib", "rlib"] +name = "connector_arrow" [dependencies] anyhow = "1" @@ -22,45 +26,58 @@ owning_ref = "0.4" serde_json = "1" chrono = "0.4" -arrow = {workspace = true, optional = true} -arrow2 = {workspace = true, default-features = false, optional = true} -bb8 = {version = "0.7", optional = true} -bb8-tiberius = {version = "0.5", optional = true} -csv = {version = "1", optional = true} -fallible-streaming-iterator = {version = "0.1", optional = true} +# common optional futures = {version = "0.3", optional = true} -gcp-bigquery-client = {version = "0.13.0", optional = true} hex = {version = "0.4", optional = true} native-tls = {version = "0.2", optional = true} -ndarray = {version = "0.15", optional = true} -num-traits = {version = "0.2", optional = true} openssl = {version = "0.10", optional = true} -oracle = {version = "0.5", optional = true} -polars = {version = "0.32", optional = true, features=["dtype-u8", "dtype-u16"]} -postgres = {version = "0.19", features = ["with-chrono-0_4", "with-uuid-0_8", "with-serde_json-1"], optional = true} -postgres-native-tls = {version = "0.5", optional = true} -postgres-openssl = {version = "0.5", optional = true} -mysql_common = {version = "0.29", features = ["chrono"], optional = true} r2d2 = {version = "0.8", optional = true} -r2d2-oracle = {version = "0.6", features = ["chrono"], optional = true} -r2d2_mysql = {version = "23", optional = true} -r2d2_postgres = {version = "0.18.1", optional = true} -r2d2_sqlite = {version = "0.22.0", optional = true} -regex = {version = "1", optional = true} -rusqlite = {version = "0.29.0", features = ["column_decltype", "chrono", "bundled"], optional = true} +num-traits = {version = "0.2", optional = true} rust_decimal = {version = "1", features = ["db-postgres"], optional = true} rust_decimal_macros = {version = "1", optional = true} -tiberius = {version = "0.5", features = ["rust_decimal", "chrono", "integrated-auth-gssapi"], optional = true} tokio = {version = "1", features = ["rt", "rt-multi-thread", "net"], optional = true} tokio-util = {version = "0.6", optional = true} urlencoding = {version = "2.1", optional = true} uuid = {version = "0.8", optional = true} -j4rs = {version = "0.15", optional = true} -datafusion = {version = "31", optional = true} -[lib] -crate-type = ["cdylib", "rlib"] -name = "connectorx" +# dst_arrow +arrow = {version = "50", default-features = false, optional = true} + +# dst_arrow2 +arrow2 = {version = "0.17", default-features = false, optional = true} +polars = {version = "0.32", optional = true, features=["dtype-u8", "dtype-u16"]} + +# src_bigquery +gcp-bigquery-client = {version = "0.13.0", optional = true} + + +# src_csv +csv = {version = "1", optional = true} +regex = {version = "1", optional = true} + +# src_mssql +bb8 = {version = "0.7", optional = true} +bb8-tiberius = {version = "0.5", optional = true} +tiberius = {version = "0.5", features = ["rust_decimal", "chrono"], optional = true} + +# src_mysql +r2d2_mysql = {version = "23", optional = true} +mysql_common = {version = "0.29", features = ["chrono"], optional = true} + +# src_oracle +oracle = {version = "0.5", optional = true} +r2d2-oracle = {version = "0.6", features = ["chrono"], optional = true} + +# src_postgres +postgres = {version = "0.19", features = ["with-chrono-0_4", "with-uuid-0_8", "with-serde_json-1"], optional = true} +r2d2_postgres = {version = "0.18.1", optional = true} +postgres-native-tls = {version = "0.5", optional = true} +postgres-openssl = {version = "0.5", optional = true} + +# src_sqlite +rusqlite = {version = "0.30.0", features = ["column_decltype", "chrono", "bundled"], optional = true} +r2d2_sqlite = {version = "0.23.0", optional = true} +fallible-streaming-iterator = {version = "0.1", optional = true} [dev-dependencies] criterion = "0.3" @@ -69,7 +86,7 @@ iai = "0.1" pprof = {version = "0.5", features = ["flamegraph"]} [features] -all = ["src_sqlite", "src_postgres", "src_mysql", "src_mssql", "src_oracle", "src_bigquery", "src_csv", "src_dummy", "dst_arrow", "dst_arrow2", "federation", "fed_exec"] +all = ["src_sqlite", "src_postgres", "src_mysql", "src_mssql", "src_oracle", "src_bigquery", "src_csv", "src_dummy", "dst_arrow", "dst_arrow2"] branch = [] default = ["fptr"] dst_arrow = ["arrow"] @@ -97,8 +114,6 @@ src_postgres = [ "postgres-openssl", ] src_sqlite = ["rusqlite", "r2d2_sqlite", "fallible-streaming-iterator", "r2d2", "urlencoding"] -federation = ["j4rs"] -fed_exec = ["datafusion", "tokio"] integrated-auth-gssapi = ["tiberius/integrated-auth-gssapi"] [package.metadata.docs.rs] diff --git a/connectorx/examples/batch_test.rs b/connector_arrow/examples/batch_test.rs similarity index 100% rename from connectorx/examples/batch_test.rs rename to connector_arrow/examples/batch_test.rs diff --git a/connectorx/src/arrow_batch_iter.rs b/connector_arrow/src/arrow_batch_iter.rs similarity index 100% rename from connectorx/src/arrow_batch_iter.rs rename to connector_arrow/src/arrow_batch_iter.rs diff --git a/connector_arrow/src/constants.rs b/connector_arrow/src/constants.rs new file mode 100644 index 0000000..1b1bd01 --- /dev/null +++ b/connector_arrow/src/constants.rs @@ -0,0 +1,21 @@ +#[cfg(any(feature = "dst_arrow", feature = "dst_arrow2"))] +pub(crate) const SECONDS_IN_DAY: i64 = 86_400; + +#[allow(dead_code)] +const KILO: usize = 1 << 10; + +#[cfg(any(feature = "dst_arrow", feature = "dst_arrow2"))] +pub const RECORD_BATCH_SIZE: usize = 64 * KILO; + +#[cfg(any( + feature = "src_postgres", + feature = "src_mysql", + feature = "src_oracle", + feature = "src_mssql" +))] +pub const DB_BUFFER_SIZE: usize = 32; + +#[cfg(any(feature = "src_oracle"))] +pub const ORACLE_ARRAY_SIZE: u32 = KILO as u32; + +pub const CONNECTORX_PROTOCOL: &str = "cxprotocol"; diff --git a/connectorx/src/data_order.rs b/connector_arrow/src/data_order.rs similarity index 100% rename from connectorx/src/data_order.rs rename to connector_arrow/src/data_order.rs diff --git a/connectorx/src/destinations/arrow/arrow_assoc.rs b/connector_arrow/src/destinations/arrow/arrow_assoc.rs similarity index 100% rename from connectorx/src/destinations/arrow/arrow_assoc.rs rename to connector_arrow/src/destinations/arrow/arrow_assoc.rs diff --git a/connectorx/src/destinations/arrow/errors.rs b/connector_arrow/src/destinations/arrow/errors.rs similarity index 100% rename from connectorx/src/destinations/arrow/errors.rs rename to connector_arrow/src/destinations/arrow/errors.rs diff --git a/connectorx/src/destinations/arrow/funcs.rs b/connector_arrow/src/destinations/arrow/funcs.rs similarity index 100% rename from connectorx/src/destinations/arrow/funcs.rs rename to connector_arrow/src/destinations/arrow/funcs.rs diff --git a/connectorx/src/destinations/arrow/mod.rs b/connector_arrow/src/destinations/arrow/mod.rs similarity index 100% rename from connectorx/src/destinations/arrow/mod.rs rename to connector_arrow/src/destinations/arrow/mod.rs diff --git a/connectorx/src/destinations/arrow/typesystem.rs b/connector_arrow/src/destinations/arrow/typesystem.rs similarity index 100% rename from connectorx/src/destinations/arrow/typesystem.rs rename to connector_arrow/src/destinations/arrow/typesystem.rs diff --git a/connectorx/src/destinations/arrow2/arrow_assoc.rs b/connector_arrow/src/destinations/arrow2/arrow_assoc.rs similarity index 100% rename from connectorx/src/destinations/arrow2/arrow_assoc.rs rename to connector_arrow/src/destinations/arrow2/arrow_assoc.rs diff --git a/connectorx/src/destinations/arrow2/errors.rs b/connector_arrow/src/destinations/arrow2/errors.rs similarity index 100% rename from connectorx/src/destinations/arrow2/errors.rs rename to connector_arrow/src/destinations/arrow2/errors.rs diff --git a/connectorx/src/destinations/arrow2/funcs.rs b/connector_arrow/src/destinations/arrow2/funcs.rs similarity index 100% rename from connectorx/src/destinations/arrow2/funcs.rs rename to connector_arrow/src/destinations/arrow2/funcs.rs diff --git a/connectorx/src/destinations/arrow2/mod.rs b/connector_arrow/src/destinations/arrow2/mod.rs similarity index 100% rename from connectorx/src/destinations/arrow2/mod.rs rename to connector_arrow/src/destinations/arrow2/mod.rs diff --git a/connectorx/src/destinations/arrow2/typesystem.rs b/connector_arrow/src/destinations/arrow2/typesystem.rs similarity index 100% rename from connectorx/src/destinations/arrow2/typesystem.rs rename to connector_arrow/src/destinations/arrow2/typesystem.rs diff --git a/connectorx/src/destinations/arrowstream/arrow_assoc.rs b/connector_arrow/src/destinations/arrowstream/arrow_assoc.rs similarity index 100% rename from connectorx/src/destinations/arrowstream/arrow_assoc.rs rename to connector_arrow/src/destinations/arrowstream/arrow_assoc.rs diff --git a/connectorx/src/destinations/arrowstream/errors.rs b/connector_arrow/src/destinations/arrowstream/errors.rs similarity index 100% rename from connectorx/src/destinations/arrowstream/errors.rs rename to connector_arrow/src/destinations/arrowstream/errors.rs diff --git a/connectorx/src/destinations/arrowstream/funcs.rs b/connector_arrow/src/destinations/arrowstream/funcs.rs similarity index 100% rename from connectorx/src/destinations/arrowstream/funcs.rs rename to connector_arrow/src/destinations/arrowstream/funcs.rs diff --git a/connectorx/src/destinations/arrowstream/mod.rs b/connector_arrow/src/destinations/arrowstream/mod.rs similarity index 100% rename from connectorx/src/destinations/arrowstream/mod.rs rename to connector_arrow/src/destinations/arrowstream/mod.rs diff --git a/connectorx/src/destinations/arrowstream/typesystem.rs b/connector_arrow/src/destinations/arrowstream/typesystem.rs similarity index 100% rename from connectorx/src/destinations/arrowstream/typesystem.rs rename to connector_arrow/src/destinations/arrowstream/typesystem.rs diff --git a/connectorx/src/destinations/mod.rs b/connector_arrow/src/destinations/mod.rs similarity index 100% rename from connectorx/src/destinations/mod.rs rename to connector_arrow/src/destinations/mod.rs diff --git a/connectorx/src/dispatcher.rs b/connector_arrow/src/dispatcher.rs similarity index 100% rename from connectorx/src/dispatcher.rs rename to connector_arrow/src/dispatcher.rs diff --git a/connectorx/src/errors.rs b/connector_arrow/src/errors.rs similarity index 96% rename from connectorx/src/errors.rs rename to connector_arrow/src/errors.rs index 6168207..5c21358 100644 --- a/connectorx/src/errors.rs +++ b/connector_arrow/src/errors.rs @@ -20,14 +20,6 @@ pub enum ConnectorXOutError { #[error(transparent)] JsonError(#[from] serde_json::Error), - #[cfg(feature = "federation")] - #[error(transparent)] - J4RSError(#[from] j4rs::errors::J4RsError), - - #[cfg(feature = "fed_exec")] - #[error(transparent)] - DataFusionError(#[from] datafusion::error::DataFusionError), - #[error(transparent)] UrlParseError(#[from] url::ParseError), diff --git a/connectorx/src/get_arrow.rs b/connector_arrow/src/get_arrow.rs similarity index 100% rename from connectorx/src/get_arrow.rs rename to connector_arrow/src/get_arrow.rs diff --git a/connectorx/src/get_arrow2.rs b/connector_arrow/src/get_arrow2.rs similarity index 100% rename from connectorx/src/get_arrow2.rs rename to connector_arrow/src/get_arrow2.rs diff --git a/connectorx/src/lib.rs b/connector_arrow/src/lib.rs similarity index 98% rename from connectorx/src/lib.rs rename to connector_arrow/src/lib.rs index 84b043b..fdec527 100644 --- a/connectorx/src/lib.rs +++ b/connector_arrow/src/lib.rs @@ -151,10 +151,6 @@ pub mod data_order; pub mod destinations; mod dispatcher; pub mod errors; -#[cfg(feature = "fed_exec")] -pub mod fed_dispatcher; -#[cfg(feature = "federation")] -pub mod fed_rewriter; #[cfg(feature = "dst_arrow")] pub mod get_arrow; #[cfg(feature = "dst_arrow2")] @@ -185,8 +181,6 @@ pub mod prelude { pub use crate::destinations::{Consume, Destination, DestinationPartition}; pub use crate::dispatcher::Dispatcher; pub use crate::errors::{ConnectorXError, ConnectorXOutError}; - #[cfg(feature = "federation")] - pub use crate::fed_rewriter::{rewrite_sql, FederatedDataSourceInfo, Plan}; #[cfg(feature = "dst_arrow")] pub use crate::get_arrow::{get_arrow, new_record_batch_iter}; #[cfg(feature = "dst_arrow2")] diff --git a/connectorx/src/macros.rs b/connector_arrow/src/macros.rs similarity index 100% rename from connectorx/src/macros.rs rename to connector_arrow/src/macros.rs diff --git a/connectorx/src/partition.rs b/connector_arrow/src/partition.rs similarity index 100% rename from connectorx/src/partition.rs rename to connector_arrow/src/partition.rs diff --git a/connectorx/src/source_router.rs b/connector_arrow/src/source_router.rs similarity index 100% rename from connectorx/src/source_router.rs rename to connector_arrow/src/source_router.rs diff --git a/connectorx/src/sources/bigquery/errors.rs b/connector_arrow/src/sources/bigquery/errors.rs similarity index 100% rename from connectorx/src/sources/bigquery/errors.rs rename to connector_arrow/src/sources/bigquery/errors.rs diff --git a/connectorx/src/sources/bigquery/mod.rs b/connector_arrow/src/sources/bigquery/mod.rs similarity index 100% rename from connectorx/src/sources/bigquery/mod.rs rename to connector_arrow/src/sources/bigquery/mod.rs diff --git a/connectorx/src/sources/bigquery/typesystem.rs b/connector_arrow/src/sources/bigquery/typesystem.rs similarity index 100% rename from connectorx/src/sources/bigquery/typesystem.rs rename to connector_arrow/src/sources/bigquery/typesystem.rs diff --git a/connectorx/src/sources/csv/errors.rs b/connector_arrow/src/sources/csv/errors.rs similarity index 100% rename from connectorx/src/sources/csv/errors.rs rename to connector_arrow/src/sources/csv/errors.rs diff --git a/connectorx/src/sources/csv/mod.rs b/connector_arrow/src/sources/csv/mod.rs similarity index 100% rename from connectorx/src/sources/csv/mod.rs rename to connector_arrow/src/sources/csv/mod.rs diff --git a/connectorx/src/sources/csv/typesystem.rs b/connector_arrow/src/sources/csv/typesystem.rs similarity index 100% rename from connectorx/src/sources/csv/typesystem.rs rename to connector_arrow/src/sources/csv/typesystem.rs diff --git a/connectorx/src/sources/dummy/mod.rs b/connector_arrow/src/sources/dummy/mod.rs similarity index 100% rename from connectorx/src/sources/dummy/mod.rs rename to connector_arrow/src/sources/dummy/mod.rs diff --git a/connectorx/src/sources/dummy/typesystem.rs b/connector_arrow/src/sources/dummy/typesystem.rs similarity index 100% rename from connectorx/src/sources/dummy/typesystem.rs rename to connector_arrow/src/sources/dummy/typesystem.rs diff --git a/connectorx/src/sources/mod.rs b/connector_arrow/src/sources/mod.rs similarity index 100% rename from connectorx/src/sources/mod.rs rename to connector_arrow/src/sources/mod.rs diff --git a/connectorx/src/sources/mssql/errors.rs b/connector_arrow/src/sources/mssql/errors.rs similarity index 100% rename from connectorx/src/sources/mssql/errors.rs rename to connector_arrow/src/sources/mssql/errors.rs diff --git a/connectorx/src/sources/mssql/mod.rs b/connector_arrow/src/sources/mssql/mod.rs similarity index 100% rename from connectorx/src/sources/mssql/mod.rs rename to connector_arrow/src/sources/mssql/mod.rs diff --git a/connectorx/src/sources/mssql/typesystem.rs b/connector_arrow/src/sources/mssql/typesystem.rs similarity index 100% rename from connectorx/src/sources/mssql/typesystem.rs rename to connector_arrow/src/sources/mssql/typesystem.rs diff --git a/connectorx/src/sources/mysql/errors.rs b/connector_arrow/src/sources/mysql/errors.rs similarity index 100% rename from connectorx/src/sources/mysql/errors.rs rename to connector_arrow/src/sources/mysql/errors.rs diff --git a/connectorx/src/sources/mysql/mod.rs b/connector_arrow/src/sources/mysql/mod.rs similarity index 100% rename from connectorx/src/sources/mysql/mod.rs rename to connector_arrow/src/sources/mysql/mod.rs diff --git a/connectorx/src/sources/mysql/typesystem.rs b/connector_arrow/src/sources/mysql/typesystem.rs similarity index 100% rename from connectorx/src/sources/mysql/typesystem.rs rename to connector_arrow/src/sources/mysql/typesystem.rs diff --git a/connectorx/src/sources/oracle/errors.rs b/connector_arrow/src/sources/oracle/errors.rs similarity index 100% rename from connectorx/src/sources/oracle/errors.rs rename to connector_arrow/src/sources/oracle/errors.rs diff --git a/connectorx/src/sources/oracle/mod.rs b/connector_arrow/src/sources/oracle/mod.rs similarity index 100% rename from connectorx/src/sources/oracle/mod.rs rename to connector_arrow/src/sources/oracle/mod.rs diff --git a/connectorx/src/sources/oracle/typesystem.rs b/connector_arrow/src/sources/oracle/typesystem.rs similarity index 100% rename from connectorx/src/sources/oracle/typesystem.rs rename to connector_arrow/src/sources/oracle/typesystem.rs diff --git a/connectorx/src/sources/postgres/connection.rs b/connector_arrow/src/sources/postgres/connection.rs similarity index 100% rename from connectorx/src/sources/postgres/connection.rs rename to connector_arrow/src/sources/postgres/connection.rs diff --git a/connectorx/src/sources/postgres/errors.rs b/connector_arrow/src/sources/postgres/errors.rs similarity index 100% rename from connectorx/src/sources/postgres/errors.rs rename to connector_arrow/src/sources/postgres/errors.rs diff --git a/connectorx/src/sources/postgres/mod.rs b/connector_arrow/src/sources/postgres/mod.rs similarity index 100% rename from connectorx/src/sources/postgres/mod.rs rename to connector_arrow/src/sources/postgres/mod.rs diff --git a/connectorx/src/sources/postgres/typesystem.rs b/connector_arrow/src/sources/postgres/typesystem.rs similarity index 100% rename from connectorx/src/sources/postgres/typesystem.rs rename to connector_arrow/src/sources/postgres/typesystem.rs diff --git a/connectorx/src/sources/sqlite/errors.rs b/connector_arrow/src/sources/sqlite/errors.rs similarity index 100% rename from connectorx/src/sources/sqlite/errors.rs rename to connector_arrow/src/sources/sqlite/errors.rs diff --git a/connectorx/src/sources/sqlite/mod.rs b/connector_arrow/src/sources/sqlite/mod.rs similarity index 100% rename from connectorx/src/sources/sqlite/mod.rs rename to connector_arrow/src/sources/sqlite/mod.rs diff --git a/connectorx/src/sources/sqlite/typesystem.rs b/connector_arrow/src/sources/sqlite/typesystem.rs similarity index 100% rename from connectorx/src/sources/sqlite/typesystem.rs rename to connector_arrow/src/sources/sqlite/typesystem.rs diff --git a/connectorx/src/sql.rs b/connector_arrow/src/sql.rs similarity index 100% rename from connectorx/src/sql.rs rename to connector_arrow/src/sql.rs diff --git a/connectorx/src/transports/bigquery_arrow.rs b/connector_arrow/src/transports/bigquery_arrow.rs similarity index 100% rename from connectorx/src/transports/bigquery_arrow.rs rename to connector_arrow/src/transports/bigquery_arrow.rs diff --git a/connectorx/src/transports/bigquery_arrow2.rs b/connector_arrow/src/transports/bigquery_arrow2.rs similarity index 100% rename from connectorx/src/transports/bigquery_arrow2.rs rename to connector_arrow/src/transports/bigquery_arrow2.rs diff --git a/connectorx/src/transports/bigquery_arrowstream.rs b/connector_arrow/src/transports/bigquery_arrowstream.rs similarity index 100% rename from connectorx/src/transports/bigquery_arrowstream.rs rename to connector_arrow/src/transports/bigquery_arrowstream.rs diff --git a/connectorx/src/transports/csv_arrow.rs b/connector_arrow/src/transports/csv_arrow.rs similarity index 100% rename from connectorx/src/transports/csv_arrow.rs rename to connector_arrow/src/transports/csv_arrow.rs diff --git a/connectorx/src/transports/dummy_arrow.rs b/connector_arrow/src/transports/dummy_arrow.rs similarity index 100% rename from connectorx/src/transports/dummy_arrow.rs rename to connector_arrow/src/transports/dummy_arrow.rs diff --git a/connectorx/src/transports/dummy_arrow2.rs b/connector_arrow/src/transports/dummy_arrow2.rs similarity index 100% rename from connectorx/src/transports/dummy_arrow2.rs rename to connector_arrow/src/transports/dummy_arrow2.rs diff --git a/connectorx/src/transports/dummy_arrowstream.rs b/connector_arrow/src/transports/dummy_arrowstream.rs similarity index 100% rename from connectorx/src/transports/dummy_arrowstream.rs rename to connector_arrow/src/transports/dummy_arrowstream.rs diff --git a/connectorx/src/transports/mod.rs b/connector_arrow/src/transports/mod.rs similarity index 100% rename from connectorx/src/transports/mod.rs rename to connector_arrow/src/transports/mod.rs diff --git a/connectorx/src/transports/mssql_arrow.rs b/connector_arrow/src/transports/mssql_arrow.rs similarity index 100% rename from connectorx/src/transports/mssql_arrow.rs rename to connector_arrow/src/transports/mssql_arrow.rs diff --git a/connectorx/src/transports/mssql_arrow2.rs b/connector_arrow/src/transports/mssql_arrow2.rs similarity index 100% rename from connectorx/src/transports/mssql_arrow2.rs rename to connector_arrow/src/transports/mssql_arrow2.rs diff --git a/connectorx/src/transports/mssql_arrowstream.rs b/connector_arrow/src/transports/mssql_arrowstream.rs similarity index 100% rename from connectorx/src/transports/mssql_arrowstream.rs rename to connector_arrow/src/transports/mssql_arrowstream.rs diff --git a/connectorx/src/transports/mysql_arrow.rs b/connector_arrow/src/transports/mysql_arrow.rs similarity index 100% rename from connectorx/src/transports/mysql_arrow.rs rename to connector_arrow/src/transports/mysql_arrow.rs diff --git a/connectorx/src/transports/mysql_arrow2.rs b/connector_arrow/src/transports/mysql_arrow2.rs similarity index 100% rename from connectorx/src/transports/mysql_arrow2.rs rename to connector_arrow/src/transports/mysql_arrow2.rs diff --git a/connectorx/src/transports/mysql_arrowstream.rs b/connector_arrow/src/transports/mysql_arrowstream.rs similarity index 100% rename from connectorx/src/transports/mysql_arrowstream.rs rename to connector_arrow/src/transports/mysql_arrowstream.rs diff --git a/connectorx/src/transports/oracle_arrow.rs b/connector_arrow/src/transports/oracle_arrow.rs similarity index 100% rename from connectorx/src/transports/oracle_arrow.rs rename to connector_arrow/src/transports/oracle_arrow.rs diff --git a/connectorx/src/transports/oracle_arrow2.rs b/connector_arrow/src/transports/oracle_arrow2.rs similarity index 100% rename from connectorx/src/transports/oracle_arrow2.rs rename to connector_arrow/src/transports/oracle_arrow2.rs diff --git a/connectorx/src/transports/oracle_arrowstream.rs b/connector_arrow/src/transports/oracle_arrowstream.rs similarity index 100% rename from connectorx/src/transports/oracle_arrowstream.rs rename to connector_arrow/src/transports/oracle_arrowstream.rs diff --git a/connectorx/src/transports/postgres_arrow.rs b/connector_arrow/src/transports/postgres_arrow.rs similarity index 100% rename from connectorx/src/transports/postgres_arrow.rs rename to connector_arrow/src/transports/postgres_arrow.rs diff --git a/connectorx/src/transports/postgres_arrow2.rs b/connector_arrow/src/transports/postgres_arrow2.rs similarity index 100% rename from connectorx/src/transports/postgres_arrow2.rs rename to connector_arrow/src/transports/postgres_arrow2.rs diff --git a/connectorx/src/transports/postgres_arrowstream.rs b/connector_arrow/src/transports/postgres_arrowstream.rs similarity index 100% rename from connectorx/src/transports/postgres_arrowstream.rs rename to connector_arrow/src/transports/postgres_arrowstream.rs diff --git a/connectorx/src/transports/sqlite_arrow.rs b/connector_arrow/src/transports/sqlite_arrow.rs similarity index 100% rename from connectorx/src/transports/sqlite_arrow.rs rename to connector_arrow/src/transports/sqlite_arrow.rs diff --git a/connectorx/src/transports/sqlite_arrow2.rs b/connector_arrow/src/transports/sqlite_arrow2.rs similarity index 100% rename from connectorx/src/transports/sqlite_arrow2.rs rename to connector_arrow/src/transports/sqlite_arrow2.rs diff --git a/connectorx/src/transports/sqlite_arrowstream.rs b/connector_arrow/src/transports/sqlite_arrowstream.rs similarity index 100% rename from connectorx/src/transports/sqlite_arrowstream.rs rename to connector_arrow/src/transports/sqlite_arrowstream.rs diff --git a/connectorx/src/typesystem.rs b/connector_arrow/src/typesystem.rs similarity index 100% rename from connectorx/src/typesystem.rs rename to connector_arrow/src/typesystem.rs diff --git a/connectorx/src/utils.rs b/connector_arrow/src/utils.rs similarity index 100% rename from connectorx/src/utils.rs rename to connector_arrow/src/utils.rs diff --git a/connectorx/tests/data/empty.csv b/connector_arrow/tests/data/empty.csv similarity index 100% rename from connectorx/tests/data/empty.csv rename to connector_arrow/tests/data/empty.csv diff --git a/connectorx/tests/data/infer_0.csv b/connector_arrow/tests/data/infer_0.csv similarity index 100% rename from connectorx/tests/data/infer_0.csv rename to connector_arrow/tests/data/infer_0.csv diff --git a/connectorx/tests/data/uint_0.csv b/connector_arrow/tests/data/uint_0.csv similarity index 100% rename from connectorx/tests/data/uint_0.csv rename to connector_arrow/tests/data/uint_0.csv diff --git a/connectorx/tests/data/uint_1.csv b/connector_arrow/tests/data/uint_1.csv similarity index 100% rename from connectorx/tests/data/uint_1.csv rename to connector_arrow/tests/data/uint_1.csv diff --git a/connectorx/tests/data/uspop_0.csv b/connector_arrow/tests/data/uspop_0.csv similarity index 100% rename from connectorx/tests/data/uspop_0.csv rename to connector_arrow/tests/data/uspop_0.csv diff --git a/connector_arrow/tests/it/main.rs b/connector_arrow/tests/it/main.rs new file mode 100644 index 0000000..b056947 --- /dev/null +++ b/connector_arrow/tests/it/main.rs @@ -0,0 +1,16 @@ +#[cfg(feature = "src_dummy")] +mod test_arrow; +#[cfg(feature = "src_bigquery")] +mod test_bigquery; +#[cfg(feature = "src_csv")] +mod test_csv; +#[cfg(feature = "src_mssql")] +mod test_mssql; +#[cfg(feature = "src_mysql")] +mod test_mysql; +#[cfg(feature = "src_oracle")] +mod test_oracle; +#[cfg(feature = "dst_arrow2")] +mod test_polars; +#[cfg(feature = "src_postgres")] +mod test_postgres; diff --git a/connectorx/tests/test_arrow.rs b/connector_arrow/tests/it/test_arrow.rs similarity index 99% rename from connectorx/tests/test_arrow.rs rename to connector_arrow/tests/it/test_arrow.rs index 31c93fa..7f668dc 100644 --- a/connectorx/tests/test_arrow.rs +++ b/connector_arrow/tests/it/test_arrow.rs @@ -2,7 +2,7 @@ use arrow::{ array::{BooleanArray, Float64Array, Int64Array, StringArray}, record_batch::RecordBatch, }; -use connectorx::{ +use connector_arrow::{ constants::RECORD_BATCH_SIZE, destinations::arrow::{ArrowDestination, ArrowTypeSystem}, prelude::*, @@ -18,7 +18,6 @@ use std::env; use url::Url; #[test] -#[should_panic] fn arrow_destination_col_major() { let mut dw = ArrowDestination::new(); let _ = dw @@ -32,7 +31,7 @@ fn arrow_destination_col_major() { ], DataOrder::ColumnMajor, ) - .unwrap(); + .unwrap_err(); } #[test] diff --git a/connectorx/tests/test_bigquery.rs b/connector_arrow/tests/it/test_bigquery.rs similarity index 98% rename from connectorx/tests/test_bigquery.rs rename to connector_arrow/tests/it/test_bigquery.rs index a8eb04e..da1a15b 100644 --- a/connectorx/tests/test_bigquery.rs +++ b/connector_arrow/tests/it/test_bigquery.rs @@ -1,4 +1,4 @@ -use connectorx::{ +use connector_arrow::{ destinations::arrow::ArrowDestination, prelude::*, sources::bigquery::BigQuerySource, sql::CXQuery, transports::BigQueryArrowTransport, }; diff --git a/connectorx/tests/test_csv.rs b/connector_arrow/tests/it/test_csv.rs similarity index 94% rename from connectorx/tests/test_csv.rs rename to connector_arrow/tests/it/test_csv.rs index 9da3749..22fe9a0 100644 --- a/connectorx/tests/test_csv.rs +++ b/connector_arrow/tests/it/test_csv.rs @@ -1,6 +1,6 @@ use arrow::array::Int64Array; -use connectorx::prelude::*; -use connectorx::{ +use connector_arrow::prelude::*; +use connector_arrow::{ destinations::arrow::{ArrowDestination, ArrowTypeSystem}, sources::{ csv::{CSVSource, CSVTypeSystem}, @@ -11,18 +11,14 @@ use connectorx::{ }; #[test] -#[should_panic] fn no_file() { let mut source = CSVSource::new(&[]); source.set_queries(&[CXQuery::naked("./a_fake_file.csv")]); - let partitions = source.partition().unwrap(); - for mut p in partitions { - p.result_rows().expect("run query"); - } + source.partition().err().unwrap(); } #[test] -#[should_panic] +#[ignore] // TODO: panic with division by zero fn empty_file() { let mut source = CSVSource::new(&[]); source.set_queries(&[CXQuery::naked("./tests/data/empty.csv")]); @@ -36,7 +32,7 @@ fn empty_file() { parser.fetch_next().unwrap(); - let _v: i64 = parser.produce().expect("produce from emtpy"); + Produce::::produce(&mut parser).unwrap_err(); } #[test] diff --git a/connectorx/tests/test_mssql.rs b/connector_arrow/tests/it/test_mssql.rs similarity index 99% rename from connectorx/tests/test_mssql.rs rename to connector_arrow/tests/it/test_mssql.rs index ba241af..eca2410 100644 --- a/connectorx/tests/test_mssql.rs +++ b/connector_arrow/tests/it/test_mssql.rs @@ -2,7 +2,7 @@ use arrow::{ array::{BooleanArray, Float64Array, Int64Array, StringArray}, record_batch::RecordBatch, }; -use connectorx::{ +use connector_arrow::{ destinations::arrow::ArrowDestination, prelude::*, sources::mssql::MsSQLSource, sql::CXQuery, transports::MsSQLArrowTransport, }; diff --git a/connectorx/tests/test_mysql.rs b/connector_arrow/tests/it/test_mysql.rs similarity index 99% rename from connectorx/tests/test_mysql.rs rename to connector_arrow/tests/it/test_mysql.rs index 4bc21bc..b075dfc 100644 --- a/connectorx/tests/test_mysql.rs +++ b/connector_arrow/tests/it/test_mysql.rs @@ -2,7 +2,7 @@ use arrow::{ array::{Float64Array, Int64Array, StringArray}, record_batch::RecordBatch, }; -use connectorx::{ +use connector_arrow::{ destinations::arrow::ArrowDestination, prelude::*, sources::mysql::{BinaryProtocol, MySQLSource, TextProtocol}, diff --git a/connectorx/tests/test_oracle.rs b/connector_arrow/tests/it/test_oracle.rs similarity index 94% rename from connectorx/tests/test_oracle.rs rename to connector_arrow/tests/it/test_oracle.rs index 05c46fd..f1cdd88 100644 --- a/connectorx/tests/test_oracle.rs +++ b/connector_arrow/tests/it/test_oracle.rs @@ -1,6 +1,6 @@ -use connectorx::prelude::*; -use connectorx::sources::oracle::OracleSource; -use connectorx::sql::CXQuery; +use connector_arrow::prelude::*; +use connector_arrow::sources::oracle::OracleSource; +use connector_arrow::sql::CXQuery; use std::env; #[test] diff --git a/connectorx/tests/test_polars.rs b/connector_arrow/tests/it/test_polars.rs similarity index 99% rename from connectorx/tests/test_polars.rs rename to connector_arrow/tests/it/test_polars.rs index 8fc63eb..ed743b7 100644 --- a/connectorx/tests/test_polars.rs +++ b/connector_arrow/tests/it/test_polars.rs @@ -1,4 +1,4 @@ -use connectorx::{ +use connector_arrow::{ constants::RECORD_BATCH_SIZE, destinations::arrow2::Arrow2Destination, prelude::*, diff --git a/connectorx/tests/test_postgres.rs b/connector_arrow/tests/it/test_postgres.rs similarity index 99% rename from connectorx/tests/test_postgres.rs rename to connector_arrow/tests/it/test_postgres.rs index 2beb6f1..82d29a3 100644 --- a/connectorx/tests/test_postgres.rs +++ b/connector_arrow/tests/it/test_postgres.rs @@ -2,7 +2,7 @@ use arrow::{ array::{BooleanArray, Float64Array, Int64Array, StringArray}, record_batch::RecordBatch, }; -use connectorx::{ +use connector_arrow::{ destinations::arrow::ArrowDestination, prelude::*, sources::postgres::{rewrite_tls_args, BinaryProtocol, CSVProtocol, PostgresSource}, diff --git a/connectorx-cpp/Cargo.toml b/connectorx-cpp/Cargo.toml deleted file mode 100644 index 05944b4..0000000 --- a/connectorx-cpp/Cargo.toml +++ /dev/null @@ -1,37 +0,0 @@ -[package] -name = "connectorx-cpp" -version = "0.3.3-alpha.1" -edition = "2021" -license = "MIT" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -libc = "0.2" -connectorx = {path = "../connectorx", default-features = false} -arrow = {workspace = true} -openssl = {version = "0.10", features = ["vendored"]} - -[lib] -crate-type = ["cdylib"] -name = "connectorx_cpp" - -[features] -default = ["fptr", "nbstr", "dsts", "srcs", "federation"] -light = ["fptr", "nbstr", "dsts_light" , "srcs_light", "federation"] -srcs_light = ["connectorx/src_postgres"] -dsts_light = ["connectorx/dst_arrow"] - -dsts = ["connectorx/dst_arrow", "connectorx/dst_arrow2"] -fptr = ["connectorx/fptr"] -branch = ["connectorx/branch"] -federation = ["connectorx/federation"] -nbstr = [] -srcs = [ - "connectorx/src_postgres", - "connectorx/src_mysql", - "connectorx/src_sqlite", - "connectorx/src_mssql", - "connectorx/src_oracle", - "connectorx/src_bigquery", -] diff --git a/connectorx-cpp/src/lib.rs b/connectorx-cpp/src/lib.rs deleted file mode 100644 index 1147a97..0000000 --- a/connectorx-cpp/src/lib.rs +++ /dev/null @@ -1,365 +0,0 @@ -mod plan; - -use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; -use connectorx::prelude::*; -use libc::c_char; -use std::collections::HashMap; -use std::convert::TryFrom; -use std::env; -use std::ffi::{CStr, CString}; -use std::sync::Arc; - -#[repr(C)] -pub struct CXSlice { - ptr: *const T, - len: usize, - capacity: usize, -} - -impl CXSlice { - pub fn new_from_vec(v: Vec) -> Self { - // If `Vec::into_raw_parts` becomes stable, can directly change to: - // let (ptr, len, capacity) = v.into_raw_parts(); - // Self {ptr, len, capacity} - - let slice = Self { - ptr: v.as_ptr(), - len: v.len(), - capacity: v.capacity(), - }; - std::mem::forget(v); - slice - } -} - -#[repr(C)] -pub struct CXTable { - name: *const c_char, - columns: CXSlice<*const c_char>, -} - -#[repr(C)] -pub struct CXConnectionInfo { - name: *const c_char, - conn: *const c_char, - schema: CXSlice, - is_local: bool, - jdbc_url: *const c_char, - jdbc_driver: *const c_char, -} - -#[repr(C)] -pub struct CXFederatedPlan { - db_name: *const c_char, - db_alias: *const c_char, - sql: *const c_char, - cardinality: usize, -} - -#[cfg(feature = "federation")] -#[no_mangle] -pub unsafe extern "C" fn free_plans(res: *const CXSlice) { - let plans = get_vec::<_>((*res).ptr, (*res).len, (*res).capacity); - plans.into_iter().for_each(|plan| { - free_str(plan.db_name); - free_str(plan.db_alias); - free_str(plan.sql); - }); -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_rewrite( - conn_list: *const CXSlice, - query: *const c_char, -) -> CXSlice { - let mut db_map = HashMap::new(); - let conn_slice = unsafe { std::slice::from_raw_parts((*conn_list).ptr, (*conn_list).len) }; - for p in conn_slice { - let name = unsafe { CStr::from_ptr(p.name) }.to_str().unwrap(); - if p.conn.is_null() { - let mut table_map: HashMap> = HashMap::new(); - let table_slice = unsafe { std::slice::from_raw_parts(p.schema.ptr, p.schema.len) }; - for t in table_slice { - let table_name = unsafe { CStr::from_ptr(t.name) }.to_str().unwrap(); - // println!("raw table name: {:?}", table_name); - let column_slice = - unsafe { std::slice::from_raw_parts(t.columns.ptr, t.columns.len) }; - - let mut column_names = vec![]; - for &c in column_slice { - let column_name = unsafe { CStr::from_ptr(c).to_str().unwrap() }; - column_names.push(column_name.to_string()); - } - table_map.insert(table_name.to_string(), column_names); - } - let source_info = - FederatedDataSourceInfo::new_from_manual_schema(table_map, p.is_local); - db_map.insert(name.to_string(), source_info); - } else { - let conn = unsafe { CStr::from_ptr(p.conn) }.to_str().unwrap(); - let jdbc_url = match p.jdbc_url.is_null() { - true => "", - false => unsafe { CStr::from_ptr(p.jdbc_url) }.to_str().unwrap(), - }; - let jdbc_driver = match p.jdbc_driver.is_null() { - true => "", - false => unsafe { CStr::from_ptr(p.jdbc_driver) }.to_str().unwrap(), - }; - // println!("name: {:?}, conn: {:?}", name, conn); - let source_info = FederatedDataSourceInfo::new_from_conn_str( - SourceConn::try_from(conn).unwrap(), - p.is_local, - jdbc_url, - jdbc_driver, - ); - db_map.insert(name.to_string(), source_info); - } - } - - let query_str = unsafe { CStr::from_ptr(query) }.to_str().unwrap(); - let j4rs_base = match env::var("CX_LIB_PATH") { - Ok(val) => Some(val), - Err(_) => None, - }; - // println!("j4rs_base: {:?}", j4rs_base); - let fed_plan: Vec = rewrite_sql(query_str, &db_map, j4rs_base.as_deref()) - .unwrap() - .into_iter() - .map(|p| p.into()) - .collect(); - - CXSlice::<_>::new_from_vec(fed_plan) -} - -#[repr(C)] -pub struct CXArray { - array: *const FFI_ArrowArray, - schema: *const FFI_ArrowSchema, -} - -#[repr(C)] -pub struct CXResult { - data: CXSlice>, - header: CXSlice<*const c_char>, -} - -pub unsafe fn get_vec(ptr: *const T, len: usize, capacity: usize) -> Vec { - Vec::from_raw_parts(ptr as *mut T, len, capacity) -} - -pub unsafe fn free_str(ptr: *const c_char) { - let _ = CString::from_raw(ptr as *mut _); -} - -#[no_mangle] -pub unsafe extern "C" fn free_result(res: *const CXResult) { - let header = get_vec::<_>((*res).header.ptr, (*res).header.len, (*res).header.capacity); - header.into_iter().for_each(|col| free_str(col)); - - let rbs = get_vec::<_>((*res).data.ptr, (*res).data.len, (*res).data.capacity); - rbs.into_iter().for_each(|rb| { - get_vec::<_>(rb.ptr, rb.len, rb.capacity) - .into_iter() - .for_each(|a| { - // Otherwise memory leak - std::sync::Arc::from_raw(a.array); - std::sync::Arc::from_raw(a.schema); - }) - }); -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_scan(conn: *const c_char, query: *const c_char) -> CXResult { - let conn_str = unsafe { CStr::from_ptr(conn) }.to_str().unwrap(); - let query_str = unsafe { CStr::from_ptr(query) }.to_str().unwrap(); - let source_conn = SourceConn::try_from(conn_str).unwrap(); - let record_batches = get_arrow(&source_conn, None, &[CXQuery::from(query_str)]) - .unwrap() - .arrow() - .unwrap(); - - // arrow::util::pretty::print_batches(&record_batches[..]).unwrap(); - - let names: Vec<*const c_char> = record_batches[0] - .schema() - .fields() - .iter() - .map(|f| { - CString::new(f.name().as_str()) - .expect("new CString error") - .into_raw() as *const c_char - }) - .collect(); - - let mut result = vec![]; - for rb in record_batches { - let mut cols = vec![]; - - for array in rb.columns() { - let data = array.to_data(); - let array = Arc::new(FFI_ArrowArray::new(&data)); - let schema = Arc::new( - arrow::ffi::FFI_ArrowSchema::try_from(data.data_type()).expect("export schema c"), - ); - let array_ptr = Arc::into_raw(array); - let schema_ptr = Arc::into_raw(schema); - - let cx_array = CXArray { - array: array_ptr, - schema: schema_ptr, - }; - cols.push(cx_array); - } - - let cx_rb = CXSlice::::new_from_vec(cols); - result.push(cx_rb); - } - - let res = CXResult { - data: CXSlice::<_>::new_from_vec(result), - header: CXSlice::<_>::new_from_vec(names), - }; - - res -} - -#[repr(C)] -pub struct CXSchema { - types: CXSlice, - headers: CXSlice<*const c_char>, -} - -#[no_mangle] -pub unsafe extern "C" fn free_iter(iter: *mut Box) { - let _ = Box::from_raw(iter); -} - -#[no_mangle] -pub unsafe extern "C" fn free_schema(schema: *mut CXSchema) { - let res = Box::from_raw(schema); - - let header = get_vec::<_>(res.headers.ptr, res.headers.len, res.headers.capacity); - header.into_iter().for_each(|col| free_str(col)); - - get_vec::<_>(res.types.ptr, res.types.len, res.types.capacity) - .into_iter() - .for_each(|a| { - std::sync::Arc::from_raw(a.array); - std::sync::Arc::from_raw(a.schema); - }); -} - -#[no_mangle] -pub unsafe extern "C" fn free_record_batch(rb: *mut CXSlice) { - let slice = Box::from_raw(rb); - get_vec::<_>(slice.ptr, slice.len, slice.capacity) - .into_iter() - .for_each(|a| { - std::sync::Arc::from_raw(a.array); - std::sync::Arc::from_raw(a.schema); - }) -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_scan_iter( - conn: *const c_char, - queries: *const CXSlice<*const c_char>, - batch_size: usize, -) -> *mut Box { - let conn_str = unsafe { CStr::from_ptr(conn) }.to_str().unwrap(); - let source_conn = SourceConn::try_from(conn_str).unwrap(); - - let query_slice = unsafe { std::slice::from_raw_parts((*queries).ptr, (*queries).len) }; - - let mut query_vec = vec![]; - for &q in query_slice { - let query = unsafe { CStr::from_ptr(q).to_str().unwrap() }; - query_vec.push(CXQuery::from(query)); - } - - let arrow_iter: Box = - new_record_batch_iter(&source_conn, None, query_vec.as_slice(), batch_size); - - Box::into_raw(Box::new(arrow_iter)) -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_get_schema( - iter: *mut Box, -) -> *mut CXSchema { - let arrow_iter = unsafe { &*iter }; - let (empty_batch, names) = arrow_iter.get_schema(); - let mut cols = vec![]; - for array in empty_batch.columns() { - let data = array.to_data(); - let array = Arc::new(arrow::ffi::FFI_ArrowArray::new(&data)); - let schema = Arc::new( - arrow::ffi::FFI_ArrowSchema::try_from(data.data_type()).expect("export schema c"), - ); - let array_ptr = Arc::into_raw(array); - let schema_ptr = Arc::into_raw(schema); - let cx_array = CXArray { - array: array_ptr, - schema: schema_ptr, - }; - cols.push(cx_array); - } - - let names: Vec<*const c_char> = names - .iter() - .map(|name| { - CString::new(name.as_str()) - .expect("new CString error") - .into_raw() as *const c_char - }) - .collect(); - - let res = Box::new(CXSchema { - types: CXSlice::<_>::new_from_vec(cols), - headers: CXSlice::<_>::new_from_vec(names), - }); - - Box::into_raw(res) -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_prepare(iter: *mut Box) { - let arrow_iter = unsafe { &mut *iter }; - arrow_iter.prepare(); -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_iter_next( - iter: *mut Box, -) -> *mut CXSlice { - let arrow_iter = unsafe { &mut *iter }; - match arrow_iter.next_batch() { - Some(rb) => { - let mut cols = vec![]; - - for array in rb.columns() { - let data = array.to_data(); - let array = Arc::new(arrow::ffi::FFI_ArrowArray::new(&data)); - let schema = - Arc::new(FFI_ArrowSchema::try_from(data.data_type()).expect("export schema c")); - let array_ptr = Arc::into_raw(array); - let schema_ptr = Arc::into_raw(schema); - - let cx_array = CXArray { - array: array_ptr, - schema: schema_ptr, - }; - cols.push(cx_array); - } - - let cx_rb = Box::new(CXSlice::::new_from_vec(cols)); - Box::into_raw(cx_rb) - } - None => std::ptr::null_mut(), - } -} - -#[no_mangle] -pub unsafe extern "C" fn connectorx_set_thread_num(num: usize) { - set_global_num_thread(num); -} diff --git a/connectorx-cpp/src/plan.rs b/connectorx-cpp/src/plan.rs deleted file mode 100644 index b1baf33..0000000 --- a/connectorx-cpp/src/plan.rs +++ /dev/null @@ -1,22 +0,0 @@ -use crate::CXFederatedPlan; -use connectorx::fed_rewriter::Plan; -use libc::c_char; -use std::convert::Into; -use std::ffi::CString; - -impl Into for Plan { - fn into(self) -> CXFederatedPlan { - CXFederatedPlan { - db_name: CString::new(self.db_name.as_str()) - .expect("new CString error") - .into_raw() as *const c_char, - db_alias: CString::new(self.db_alias.as_str()) - .expect("new CString error") - .into_raw() as *const c_char, - sql: CString::new(self.sql.as_str()) - .expect("new CString error") - .into_raw() as *const c_char, - cardinality: self.cardinality, - } - } -} diff --git a/connectorx-python/.cargo/config b/connectorx-python/.cargo/config deleted file mode 100644 index 59c989e..0000000 --- a/connectorx-python/.cargo/config +++ /dev/null @@ -1,11 +0,0 @@ -[target.x86_64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - -[target.aarch64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] \ No newline at end of file diff --git a/connectorx-python/Cargo.lock b/connectorx-python/Cargo.lock deleted file mode 100644 index 50171df..0000000 --- a/connectorx-python/Cargo.lock +++ /dev/null @@ -1,5977 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.10", - "once_cell", - "version_check", -] - -[[package]] -name = "ahash" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" -dependencies = [ - "cfg-if 1.0.0", - "const-random", - "getrandom 0.2.10", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - -[[package]] -name = "anyhow" -version = "1.0.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" - -[[package]] -name = "argminmax" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202108b46429b765ef483f8a24d5c46f48c14acfdacc086dd4ab6dddf6bcdbd2" -dependencies = [ - "num-traits", -] - -[[package]] -name = "array-init-cursor" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" - -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - -[[package]] -name = "arrayvec" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" - -[[package]] -name = "arrow" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6619cab21a0cdd8c9b9f1d9e09bfaa9b1974e5ef809a6566aef0b998caf38ace" -dependencies = [ - "ahash 0.8.3", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0dc95485623a76e00929bda8caa40c1f838190952365c4f43a7b9ae86d03e94" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "num", -] - -[[package]] -name = "arrow-array" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3267847f53d3042473cfd2c769afd8d74a6d7d201fc3a34f5cb84c0282ef47a7" -dependencies = [ - "ahash 0.8.3", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half 2.3.1", - "hashbrown 0.13.2", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f66553e66e120ac4b21570368ee9ebf35ff3f5399f872b0667699e145678f5" -dependencies = [ - "half 2.3.1", - "num", -] - -[[package]] -name = "arrow-cast" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e6f3579dbf0d97c683d451b2550062b0f0e62a3169bf74238b5f59f44ad6d8" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "chrono", - "comfy-table 6.2.0", - "lexical-core", - "num", -] - -[[package]] -name = "arrow-csv" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373579c4c1a8f5307d3125b7a89c700fcf8caf85821c77eb4baab3855ae0aba5" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61bc8df9912cca6642665fdf989d6fa0de2570f18a7f709bcf59d29de96d2097" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half 2.3.1", - "num", -] - -[[package]] -name = "arrow-format" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" -dependencies = [ - "planus", - "serde", -] - -[[package]] -name = "arrow-ipc" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0105dcf5f91daa7182d87b713ee0b32b3bfc88e0c48e7dc3e9d6f1277a07d1ae" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", -] - -[[package]] -name = "arrow-json" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73134fb5b5ec8770f8cbb214c2c487b2d350081e403ca4eeeb6f8f5e19846ac" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "indexmap 1.9.3", - "lexical-core", - "num", - "serde", - "serde_json", -] - -[[package]] -name = "arrow-ord" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f25bc66e18d4c2aa1fe2f9bb03e2269da60e636213210385ae41a107f9965a" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "half 2.3.1", - "num", -] - -[[package]] -name = "arrow-row" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1095ff85ea4f5ff02d17b30b089de31b51a50be01c6b674f0a0509ab771232f1" -dependencies = [ - "ahash 0.8.3", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half 2.3.1", - "hashbrown 0.13.2", -] - -[[package]] -name = "arrow-schema" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25187bbef474151a2e4ddec67b9e34bda5cbfba292dc571392fa3a1f71ff5a82" -dependencies = [ - "bitflags 2.4.0", -] - -[[package]] -name = "arrow-select" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd0d4ee884aec3aa05e41478e3cd312bf609de9babb5d187a43fb45931da4da4" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num", -] - -[[package]] -name = "arrow-string" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6d71c3ffe4c07e66ce8fdc6aed5b00e0e60c5144911879b10546f5b72d8fa1c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "regex", - "regex-syntax 0.7.5", -] - -[[package]] -name = "arrow2" -version = "0.17.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59c468daea140b747d781a1da9f7db5f0a8e6636d4af20cc539e43d05b0604fa" -dependencies = [ - "ahash 0.8.3", - "arrow-format", - "bytemuck", - "chrono", - "dyn-clone", - "either", - "ethnum", - "foreign_vec", - "futures", - "getrandom 0.2.10", - "hash_hasher", - "lexical-core", - "lz4", - "multiversion", - "num-traits", - "regex", - "regex-syntax 0.6.29", - "rustc_version", - "simdutf8", - "strength_reduce", - "zstd", -] - -[[package]] -name = "async-channel" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" -dependencies = [ - "concurrent-queue", - "event-listener", - "futures-core", -] - -[[package]] -name = "async-compression" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d495b6dc0184693324491a5ac05f559acc97bf937ab31d7a1c33dd0016be6d2b" -dependencies = [ - "bzip2", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", -] - -[[package]] -name = "async-lock" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" -dependencies = [ - "event-listener", -] - -[[package]] -name = "async-native-tls" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9e7a929bd34c68a82d58a4de7f86fffdaf97fb2af850162a7bb19dd7269b33" -dependencies = [ - "async-std", - "native-tls", - "thiserror", - "url", -] - -[[package]] -name = "async-std" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" -dependencies = [ - "async-channel", - "async-lock", - "crossbeam-utils", - "futures-channel", - "futures-core", - "futures-io", - "memchr", - "once_cell", - "pin-project-lite", - "pin-utils", - "slab", - "wasm-bindgen-futures", -] - -[[package]] -name = "async-stream" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22068c0c19514942eefcfd4daf8976ef1aad84e61539f95cd200c35202f80af5" -dependencies = [ - "async-stream-impl 0.2.1", - "futures-core", -] - -[[package]] -name = "async-stream" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" -dependencies = [ - "async-stream-impl 0.3.5", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25f9db3b38af870bf7e5cc649167533b493928e50744e2c30ae350230b414670" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "async-trait" -version = "0.1.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "asynchronous-codec" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4401f0a3622dad2e0763fa79e0eb328bc70fb7dccfdd645341f00d671247d6" -dependencies = [ - "bytes", - "futures-sink", - "futures-util", - "memchr", - "pin-project-lite", -] - -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "backtrace" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" -dependencies = [ - "addr2line", - "cc", - "cfg-if 1.0.0", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "base64" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - -[[package]] -name = "base64" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53" - -[[package]] -name = "bb8" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e9f4fa9768efd269499d8fba693260cfc670891cf6de3adc935588447a77cc8" -dependencies = [ - "async-trait", - "futures-channel", - "futures-util", - "parking_lot 0.11.2", - "tokio", -] - -[[package]] -name = "bb8-tiberius" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "648d5365b34a2a362d5b8790d3c1b230d263d2377e563c76cb79c10d326b917e" -dependencies = [ - "async-trait", - "bb8", - "futures", - "thiserror", - "tiberius", - "tokio", - "tokio-util 0.6.10", -] - -[[package]] -name = "bigdecimal" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6773ddc0eafc0e509fb60e48dff7f450f8e674a0686ae8605e8d9901bd5eefa" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "bindgen" -version = "0.59.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" -dependencies = [ - "bitflags 1.3.2", - "cexpr", - "clang-sys", - "clap", - "env_logger", - "lazy_static", - "lazycell", - "log", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "which", -] - -[[package]] -name = "bitfield" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46afbd2983a5d5a7bd740ccb198caf5b82f45c40c09c0eed36052d91cb92e719" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if 1.0.0", - "constant_time_eq", - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "borsh" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b" -dependencies = [ - "borsh-derive", - "hashbrown 0.13.2", -] - -[[package]] -name = "borsh-derive" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7" -dependencies = [ - "borsh-derive-internal", - "borsh-schema-derive-internal", - "proc-macro-crate", - "proc-macro2", - "syn 1.0.109", -] - -[[package]] -name = "borsh-derive-internal" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "borsh-schema-derive-internal" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "brotli" -version = "3.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bufstream" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" - -[[package]] -name = "built" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9c056b9ed43aee5e064b683aa1ec783e19c6acec7559e3ae931b7490472fbe" -dependencies = [ - "cargo-lock", - "chrono", -] - -[[package]] -name = "bumpalo" -version = "3.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" - -[[package]] -name = "bytecheck" -version = "0.6.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" -dependencies = [ - "bytecheck_derive", - "ptr_meta", - "simdutf8", -] - -[[package]] -name = "bytecheck_derive" -version = "0.6.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "bytemuck" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" -dependencies = [ - "bytemuck_derive", -] - -[[package]] -name = "bytemuck_derive" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "bytes" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "cargo-lock" -version = "8.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "031718ddb8f78aa5def78a09e90defe30151d1f6c672f937af4dd916429ed996" -dependencies = [ - "semver", - "serde", - "toml", - "url", -] - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" -dependencies = [ - "jobserver", - "libc", -] - -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d87d9d13be47a5b7c3907137f1290b0459a7f80efb26be8c52afb11963bccb02" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "time 0.1.45", - "wasm-bindgen", - "windows-targets", -] - -[[package]] -name = "chrono-tz" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" -dependencies = [ - "parse-zoneinfo", - "phf", - "phf_codegen", -] - -[[package]] -name = "clang-sys" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim 0.8.0", - "textwrap", - "unicode-width", - "vec_map", -] - -[[package]] -name = "cmake" -version = "0.1.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" -dependencies = [ - "cc", -] - -[[package]] -name = "comfy-table" -version = "6.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e959d788268e3bf9d35ace83e81b124190378e4c91c9067524675e33394b8ba" -dependencies = [ - "strum", - "strum_macros 0.24.3", - "unicode-width", -] - -[[package]] -name = "comfy-table" -version = "7.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" -dependencies = [ - "crossterm", - "strum", - "strum_macros 0.24.3", - "unicode-width", -] - -[[package]] -name = "concurrent-queue" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "connection-string" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c4ecb0dc8c35d2c626e45ae70bbfcb1050b302f42bcdf025d913cc0c5a0b443" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "connectorx" -version = "0.3.3-alpha.1" -dependencies = [ - "anyhow", - "arrow", - "arrow2", - "bb8", - "bb8-tiberius", - "chrono", - "csv", - "datafusion", - "fallible-streaming-iterator", - "fehler", - "futures", - "gcp-bigquery-client", - "hex", - "itertools", - "j4rs", - "log", - "mysql_common", - "native-tls", - "num-traits", - "openssl", - "oracle", - "owning_ref", - "polars", - "postgres", - "postgres-native-tls", - "postgres-openssl", - "r2d2", - "r2d2-oracle", - "r2d2_mysql", - "r2d2_postgres", - "r2d2_sqlite", - "rayon", - "rusqlite", - "rust_decimal", - "rust_decimal_macros", - "serde_json", - "sqlparser 0.11.0", - "thiserror", - "tiberius", - "tokio", - "tokio-util 0.6.10", - "url", - "urlencoding", - "uuid 0.8.2", -] - -[[package]] -name = "connectorx-python" -version = "0.3.3-alpha.1" -dependencies = [ - "anyhow", - "arrow", - "arrow2", - "bitfield", - "built", - "bytes", - "chrono", - "connectorx", - "criterion", - "criterion-macro", - "dict_derive", - "env_logger", - "fehler", - "iai", - "itertools", - "lazy_static", - "libc", - "log", - "ndarray", - "numpy", - "openssl", - "postgres", - "postgres-native-tls", - "postgres-openssl", - "pprof", - "pyo3", - "pyo3-built", - "rayon", - "rust_decimal", - "serde_json", - "sqlparser 0.11.0", - "thiserror", - "tokio", - "tokio-util 0.6.10", - "url", - "urlencoding", - "uuid 0.8.2", -] - -[[package]] -name = "const-random" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" -dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" -dependencies = [ - "getrandom 0.2.10", - "once_cell", - "proc-macro-hack", - "tiny-keccak", -] - -[[package]] -name = "constant_time_eq" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" - -[[package]] -name = "core-foundation" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" - -[[package]] -name = "cpp_demangle" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeaa953eaad386a53111e47172c2fedba671e5684c8dd601a5f474f4f118710f" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "cpufeatures" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "criterion" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" -dependencies = [ - "atty", - "cast", - "clap", - "criterion-plot", - "csv", - "itertools", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_cbor", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-macro" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8421c08c2e60050bb24ebfb7232bdd2fcf44fa74c5777b00a71daa7d332a8164" -dependencies = [ - "proc-macro2", - "quote", -] - -[[package]] -name = "criterion-plot" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" -dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "crossbeam-utils", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-queue" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "crossterm" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" -dependencies = [ - "bitflags 1.3.2", - "crossterm_winapi", - "libc", - "mio", - "parking_lot 0.12.1", - "signal-hook", - "signal-hook-mio", - "winapi", -] - -[[package]] -name = "crossterm_winapi" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" -dependencies = [ - "winapi", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "darling" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", -] - -[[package]] -name = "darling_macro" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" -dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if 1.0.0", - "hashbrown 0.14.0", - "lock_api", - "once_cell", - "parking_lot_core 0.9.8", -] - -[[package]] -name = "datafusion" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9992c267436551d40b52d65289b144712e7b0ebdc62c8c859fd1574e5f73efbb" -dependencies = [ - "ahash 0.8.3", - "arrow", - "arrow-array", - "arrow-schema", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-row", - "datafusion-sql", - "flate2", - "futures", - "glob", - "hashbrown 0.13.2", - "indexmap 1.9.3", - "itertools", - "lazy_static", - "log", - "num_cpus", - "object_store", - "parking_lot 0.12.1", - "parquet", - "percent-encoding", - "pin-project-lite", - "rand 0.8.5", - "smallvec", - "sqlparser 0.34.0", - "tempfile", - "tokio", - "tokio-stream", - "tokio-util 0.7.8", - "url", - "uuid 1.4.1", - "xz2", - "zstd", -] - -[[package]] -name = "datafusion-common" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3be97f7a7c720cdbb71e9eeabf814fa6ad8102b9022390f6cac74d3b4af6392" -dependencies = [ - "arrow", - "arrow-array", - "chrono", - "num_cpus", - "object_store", - "parquet", - "sqlparser 0.34.0", -] - -[[package]] -name = "datafusion-execution" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77c4b14b809b0e4c5bb101b6834504f06cdbb0d3c643400c61d0d844b33264e" -dependencies = [ - "dashmap", - "datafusion-common", - "datafusion-expr", - "hashbrown 0.13.2", - "log", - "object_store", - "parking_lot 0.12.1", - "rand 0.8.5", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ec7409bd45cf4fae6395d7d1024c8a97e543cadc88363e405d2aad5330e5e7" -dependencies = [ - "ahash 0.8.3", - "arrow", - "datafusion-common", - "lazy_static", - "sqlparser 0.34.0", - "strum", - "strum_macros 0.24.3", -] - -[[package]] -name = "datafusion-optimizer" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b537c93f87989c212db92a448a0f5eb4f0995e27199bb7687ae94f8b64a7a8" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown 0.13.2", - "itertools", - "log", - "regex-syntax 0.7.5", -] - -[[package]] -name = "datafusion-physical-expr" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60ee3f53340fdef36ee54d9e12d446ae2718b1d0196ac581f791d34808ec876" -dependencies = [ - "ahash 0.8.3", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-row", - "half 2.3.1", - "hashbrown 0.13.2", - "indexmap 1.9.3", - "itertools", - "lazy_static", - "libc", - "md-5", - "paste 1.0.14", - "petgraph 0.6.4", - "rand 0.8.5", - "regex", - "sha2", - "unicode-segmentation", - "uuid 1.4.1", -] - -[[package]] -name = "datafusion-row" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d58fc64058aa3bcb00077a0d19474a0d584d31dec8c7ac3406868f485f659af9" -dependencies = [ - "arrow", - "datafusion-common", - "paste 1.0.14", - "rand 0.8.5", -] - -[[package]] -name = "datafusion-sql" -version = "26.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1531f0314151a34bf6c0a83c7261525688b7c729876f53e7896b8f4ca8f57d07" -dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-expr", - "log", - "sqlparser 0.34.0", -] - -[[package]] -name = "debugid" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6ee87af31d84ef885378aebca32be3d682b0e0dc119d5b4860a2c5bb5046730" -dependencies = [ - "uuid 0.8.2", -] - -[[package]] -name = "deranged" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" -dependencies = [ - "serde", -] - -[[package]] -name = "derive_utils" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abcad25e9720609ccb3dcdb795d845e37d8ce34183330a9f48b03a1a71c8e21" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "dict_derive" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6207f46b33b2bf00858b0edb03d188d31a46fedfde4aa53a27d69fe25acd80cf" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - -[[package]] -name = "dunce" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" - -[[package]] -name = "dyn-clone" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555" - -[[package]] -name = "either" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - -[[package]] -name = "encoding" -version = "0.2.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" -dependencies = [ - "encoding-index-japanese", - "encoding-index-korean", - "encoding-index-simpchinese", - "encoding-index-singlebyte", - "encoding-index-tradchinese", -] - -[[package]] -name = "encoding-index-japanese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-korean" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-simpchinese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-singlebyte" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding-index-tradchinese" -version = "1.20141219.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" -dependencies = [ - "encoding_index_tests", -] - -[[package]] -name = "encoding_index_tests" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" - -[[package]] -name = "encoding_rs" -version = "0.8.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "enum_dispatch" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f33313078bb8d4d05a2733a94ac4c2d8a0df9a2b84424ebf4f33bfc224a890e" -dependencies = [ - "once_cell", - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "enumflags2" -version = "0.7.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c041f5090df68b32bcd905365fd51769c8b9d553fe87fde0b683534f10c01bd2" -dependencies = [ - "enumflags2_derive", -] - -[[package]] -name = "enumflags2_derive" -version = "0.7.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9a1f9f7d83e59740248a6e14ecf93929ade55027844dfcea78beafccc15745" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "ethnum" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8ff382b2fa527fb7fb06eeebfc5bbb3f17e3cc6b9d70b006c41daa8824adac" - -[[package]] -name = "event-listener" -version = "2.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - -[[package]] -name = "fast-float" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" - -[[package]] -name = "fastrand" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" - -[[package]] -name = "fehler" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5729fe49ba028cd550747b6e62cd3d841beccab5390aa398538c31a2d983635" -dependencies = [ - "fehler-macros", -] - -[[package]] -name = "fehler-macros" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb5acb1045ebbfa222e2c50679e392a71dd77030b78fb0189f2d9c5974400f9" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "finl_unicode" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" - -[[package]] -name = "fixedbitset" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" -dependencies = [ - "crc32fast", - "libz-sys", - "miniz_oxide", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - -[[package]] -name = "foreign_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" - -[[package]] -name = "form_urlencoded" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "frunk" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a351b59e12f97b4176ee78497dff72e4276fb1ceb13e19056aca7fa0206287" -dependencies = [ - "frunk_core", - "frunk_derives", - "frunk_proc_macros", -] - -[[package]] -name = "frunk_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af2469fab0bd07e64ccf0ad57a1438f63160c69b2e57f04a439653d68eb558d6" - -[[package]] -name = "frunk_derives" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fa992f1656e1707946bbba340ad244f0814009ef8c0118eb7b658395f19a2e" -dependencies = [ - "frunk_proc_macro_helpers", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "frunk_proc_macro_helpers" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b54add839292b743aeda6ebedbd8b11e93404f902c56223e51b9ec18a13d2c" -dependencies = [ - "frunk_core", - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "frunk_proc_macros" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71b85a1d4a9a6b300b41c05e8e13ef2feca03e0334127f29eca9506a7fe13a93" -dependencies = [ - "frunk_core", - "frunk_proc_macro_helpers", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "futures" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" - -[[package]] -name = "futures-executor" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" - -[[package]] -name = "futures-macro" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "futures-sink" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" - -[[package]] -name = "futures-task" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" - -[[package]] -name = "futures-util" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "gcp-bigquery-client" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ab5966c98f6d4e71e247cda6a6d8497bc8a1df3a4ba9ee548087842cffc21d" -dependencies = [ - "async-stream 0.3.5", - "hyper", - "hyper-rustls 0.23.2", - "log", - "reqwest", - "serde", - "serde_json", - "thiserror", - "time 0.3.28", - "tokio", - "tokio-stream", - "url", - "yup-oauth2", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if 1.0.0", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" -dependencies = [ - "cfg-if 1.0.0", - "js-sys", - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "wasm-bindgen", -] - -[[package]] -name = "gimli" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "h2" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap 1.9.3", - "slab", - "tokio", - "tokio-util 0.7.8", - "tracing", -] - -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "half" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" -dependencies = [ - "cfg-if 1.0.0", - "crunchy", - "num-traits", -] - -[[package]] -name = "hash_hasher" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -dependencies = [ - "ahash 0.7.6", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.6", -] - -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.3", -] - -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" -dependencies = [ - "ahash 0.8.3", - "allocator-api2", - "rayon", -] - -[[package]] -name = "hashlink" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" -dependencies = [ - "hashbrown 0.11.2", -] - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "home" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "http" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" -dependencies = [ - "bytes", - "http", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "hyper" -version = "0.14.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.4.9", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" -dependencies = [ - "http", - "hyper", - "log", - "rustls 0.20.9", - "rustls-native-certs", - "tokio", - "tokio-rustls 0.23.4", -] - -[[package]] -name = "hyper-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" -dependencies = [ - "futures-util", - "http", - "hyper", - "rustls 0.21.7", - "tokio", - "tokio-rustls 0.24.1", -] - -[[package]] -name = "iai" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" - -[[package]] -name = "iana-time-zone" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown 0.14.0", -] - -[[package]] -name = "indoc" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" -dependencies = [ - "indoc-impl", - "proc-macro-hack", -] - -[[package]] -name = "indoc-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn 1.0.109", - "unindent", -] - -[[package]] -name = "inferno" -version = "0.10.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3886428c6400486522cf44b8626e7b94ad794c14390290f2a274dcf728a58f" -dependencies = [ - "ahash 0.7.6", - "atty", - "indexmap 1.9.3", - "itoa", - "lazy_static", - "log", - "num-format", - "quick-xml", - "rgb", - "str_stack", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "io-enum" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5305557fa27b460072ae15ce07617e999f5879f14d376c8449f0bfb9f9d8e91e" -dependencies = [ - "derive_utils", - "syn 2.0.31", -] - -[[package]] -name = "ipnet" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" - -[[package]] -name = "j4rs" -version = "0.15.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76cc9c1648a1cc940ac10c19f56e50bee15344590e10f220899d955db5f87ac2" -dependencies = [ - "cesu8", - "dirs", - "dunce", - "fs_extra", - "glob", - "java-locator", - "jni-sys", - "lazy_static", - "libc", - "libloading", - "log", - "serde", - "serde_json", - "sha2", -] - -[[package]] -name = "java-locator" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90003f2fd9c52f212c21d8520f1128da0080bad6fff16b68fe6e7f2f0c3780c2" -dependencies = [ - "glob", - "lazy_static", -] - -[[package]] -name = "jni-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" - -[[package]] -name = "jobserver" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "lexical" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" -dependencies = [ - "lexical-core", -] - -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.147" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" - -[[package]] -name = "libgssapi" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "724dbcd1f871da9c67983537a47ac510c278656f6392418ad67c7a52720e54b2" -dependencies = [ - "bitflags 1.3.2", - "bytes", - "lazy_static", - "libgssapi-sys", - "parking_lot 0.11.2", -] - -[[package]] -name = "libgssapi-sys" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd7d65e409c889f6c9d81ff079371d0d8fd88d7dca702ff187ef96fb0450fb7" -dependencies = [ - "bindgen", -] - -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if 1.0.0", - "winapi", -] - -[[package]] -name = "libm" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" - -[[package]] -name = "libsqlite3-sys" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "libz-sys" -version = "1.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "linux-raw-sys" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" - -[[package]] -name = "lock_api" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" - -[[package]] -name = "lru" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909" -dependencies = [ - "hashbrown 0.12.3", -] - -[[package]] -name = "lz4" -version = "1.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "matrixmultiply" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "md-5" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" -dependencies = [ - "digest", -] - -[[package]] -name = "md5" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6bcd6433cff03a4bfc3d9834d504467db1f1cf6d0ea765d37d330249ed629d" - -[[package]] -name = "md5" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" - -[[package]] -name = "memchr" -version = "2.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" - -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - -[[package]] -name = "memmap2" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" -dependencies = [ - "libc", -] - -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "0.8.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" -dependencies = [ - "libc", - "log", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", -] - -[[package]] -name = "multimap" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" - -[[package]] -name = "multiversion" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c7b9d7fe61760ce5ea19532ead98541f6b4c495d87247aff9826445cf6872a" -dependencies = [ - "multiversion-macros", - "target-features", -] - -[[package]] -name = "multiversion-macros" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26a83d8500ed06d68877e9de1dde76c1dbb83885dcdbda4ef44ccbc3fbda2ac8" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "target-features", -] - -[[package]] -name = "mysql" -version = "23.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f11339ca5c251941805d51362a07823605a80586ced92914ab7de84fba813f" -dependencies = [ - "bufstream", - "bytes", - "crossbeam", - "flate2", - "io-enum", - "libc", - "lru", - "mysql_common", - "named_pipe", - "native-tls", - "once_cell", - "pem", - "percent-encoding", - "serde", - "serde_json", - "socket2 0.4.9", - "twox-hash", - "url", -] - -[[package]] -name = "mysql_common" -version = "0.29.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9006c95034ccf7b903d955f210469119f6c3477fc9c9e7a7845ce38a3e665c2a" -dependencies = [ - "base64 0.13.1", - "bigdecimal", - "bindgen", - "bitflags 1.3.2", - "bitvec", - "byteorder", - "bytes", - "cc", - "chrono", - "cmake", - "crc32fast", - "flate2", - "frunk", - "lazy_static", - "lexical", - "num-bigint", - "num-traits", - "rand 0.8.5", - "regex", - "rust_decimal", - "saturating", - "serde", - "serde_json", - "sha1", - "sha2", - "smallvec", - "subprocess", - "thiserror", - "time 0.3.28", - "uuid 1.4.1", -] - -[[package]] -name = "named_pipe" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad9c443cce91fc3e12f017290db75dde490d685cdaaf508d7159d7cf41f0eb2b" -dependencies = [ - "winapi", -] - -[[package]] -name = "native-tls" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" -dependencies = [ - "lazy_static", - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - -[[package]] -name = "ndarray" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] - -[[package]] -name = "nix" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa9b4819da1bc61c0ea48b63b7bc8604064dd43013e7cc325df098d49cd7c18a" -dependencies = [ - "bitflags 1.3.2", - "cc", - "cfg-if 1.0.0", - "libc", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "now" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" -dependencies = [ - "chrono", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-format" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" -dependencies = [ - "arrayvec", - "itoa", -] - -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.2", - "libc", -] - -[[package]] -name = "num_threads" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ - "libc", -] - -[[package]] -name = "numpy" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f3a190dd1aa88ee0de91e59e970d5b85cfa079a9ff6531b69f811ccd0c2a6e1" -dependencies = [ - "cfg-if 0.1.10", - "libc", - "ndarray", - "num-complex", - "num-traits", - "pyo3", -] - -[[package]] -name = "object" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" -dependencies = [ - "memchr", -] - -[[package]] -name = "object_store" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "itertools", - "parking_lot 0.12.1", - "percent-encoding", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - -[[package]] -name = "once_cell" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - -[[package]] -name = "oorandom" -version = "11.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" - -[[package]] -name = "openssl" -version = "0.10.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" -dependencies = [ - "bitflags 2.4.0", - "cfg-if 1.0.0", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "openssl-probe" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" - -[[package]] -name = "openssl-src" -version = "300.1.3+3.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2c101a165fff9935e34def4669595ab1c7847943c42be86e21503e482be107" -dependencies = [ - "cc", -] - -[[package]] -name = "openssl-sys" -version = "0.9.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" -dependencies = [ - "cc", - "libc", - "openssl-src", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "opentls" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f561874f8d6ecfb674fc08863414040c93cc90c0b6963fe679895fab8b65560" -dependencies = [ - "futures-util", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "url", -] - -[[package]] -name = "oracle" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe80334af1fbaea016fbef0af77f5fa32452362e29a039389b8c93737585003" -dependencies = [ - "cc", - "chrono", - "lazy_static", - "oracle_procmacro", - "paste 1.0.14", -] - -[[package]] -name = "oracle_procmacro" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad247f3421d57de56a0d0408d3249d4b1048a522be2013656d92f022c3d8af27" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "ordered-float" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" -dependencies = [ - "num-traits", -] - -[[package]] -name = "owning_ref" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff55baddef9e4ad00f88b6c743a2a8062d4c6ade126c2a528644b8e444d52ce" -dependencies = [ - "stable_deref_trait", -] - -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core 0.9.8", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if 1.0.0", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" -dependencies = [ - "cfg-if 1.0.0", - "libc", - "redox_syscall 0.3.5", - "smallvec", - "windows-targets", -] - -[[package]] -name = "parquet" -version = "40.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6a656fcc17e641657c955742c689732684e096f790ff30865d9f8dcc39f7c4a" -dependencies = [ - "ahash 0.8.3", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.21.3", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "hashbrown 0.13.2", - "lz4", - "num", - "num-bigint", - "object_store", - "paste 1.0.14", - "seq-macro", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", -] - -[[package]] -name = "parse-zoneinfo" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" -dependencies = [ - "regex", -] - -[[package]] -name = "paste" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" - -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] - -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - -[[package]] -name = "pem" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" -dependencies = [ - "base64 0.13.1", -] - -[[package]] -name = "percent-encoding" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" - -[[package]] -name = "petgraph" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" -dependencies = [ - "fixedbitset 0.2.0", - "indexmap 1.9.3", -] - -[[package]] -name = "petgraph" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset 0.4.2", - "indexmap 2.0.0", -] - -[[package]] -name = "phf" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - -[[package]] -name = "phf_shared" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" - -[[package]] -name = "planus" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" -dependencies = [ - "array-init-cursor", -] - -[[package]] -name = "plotters" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" - -[[package]] -name = "plotters-svg" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "polars" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1362d4a136c0ebacb40d88a37ba361738b222fd8a2ee9340a3d8642f698c52b" -dependencies = [ - "getrandom 0.2.10", - "polars-core", - "polars-io", - "polars-lazy", - "polars-ops", - "polars-sql", - "polars-time", - "version_check", -] - -[[package]] -name = "polars-arrow" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f967c901fa5da4ca7f64e813d1268488ba97e9b3004cefc579ff851c197a1138" -dependencies = [ - "arrow2", - "hashbrown 0.14.0", - "multiversion", - "num-traits", - "polars-error", - "thiserror", - "version_check", -] - -[[package]] -name = "polars-core" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24f92fc5b167f668ff85ab9607dfa72e2c09664cacef59297ee8601dee60126" -dependencies = [ - "ahash 0.8.3", - "arrow2", - "bitflags 2.4.0", - "chrono", - "comfy-table 7.0.1", - "either", - "hashbrown 0.14.0", - "indexmap 2.0.0", - "num-traits", - "once_cell", - "polars-arrow", - "polars-error", - "polars-row", - "polars-utils", - "rand 0.8.5", - "rand_distr", - "rayon", - "regex", - "smartstring", - "thiserror", - "version_check", - "xxhash-rust", -] - -[[package]] -name = "polars-error" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d09c3a7337e53b38c37b57999038440fa39c6801b9ba48afaecd8e16f7ac0a" -dependencies = [ - "arrow2", - "regex", - "thiserror", -] - -[[package]] -name = "polars-io" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92cab0df9f2a35702fa5aec99edfaabf9ae8e9cdd0acf69e143ad2d132f34f9c" -dependencies = [ - "ahash 0.8.3", - "arrow2", - "async-trait", - "bytes", - "chrono", - "fast-float", - "futures", - "home", - "lexical", - "lexical-core", - "memchr", - "memmap2 0.7.1", - "num-traits", - "once_cell", - "polars-arrow", - "polars-core", - "polars-error", - "polars-time", - "polars-utils", - "rayon", - "regex", - "simdutf8", - "tokio", -] - -[[package]] -name = "polars-lazy" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c33762ec2a55e01c9f8776b34db86257c70a0a3b3929bd4eb91a52aacf61456" -dependencies = [ - "ahash 0.8.3", - "bitflags 2.4.0", - "glob", - "once_cell", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-pipe", - "polars-plan", - "polars-time", - "polars-utils", - "rayon", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-ops" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e825575c96302d2daedfc205a0062180033c92c55bcd6aafc4e109d4d8849ed0" -dependencies = [ - "argminmax", - "arrow2", - "either", - "indexmap 2.0.0", - "memchr", - "polars-arrow", - "polars-core", - "polars-utils", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-pipe" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2bc9a12da9ed043fb0cb51dbcb87b365e4845b7ab6399d7a81e838460c6974" -dependencies = [ - "enum_dispatch", - "hashbrown 0.14.0", - "num-traits", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-plan", - "polars-row", - "polars-utils", - "rayon", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-plan" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb67b014f0295e8e9dbb84404a91d666d477b3bc248a2ed51bc442833b16da35" -dependencies = [ - "ahash 0.8.3", - "arrow2", - "once_cell", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-time", - "polars-utils", - "rayon", - "regex", - "smartstring", - "strum_macros 0.25.2", - "version_check", -] - -[[package]] -name = "polars-row" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27f54c1956027bf6301948fb4f2837cf6d6b638d8dd1edf3aaeaa19906a986be" -dependencies = [ - "arrow2", - "polars-error", - "polars-utils", -] - -[[package]] -name = "polars-sql" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfcb15cf8eebd25ea1724109d0153817cd484c6326290585f0736b4e7fcf2f4" -dependencies = [ - "polars-arrow", - "polars-core", - "polars-lazy", - "polars-plan", - "serde", - "serde_json", - "sqlparser 0.36.1", -] - -[[package]] -name = "polars-time" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f42d2632f5971c9575041d33cbcfb1f996900c40bbf58bc6eb0a0c5efbecea" -dependencies = [ - "arrow2", - "atoi", - "chrono", - "now", - "once_cell", - "polars-arrow", - "polars-core", - "polars-ops", - "polars-utils", - "regex", - "smartstring", -] - -[[package]] -name = "polars-utils" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c326708a370d71dc6e11a8f4bbc10a8479e1c314dc048ba73543b815cd0bf339" -dependencies = [ - "ahash 0.8.3", - "hashbrown 0.14.0", - "num-traits", - "once_cell", - "polars-error", - "rayon", - "smartstring", - "sysinfo", - "version_check", -] - -[[package]] -name = "postgres" -version = "0.19.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7915b33ed60abc46040cbcaa25ffa1c7ec240668e0477c4f3070786f5916d451" -dependencies = [ - "bytes", - "fallible-iterator", - "futures-util", - "log", - "tokio", - "tokio-postgres", -] - -[[package]] -name = "postgres-native-tls" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d442770e2b1e244bb5eb03b31c79b65bb2568f413b899eaba850fa945a65954" -dependencies = [ - "futures", - "native-tls", - "tokio", - "tokio-native-tls", - "tokio-postgres", -] - -[[package]] -name = "postgres-openssl" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de0ea6504e07ca78355a6fb88ad0f36cafe9e696cbc6717f16a207f3a60be72" -dependencies = [ - "futures", - "openssl", - "tokio", - "tokio-openssl", - "tokio-postgres", -] - -[[package]] -name = "postgres-protocol" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" -dependencies = [ - "base64 0.21.3", - "byteorder", - "bytes", - "fallible-iterator", - "hmac", - "md-5", - "memchr", - "rand 0.8.5", - "sha2", - "stringprep", -] - -[[package]] -name = "postgres-types" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" -dependencies = [ - "bytes", - "chrono", - "fallible-iterator", - "postgres-protocol", - "serde", - "serde_json", - "uuid 0.8.2", -] - -[[package]] -name = "pprof" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc842ca3fb958643d1696cfdada75410482480c11a7129463924fff5ab18d405" -dependencies = [ - "backtrace", - "criterion", - "inferno", - "lazy_static", - "libc", - "log", - "nix", - "parking_lot 0.11.2", - "prost", - "prost-build", - "prost-derive", - "symbolic-demangle", - "tempfile", - "thiserror", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "pretty-hex" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be91bcc43e73799dc46a6c194a55e7aae1d86cc867c860fd4a436019af21bd8c" - -[[package]] -name = "proc-macro-crate" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" -dependencies = [ - "toml", -] - -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - -[[package]] -name = "proc-macro2" -version = "1.0.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prost" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de5e2533f59d08fcf364fd374ebda0692a70bd6d7e66ef97f306f45c6c5d8020" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603" -dependencies = [ - "bytes", - "heck 0.3.3", - "itertools", - "log", - "multimap", - "petgraph 0.5.1", - "prost", - "prost-types", - "tempfile", - "which", -] - -[[package]] -name = "prost-derive" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "prost-types" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "603bbd6394701d13f3f25aada59c7de9d35a6a5887cfc156181234a44002771b" -dependencies = [ - "bytes", - "prost", -] - -[[package]] -name = "ptr_meta" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" -dependencies = [ - "ptr_meta_derive", -] - -[[package]] -name = "ptr_meta_derive" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "pyo3" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41d50a7271e08c7c8a54cd24af5d62f73ee3a6f6a314215281ebdec421d5752" -dependencies = [ - "cfg-if 1.0.0", - "indoc", - "libc", - "parking_lot 0.11.2", - "paste 0.1.18", - "pyo3-build-config", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "779239fc40b8e18bc8416d3a37d280ca9b9fb04bda54b98037bb6748595c2410" -dependencies = [ - "once_cell", -] - -[[package]] -name = "pyo3-built" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be6d574e0f8cab2cdd1eeeb640cbf845c974519fa9e9b62fa9c08ecece0ca5de" - -[[package]] -name = "pyo3-macros" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b247e8c664be87998d8628e86f282c25066165f1f8dda66100c48202fdb93a" -dependencies = [ - "pyo3-macros-backend", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a8c2812c412e00e641d99eeb79dd478317d981d938aa60325dfa7157b607095" -dependencies = [ - "proc-macro2", - "pyo3-build-config", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "quick-xml" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b" -dependencies = [ - "memchr", -] - -[[package]] -name = "quote" -version = "1.0.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r2d2" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" -dependencies = [ - "log", - "parking_lot 0.12.1", - "scheduled-thread-pool", -] - -[[package]] -name = "r2d2-oracle" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca5358dca54423e557b30e7b5a6d950d3a442ab4a56cc916965030cead8b02b" -dependencies = [ - "oracle", - "r2d2", -] - -[[package]] -name = "r2d2_mysql" -version = "23.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9733d738ce65959a744f387bae69aa690a867e18d48e5486b171c47bc7b0c575" -dependencies = [ - "mysql", - "r2d2", -] - -[[package]] -name = "r2d2_postgres" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7029c56be658cb54f321e0bee597810ee16796b735fa2559d7056bf06b12230b" -dependencies = [ - "postgres", - "r2d2", -] - -[[package]] -name = "r2d2_sqlite" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fdc8e4da70586127893be32b7adf21326a4c6b1aba907611edf467d13ffe895" -dependencies = [ - "r2d2", - "rusqlite", -] - -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.10", -] - -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "num_cpus", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom 0.2.10", - "redox_syscall 0.2.16", - "thiserror", -] - -[[package]] -name = "regex" -version = "1.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax 0.7.5", -] - -[[package]] -name = "regex-automata" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax 0.7.5", -] - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - -[[package]] -name = "rend" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581008d2099240d37fb08d77ad713bcaec2c4d89d50b5b21a8bb1996bbab68ab" -dependencies = [ - "bytecheck", -] - -[[package]] -name = "reqwest" -version = "0.11.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" -dependencies = [ - "base64 0.21.3", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-rustls 0.24.1", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls 0.21.7", - "rustls-pemfile 1.0.3", - "serde", - "serde_json", - "serde_urlencoded", - "tokio", - "tokio-rustls 0.24.1", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "webpki-roots", - "winreg", -] - -[[package]] -name = "rgb" -version = "0.8.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20ec2d3e3fc7a92ced357df9cebd5a10b6fb2aa1ee797bf7e9ce2f17dffc8f59" -dependencies = [ - "bytemuck", -] - -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin", - "untrusted", - "web-sys", - "winapi", -] - -[[package]] -name = "rkyv" -version = "0.7.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58" -dependencies = [ - "bitvec", - "bytecheck", - "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", - "seahash", - "tinyvec", - "uuid 1.4.1", -] - -[[package]] -name = "rkyv_derive" -version = "0.7.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e06b915b5c230a17d7a736d1e2e63ee753c256a8614ef3f5147b13a4f5541d" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "rusqlite" -version = "0.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a" -dependencies = [ - "bitflags 1.3.2", - "chrono", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "memchr", - "smallvec", -] - -[[package]] -name = "rust_decimal" -version = "1.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c4216490d5a413bc6d10fa4742bd7d4955941d062c0ef873141d6b0e7b30fd" -dependencies = [ - "arrayvec", - "borsh", - "bytes", - "num-traits", - "postgres", - "rand 0.8.5", - "rkyv", - "serde", - "serde_json", -] - -[[package]] -name = "rust_decimal_macros" -version = "1.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86444b802de0b10ac5e563b5ddb43b541b9705de4e01a50e82194d2b183c1835" -dependencies = [ - "quote", - "rust_decimal", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "0.38.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453" -dependencies = [ - "bitflags 2.4.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - -[[package]] -name = "rustls" -version = "0.20.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" -dependencies = [ - "log", - "ring", - "sct", - "webpki", -] - -[[package]] -name = "rustls" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" -dependencies = [ - "log", - "ring", - "rustls-webpki", - "sct", -] - -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.3", - "schannel", - "security-framework", -] - -[[package]] -name = "rustls-pemfile" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee86d63972a7c661d1536fefe8c3c8407321c3df668891286de28abcd087360" -dependencies = [ - "base64 0.13.1", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" -dependencies = [ - "base64 0.21.3", -] - -[[package]] -name = "rustls-webpki" -version = "0.101.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] -name = "rustversion" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" - -[[package]] -name = "ryu" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "saturating" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" - -[[package]] -name = "schannel" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "scheduled-thread-pool" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" -dependencies = [ - "parking_lot 0.12.1", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "sct" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] -name = "seahash" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" - -[[package]] -name = "security-framework" -version = "2.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "semver" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" -dependencies = [ - "serde", -] - -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - -[[package]] -name = "serde" -version = "1.0.188" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half 1.8.2", - "serde", -] - -[[package]] -name = "serde_derive" -version = "1.0.188" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "serde_json" -version = "1.0.105" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "sha1" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" -dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" -dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest", -] - -[[package]] -name = "shlex" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" - -[[package]] -name = "signal-hook" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" -dependencies = [ - "libc", - "signal-hook-registry", -] - -[[package]] -name = "signal-hook-mio" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" -dependencies = [ - "libc", - "mio", - "signal-hook", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] - -[[package]] -name = "simdutf8" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" - -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" - -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - -[[package]] -name = "snafu" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" -dependencies = [ - "doc-comment", - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "snap" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" - -[[package]] -name = "socket2" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "socket2" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - -[[package]] -name = "sqlparser" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10e1ce16b71375ad72d28d111131069ce0d5f8603f4f86d8acd3456b41b57a51" -dependencies = [ - "log", -] - -[[package]] -name = "sqlparser" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3706eefb17039056234df6b566b0014f303f867f2656108334a55b8096f59" -dependencies = [ - "log", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" -dependencies = [ - "log", -] - -[[package]] -name = "sqlparser_derive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "str_stack" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" - -[[package]] -name = "strength_reduce" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" - -[[package]] -name = "stringprep" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" -dependencies = [ - "finl_unicode", - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" -dependencies = [ - "strum_macros 0.24.3", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] - -[[package]] -name = "strum_macros" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.31", -] - -[[package]] -name = "subprocess" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - -[[package]] -name = "symbolic-common" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f551f902d5642e58039aee6a9021a61037926af96e071816361644983966f540" -dependencies = [ - "debugid", - "memmap2 0.5.10", - "stable_deref_trait", - "uuid 0.8.2", -] - -[[package]] -name = "symbolic-demangle" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4564ca7b4e6eb14105aa8bbbce26e080f6b5d9c4373e67167ab31f7b86443750" -dependencies = [ - "cpp_demangle", - "rustc-demangle", - "symbolic-common", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sysinfo" -version = "0.29.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a18d114d420ada3a891e6bc8e96a2023402203296a47cdd65083377dad18ba5" -dependencies = [ - "cfg-if 1.0.0", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "winapi", -] - -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - -[[package]] -name = "target-features" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f6b473c37f9add4cf1df5b4d66a8ef58ab6c895f1a3b3f949cf3e21230140e" - -[[package]] -name = "tempfile" -version = "3.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" -dependencies = [ - "cfg-if 1.0.0", - "fastrand", - "redox_syscall 0.3.5", - "rustix", - "windows-sys", -] - -[[package]] -name = "termcolor" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "thiserror" -version = "1.0.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "tiberius" -version = "0.5.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08c782c165a53700c17e4b15a1f6facc21e40a6a80402c518e0f3a2c3fcedd4" -dependencies = [ - "async-native-tls", - "async-stream 0.2.1", - "async-trait", - "asynchronous-codec", - "byteorder", - "bytes", - "chrono", - "connection-string", - "encoding", - "enumflags2", - "futures", - "futures-sink", - "futures-util", - "libgssapi", - "num-traits", - "once_cell", - "opentls", - "pin-project-lite", - "pretty-hex", - "rust_decimal", - "thiserror", - "tracing", - "uuid 0.8.2", - "winauth", -] - -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" -dependencies = [ - "deranged", - "itoa", - "libc", - "num_threads", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" - -[[package]] -name = "time-macros" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" -dependencies = [ - "time-core", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "parking_lot 0.12.1", - "pin-project-lite", - "socket2 0.5.3", - "tokio-macros", - "windows-sys", -] - -[[package]] -name = "tokio-macros" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - -[[package]] -name = "tokio-openssl" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08f9ffb7809f1b20c1b398d92acf4cc719874b3b2b2d9ea2f09b4a80350878a" -dependencies = [ - "futures-util", - "openssl", - "openssl-sys", - "tokio", -] - -[[package]] -name = "tokio-postgres" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" -dependencies = [ - "async-trait", - "byteorder", - "bytes", - "fallible-iterator", - "futures-channel", - "futures-util", - "log", - "parking_lot 0.12.1", - "percent-encoding", - "phf", - "pin-project-lite", - "postgres-protocol", - "postgres-types", - "rand 0.8.5", - "socket2 0.5.3", - "tokio", - "tokio-util 0.7.8", - "whoami", -] - -[[package]] -name = "tokio-rustls" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" -dependencies = [ - "rustls 0.20.9", - "tokio", - "webpki", -] - -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.7", - "tokio", -] - -[[package]] -name = "tokio-stream" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507" -dependencies = [ - "bytes", - "futures-core", - "futures-io", - "futures-sink", - "log", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", - "tracing", -] - -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - -[[package]] -name = "tower-service" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" - -[[package]] -name = "tracing" -version = "0.1.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" -dependencies = [ - "cfg-if 1.0.0", - "log", - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "tracing-core" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" -dependencies = [ - "once_cell", -] - -[[package]] -name = "try-lock" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" - -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if 1.0.0", - "rand 0.8.5", - "static_assertions", -] - -[[package]] -name = "typenum" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" - -[[package]] -name = "unicode-bidi" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" - -[[package]] -name = "unicode-ident" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" - -[[package]] -name = "unicode-normalization" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-segmentation" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" - -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - -[[package]] -name = "unindent" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" - -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - -[[package]] -name = "url" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.10", - "md5 0.7.0", -] - -[[package]] -name = "uuid" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" -dependencies = [ - "getrandom 0.2.10", -] - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "walkdir" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06" -dependencies = [ - "cfg-if 1.0.0", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn 1.0.109", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eb6ec270a31b1d3c7e266b999739109abce8b6c87e4b31fcfcd788b65267395" -dependencies = [ - "cfg-if 1.0.0", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2" - -[[package]] -name = "web-sys" -version = "0.3.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "webpki" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] -name = "webpki-roots" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" - -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - -[[package]] -name = "whoami" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50" -dependencies = [ - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "winauth" -version = "0.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f820cd208ce9c6b050812dc2d724ba98c6c1e9db5ce9b3f58d925ae5723a5e6" -dependencies = [ - "bitflags 1.3.2", - "byteorder", - "md5 0.6.1", - "rand 0.7.3", - "winapi", -] - -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if 1.0.0", - "windows-sys", -] - -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] - -[[package]] -name = "xxhash-rust" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "735a71d46c4d68d71d4b24d03fdc2b98e38cea81730595801db779c04fe80d70" - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - -[[package]] -name = "yup-oauth2" -version = "7.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98748970d2ddf05253e6525810d989740334aa7509457864048a829902db76f3" -dependencies = [ - "anyhow", - "async-trait", - "base64 0.13.1", - "futures", - "http", - "hyper", - "hyper-rustls 0.23.2", - "itertools", - "log", - "percent-encoding", - "rustls 0.20.9", - "rustls-pemfile 0.3.0", - "seahash", - "serde", - "serde_json", - "time 0.3.28", - "tokio", - "tower-service", - "url", -] - -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.8+zstd.1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" -dependencies = [ - "cc", - "libc", - "pkg-config", -] diff --git a/connectorx-python/Cargo.toml b/connectorx-python/Cargo.toml deleted file mode 100644 index 0b010af..0000000 --- a/connectorx-python/Cargo.toml +++ /dev/null @@ -1,79 +0,0 @@ -[package] -authors = ["Weiyuan Wu "] -edition = "2018" -name = "connectorx-python" -version = "0.3.3-alpha.1" -license = "MIT" -readme = "README.md" - -[workspace] -# prevents package from thinking it's in the workspace - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -anyhow = "1" -arrow = { version = "46" } -arrow2 = {version = "0.17", default-features = false} -bitfield = "0.13" -bytes = "1.4" -chrono = "0.4" -connectorx = {path = "../connectorx", default-features = false} -dict_derive = "0.4" -env_logger = "0.9" -fehler = "1" -itertools = "0.10" -lazy_static = "1.4.0" -libc = "0.2" -log = "0.4" -ndarray = "0.15" -numpy = "0.15" -openssl = {version = "0.10", features = ["vendored"]} -postgres = {version = "0.19", features = ["with-chrono-0_4", "with-uuid-0_8", "with-serde_json-1"]} -postgres-native-tls = {version = "0.5"} -postgres-openssl = {version = "0.5.0"} -pyo3 = {version = "0.15", default-features = false, features = ["macros"]} -pyo3-built = "0.4" -rust_decimal = {version = "1", features = ["db-postgres"]} -serde_json = "1" -sqlparser = "0.37" -thiserror = "1" -tokio = {version = "1", features = ["rt", "rt-multi-thread", "net"]} -tokio-util = "0.6" -url = "2" -urlencoding = "2.1" -uuid = "0.8" - -[build-dependencies] -built = {version = "0.5", features = ["chrono"]} - -[dev-dependencies] -criterion = "0.3" -criterion-macro = "0.3" -iai = "0.1" -pprof = {version = "0.5", features = ["flamegraph", "criterion", "protobuf"]} -rayon = "1" - -[lib] -crate-type = ["cdylib"] -name = "connectorx" - -[features] -branch = ["connectorx/branch"] -default = ["extension", "fptr", "nbstr", "dsts", "srcs", "federation", "fed_exec"] -dsts = ["connectorx/dst_arrow", "connectorx/dst_arrow2"] -executable = ["pyo3/auto-initialize"] -extension = ["pyo3/extension-module"] -fptr = ["connectorx/fptr"] -federation = ["connectorx/federation"] -fed_exec = ["connectorx/fed_exec"] -nbstr = [] -srcs = [ - "connectorx/src_postgres", - "connectorx/src_mysql", - "connectorx/src_sqlite", - "connectorx/src_mssql", - "connectorx/src_oracle", - "connectorx/src_bigquery", -] -integrated-auth-gssapi = ["connectorx/integrated-auth-gssapi"] diff --git a/connectorx-python/LICENSE b/connectorx-python/LICENSE deleted file mode 120000 index ea5b606..0000000 --- a/connectorx-python/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/connectorx-python/README.md b/connectorx-python/README.md deleted file mode 120000 index 32d46ee..0000000 --- a/connectorx-python/README.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/connectorx-python/build.rs b/connectorx-python/build.rs deleted file mode 100644 index 648d8e1..0000000 --- a/connectorx-python/build.rs +++ /dev/null @@ -1,13 +0,0 @@ -// https://github.com/PyO3/pyo3-built/issues/21 - -fn main() { - // let src = std::env::var("CARGO_MANIFEST_DIR").unwrap(); - // println!("src: {}", src); - // let dst = std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("built.rs"); - // let mut opts = built::Options::default(); - // println!("out: {:?}", dst); - // opts.set_dependencies(true).set_compiler(true).set_env(true); - - // built::write_built_file_with_opts(&opts, std::path::Path::new(&src), &dst) - // .expect("Failed to acquire build-time information"); -} diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py deleted file mode 100644 index 64e82d6..0000000 --- a/connectorx-python/connectorx/__init__.py +++ /dev/null @@ -1,379 +0,0 @@ -from typing import Optional, Tuple, Union, List, Dict, Any - -from .connectorx import ( - read_sql as _read_sql, - partition_sql as _partition_sql, - read_sql2 as _read_sql2, - get_meta as _get_meta, -) - -try: - from importlib.metadata import version - - __version__ = version(__name__) -except: - try: - from importlib_metadata import version - - __version__ = version(__name__) - except: - pass - -import os - -dir_path = os.path.dirname(os.path.realpath(__file__)) -# check whether it is in development env or installed -if ( - not os.path.basename(os.path.abspath(os.path.join(dir_path, ".."))) - == "connectorx-python" -): - if "J4RS_BASE_PATH" not in os.environ: - os.environ["J4RS_BASE_PATH"] = os.path.join(dir_path, "dependencies") -if "CX_REWRITER_PATH" not in os.environ: - os.environ["CX_REWRITER_PATH"] = os.path.join( - dir_path, "dependencies/federated-rewriter.jar" - ) - - -def rewrite_conn(conn: str, protocol: Optional[str] = None): - if not protocol: - # note: redshift/clickhouse are not compatible with the 'binary' protocol, and use other database - # drivers to connect. set a compatible protocol and masquerade as the appropriate backend. - backend, connection_details = conn.split(":", 1) if conn else ("", "") - if "redshift" in backend: - conn = f"postgresql:{connection_details}" - protocol = "cursor" - elif "clickhouse" in backend: - conn = f"mysql:{connection_details}" - protocol = "text" - else: - protocol = "binary" - return conn, protocol - - -def get_meta( - conn: str, - query: str, - protocol: Optional[str] = None, -): - """ - Get metadata (header) of the given query (only for pandas) - - Parameters - ========== - conn - the connection string. - query - a SQL query or a list of SQL queries. - protocol - backend-specific transfer protocol directive; defaults to 'binary' (except for redshift - connection strings, where 'cursor' will be used instead). - - """ - conn, protocol = rewrite_conn(conn, protocol) - result = _get_meta(conn, protocol, query) - df = reconstruct_pandas(result) - return df - - -def partition_sql( - conn: str, - query: str, - partition_on: str, - partition_num: int, - partition_range: Optional[Tuple[int, int]] = None, -): - """ - Partition the sql query - - Parameters - ========== - conn - the connection string. - query - a SQL query or a list of SQL queries. - partition_on - the column on which to partition the result. - partition_num - how many partitions to generate. - partition_range - the value range of the partition column. - """ - partition_query = { - "query": query, - "column": partition_on, - "min": partition_range[0] if partition_range else None, - "max": partition_range[1] if partition_range else None, - "num": partition_num, - } - return _partition_sql(conn, partition_query) - - -def read_sql_pandas( - sql: Union[List[str], str], - con: Union[str, Dict[str, str]], - index_col: Optional[str] = None, - protocol: Optional[str] = None, - partition_on: Optional[str] = None, - partition_range: Optional[Tuple[int, int]] = None, - partition_num: Optional[int] = None, -): - """ - Run the SQL query, download the data from database into a dataframe. - First several parameters are in the same name and order with `pandas.read_sql`. - - Parameters - ========== - Please refer to `read_sql` - - Examples - ======== - Read a DataFrame from a SQL query using a single thread: - - >>> # from pandas import read_sql - >>> from connectorx import read_sql_pandas as read_sql - >>> postgres_url = "postgresql://username:password@server:port/database" - >>> query = "SELECT * FROM lineitem" - >>> read_sql(query, postgres_url) - - """ - return read_sql( - con, - sql, - return_type="pandas", - protocol=protocol, - partition_on=partition_on, - partition_range=partition_range, - partition_num=partition_num, - index_col=index_col, - ) - - -def read_sql( - conn: Union[str, Dict[str, str]], - query: Union[List[str], str], - *, - return_type: str = "pandas", - protocol: Optional[str] = None, - partition_on: Optional[str] = None, - partition_range: Optional[Tuple[int, int]] = None, - partition_num: Optional[int] = None, - index_col: Optional[str] = None, -): - """ - Run the SQL query, download the data from database into a dataframe. - - Parameters - ========== - conn - the connection string, or dict of connection string mapping for federated query. - query - a SQL query or a list of SQL queries. - return_type - the return type of this function; one of "arrow(2)", "pandas", "modin", "dask" or "polars(2)". - protocol - backend-specific transfer protocol directive; defaults to 'binary' (except for redshift - connection strings, where 'cursor' will be used instead). - partition_on - the column on which to partition the result. - partition_range - the value range of the partition column. - partition_num - how many partitions to generate. - index_col - the index column to set; only applicable for return type "pandas", "modin", "dask". - - Examples - ======== - Read a DataFrame from a SQL query using a single thread: - - >>> postgres_url = "postgresql://username:password@server:port/database" - >>> query = "SELECT * FROM lineitem" - >>> read_sql(postgres_url, query) - - Read a DataFrame in parallel using 10 threads by automatically partitioning the provided SQL on the partition column: - - >>> postgres_url = "postgresql://username:password@server:port/database" - >>> query = "SELECT * FROM lineitem" - >>> read_sql(postgres_url, query, partition_on="partition_col", partition_num=10) - - Read a DataFrame in parallel using 2 threads by explicitly providing two SQL queries: - - >>> postgres_url = "postgresql://username:password@server:port/database" - >>> queries = ["SELECT * FROM lineitem WHERE partition_col <= 10", "SELECT * FROM lineitem WHERE partition_col > 10"] - >>> read_sql(postgres_url, queries) - - """ - if isinstance(query, list) and len(query) == 1: - query = query[0] - - if isinstance(conn, dict): - assert partition_on is None and isinstance( - query, str - ), "Federated query does not support query partitioning for now" - assert ( - protocol is None - ), "Federated query does not support specifying protocol for now" - result = _read_sql2(query, conn) - df = reconstruct_arrow(result) - if return_type == "pandas": - df = df.to_pandas(date_as_object=False, split_blocks=False) - if return_type == "polars": - try: - import polars as pl - except ModuleNotFoundError: - raise ValueError("You need to install polars first") - - try: - # api change for polars >= 0.8.* - df = pl.from_arrow(df) - except AttributeError: - df = pl.DataFrame.from_arrow(df) - return df - - if isinstance(query, str): - if partition_on is None: - queries = [query] - partition_query = None - else: - partition_query = { - "query": query, - "column": partition_on, - "min": partition_range[0] if partition_range else None, - "max": partition_range[1] if partition_range else None, - "num": partition_num, - } - queries = None - elif isinstance(query, list): - queries = query - partition_query = None - - if partition_on is not None: - raise ValueError("Partition on multiple queries is not supported.") - else: - raise ValueError("query must be either str or a list of str") - - conn, protocol = rewrite_conn(conn, protocol) - - if return_type in {"modin", "dask", "pandas"}: - try: - import pandas - except ModuleNotFoundError: - raise ValueError("You need to install pandas first") - - result = _read_sql( - conn, - "pandas", - queries=queries, - protocol=protocol, - partition_query=partition_query, - ) - df = reconstruct_pandas(result) - - if index_col is not None: - df.set_index(index_col, inplace=True) - - if return_type == "modin": - try: - import modin.pandas as mpd - except ModuleNotFoundError: - raise ValueError("You need to install modin first") - - df = mpd.DataFrame(df) - elif return_type == "dask": - try: - import dask.dataframe as dd - except ModuleNotFoundError: - raise ValueError("You need to install dask first") - - df = dd.from_pandas(df, npartitions=1) - - elif return_type in {"arrow", "arrow2", "polars", "polars2"}: - try: - import pyarrow - except ModuleNotFoundError: - raise ValueError("You need to install pyarrow first") - - result = _read_sql( - conn, - "arrow2" if return_type in {"arrow2", "polars", "polars2"} else "arrow", - queries=queries, - protocol=protocol, - partition_query=partition_query, - ) - df = reconstruct_arrow(result) - if return_type in {"polars", "polars2"}: - try: - import polars as pl - except ModuleNotFoundError: - raise ValueError("You need to install polars first") - - try: - df = pl.DataFrame.from_arrow(df) - except AttributeError: - # api change for polars >= 0.8.* - df = pl.from_arrow(df) - else: - raise ValueError(return_type) - - return df - - -def reconstruct_arrow(result: Tuple[List[str], List[List[Tuple[int, int]]]]): - import pyarrow as pa - - names, ptrs = result - if len(names) == 0: - return pa.Table.from_arrays([]) - - rbs = [] - for chunk in ptrs: - rb = pa.RecordBatch.from_arrays( - [pa.Array._import_from_c(*col_ptr) for col_ptr in chunk], names - ) - rbs.append(rb) - return pa.Table.from_batches(rbs) - - -def reconstruct_pandas(df_infos: Dict[str, Any]): - import pandas as pd - - data = df_infos["data"] - headers = df_infos["headers"] - block_infos = df_infos["block_infos"] - - nrows = data[0][0].shape[-1] if isinstance(data[0], tuple) else data[0].shape[-1] - blocks = [] - for binfo, block_data in zip(block_infos, data): - if binfo.dt == 0: # NumpyArray - blocks.append( - pd.core.internals.make_block(block_data, placement=binfo.cids) - ) - elif binfo.dt == 1: # IntegerArray - blocks.append( - pd.core.internals.make_block( - pd.core.arrays.IntegerArray(block_data[0], block_data[1]), - placement=binfo.cids[0], - ) - ) - elif binfo.dt == 2: # BooleanArray - blocks.append( - pd.core.internals.make_block( - pd.core.arrays.BooleanArray(block_data[0], block_data[1]), - placement=binfo.cids[0], - ) - ) - elif binfo.dt == 3: # DatetimeArray - blocks.append( - pd.core.internals.make_block( - pd.core.arrays.DatetimeArray(block_data), placement=binfo.cids - ) - ) - else: - raise ValueError(f"unknown dt: {binfo.dt}") - - block_manager = pd.core.internals.BlockManager( - blocks, [pd.Index(headers), pd.RangeIndex(start=0, stop=nrows, step=1)] - ) - df = pd.DataFrame(block_manager) - return df diff --git a/connectorx-python/connectorx/tests/__init__.py b/connectorx-python/connectorx/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/connectorx-python/connectorx/tests/benchmarks.py b/connectorx-python/connectorx/tests/benchmarks.py deleted file mode 100644 index 778d61e..0000000 --- a/connectorx-python/connectorx/tests/benchmarks.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -This file is skipped during normal test because the file name is not started with benchmarks -""" -import os - -from .. import read_sql - - -def read_sql_impl(conn: str, table: str): - read_sql( - conn, - f"""SELECT * FROM {table}""", - partition_on="L_ORDERKEY", - partition_num=10, - ) - - -def bench_mysql(benchmark): - benchmark(read_sql_impl, os.environ["MYSQL_URL"], os.environ["TPCH_TABLE"]) - - -def bench_postgres(benchmark): - benchmark(read_sql_impl, - os.environ["POSTGRES_URL"], os.environ["TPCH_TABLE"]) diff --git a/connectorx-python/connectorx/tests/test_arrow.py b/connectorx-python/connectorx/tests/test_arrow.py deleted file mode 100644 index d784d4f..0000000 --- a/connectorx-python/connectorx/tests/test_arrow.py +++ /dev/null @@ -1,193 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal -import datetime - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - - -def test_arrow(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - return_type="arrow", - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="float64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="object" - ), - }, - ) - - df = df.to_pandas() - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_arrow2(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - return_type="arrow2", - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int32"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="float64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="object" - ), - }, - ) - - df = df.to_pandas() - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_arrow2_type(postgres_url: str) -> None: - query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_json, test_jsonb, test_f4array, test_f8array, test_narray, test_i2array, test_i4array, test_i8array, test_enum, test_ltree, test_name FROM test_types" - df = read_sql(postgres_url, query, return_type="arrow2") - df = df.to_pandas(date_as_object=False) - df.sort_values(by="test_int16", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(4), - data={ - "test_date": pd.Series( - ["1970-01-01", "2000-02-28", "2038-01-18", None], dtype="datetime64[ns]" - ), - "test_timestamp": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 12:00:10", - "2038-01-18 23:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_timestamptz": pd.Series( - [ - "1970-01-01 00:00:01+00:00", - "2000-02-28 16:00:10+00:00", - "2038-01-18 15:59:59+00:00", - None, - ], - dtype="datetime64[ns, UTC]", - ), - "test_int16": pd.Series([0, 1, 2, 3], dtype="int32"), - "test_int64": pd.Series( - [-9223372036854775808, 0, 9223372036854775807, None], dtype="float64" - ), - "test_float32": pd.Series( - [None, 3.1415926535, 2.71, -1e-37], dtype="float32" - ), - "test_numeric": pd.Series([None, 521.34, 0.00, 0.00], dtype="float64"), - "test_bpchar": pd.Series(["a ", "bb ", "ccc ", None], dtype="object"), - "test_char": pd.Series(["a", "b", None, "d"], dtype="object"), - "test_varchar": pd.Series([None, "bb", "c", "defghijklm"], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - "86b49c42-96b2-11eb-9298-3e22fbb9fe9d", - None, - ], - dtype="object", - ), - "test_time": pd.Series( - [ - datetime.time(8, 12, 40), - None, - datetime.time(23, 0, 10), - datetime.time(18, 30), - ], - dtype="object", - ), - "test_bytea": pd.Series( - [ - None, - b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5", - b"", - b"\xf0\x9f\x98\x9c", - ], - dtype="object", - ), - "test_json": pd.Series( - [ - '{"customer":"John Doe","items":{"product":"Beer","qty":6}}', - '{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}', - '{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}', - None, - ], - dtype="object", - ), - "test_jsonb": pd.Series( - [ - '{"product":"Beer","qty":6}', - '{"product":"Diaper","qty":24}', - '{"product":"Toy Car","qty":1}', - None, - ], - dtype="object", - ), - "test_f4array": pd.Series( - [[], None, [123.123], [-1e-37, 1e37]], dtype="object" - ), - "test_f8array": pd.Series( - [[], None, [1e-307, 1e308], [0.000234, -12.987654321]], dtype="object" - ), - "test_narray": pd.Series( - [[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object" - ), - "test_i2array": pd.Series( - [[-1, 0, 1], [], [-32768, 32767], None], dtype="object" - ), - "test_i4array": pd.Series( - [[-1, 0, 1123], [], [-2147483648, 2147483647], None], dtype="object" - ), - "test_i8array": pd.Series( - [[-9223372036854775808, 9223372036854775807], [], [0], None], - dtype="object", - ), - "test_enum": pd.Series( - ["happy", "very happy", "ecstatic", None], dtype="object" - ), - "test_ltree": pd.Series( - ["A.B.C.D", "A.B.E", "A", None], dtype="object" - ), - "test_name": pd.Series( - ["0", "21", "someName", "101203203-1212323-22131235"] - ) - - }, - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_bigquery.py b/connectorx-python/connectorx/tests/test_bigquery.py deleted file mode 100644 index c5007a3..0000000 --- a/connectorx-python/connectorx/tests/test_bigquery.py +++ /dev/null @@ -1,312 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def bigquery_url() -> str: - conn = os.environ["BIGQUERY_URL"] - return conn - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_without_partition(bigquery_url: str) -> None: - query = "select * from `dataprep-bigquery.dataprep.test_table` order by test_int" - df = read_sql(bigquery_url, query) - expected = pd.DataFrame( - index=range(5), - data={ - "test_int": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "test_string": pd.Series( - ["str1", "str2", None, "str05", None], dtype="object" - ), - "test_float": pd.Series([1.10, 2.20, -4.44, None, None], dtype="float64"), - "test_bool": pd.Series([True, False, False, None, True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_with_partition(bigquery_url: str) -> None: - query = "select * from `dataprep-bigquery.dataprep.test_table` order by test_int" - df = read_sql( - bigquery_url, - query, - partition_on="test_int", - partition_num=3, - partition_range=[0, 2500], - ) - df = df.sort_values("test_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(5), - data={ - "test_int": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "test_string": pd.Series( - ["str1", "str2", None, "str05", None], dtype="object" - ), - "test_float": pd.Series([1.10, 2.20, -4.44, None, None], dtype="float64"), - "test_bool": pd.Series([True, False, False, None, True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_with_partition_without_partition_range(bigquery_url: str) -> None: - query = "select * from `dataprep-bigquery.dataprep.test_table` order by test_int" - df = read_sql(bigquery_url, query, partition_on="test_int", partition_num=3) - df = df.sort_values("test_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(5), - data={ - "test_int": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "test_string": pd.Series( - ["str1", "str2", None, "str05", None], dtype="object" - ), - "test_float": pd.Series([1.10, 2.20, -4.44, None, None], dtype="float64"), - "test_bool": pd.Series([True, False, False, None, True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_manual_partition(bigquery_url: str) -> None: - queries = [ - "select * from `dataprep-bigquery.dataprep.test_table` where test_int < 2 order by test_int", - "select * from `dataprep-bigquery.dataprep.test_table` where test_int >= 2 order by test_int", - ] - df = read_sql(bigquery_url, query=queries) - df = df.sort_values("test_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(5), - data={ - "test_int": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "test_string": pd.Series( - ["str1", "str2", None, "str05", None], dtype="object" - ), - "test_float": pd.Series([1.10, 2.20, -4.44, None, None], dtype="float64"), - "test_bool": pd.Series([True, False, False, None, True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_some_empty_partition(bigquery_url: str) -> None: - query = "select * from `dataprep-bigquery.dataprep.test_table` where test_int=1" - df = read_sql(bigquery_url, query, partition_on="test_int", partition_num=3) - df = df.sort_values("test_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(1), - data={ - "test_int": pd.Series([1], dtype="Int64"), - "test_string": pd.Series( - ["str1"], dtype="object" - ), - "test_float": pd.Series([1.10], dtype="float64"), - "test_bool": pd.Series([True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_join(bigquery_url: str) -> None: - query = "SELECT T.test_int, T.test_string, S.test_str FROM `dataprep-bigquery.dataprep.test_table` T INNER JOIN `dataprep-bigquery.dataprep.test_types` S ON T.test_int = S.test_int" - df = read_sql( - bigquery_url, - query - ) - df = df.sort_values("test_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(2), - data={ - "test_int": pd.Series([1, 2], dtype="Int64"), - "test_string": pd.Series( - [ - "str1", - "str2", - ], - dtype="object" - ), - "test_str": pd.Series( - [ - "😁😂😜", - "こんにちはЗдра́в", - ], - dtype="object" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_join_with_partition(bigquery_url: str) -> None: - query = "SELECT T.test_int, T.test_string, S.test_str FROM `dataprep-bigquery.dataprep.test_table` T INNER JOIN `dataprep-bigquery.dataprep.test_types` S ON T.test_int = S.test_int" - df = read_sql( - bigquery_url, - query, - partition_on="test_int", - partition_num=3, - ) - df = df.sort_values("test_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(2), - data={ - "test_int": pd.Series([1, 2], dtype="Int64"), - "test_string": pd.Series( - [ - "str1", - "str2", - ], - dtype="object" - ), - "test_str": pd.Series( - [ - "😁😂😜", - "こんにちはЗдра́в", - ], - dtype="object" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_aggregation1(bigquery_url: str) -> None: - query = "SELECT test_bool, SUM(test_int) as sum_int, SUM(test_float) as sum_float FROM `dataprep-bigquery.dataprep.test_table` GROUP BY test_bool" - df = read_sql(bigquery_url, query) - df = df.sort_values("sum_int").reset_index(drop=True) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - "sum_int": pd.Series([5, 6, 2334], dtype="Int64"), - "sum_float": pd.Series([None, -2.24, 1.10], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_aggregation2(bigquery_url: str) -> None: - query = "select MAX(test_int) as max_int, MIN(test_int) min_int from `dataprep-bigquery.dataprep.test_table`" - df = read_sql(bigquery_url, query) - expected = pd.DataFrame( - index=range(1), - data={ - "max_int": pd.Series([2333], dtype="Int64"), - "min_int": pd.Series([1], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_aggregation1_with_partition(bigquery_url: str) -> None: - query = "SELECT test_bool, SUM(test_int) as sum_int, SUM(test_float) as sum_float FROM `dataprep-bigquery.dataprep.test_table` GROUP BY test_bool" - df = read_sql(bigquery_url, query, partition_on="sum_int", partition_num=2) - df.sort_values(by="sum_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - "sum_int": pd.Series([5, 6, 2334], dtype="Int64"), - "sum_float": pd.Series([None, -2.24, 1.10], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_aggregation2_with_partition(bigquery_url: str) -> None: - query = "select MAX(test_int) as max_int, MIN(test_int) min_int from `dataprep-bigquery.dataprep.test_table`" - df = read_sql(bigquery_url, query, partition_on="max_int", partition_num=2) - expected = pd.DataFrame( - index=range(1), - data={ - "max_int": pd.Series([2333], dtype="Int64"), - "min_int": pd.Series([1], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("BIGQUERY_URL"), - reason="Test bigquery only when `BIGQUERY_URL` is set", -) -def test_bigquery_types(bigquery_url: str) -> None: - query = "select * from `dataprep-bigquery.dataprep.test_types`" - df = read_sql(bigquery_url, query) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, None], dtype="Int64"), - "test_numeric": pd.Series([1.23, 234.56, None], dtype="float"), - "test_bool": pd.Series([True, None, False], dtype="boolean"), - "test_date": pd.Series( - ["1937-01-28", "2053-07-25", None], dtype="datetime64[ns]" - ), - "test_time": pd.Series(["00:00:00", "12:59:59", None], dtype="object"), - "test_datetime": pd.Series( - [None, "2053-07-25 12:59:59", "1937-01-28 00:00:00"], - dtype="datetime64[ns]", - ), - "test_timestamp": pd.Series( - ["1970-01-01 00:00:01.000", None, "2004-02-29 09:00:01.300"], - dtype="datetime64[ns]", - ), - "test_str": pd.Series(["😁😂😜", "こんにちはЗдра́в", None], dtype="object"), - "test_bytes": pd.Series( - ["8J+YgfCfmILwn5ic", "44GT44KT44Gr44Gh44Gv0JfQtNGA0LDMgdCy", None], - dtype="object", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_clickhouse.py b/connectorx-python/connectorx/tests/test_clickhouse.py deleted file mode 100644 index 630ef37..0000000 --- a/connectorx-python/connectorx/tests/test_clickhouse.py +++ /dev/null @@ -1,83 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def clickhouse_url() -> str: - conn = os.environ["CLICKHOUSE_URL"] - return conn - - -@pytest.mark.skipif( - not os.environ.get("CLICKHOUSE_URL"), - reason="Do not test Clickhouse unless `CLICKHOUSE_URL` is set", -) -def test_clickhouse_without_partition(clickhouse_url: str) -> None: - query = "select * from test_table limit 3" - # clickhouse does not support binary protocol - df = read_sql(clickhouse_url, query, protocol="text") - # result from clickhouse might have different order each time - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 3], dtype="float64"), - "test_str": pd.Series(["abc", "defg", "hijkl"], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("CLICKHOUSE_URL"), - reason="Do not test Clickhouse unless `CLICKHOUSE_URL` is set", -) -def test_clickhouse_with_partition(clickhouse_url: str) -> None: - query = "select * from test_table" - df = read_sql( - clickhouse_url, query, partition_on="test_int", partition_num=3, protocol="text" - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="float64"), - "test_str": pd.Series( - ["abc", "defg", "hijkl", "mnopqr", "st", "u"], dtype="object" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("CLICKHOUSE_URL"), - reason="Do not test Clickhouse unless `CLICKHOUSE_URL` is set", -) -def test_clickhouse_types(clickhouse_url: str) -> None: - query = "select * from test_types" - df = read_sql(clickhouse_url, query, protocol="text") - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 3], dtype="Int64"), - "test_float": pd.Series([2.3, 3.3, 4.3], dtype="float64"), - "test_date": pd.Series( - ["1999-07-25", "1979-04-07", "1999-09-22"], dtype="datetime64[ns]" - ), - "test_datetime": pd.Series( - ["1999-07-25 23:14:07", "1979-04-07 03:04:37", "1999-07-25 20:21:14"], - dtype="datetime64[ns]", - ), - "test_decimal": pd.Series(["2.22", "3.33", "4.44"], dtype="object"), - "test_varchar": pd.Series(["こんにちは", "Ha好ち😁ðy", "b"], dtype="object"), - "test_char": pd.Series(["0123456789", "abcdefghij", "321"], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_dask.py b/connectorx-python/connectorx/tests/test_dask.py deleted file mode 100644 index a36838f..0000000 --- a/connectorx-python/connectorx/tests/test_dask.py +++ /dev/null @@ -1,42 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - - -def test_dask(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - return_type="dask", - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df = df.compute() - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_federation.py b/connectorx-python/connectorx/tests/test_federation.py deleted file mode 100644 index 36203d7..0000000 --- a/connectorx-python/connectorx/tests/test_federation.py +++ /dev/null @@ -1,59 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def db1_url() -> str: - conn = os.environ["DB1"] - return conn - - -@pytest.fixture(scope="module") # type: ignore -def db2_url() -> str: - conn = os.environ["DB2"] - return conn - - -@pytest.mark.skipif( - not (os.environ.get("DB1") and os.environ.get("DB2")), - reason="Do not test federated queries is set unless both `DB1` and `DB2` are set", -) -def test_fed_spj(db1_url: str, db2_url: str) -> None: - query = "SELECT T.test_int, T.test_bool, S.test_language FROM db1.test_table T INNER JOIN db2.test_str S ON T.test_int = S.id" - df = read_sql({"db1": db1_url, "db2": db2_url}, query) - expected = pd.DataFrame( - index=range(5), - data={ - "TEST_INT": pd.Series([0, 1, 2, 3, 4], dtype="int64"), - "TEST_BOOL": pd.Series([None, True, False, False, None], dtype="object"), - "TEST_LANGUAGE": pd.Series( - ["English", "中文", "日本語", "русский", "Emoji"], dtype="object" - ), - }, - ) - df.sort_values(by="TEST_INT", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not (os.environ.get("DB1") and os.environ.get("DB2")), - reason="Do not test federated queries is set unless both `DB1` and `DB2` are set", -) -def test_fed_spja(db1_url: str, db2_url: str) -> None: - query = "select test_bool, AVG(test_float) as avg_float, SUM(test_int) as sum_int from db1.test_table as a, db2.test_str as b where a.test_int = b.id AND test_nullint is not NULL GROUP BY test_bool ORDER BY sum_int" - df = read_sql({"db1": db1_url, "db2": db2_url}, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([True, False, None], dtype="object"), - "AVG_FLOAT": pd.Series([None, 3, 5.45], dtype="float64"), - "SUM_INT": pd.Series([1, 3, 4], dtype="int64"), - }, - ) - df.sort_values(by="SUM_INT", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_meta.py b/connectorx-python/connectorx/tests/test_meta.py deleted file mode 100644 index 5484845..0000000 --- a/connectorx-python/connectorx/tests/test_meta.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import get_meta - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - -def test_get_meta(postgres_url: str) -> None: - query = "SELECT * FROM test_table limit 10" - df = get_meta( - postgres_url, - query, - ) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="Int64"), - "test_nullint": pd.Series([], dtype="Int64"), - "test_str": pd.Series( - [], dtype="object" - ), - "test_float": pd.Series([], dtype="float64"), - "test_bool": pd.Series( - [], dtype="boolean" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) \ No newline at end of file diff --git a/connectorx-python/connectorx/tests/test_modin.py b/connectorx-python/connectorx/tests/test_modin.py deleted file mode 100644 index 285bab5..0000000 --- a/connectorx-python/connectorx/tests/test_modin.py +++ /dev/null @@ -1,42 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - - -def test_modin(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - return_type="modin", - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df = df._to_pandas() - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_mssql.py b/connectorx-python/connectorx/tests/test_mssql.py deleted file mode 100644 index ee04d00..0000000 --- a/connectorx-python/connectorx/tests/test_mssql.py +++ /dev/null @@ -1,498 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def mssql_url() -> str: - conn = os.environ["MSSQL_URL"] - # conn = os.environ["AZURE_MSSQL_URL"] - return conn - - -@pytest.mark.xfail -def test_on_non_select(mssql_url: str) -> None: - query = "CREATE TABLE non_select(id INTEGER NOT NULL)" - df = read_sql(mssql_url, query) - - -def test_aggregation(mssql_url: str) -> None: - query = ( - "SELECT test_bool, SUM(test_float) as sum FROM test_table GROUP BY test_bool" - ) - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - "sum": pd.Series([10.9, 5.2, -10.0], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_partition_on_aggregation(mssql_url: str) -> None: - query = ( - "SELECT test_bool, SUM(test_int) AS test_int FROM test_table GROUP BY test_bool" - ) - df = read_sql(mssql_url, query, partition_on="test_int", partition_num=2) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - "test_int": pd.Series([4, 5, 1315], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_aggregation2(mssql_url: str) -> None: - query = "select DISTINCT(test_bool) from test_table" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_partition_on_aggregation2(mssql_url: str) -> None: - query = "select MAX(test_int) as max, MIN(test_int) as min from test_table" - df = read_sql(mssql_url, query, partition_on="max", partition_num=2) - expected = pd.DataFrame( - index=range(1), - data={ - "max": pd.Series([1314], dtype="Int64"), - "min": pd.Series([0], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_udf(mssql_url: str) -> None: - query = ( - "SELECT dbo.increment(test_int) AS test_int FROM test_table ORDER BY test_int" - ) - df = read_sql(mssql_url, query, partition_on="test_int", partition_num=2) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 1315], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_manual_partition(mssql_url: str) -> None: - - queries = [ - "SELECT * FROM test_table WHERE test_int < 2", - "SELECT * FROM test_table WHERE test_int >= 2", - ] - - df = read_sql(mssql_url, query=queries) - - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_without_partition(mssql_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([3, None, 5, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["str1", "str2", "a", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([None, 2.2, 3.1, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [True, False, None, False, None, True], dtype="boolean" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_limit_without_partition(mssql_url: str) -> None: - query = "SELECT top 3 * FROM test_table" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 0], dtype="int64"), - "test_nullint": pd.Series([3, None, 5], dtype="Int64"), - "test_str": pd.Series(["str1", "str2", "a"], dtype="object"), - "test_float": pd.Series([None, 2.2, 3.1], dtype="float64"), - "test_bool": pd.Series([True, False, None], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_limit_large_without_partition(mssql_url: str) -> None: - query = "SELECT top 10 * FROM test_table" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([3, None, 5, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["str1", "str2", "a", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([None, 2.2, 3.1, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [True, False, None, False, None, True], dtype="boolean" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_with_partition(mssql_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - mssql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_limit_with_partition(mssql_url: str) -> None: - query = "SELECT top 3 * FROM test_table" - df = read_sql( - mssql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([0, 1, 2], dtype="int64"), - "test_nullint": pd.Series([5, 3, None], dtype="Int64"), - "test_str": pd.Series(["a", "str1", "str2"], dtype="object"), - "test_float": pd.Series([3.1, None, 2.20], dtype="float64"), - "test_bool": pd.Series([None, True, False], dtype="boolean"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_limit_large_with_partition(mssql_url: str) -> None: - query = "SELECT top 10 * FROM test_table" - df = read_sql( - mssql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_with_partition_without_partition_range(mssql_url: str) -> None: - query = "SELECT * FROM test_table where test_float > 3" - df = read_sql( - mssql_url, - query, - partition_on="test_int", - partition_num=3, - ) - - expected = pd.DataFrame( - index=range(2), - data={ - "test_int": pd.Series([0, 4], dtype="int64"), - "test_nullint": pd.Series([5, 9], dtype="Int64"), - "test_str": pd.Series(["a", "c"], dtype="object"), - "test_float": pd.Series([3.1, 7.8], dtype="float64"), - "test_bool": pd.Series([None, None], dtype="boolean"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_with_partition_and_selection(mssql_url: str) -> None: - query = "SELECT * FROM test_table WHERE 1 = 3 OR 2 = 2" - df = read_sql( - mssql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_with_partition_and_projection(mssql_url: str) -> None: - query = "SELECT test_int, test_float, test_str FROM test_table" - df = read_sql( - mssql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="int64"), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_with_partition_and_spja(mssql_url: str) -> None: - query = """ - SELECT test_bool, AVG(test_float) AS avg, SUM(test_int) AS sum - FROM test_table AS a, test_str AS b - WHERE a.test_int = b.id AND test_nullint IS NOT NULL - GROUP BY test_bool - ORDER BY sum - """ - df = read_sql(mssql_url, query, partition_on="sum", partition_num=2) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([True, False, None], dtype="boolean"), - "avg": pd.Series([None, 3, 5.45], dtype="float64"), - "sum": pd.Series([1, 3, 4], dtype="Int64"), - }, - ) - df = df.sort_values("sum").reset_index(drop=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result(mssql_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="int64"), - "test_nullint": pd.Series([], dtype="Int64"), - "test_str": pd.Series([], dtype="object"), - "test_float": pd.Series([], dtype="float64"), - "test_bool": pd.Series([], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_partition(mssql_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(mssql_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="int64"), - "test_nullint": pd.Series([], dtype="Int64"), - "test_str": pd.Series([], dtype="object"), - "test_float": pd.Series([], dtype="float64"), - "test_bool": pd.Series([], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_some_partition(mssql_url: str) -> None: - query = "SELECT * FROM test_table where test_int < 1" - df = read_sql(mssql_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([0], dtype="int64"), - "test_nullint": pd.Series([5], dtype="Int64"), - "test_str": pd.Series(["a"], dtype="object"), - "test_float": pd.Series([3.1], dtype="float"), - "test_bool": pd.Series([None], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_types(mssql_url: str) -> None: - query = "SELECT * FROM test_types" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int1": pd.Series([0, 255, None], dtype="Int64"), - "test_int2": pd.Series([-32768, 32767, None], dtype="Int64"), - "test_int4": pd.Series([-2147483648, 2147483647, None], dtype="Int64"), - "test_int8": pd.Series( - [-9223372036854775808, 9223372036854775807, None], dtype="Int64" - ), - "test_float24": pd.Series([None, 1.18e-38, 3.40e38], dtype="float"), - "test_float53": pd.Series([None, -2.23e-308, 1.79e308], dtype="float"), - "test_floatn": pd.Series([None, 0, 123.1234567], dtype="float"), - "test_date": pd.Series( - ["1999-07-25", None, "2021-01-28"], dtype="datetime64[ns]" - ), - "test_time": pd.Series(["00:00:00", "23:59:59", None], dtype="object"), - "test_datetime": pd.Series( - [None, "2020-12-31 23:59:59", "2021-01-28 10:30:30"], - dtype="datetime64[ns]", - ), - "test_smalldatetime": pd.Series( - ["1990-01-01 10:00:00", None, "2079-06-05 23:00:00"], - dtype="datetime64[ns]", - ), - "test_naivedatetime": pd.Series( - ["1753-01-01 12:00:00", "2038-12-31 01:00:00", None], - dtype="datetime64[ns]", - ), - "test_naivedatetime2": pd.Series( - ["1900-01-01 12:00:00.12345", None, "2027-03-18 14:30:30.54321"], - dtype="datetime64[ns]", - ), - "test_new_decimal": pd.Series([1.1, 2.2, None], dtype="float"), - "test_decimal": pd.Series([1, 2, None], dtype="float"), - "test_varchar": pd.Series([None, "varchar2", "varchar3"], dtype="object"), - "test_char": pd.Series([None, "char2 ", "char3 "], dtype="object"), - "test_varbinary": pd.Series([None, b"1234", b""], dtype="object"), - "test_binary": pd.Series( - [None, b"12\x00\x00\x00", b"\x00\x00\x00\x00\x00"], dtype="object" - ), - "test_nchar": pd.Series(["1234", None, "12 "], dtype="object"), - "test_text": pd.Series(["text", "t", None], dtype="object"), - "test_ntext": pd.Series(["ntext", "nt", None], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - None, - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - ], - dtype="object", - ), - "test_money": pd.Series( - [None, 922337203685477.5807, -922337203685477.5808], dtype="float" - ), - "test_smallmoney": pd.Series( - [None, 214748.3647, -214748.3648], dtype="float" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_unicode(mssql_url: str) -> None: - query = "SELECT test_hello FROM test_str where 1 <= id and id <= 4" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - index=range(4), - data={ - "test_hello": pd.Series( - ["你好", "こんにちは", "Здра́вствуйте", "😁😂😜"], dtype="object" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_cte(mssql_url: str) -> None: - query = "with test_cte (test_int, test_str) as (select test_int, test_str from test_table where test_float > 0) select test_int, test_str from test_cte" - df = read_sql(mssql_url, query, partition_on="test_int", partition_num=3) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(4), - data={ - "test_int": pd.Series([0, 2, 3, 4], dtype="int64"), - "test_str": pd.Series(["a", "str2", "b", "c"], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mssql_offset(mssql_url: str) -> None: - query = "SELECT * FROM (SELECT * FROM test_table) AS _ ORDER BY(SELECT NULL) OFFSET 0 ROWS FETCH NEXT 1 ROWS ONLY" - df = read_sql(mssql_url, query) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([1], dtype="int64"), - "test_nullint": pd.Series([3], dtype="Int64"), - "test_str": pd.Series(["str1"], dtype="object"), - "test_float": pd.Series([None], dtype="float"), - "test_bool": pd.Series([True], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_mysql.py b/connectorx-python/connectorx/tests/test_mysql.py deleted file mode 100644 index 9376bf5..0000000 --- a/connectorx-python/connectorx/tests/test_mysql.py +++ /dev/null @@ -1,470 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def mysql_url() -> str: - conn = os.environ["MYSQL_URL"] - # conn = os.environ["MARIADB_URL"] - return conn - - -def test_mysql_without_partition(mysql_url: str) -> None: - query = "select * from test_table limit 3" - df = read_sql(mysql_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 3], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3], dtype="float64"), - "test_enum": pd.Series(["odd", "even", "odd"], dtype="object"), - "test_null": pd.Series([None, None, None], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_with_partition(mysql_url: str) -> None: - query = "select * from test_table" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_range=(0, 10), - partition_num=6, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series( - ["odd", "even", "odd", "even", "odd", "even"], dtype="object" - ), - "test_null": pd.Series([None, None, None, None, None, None], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_without_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql(mysql_url, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series( - ["odd", "even", "odd", "even", "odd", "even"], dtype="object" - ), - "test_null": pd.Series([None, None, None, None, None, None], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_limit_without_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table limit 3" - df = read_sql(mysql_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 3], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3], dtype="float64"), - "test_enum": pd.Series(["odd", "even", "odd"], dtype="object"), - "test_null": pd.Series([None, None, None], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_limit_large_without_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table limit 10" - df = read_sql(mysql_url, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series( - ["odd", "even", "odd", "even", "odd", "even"], dtype="object" - ), - "test_null": pd.Series([None, None, None, None, None, None], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_with_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series( - ["odd", "even", "odd", "even", "odd", "even"], dtype="object" - ), - "test_null": pd.Series([None, None, None, None, None, None], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_limit_with_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table limit 3" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 3], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3], dtype="float64"), - "test_enum": pd.Series(["odd", "even", "odd"], dtype="object"), - "test_null": pd.Series([None, None, None], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_limit_large_with_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table limit 10" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series( - ["odd", "even", "odd", "even", "odd", "even"], dtype="object" - ), - "test_null": pd.Series([None, None, None, None, None, None], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_with_partition_without_partition_range(mysql_url: str) -> None: - query = "SELECT * FROM test_table where test_float > 3" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_num=3, - ) - expected = pd.DataFrame( - index=range(4), - data={ - "test_int": pd.Series([3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series(["odd", "even", "odd", "even"], dtype="object"), - "test_null": pd.Series([None, None, None, None], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_manual_partition(mysql_url: str) -> None: - queries = [ - "SELECT * FROM test_table WHERE test_int < 2", - "SELECT * FROM test_table WHERE test_int >= 2", - ] - df = read_sql(mysql_url, query=queries) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 6], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3, 4.4, 5.5, 6.6], dtype="float64"), - "test_enum": pd.Series( - ["odd", "even", "odd", "even", "odd", "even"], dtype="object" - ), - "test_null": pd.Series([None, None, None, None, None, None], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_selection_and_projection(mysql_url: str) -> None: - query = "SELECT test_int FROM test_table WHERE test_float < 5" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_num=3, - ) - expected = pd.DataFrame( - index=range(4), - data={ - "test_int": pd.Series([1, 2, 3, 4], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_join(mysql_url: str) -> None: - query = "SELECT T.test_int, T.test_float, S.test_str FROM test_table T INNER JOIN test_table_extra S ON T.test_int = S.test_int" - df = read_sql( - mysql_url, - query, - partition_on="test_int", - partition_num=3, - ) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 3], dtype="Int64"), - "test_float": pd.Series([1.1, 2.2, 3.3], dtype="float64"), - "test_str": pd.Series( - [ - "Ha好ち😁ðy̆", - "こんにちは", - "русский", - ], - dtype="object", - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_aggregate(mysql_url: str) -> None: - query = "select AVG(test_float) as avg_float, SUM(T.test_int) as sum_int, SUM(test_null) as sum_null from test_table as T INNER JOIN test_table_extra as S where T.test_int = S.test_int GROUP BY test_enum ORDER BY sum_int" - df = read_sql(mysql_url, query) - expected = pd.DataFrame( - index=range(2), - data={ - "avg_float": pd.Series([2.2, 2.2], dtype="float64"), - "sum_int": pd.Series([2.0, 4.0], dtype="float64"), - "sum_null": pd.Series([None, None], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_types_binary(mysql_url: str) -> None: - query = "select * from test_types" - df = read_sql(mysql_url, query, protocol="binary") - expected = pd.DataFrame( - index=range(3), - data={ - "test_timestamp": pd.Series( - ["1970-01-01 00:00:01", "2038-01-19 00:00:00", None], - dtype="datetime64[ns]", - ), - "test_date": pd.Series( - [None, "1970-01-01", "2038-01-19"], dtype="datetime64[ns]" - ), - "test_time": pd.Series(["00:00:00", None, "23:59:59"], dtype="object"), - "test_datetime": pd.Series( - ["1970-01-01 00:00:01", "2038-01-19 00:0:00", None], - dtype="datetime64[ns]", - ), - "test_new_decimal": pd.Series([1.1, None, 3.3], dtype="float"), - "test_decimal": pd.Series([1, 2, None], dtype="float"), - "test_varchar": pd.Series([None, "varchar2", "varchar3"], dtype="object"), - "test_char": pd.Series(["char1", None, "char3"], dtype="object"), - "test_tiny": pd.Series([-128, 127, None], dtype="Int64"), - "test_short": pd.Series([-32768, 32767, None], dtype="Int64"), - "test_int24": pd.Series([-8388608, 8388607, None], dtype="Int64"), - "test_long": pd.Series([-2147483648, 2147483647, None], dtype="Int64"), - "test_longlong": pd.Series( - [-9223372036854775808, 9223372036854775807, None], dtype="Int64" - ), - "test_tiny_unsigned": pd.Series([None, 255, 0], dtype="Int64"), - "test_short_unsigned": pd.Series([None, 65535, 0], dtype="Int64"), - "test_int24_unsigned": pd.Series([None, 16777215, 0], dtype="Int64"), - "test_long_unsigned": pd.Series([None, 4294967295, 0], dtype="Int64"), - "test_longlong_unsigned": pd.Series( - [None, 18446744070000001024.0, 0.0], dtype="float" - ), - "test_long_notnull": pd.Series([1, 2147483647, -2147483648], dtype="int64"), - "test_short_unsigned_notnull": pd.Series([1, 65535, 0], dtype="int64"), - "test_float": pd.Series([None, -1.1e-38, 3.4e38], dtype="float"), - "test_double": pd.Series([-2.2e-308, None, 1.7e308], dtype="float"), - "test_double_notnull": pd.Series([1.2345, -1.1e-3, 1.7e30], dtype="float"), - "test_year": pd.Series([1901, 2155, None], dtype="Int64"), - "test_tinyblob": pd.Series( - [None, b"tinyblob2", b"tinyblob3"], dtype="object" - ), - "test_blob": pd.Series( - [None, b"blobblobblobblob2", b"blobblobblobblob3"], dtype="object" - ), - "test_mediumblob": pd.Series( - [None, b"mediumblob2", b"mediumblob3"], dtype="object" - ), - "test_longblob": pd.Series( - [None, b"longblob2", b"longblob3"], dtype="object" - ), - "test_enum": pd.Series(["apple", None, "mango"], dtype="object"), - "test_json": pd.Series( - ['{"age":1,"name":"piggy"}', '{"age":2,"name":"kitty"}', None], - # mariadb - # [b'{"name": "piggy", "age": 1}', b'{"name": "kitty", "age": 2}', None], - dtype="object", - ), - "test_mediumtext": pd.Series( - [None, b"", b"medium text!!!!"], dtype="object" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_types_text(mysql_url: str) -> None: - query = "select * from test_types" - df = read_sql(mysql_url, query, protocol="text") - expected = pd.DataFrame( - index=range(3), - data={ - "test_timestamp": pd.Series( - ["1970-01-01 00:00:01", "2038-01-19 00:00:00", None], - dtype="datetime64[ns]", - ), - "test_date": pd.Series( - [None, "1970-01-01", "2038-01-19"], dtype="datetime64[ns]" - ), - "test_time": pd.Series(["00:00:00", None, "23:59:59"], dtype="object"), - "test_datetime": pd.Series( - ["1970-01-01 00:00:01", "2038-01-19 00:00:00", None], - dtype="datetime64[ns]", - ), - "test_new_decimal": pd.Series([1.1, None, 3.3], dtype="float"), - "test_decimal": pd.Series([1, 2, None], dtype="float"), - "test_varchar": pd.Series([None, "varchar2", "varchar3"], dtype="object"), - "test_char": pd.Series(["char1", None, "char3"], dtype="object"), - "test_tiny": pd.Series([-128, 127, None], dtype="Int64"), - "test_short": pd.Series([-32768, 32767, None], dtype="Int64"), - "test_int24": pd.Series([-8388608, 8388607, None], dtype="Int64"), - "test_long": pd.Series([-2147483648, 2147483647, None], dtype="Int64"), - "test_longlong": pd.Series( - [-9223372036854775808, 9223372036854775807, None], dtype="Int64" - ), - "test_tiny_unsigned": pd.Series([None, 255, 0], dtype="Int64"), - "test_short_unsigned": pd.Series([None, 65535, 0], dtype="Int64"), - "test_int24_unsigned": pd.Series([None, 16777215, 0], dtype="Int64"), - "test_long_unsigned": pd.Series([None, 4294967295, 0], dtype="Int64"), - "test_longlong_unsigned": pd.Series( - [None, 18446744070000001024.0, 0.0], dtype="float" - ), - "test_long_notnull": pd.Series([1, 2147483647, -2147483648], dtype="int64"), - "test_short_unsigned_notnull": pd.Series([1, 65535, 0], dtype="int64"), - "test_float": pd.Series([None, -1.1e-38, 3.4e38], dtype="float"), - "test_double": pd.Series([-2.2e-308, None, 1.7e308], dtype="float"), - "test_double_notnull": pd.Series([1.2345, -1.1e-3, 1.7e30], dtype="float"), - "test_year": pd.Series([1901, 2155, None], dtype="Int64"), - "test_tinyblob": pd.Series( - [None, b"tinyblob2", b"tinyblob3"], dtype="object" - ), - "test_blob": pd.Series( - [None, b"blobblobblobblob2", b"blobblobblobblob3"], dtype="object" - ), - "test_mediumblob": pd.Series( - [None, b"mediumblob2", b"mediumblob3"], dtype="object" - ), - "test_longblob": pd.Series( - [None, b"longblob2", b"longblob3"], dtype="object" - ), - "test_enum": pd.Series(["apple", None, "mango"], dtype="object"), - "test_json": pd.Series( - ['{"age":1,"name":"piggy"}', '{"age":2,"name":"kitty"}', None], - # mariadb - # [b'{"name": "piggy", "age": 1}', b'{"name": "kitty", "age": 2}', None], - dtype="object", - ), - "test_mediumtext": pd.Series( - [None, b"", b"medium text!!!!"], dtype="object" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result(mysql_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(mysql_url, query) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="Int64"), - "test_float": pd.Series([], dtype="float64"), - "test_enum": pd.Series([], dtype="object"), - "test_null": pd.Series([], dtype="Int64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(mysql_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="Int64"), - "test_float": pd.Series([], dtype="float64"), - "test_enum": pd.Series([], dtype="object"), - "test_null": pd.Series([], dtype="Int64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_some_partition(mysql_url: str) -> None: - query = "SELECT * FROM test_table where test_int = 6" - df = read_sql(mysql_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - index=range(1), - data={ - "test_int": pd.Series([6], dtype="Int64"), - "test_float": pd.Series([6.6], dtype="float64"), - "test_enum": pd.Series(["even"], dtype="object"), - "test_null": pd.Series([None], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_mysql_cte(mysql_url: str) -> None: - query = "with test_cte (test_int, test_enum) as (select test_int, test_enum from test_table where test_float > 2) select test_int, test_enum from test_cte" - df = read_sql(mysql_url, query, partition_on="test_int", partition_num=3) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(5), - data={ - "test_int": pd.Series([2, 3, 4, 5, 6], dtype="Int64"), - "test_enum": pd.Series( - ["even", "odd", "even", "odd", "even"], dtype="object" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_oracle.py b/connectorx-python/connectorx/tests/test_oracle.py deleted file mode 100644 index 59e489c..0000000 --- a/connectorx-python/connectorx/tests/test_oracle.py +++ /dev/null @@ -1,443 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def oracle_url() -> str: - conn = os.environ["ORACLE_URL"] - return conn - -@pytest.mark.xfail -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_on_non_select(oracle_url: str) -> None: - query = "CREATE TABLE non_select(id INTEGER NOT NULL)" - read_sql(oracle_url, query) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_complex_join(oracle_url: str) -> None: - query = "SELECT a.test_int, b.test_date, c.test_num_int FROM test_table a left join test_types b on a.test_int = b.test_num_int cross join (select test_num_int from test_types) c where c.test_num_int < 3" - df = read_sql(oracle_url, query) - df = df.sort_values("TEST_INT").reset_index(drop=True) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 4, 5, 5, 2333], dtype="Int64"), - "TEST_DATE": pd.Series( - ["2019-05-21", None, None, "2020-05-21", "2020-05-21", None], - dtype="datetime64[ns]", - ), - "TEST_NUM_INT": pd.Series([1, 1, 1, 1, 1, 1], dtype="Int64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_oracle_complex_join(oracle_url: str) -> None: - query = "SELECT a.test_int, b.test_date, c.test_num_int FROM test_table a left join test_types b on a.test_int = b.test_num_int cross join (select test_num_int from test_types) c where c.test_num_int < 3" - df = read_sql(oracle_url, query) - df = df.sort_values("TEST_INT").reset_index(drop=True) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 4, 5, 5, 2333], dtype="Int64"), - "TEST_DATE": pd.Series( - ["2019-05-21", None, None, "2020-05-21", "2020-05-21", None], - dtype="datetime64[ns]", - ), - "TEST_NUM_INT": pd.Series([1, 1, 1, 1, 1, 1], dtype="Int64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_complex_join(oracle_url: str) -> None: - query = "SELECT a.test_int, b.test_date, c.test_num_int FROM test_table a left join test_types b on a.test_int = b.test_num_int cross join (select test_num_int from test_types) c where c.test_num_int < 3" - df = read_sql(oracle_url, query) - df = df.sort_values("TEST_INT").reset_index(drop=True) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 4, 5, 5, 2333], dtype="Int64"), - "TEST_DATE": pd.Series( - ["2019-05-21", None, None, "2020-05-21", "2020-05-21", None], - dtype="datetime64[ns]", - ), - "TEST_NUM_INT": pd.Series([1, 1, 1, 1, 1, 1], dtype="Int64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_aggregation(oracle_url: str) -> None: - query = "select avg(test_int), test_char from test_table group by test_char" - df = read_sql(oracle_url, query) - df = df.sort_values("AVG(TEST_INT)").reset_index(drop=True) - expected = pd.DataFrame( - data={ - "AVG(TEST_INT)": pd.Series([1, 2, 5, 1168.5], dtype="float64"), - "TEST_CHAR": pd.Series(["str1 ", "str2 ", "str05", None], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_partition_on_aggregation(oracle_url: str) -> None: - query = "select sum(test_int) cid, test_char from test_table group by test_char" - df = read_sql(oracle_url, query, partition_on="cid", partition_num=3) - df = df.sort_values("CID").reset_index(drop=True) - expected = pd.DataFrame( - index=range(4), - data={ - "CID": pd.Series([1, 2, 5, 2337], dtype="float64"), - "TEST_CHAR": pd.Series(["str1 ", "str2 ", "str05", None], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_aggregation2(oracle_url: str) -> None: - query = "select DISTINCT(test_char) from test_table" - df = read_sql(oracle_url, query) - expected = pd.DataFrame( - data={ - "TEST_CHAR": pd.Series(["str05", "str1 ", "str2 ", None], dtype="object"), - }, - ) - df.sort_values(by="TEST_CHAR", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_partition_on_aggregation2(oracle_url: str) -> None: - query = "select MAX(test_int) MAX, MIN(test_int) MIN from test_table" - df = read_sql(oracle_url, query, partition_on="MAX", partition_num=2) - expected = pd.DataFrame( - index=range(1), - data={ - "MAX": pd.Series([2333], dtype="float64"), - "MIN": pd.Series([1], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_manual_partition(oracle_url: str) -> None: - queries = [ - "SELECT * FROM test_table WHERE test_int < 2", - "SELECT * FROM test_table WHERE test_int >= 2", - ] - df = read_sql(oracle_url, query=queries) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "TEST_CHAR": pd.Series( - ["str1 ", "str2 ", None, "str05", None], dtype="object" - ), - "TEST_FLOAT": pd.Series([1.1, 2.2, -4.44, None, None], dtype="float64"), - }, - ) - df.sort_values(by="TEST_INT", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_without_partition(oracle_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql(oracle_url, query) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 2333, 4, 5], dtype="Int64"), - "TEST_CHAR": pd.Series( - ["str1 ", "str2 ", None, None, "str05"], dtype="object" - ), - "TEST_FLOAT": pd.Series([1.1, 2.2, None, -4.44, None], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_limit_without_partition(oracle_url: str) -> None: - query = "SELECT * FROM test_table where rownum <= 3" - df = read_sql(oracle_url, query) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 2333], dtype="Int64"), - "TEST_CHAR": pd.Series(["str1 ", "str2 ", None], dtype="object"), - "TEST_FLOAT": pd.Series([1.1, 2.2, None], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_limit_large_without_partition(oracle_url: str) -> None: - query = "SELECT * FROM test_table where rownum < 10" - df = read_sql(oracle_url, query) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 2333, 4, 5], dtype="Int64"), - "TEST_CHAR": pd.Series( - ["str1 ", "str2 ", None, None, "str05"], dtype="object" - ), - "TEST_FLOAT": pd.Series([1.1, 2.2, None, -4.44, None], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_with_partition(oracle_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - oracle_url, - query, - partition_on="test_int", - partition_range=(0, 5001), - partition_num=3, - ) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "TEST_CHAR": pd.Series( - ["str1 ", "str2 ", None, "str05", None], dtype="object" - ), - "TEST_FLOAT": pd.Series([1.1, 2.2, -4.44, None, None], dtype="float64"), - }, - ) - df.sort_values(by="TEST_INT", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_with_partition_without_partition_range(oracle_url: str) -> None: - query = "SELECT * FROM test_table where test_float > 1" - df = read_sql( - oracle_url, - query, - partition_on="test_int", - partition_num=3, - ) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2], dtype="Int64"), - "TEST_CHAR": pd.Series(["str1 ", "str2 "], dtype="object"), - "TEST_FLOAT": pd.Series([1.1, 2.2], dtype="float64"), - }, - ) - df.sort_values(by="TEST_INT", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_with_partition_and_selection(oracle_url: str) -> None: - query = "SELECT * FROM test_table WHERE 1 = 3 OR 2 = 2" - df = read_sql( - oracle_url, - query, - partition_on="test_int", - partition_range=(1, 2333), - partition_num=3, - ) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1, 2, 4, 5, 2333], dtype="Int64"), - "TEST_CHAR": pd.Series( - ["str1 ", "str2 ", None, "str05", None], dtype="object" - ), - "TEST_FLOAT": pd.Series([1.1, 2.2, -4.44, None, None], dtype="float64"), - }, - ) - df.sort_values(by="TEST_INT", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_with_partition_and_spja(oracle_url: str) -> None: - query = "select test_table.test_int cid, SUM(test_types.test_num_float) sfloat from test_table, test_types where test_table.test_int=test_types.test_num_int group by test_table.test_int" - df = read_sql(oracle_url, query, partition_on="cid", partition_num=2) - expected = pd.DataFrame( - data={ - "CID": pd.Series([1, 5], dtype="Int64"), - "SFLOAT": pd.Series([2.3, -0.2], dtype="float64"), - }, - ) - df.sort_values(by="CID", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_types(oracle_url: str) -> None: - query = "SELECT * FROM test_types" - df = read_sql(oracle_url, query) - print(df) - expected = pd.DataFrame( - data={ - "TEST_NUM_INT": pd.Series([1, 5, 5, None], dtype="Int64"), - "TEST_INT": pd.Series([-10, 22, 22, 100], dtype="Int64"), - "TEST_NUM_FLOAT": pd.Series([2.3, -0.1, -0.1, None], dtype="float64"), - "TEST_FLOAT": pd.Series([2.34, 123.455, 123.455, None], dtype="float64"), - "TEST_BINARY_FLOAT": pd.Series( - [-3.456, 3.1415926535, 3.1415926535, None], dtype="float64" - ), - "TEST_BINARY_DOUBLE": pd.Series( - [9999.99991, -111111.2345, -111111.2345, None], dtype="float64" - ), - "TEST_CHAR": pd.Series(["char1", "char2", "char2", None], dtype="object"), - "TEST_VARCHAR": pd.Series( - ["varchar1", "varchar222", "varchar222", None], dtype="object" - ), - "TEST_NCHAR": pd.Series( - ["y123 ", "aab123", "aab123", None], dtype="object" - ), - "TEST_NVARCHAR": pd.Series( - ["aK>?KJ@#$%", ")>KDS)(F*&%J", ")>KDS)(F*&%J", None], dtype="object" - ), - "TEST_DATE": pd.Series( - ["2019-05-21", "2020-05-21", "2020-05-21", None], dtype="datetime64[ns]" - ), - "TEST_TIMESTAMP": pd.Series( - [ - "2019-05-21 01:02:33", - "2020-05-21 01:02:33", - "2020-05-21 01:02:33", - None, - ], - dtype="datetime64[ns]", - ), - "TEST_TIMESTAMPTZ": pd.Series( - [ - "1999-12-01 11:00:00", - "1899-12-01 11:00:00", - "1899-12-01 11:00:00", - None, - ], - dtype="datetime64[ns]", - ), - "TEST_CLOB": pd.Series( - ["13ab", "13ab", "13ab", None], dtype="object" - ), - "TEST_BLOB": pd.Series( - [ b'9\xaf', b'9\xaf', b'9\xaf', None], dtype="object" - ), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_empty_result(oracle_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(oracle_url, query) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([], dtype="Int64"), - "TEST_CHAR": pd.Series([], dtype="object"), - "TEST_FLOAT": pd.Series([], dtype="float64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_empty_result_on_partition(oracle_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(oracle_url, query, partition_on="test_int", partition_num=3) - print(df) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([], dtype="Int64"), - "TEST_CHAR": pd.Series([], dtype="object"), - "TEST_FLOAT": pd.Series([], dtype="float64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_empty_result_on_some_partition(oracle_url: str) -> None: - query = "SELECT * FROM test_table where test_int < 2" - df = read_sql(oracle_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "TEST_INT": pd.Series([1], dtype="Int64"), - "TEST_CHAR": pd.Series(["str1 "], dtype="object"), - "TEST_FLOAT": pd.Series([1.1], dtype="float64"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_cte(oracle_url: str) -> None: - query = "with test_cte (test_int, test_str) as (select test_int, test_char from test_table where test_float > 0) select test_int, test_str from test_cte" - df = read_sql(oracle_url, query, partition_on="test_int", partition_num=3) - df.sort_values(by="TEST_INT", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(2), - data={ - "TEST_INT": pd.Series([1, 2], dtype="Int64"), - "TEST_STR": pd.Series(["str1 ", "str2 "], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - -@pytest.mark.skipif( - not os.environ.get("ORACLE_URL"), reason="Test oracle only when `ORACLE_URL` is set" -) -def test_oracle_round_function(oracle_url: str) -> None: - query = "SELECT round(v,2) TEST_ROUND FROM test_issue" - df = read_sql(oracle_url, query) - expected = pd.DataFrame( - data={ - "TEST_ROUND": pd.Series([1.11, 2.22, 3.33, None], dtype="float64"), - } - ) - assert_frame_equal(df, expected, check_names=True) \ No newline at end of file diff --git a/connectorx-python/connectorx/tests/test_partition.py b/connectorx-python/connectorx/tests/test_partition.py deleted file mode 100644 index 27e5638..0000000 --- a/connectorx-python/connectorx/tests/test_partition.py +++ /dev/null @@ -1,21 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import partition_sql - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - - -def test_partition_sql(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - queires = partition_sql( - postgres_url, query, partition_on="test_int", partition_num=2 - ) - assert len(queires) == 2 diff --git a/connectorx-python/connectorx/tests/test_polars.py b/connectorx-python/connectorx/tests/test_polars.py deleted file mode 100644 index f748fcb..0000000 --- a/connectorx-python/connectorx/tests/test_polars.py +++ /dev/null @@ -1,38 +0,0 @@ -import os - -import pandas as pd -import pytest -import polars as pl - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - - -def test_modin(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - return_type="polars", - ) - - expected = pl.DataFrame( - { - "test_int": [0, 1, 2, 3, 4, 1314], - "test_nullint": [5, 3, None, 7, 9, 2], - "test_str": ["a", "str1", "str2", "b", "c", None], - "test_float": [3.1, None, 2.2, 3, 7.8, -10], - "test_bool": [None, True, False, False, None, True], - }, - ) - - df = df.sort('test_int') - assert df.frame_equal(expected, null_equal=True) diff --git a/connectorx-python/connectorx/tests/test_postgres.py b/connectorx-python/connectorx/tests/test_postgres.py deleted file mode 100644 index 4f636fb..0000000 --- a/connectorx-python/connectorx/tests/test_postgres.py +++ /dev/null @@ -1,1141 +0,0 @@ -import os - -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url() -> str: - conn = os.environ["POSTGRES_URL"] - return conn - - -@pytest.fixture(scope="module") # type: ignore -def postgres_url_tls() -> str: - conn = os.environ["POSTGRES_URL_TLS"] - return conn - - -@pytest.fixture(scope="module") # type: ignore -def postgres_rootcert() -> str: - cert = os.environ["POSTGRES_ROOTCERT"] - return cert - - -@pytest.fixture(scope="module") # type: ignore -def postgres_sslcert() -> str: - cert = os.environ["POSTGRES_SSLCERT"] - return cert - - -@pytest.fixture(scope="module") # type: ignore -def postgres_sslkey() -> str: - key = os.environ["POSTGRES_SSLKEY"] - return key - - -@pytest.mark.xfail -def test_on_non_select(postgres_url: str) -> None: - query = "CREATE TABLE non_select(id INTEGER NOT NULL)" - df = read_sql(postgres_url, query) - - -def test_aggregation(postgres_url: str) -> None: - query = "SELECT test_bool, SUM(test_float) FROM test_table GROUP BY test_bool" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - "sum": pd.Series([10.9, 5.2, -10.0], dtype="float64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_partition_on_aggregation(postgres_url: str) -> None: - query = ( - "SELECT test_bool, SUM(test_int) AS test_int FROM test_table GROUP BY test_bool" - ) - df = read_sql(postgres_url, query, partition_on="test_int", partition_num=2) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - "test_int": pd.Series([4, 5, 1315], dtype="Int64"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_aggregation2(postgres_url: str) -> None: - query = "select DISTINCT(test_bool) from test_table" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([None, False, True], dtype="boolean"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_partition_on_aggregation2(postgres_url: str) -> None: - query = "select MAX(test_int), MIN(test_int) from test_table" - df = read_sql(postgres_url, query, partition_on="max", partition_num=2) - expected = pd.DataFrame( - index=range(1), - data={ - "max": pd.Series([1314], dtype="Int64"), - "min": pd.Series([0], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_udf(postgres_url: str) -> None: - query = "select increment(test_int) as test_int from test_table ORDER BY test_int" - df = read_sql(postgres_url, query, partition_on="test_int", partition_num=2) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 3, 4, 5, 1315], dtype="Int64"), - }, - ) - df = df.sort_values("test_int").reset_index(drop=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_manual_partition(postgres_url: str) -> None: - - queries = [ - "SELECT * FROM test_table WHERE test_int < 2", - "SELECT * FROM test_table WHERE test_int >= 2", - ] - - df = read_sql(postgres_url, query=queries) - - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_without_partition(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([3, None, 5, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["str1", "str2", "a", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([None, 2.2, 3.1, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [True, False, None, False, None, True], dtype="boolean" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_limit(postgres_url: str) -> None: - query = "SELECT * FROM test_table limit 3" - df = read_sql( - postgres_url, - query, - ) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([0, 1, 2], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None], dtype="Int64"), - "test_str": pd.Series(["a", "str1", "str2"], dtype="object"), - "test_float": pd.Series([3.1, None, 2.2], dtype="float64"), - "test_bool": pd.Series([None, True, False], dtype="boolean"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_limit_large(postgres_url: str) -> None: - query = "SELECT * FROM test_table limit 10" - df = read_sql( - postgres_url, - query, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_limit_with_partition(postgres_url: str) -> None: - query = "SELECT * FROM test_table limit 3" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([0, 1, 2], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None], dtype="Int64"), - "test_str": pd.Series(["a", "str1", "str2"], dtype="object"), - "test_float": pd.Series([3.1, None, 2.2], dtype="float64"), - "test_bool": pd.Series([None, True, False], dtype="boolean"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_limit_large_with_partition(postgres_url: str) -> None: - query = "SELECT * FROM test_table limit 10" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_partition(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_partition_without_partition_range(postgres_url: str) -> None: - query = "SELECT * FROM test_table where test_float > 3" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_num=3, - ) - - expected = pd.DataFrame( - index=range(2), - data={ - "test_int": pd.Series([0, 4], dtype="Int64"), - "test_nullint": pd.Series([5, 9], dtype="Int64"), - "test_str": pd.Series(["a", "c"], dtype="object"), - "test_float": pd.Series([3.1, 7.8], dtype="float64"), - "test_bool": pd.Series([None, None], dtype="boolean"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_partition_and_selection(postgres_url: str) -> None: - query = "SELECT * FROM test_table WHERE 1 = 3 OR 2 = 2" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_partition_and_projection(postgres_url: str) -> None: - query = "SELECT test_int, test_nullint, test_str FROM test_table" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_partition_and_join(postgres_url: str) -> None: - query = "SELECT T.test_int, T.test_bool, S.test_language FROM test_table T INNER JOIN test_str S ON T.test_int = S.id" - df = read_sql( - postgres_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(5), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4], dtype="Int64"), - "test_bool": pd.Series([None, True, False, False, None], dtype="boolean"), - "test_language": pd.Series( - ["English", "中文", "日本語", "русский", "Emoji"], dtype="object" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_partition_and_spja(postgres_url: str) -> None: - query = "select test_bool, AVG(test_float) as avg, SUM(test_int) as sum from test_table as a, test_str as b where a.test_int = b.id AND test_nullint is not NULL GROUP BY test_bool ORDER BY sum" - df = read_sql(postgres_url, query, partition_on="sum", partition_num=2) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([True, False, None], dtype="boolean"), - "avg": pd.Series([None, 3, 5.45], dtype="float64"), - "sum": pd.Series([1, 3, 4], dtype="Int64"), - }, - ) - df.sort_values(by="sum", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_on_utf8(postgres_url: str) -> None: - query = "SELECT * FROM test_str" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - index=range(9), - data={ - "id": pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype="Int64"), - "test_language": pd.Series( - [ - "English", - "中文", - "日本語", - "русский", - "Emoji", - "Latin1", - "Extra", - "Mixed", - "", - ], - dtype="object", - ), - "test_hello": pd.Series( - [ - "Hello", - "你好", - "こんにちは", - "Здра́вствуйте", - "😁😂😜", - "¥§¤®ð", - "y̆", - "Ha好ち😁ðy̆", - None, - ], - dtype="object", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_with_index_col(postgres_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql(postgres_url, query, index_col="test_int") - expected = pd.DataFrame( - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([3, None, 5, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["str1", "str2", "a", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([None, 2.2, 3.1, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [True, False, None, False, None, True], dtype="boolean" - ), - }, - ) - expected.set_index("test_int", inplace=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_types_binary(postgres_url: str) -> None: - query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree, test_lquery, test_ltxtquery FROM test_types" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - index=range(4), - data={ - "test_date": pd.Series( - ["1970-01-01", "2000-02-28", "2038-01-18", None], dtype="datetime64[ns]" - ), - "test_timestamp": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 12:00:10", - "2038-01-18 23:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_timestamptz": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 16:00:10", - "2038-01-18 15:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_int16": pd.Series([0, 1, 2, 3], dtype="Int64"), - "test_int64": pd.Series( - [-9223372036854775808, 0, 9223372036854775807, None], dtype="Int64" - ), - "test_float32": pd.Series( - [None, 3.1415926535, 2.71, -1e-37], dtype="float64" - ), - "test_numeric": pd.Series([None, 521.34, 0.00, 0.00], dtype="float64"), - "test_bpchar": pd.Series(["a ", "bb ", "ccc ", None], dtype="object"), - "test_char": pd.Series(["a", "b", None, "d"], dtype="object"), - "test_varchar": pd.Series([None, "bb", "c", "defghijklm"], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - "86b49c42-96b2-11eb-9298-3e22fbb9fe9d", - None, - ], - dtype="object", - ), - "test_time": pd.Series( - ["08:12:40", None, "23:00:10", "18:30:00"], dtype="object" - ), - "test_json": pd.Series( - [ - '{"customer":"John Doe","items":{"product":"Beer","qty":6}}', - '{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}', - '{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}', - None, - ], - dtype="object", - ), - "test_jsonb": pd.Series( - [ - '{"product":"Beer","qty":6}', - '{"product":"Diaper","qty":24}', - '{"product":"Toy Car","qty":1}', - None, - ], - dtype="object", - ), - "test_bytea": pd.Series( - [ - None, - b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5", - b"", - b"\xf0\x9f\x98\x9c", - ], - dtype="object", - ), - "test_enum": pd.Series( - ["happy", "very happy", "ecstatic", None], dtype="object" - ), - "test_f4array": pd.Series( - [[], None, [123.123], [-1e-37, 1e37]], dtype="object" - ), - "test_f8array": pd.Series( - [[], None, [-1e-307, 1e308], [0.000234, -12.987654321]], dtype="object" - ), - "test_narray": pd.Series( - [[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object" - ), - "test_boolarray": pd.Series( - [[True, False], [], [True], None], dtype="object" - ), - "test_i2array": pd.Series( - [[-1, 0, 1], [], [-32768, 32767], None], dtype="object" - ), - "test_i4array": pd.Series( - [[-1, 0, 1123], [], [-2147483648, 2147483647], None], dtype="object" - ), - "test_i8array": pd.Series( - [[-9223372036854775808, 9223372036854775807], [], [0], None], - dtype="object", - ), - "test_citext": pd.Series(["str_citext", "", "s", None], dtype="object"), - "test_ltree": pd.Series(["A.B.C.D", "A.B.E", "A", None], dtype="object"), - "test_lquery": pd.Series(["*.B.*", "A.*", "*", None], dtype="object"), - "test_ltxtquery": pd.Series( - ["A & B*", "A | B", "A@", None], dtype="object" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_types_csv(postgres_url: str) -> None: - query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum::text, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree FROM test_types" - df = read_sql(postgres_url, query, protocol="csv") - expected = pd.DataFrame( - index=range(4), - data={ - "test_date": pd.Series( - ["1970-01-01", "2000-02-28", "2038-01-18", None], dtype="datetime64[ns]" - ), - "test_timestamp": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 12:00:10", - "2038-01-18 23:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_timestamptz": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 16:00:10", - "2038-01-18 15:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_int16": pd.Series([0, 1, 2, 3], dtype="Int64"), - "test_int64": pd.Series( - [-9223372036854775808, 0, 9223372036854775807, None], dtype="Int64" - ), - "test_float32": pd.Series( - [None, 3.1415926535, 2.71, -1e-37], dtype="float64" - ), - "test_numeric": pd.Series([None, 521.34, 0.00, 0.00], dtype="float64"), - "test_bpchar": pd.Series(["a ", "bb ", "ccc ", None], dtype="object"), - "test_char": pd.Series(["a", "b", None, "d"], dtype="object"), - "test_varchar": pd.Series([None, "bb", "c", "defghijklm"], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - "86b49c42-96b2-11eb-9298-3e22fbb9fe9d", - None, - ], - dtype="object", - ), - "test_time": pd.Series( - ["08:12:40", None, "23:00:10", "18:30:00"], dtype="object" - ), - "test_json": pd.Series( - [ - '{"customer":"John Doe","items":{"product":"Beer","qty":6}}', - '{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}', - '{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}', - None, - ], - dtype="object", - ), - "test_jsonb": pd.Series( - [ - '{"product":"Beer","qty":6}', - '{"product":"Diaper","qty":24}', - '{"product":"Toy Car","qty":1}', - None, - ], - dtype="object", - ), - "test_bytea": pd.Series( - [ - None, - b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5", - b"", - b"\xf0\x9f\x98\x9c", - ], - dtype="object", - ), - "test_enum": pd.Series( - ["happy", "very happy", "ecstatic", None], dtype="object" - ), - "test_f4array": pd.Series( - [[], None, [123.123], [-1e-37, 1e37]], dtype="object" - ), - "test_f8array": pd.Series( - [[], None, [1e-307, 1e308], [0.000234, -12.987654321]], dtype="object" - ), - "test_narray": pd.Series( - [[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object" - ), - "test_boolarray": pd.Series( - [[True, False], [], [True], None], dtype="object" - ), - "test_i2array": pd.Series( - [[-1, 0, 1], [], [-32768, 32767], None], dtype="object" - ), - "test_i4array": pd.Series( - [[-1, 0, 1123], [], [-2147483648, 2147483647], None], dtype="object" - ), - "test_i8array": pd.Series( - [[-9223372036854775808, 9223372036854775807], [], [0], None], - dtype="object", - ), - "test_citext": pd.Series(["str_citext", None, "s", None], dtype="object"), - "test_ltree": pd.Series(["A.B.C.D", "A.B.E", "A", None], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_types_cursor(postgres_url: str) -> None: - query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_json, test_jsonb, test_bytea, test_enum::text, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array, test_citext, test_ltree FROM test_types" - df = read_sql(postgres_url, query, protocol="cursor") - expected = pd.DataFrame( - index=range(4), - data={ - "test_date": pd.Series( - ["1970-01-01", "2000-02-28", "2038-01-18", None], dtype="datetime64[ns]" - ), - "test_timestamp": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 12:00:10", - "2038-01-18 23:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_timestamptz": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 16:00:10", - "2038-01-18 15:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_int16": pd.Series([0, 1, 2, 3], dtype="Int64"), - "test_int64": pd.Series( - [-9223372036854775808, 0, 9223372036854775807, None], dtype="Int64" - ), - "test_float32": pd.Series( - [None, 3.1415926535, 2.71, -1e-37], dtype="float64" - ), - "test_numeric": pd.Series([None, 521.34, 0.00, 0.00], dtype="float64"), - "test_bpchar": pd.Series(["a ", "bb ", "ccc ", None], dtype="object"), - "test_char": pd.Series(["a", "b", None, "d"], dtype="object"), - "test_varchar": pd.Series([None, "bb", "c", "defghijklm"], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - "86b49c42-96b2-11eb-9298-3e22fbb9fe9d", - None, - ], - dtype="object", - ), - "test_time": pd.Series( - ["08:12:40", None, "23:00:10", "18:30:00"], dtype="object" - ), - "test_json": pd.Series( - [ - '{"customer":"John Doe","items":{"product":"Beer","qty":6}}', - '{"customer":"Lily Bush","items":{"product":"Diaper","qty":24}}', - '{"customer":"Josh William","items":{"product":"Toy Car","qty":1}}', - None, - ], - dtype="object", - ), - "test_jsonb": pd.Series( - [ - '{"product":"Beer","qty":6}', - '{"product":"Diaper","qty":24}', - '{"product":"Toy Car","qty":1}', - None, - ], - dtype="object", - ), - "test_bytea": pd.Series( - [ - None, - b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5", - b"", - b"\xf0\x9f\x98\x9c", - ], - dtype="object", - ), - "test_enum": pd.Series( - ["happy", "very happy", "ecstatic", None], dtype="object" - ), - "test_f4array": pd.Series( - [[], None, [123.123], [-1e-37, 1e37]], dtype="object" - ), - "test_f8array": pd.Series( - [[], None, [1e-307, 1e308], [0.000234, -12.987654321]], dtype="object" - ), - "test_narray": pd.Series( - [[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object" - ), - "test_boolarray": pd.Series( - [[True, False], [], [True], None], dtype="object" - ), - "test_i2array": pd.Series( - [[-1, 0, 1], [], [-32768, 32767], None], dtype="object" - ), - "test_i4array": pd.Series( - [[-1, 0, 1123], [], [-2147483648, 2147483647], None], dtype="object" - ), - "test_i8array": pd.Series( - [[-9223372036854775808, 9223372036854775807], [], [0], None], - dtype="object", - ), - "test_citext": pd.Series(["str_citext", "", "s", None], dtype="object"), - "test_ltree": pd.Series(["A.B.C.D", "A.B.E", "A", None], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_postgres_types_simple(postgres_url: str) -> None: - query = "SELECT test_date, test_timestamp, test_timestamptz, test_int16, test_int64, test_float32, test_numeric, test_bpchar, test_char, test_varchar, test_uuid, test_time, test_bytea, test_enum, test_f4array, test_f8array, test_narray, test_boolarray, test_i2array, test_i4array, test_i8array FROM test_types" - df = read_sql(postgres_url, query, protocol="simple") - expected = pd.DataFrame( - index=range(4), - data={ - "test_date": pd.Series( - ["1970-01-01", "2000-02-28", "2038-01-18", None], dtype="datetime64[ns]" - ), - "test_timestamp": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 12:00:10", - "2038-01-18 23:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_timestamptz": pd.Series( - [ - "1970-01-01 00:00:01", - "2000-02-28 16:00:10", - "2038-01-18 15:59:59", - None, - ], - dtype="datetime64[ns]", - ), - "test_int16": pd.Series([0, 1, 2, 3], dtype="Int64"), - "test_int64": pd.Series( - [-9223372036854775808, 0, 9223372036854775807, None], dtype="Int64" - ), - "test_float32": pd.Series( - [None, 3.1415926535, 2.71, -1e-37], dtype="float64" - ), - "test_numeric": pd.Series([None, 521.34, 0.00, 0.00], dtype="float64"), - "test_bpchar": pd.Series(["a ", "bb ", "ccc ", None], dtype="object"), - "test_char": pd.Series(["a", "b", None, "d"], dtype="object"), - "test_varchar": pd.Series([None, "bb", "c", "defghijklm"], dtype="object"), - "test_uuid": pd.Series( - [ - "86b494cc-96b2-11eb-9298-3e22fbb9fe9d", - "86b49b84-96b2-11eb-9298-3e22fbb9fe9d", - "86b49c42-96b2-11eb-9298-3e22fbb9fe9d", - None, - ], - dtype="object", - ), - "test_time": pd.Series( - ["08:12:40", None, "23:00:10", "18:30:00"], dtype="object" - ), - "test_bytea": pd.Series( - [ - None, - b"\xd0\x97\xd0\xb4\xd1\x80\xd0\xb0\xcc\x81\xd0\xb2\xd1\x81\xd1\x82\xd0\xb2\xd1\x83\xd0\xb9\xd1\x82\xd0\xb5", - b"", - b"\xf0\x9f\x98\x9c", - ], - dtype="object", - ), - "test_enum": pd.Series( - ["happy", "very happy", "ecstatic", None], dtype="object" - ), - "test_f4array": pd.Series( - [[], None, [123.123], [-1e-37, 1e37]], dtype="object" - ), - "test_f8array": pd.Series( - [[], None, [1e-307, 1e308], [0.000234, -12.987654321]], dtype="object" - ), - "test_narray": pd.Series( - [[], None, [521.34], [0.12, 333.33, 22.22]], dtype="object" - ), - "test_boolarray": pd.Series( - [[True, False], [], [True], None], dtype="object" - ), - "test_i2array": pd.Series( - [[-1, 0, 1], [], [-32768, 32767], None], dtype="object" - ), - "test_i4array": pd.Series( - [[-1, 0, 1123], [], [-2147483648, 2147483647], None], dtype="object" - ), - "test_i8array": pd.Series( - [[-9223372036854775808, 9223372036854775807], [], [0], None], - dtype="object", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result(postgres_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="Int64"), - "test_nullint": pd.Series([], dtype="Int64"), - "test_str": pd.Series([], dtype="object"), - "test_float": pd.Series([], dtype="float64"), - "test_bool": pd.Series([], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_partition(postgres_url: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(postgres_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="Int64"), - "test_nullint": pd.Series([], dtype="Int64"), - "test_str": pd.Series([], dtype="object"), - "test_float": pd.Series([], dtype="float64"), - "test_bool": pd.Series([], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_some_partition(postgres_url: str) -> None: - query = "SELECT * FROM test_table where test_int < 1" - df = read_sql(postgres_url, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([0], dtype="Int64"), - "test_nullint": pd.Series([5], dtype="Int64"), - "test_str": pd.Series(["a"], dtype="object"), - "test_float": pd.Series([3.1], dtype="float64"), - "test_bool": pd.Series([None], dtype="boolean"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_posix_regex(postgres_url: str) -> None: - query = "select test_int, case when test_str ~* 'str.*' then 'convert_str' end as converted_str from test_table" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="Int64"), - "converted_str": pd.Series( - ["convert_str", "convert_str", None, None, None, None], dtype="object" - ), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_json(postgres_url: str) -> None: - query = "select test_json->>'customer' as customer from test_types" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - data={ - "customer": pd.Series( - ["John Doe", "Lily Bush", "Josh William", None], dtype="object" - ), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_partition_on_json(postgres_url: str) -> None: - query = "select test_int16, test_jsonb->>'qty' as qty from test_types" - df = read_sql(postgres_url, query, partition_on="test_int16", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int16": pd.Series([0, 1, 2, 3], dtype="Int64"), - "qty": pd.Series(["6", "24", "1", None], dtype="object"), - } - ) - df.sort_values(by="test_int16", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_cte(postgres_url: str) -> None: - query = "with test_cte (test_int, test_str) as (select test_int, test_str from test_table where test_float > 0) select test_int, test_str from test_cte" - df = read_sql(postgres_url, query, partition_on="test_int", partition_num=3) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(4), - data={ - "test_int": pd.Series([0, 2, 3, 4], dtype="Int64"), - "test_str": pd.Series(["a", "str2", "b", "c"], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("POSTGRES_URL_TLS"), - reason="Do not test Postgres TLS unless `POSTGRES_URL_TLS` is set", -) -def test_postgres_tls(postgres_url_tls: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - f"{postgres_url_tls}?sslmode=require", - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_partition_on_decimal(postgres_url: str) -> None: - # partition column can not have None - query = "SELECT * FROM test_table where test_int<>1" - df = read_sql(postgres_url, query, partition_on="test_float", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([0, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series(["a", "str2", "b", "c", None], dtype="object"), - "test_float": pd.Series([3.1, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series([None, False, False, None, True], dtype="boolean"), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("POSTGRES_URL_TLS"), - reason="Do not test Postgres TLS unless `POSTGRES_URL_TLS` is set", -) -def test_postgres_tls_with_cert(postgres_url_tls: str, postgres_rootcert: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - f"{postgres_url_tls}?sslmode=require&sslrootcert={postgres_rootcert}", - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("POSTGRES_URL_TLS"), - reason="Do not test Postgres TLS unless `POSTGRES_URL_TLS` is set", -) -def test_postgres_tls_client_auth( - postgres_url_tls: str, - postgres_rootcert: str, - postgres_sslcert: str, - postgres_sslkey: str, -) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - f"{postgres_url_tls}?sslmode=require&sslrootcert={postgres_rootcert}&sslcert={postgres_sslcert}&sslkey={postgres_sslkey}", - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("POSTGRES_URL_TLS"), - reason="Do not test Postgres TLS unless `POSTGRES_URL_TLS` is set", -) -def test_postgres_tls_disable(postgres_url_tls: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - f"{postgres_url_tls}?sslmode=disable", - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif( - not os.environ.get("POSTGRES_URL_TLS"), - reason="Do not test Postgres TLS unless `POSTGRES_URL_TLS` is set", -) -@pytest.mark.xfail -def test_postgres_tls_fail(postgres_url_tls: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - f"{postgres_url_tls}?sslmode=require&sslrootcert=fake.cert", - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - ) - -def test_postgres_name_type(postgres_url: str) -> None: - # partition column can not have None - query = "SELECT test_name FROM test_types" - df = read_sql(postgres_url, query) - expected = pd.DataFrame( - data={ - "test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]), - }, - ) - assert_frame_equal(df, expected, check_names=True) \ No newline at end of file diff --git a/connectorx-python/connectorx/tests/test_redshift.py b/connectorx-python/connectorx/tests/test_redshift.py deleted file mode 100644 index 7c17f41..0000000 --- a/connectorx-python/connectorx/tests/test_redshift.py +++ /dev/null @@ -1,136 +0,0 @@ -import os - -import numpy as np -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def redshift_url() -> str: - conn = os.environ["REDSHIFT_URL"] - return conn - - -@pytest.mark.skipif(not os.environ.get("REDSHIFT_URL"), reason="Do not test Redshift unless `REDSHIFT_URL` is set") -def test_redshift_without_partition(redshift_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql(redshift_url, query, protocol="cursor") - # result from redshift might have different order each time - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif(not os.environ.get("REDSHIFT_URL"), reason="Do not test Redshift unless `REDSHIFT_URL` is set") -def test_redshift_with_partition(redshift_url: str) -> None: - query = "SELECT * FROM test_table" - df = read_sql( - redshift_url, - query, - partition_on="test_int", - partition_range=(0, 2000), - partition_num=3, - protocol="cursor" - ) - # result from redshift might have different order each time - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["a", "str1", "str2", "b", "c", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif(not os.environ.get("REDSHIFT_URL"), reason="Do not test Redshift unless `REDSHIFT_URL` is set") -def test_redshift_types(redshift_url: str) -> None: - query = "SELECT test_int16, test_char, test_time, test_datetime FROM test_types" - df = read_sql(redshift_url, query, protocol="cursor") - # result from redshift might have different order each time - df.sort_values(by="test_int16", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(4), - data={ - "test_int16": pd.Series([0, 1, 2, 3], dtype="Int64"), - "test_char": pd.Series(["a", "b", "c", "d"], dtype="object"), - "test_time": pd.Series( - ["08:12:40", "10:03:00", "23:00:10", "18:30:00"], dtype="object" - ), - "test_datetime": pd.Series( - [ - np.datetime64("2007-01-01T10:00:19"), - np.datetime64("2005-01-01T22:03:00"), - None, - np.datetime64("1987-01-01T11:00:00"), - ], dtype="datetime64[ns]" - ), - - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -@pytest.mark.skipif(not os.environ.get("REDSHIFT_URL"), reason="Do not test Redshift unless `REDSHIFT_URL` is set") -def test_read_sql_on_utf8(redshift_url: str) -> None: - query = "SELECT * FROM test_str" - df = read_sql(redshift_url, query, protocol="cursor") - # result from redshift might have different order each time - df.sort_values(by="id", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(8), - data={ - "id": pd.Series([0, 1, 2, 3, 4, 5, 6, 7], dtype="Int64"), - "test_language": pd.Series( - [ - "English", - "中文", - "日本語", - "русский", - "Emoji", - "Latin1", - "Extra", - "Mixed", - ], - dtype="object", - ), - "test_hello": pd.Series( - [ - "Hello", - "你好", - "こんにちは", - "Здра́вствуйте", - "😁😂😜", - "¥§¤®ð", - "y̆", - "Ha好ち😁ðy̆", - ], - dtype="object", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/connectorx/tests/test_sqlite.py b/connectorx-python/connectorx/tests/test_sqlite.py deleted file mode 100644 index f0e8a99..0000000 --- a/connectorx-python/connectorx/tests/test_sqlite.py +++ /dev/null @@ -1,393 +0,0 @@ -import os - -import numpy as np -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from .. import read_sql - - -@pytest.fixture(scope="module") # type: ignore -def sqlite_db() -> str: - conn = os.environ["SQLITE_URL"] - return conn - - -def test_sqlite_without_partition(sqlite_db: str) -> None: - query = "SELECT test_int, test_nullint, test_str, test_float, test_bool, test_date, test_time, test_datetime FROM test_table" - df = read_sql(sqlite_db, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([3, None, 5, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["str1", "str2", "こんにちは", "b", "Ha好ち😁ðy̆", None], dtype="object" - ), - "test_float": pd.Series([None, 2.2, 3.1, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [True, False, None, False, None, True], dtype="boolean" - ), - "test_date": pd.Series( - [ - np.datetime64("1996-03-13"), - np.datetime64("1996-01-30"), - np.datetime64("1996-02-28"), - np.datetime64("2020-01-12"), - np.datetime64("1996-04-20"), - None, - ], - dtype="datetime64[ns]", - ), - "test_time": pd.Series( - [ - "08:12:40", - "10:03:00", - "23:00:10", - "23:00:10", - "18:30:00", - "18:30:00", - ], - dtype="object", - ), - "test_datetime": pd.Series( - [ - np.datetime64("2007-01-01T10:00:19"), - np.datetime64("2005-01-01T22:03:00"), - None, - np.datetime64("1987-01-01T11:00:00"), - None, - np.datetime64("2007-10-01T10:32:00"), - ], - dtype="datetime64[ns]", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_sqlite_limit_without_partition(sqlite_db: str) -> None: - query = "SELECT test_int, test_nullint, test_str, test_float, test_bool, test_date, test_time, test_datetime FROM test_table limit 3" - df = read_sql(sqlite_db, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_int": pd.Series([1, 2, 0], dtype="Int64"), - "test_nullint": pd.Series([3, None, 5], dtype="Int64"), - "test_str": pd.Series(["str1", "str2", "こんにちは"], dtype="object"), - "test_float": pd.Series([None, 2.2, 3.1], dtype="float64"), - "test_bool": pd.Series([True, False, None], dtype="boolean"), - "test_date": pd.Series( - [ - np.datetime64("1996-03-13"), - np.datetime64("1996-01-30"), - np.datetime64("1996-02-28"), - ], - dtype="datetime64[ns]", - ), - "test_time": pd.Series( - ["08:12:40", "10:03:00", "23:00:10"], dtype="object" - ), - "test_datetime": pd.Series( - [ - np.datetime64("2007-01-01T10:00:19"), - np.datetime64("2005-01-01T22:03:00"), - None, - ], - dtype="datetime64[ns]", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_sqlite_limit_large_without_partition(sqlite_db: str) -> None: - query = "SELECT test_int, test_nullint, test_str, test_float, test_bool, test_date, test_time, test_datetime FROM test_table limit 10" - df = read_sql(sqlite_db, query) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([1, 2, 0, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([3, None, 5, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["str1", "str2", "こんにちは", "b", "Ha好ち😁ðy̆", None], dtype="object" - ), - "test_float": pd.Series([None, 2.2, 3.1, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [True, False, None, False, None, True], dtype="boolean" - ), - "test_date": pd.Series( - [ - np.datetime64("1996-03-13"), - np.datetime64("1996-01-30"), - np.datetime64("1996-02-28"), - np.datetime64("2020-01-12"), - np.datetime64("1996-04-20"), - None, - ], - dtype="datetime64[ns]", - ), - "test_time": pd.Series( - [ - "08:12:40", - "10:03:00", - "23:00:10", - "23:00:10", - "18:30:00", - "18:30:00", - ], - dtype="object", - ), - "test_datetime": pd.Series( - [ - np.datetime64("2007-01-01T10:00:19"), - np.datetime64("2005-01-01T22:03:00"), - None, - np.datetime64("1987-01-01T11:00:00"), - None, - np.datetime64("2007-10-01T10:32:00"), - ], - dtype="datetime64[ns]", - ), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_sqlite_with_partition(sqlite_db: str) -> None: - query = "SELECT test_int, test_nullint, test_str, test_float, test_bool, test_date, test_time, test_datetime FROM test_table" - df = read_sql( - sqlite_db, - query, - partition_on="test_int", - partition_num=3, - ) - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["こんにちは", "str1", "str2", "b", "Ha好ち😁ðy̆", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - "test_date": pd.Series( - [ - np.datetime64("1996-02-28"), - np.datetime64("1996-03-13"), - np.datetime64("1996-01-30"), - np.datetime64("2020-01-12"), - np.datetime64("1996-04-20"), - None, - ], - dtype="datetime64[ns]", - ), - "test_time": pd.Series( - [ - "23:00:10", - "08:12:40", - "10:03:00", - "23:00:10", - "18:30:00", - "18:30:00", - ], - dtype="object", - ), - "test_datetime": pd.Series( - [ - None, - np.datetime64("2007-01-01T10:00:19"), - np.datetime64("2005-01-01T22:03:00"), - np.datetime64("1987-01-01T11:00:00"), - None, - np.datetime64("2007-10-01T10:32:00"), - ], - dtype="datetime64[ns]", - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_manual_partition(sqlite_db: str) -> None: - - queries = [ - "SELECT test_int, test_nullint, test_str, test_float, test_bool, test_date, test_time, test_datetime FROM test_table WHERE test_int < 2", - "SELECT test_int, test_nullint, test_str, test_float, test_bool, test_date, test_time, test_datetime FROM test_table WHERE test_int >= 2", - ] - - df = read_sql(sqlite_db, query=queries) - - expected = pd.DataFrame( - index=range(6), - data={ - "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), - "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), - "test_str": pd.Series( - ["こんにちは", "str1", "str2", "b", "Ha好ち😁ðy̆", None], dtype="object" - ), - "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), - "test_bool": pd.Series( - [None, True, False, False, None, True], dtype="boolean" - ), - "test_date": pd.Series( - [ - np.datetime64("1996-02-28"), - np.datetime64("1996-03-13"), - np.datetime64("1996-01-30"), - np.datetime64("2020-01-12"), - np.datetime64("1996-04-20"), - None, - ], - dtype="datetime64[ns]", - ), - "test_time": pd.Series( - [ - "23:00:10", - "08:12:40", - "10:03:00", - "23:00:10", - "18:30:00", - "18:30:00", - ], - dtype="object", - ), - "test_datetime": pd.Series( - [ - None, - np.datetime64("2007-01-01T10:00:19"), - np.datetime64("2005-01-01T22:03:00"), - np.datetime64("1987-01-01T11:00:00"), - None, - np.datetime64("2007-10-01T10:32:00"), - ], - dtype="datetime64[ns]", - ), - }, - ) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_sqlite_without_partition_and_spa(sqlite_db: str) -> None: - query = """ - SELECT test_bool, AVG(test_float) AS avg, SUM(test_int) AS sum - FROM test_table - WHERE test_nullint IS NOT NULL - GROUP BY test_bool - ORDER BY sum - """ - df = read_sql(sqlite_db, query) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([False, None, True], dtype="boolean"), - "avg": pd.Series([3.00, 5.45, -10.00], dtype="float64"), - "sum": pd.Series([3, 4, 1315], dtype="Int64"), - }, - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_sqlite_with_partition_and_spa(sqlite_db: str) -> None: - query = """ - SELECT test_bool, AVG(test_float) AS avg, SUM(test_int) AS sum - FROM test_table - WHERE test_nullint IS NOT NULL - GROUP BY test_bool - ORDER BY sum - """ - df = read_sql(sqlite_db, query, partition_on="sum", partition_num=2) - expected = pd.DataFrame( - index=range(3), - data={ - "test_bool": pd.Series([False, None, True], dtype="boolean"), - "avg": pd.Series([3.00, 5.45, -10.00], dtype="float64"), - "sum": pd.Series([3, 4, 1315], dtype="Int64"), - }, - ) - df = df.sort_values("sum").reset_index(drop=True) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result(sqlite_db: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(sqlite_db, query) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="object"), - "test_nullint": pd.Series([], dtype="object"), - "test_str": pd.Series([], dtype="object"), - "test_float": pd.Series([], dtype="object"), - "test_bool": pd.Series([], dtype="object"), - "test_date": pd.Series([], dtype="object"), - "test_time": pd.Series([], dtype="object"), - "test_datetime": pd.Series([], dtype="object"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_partition(sqlite_db: str) -> None: - query = "SELECT * FROM test_table where test_int < -100" - df = read_sql(sqlite_db, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([], dtype="object"), - "test_nullint": pd.Series([], dtype="object"), - "test_str": pd.Series([], dtype="object"), - "test_float": pd.Series([], dtype="object"), - "test_bool": pd.Series([], dtype="object"), - "test_date": pd.Series([], dtype="object"), - "test_time": pd.Series([], dtype="object"), - "test_datetime": pd.Series([], dtype="object"), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_empty_result_on_some_partition(sqlite_db: str) -> None: - query = "SELECT * FROM test_table where test_int < 1" - df = read_sql(sqlite_db, query, partition_on="test_int", partition_num=3) - expected = pd.DataFrame( - data={ - "test_int": pd.Series([0], dtype="Int64"), - "test_nullint": pd.Series([5], dtype="Int64"), - "test_str": pd.Series(["こんにちは"], dtype="object"), - "test_float": pd.Series([3.1], dtype="float"), - "test_bool": pd.Series([None], dtype="boolean"), - "test_date": pd.Series( - [ - np.datetime64("1996-02-28"), - ], - dtype="datetime64[ns]", - ), - "test_time": pd.Series(["23:00:10"], dtype="object"), - "test_datetime": pd.Series( - [ - None, - ], - dtype="datetime64[ns]", - ), - } - ) - assert_frame_equal(df, expected, check_names=True) - - -def test_sqlite_cte(sqlite_db: str) -> None: - query = "with test_cte (test_int, test_str) as (select test_int, test_str from test_table where test_float > 0) select test_int, test_str from test_cte" - df = read_sql(sqlite_db, query, partition_on="test_int", partition_num=3) - df.sort_values(by="test_int", inplace=True, ignore_index=True) - expected = pd.DataFrame( - index=range(4), - data={ - "test_int": pd.Series([0, 2, 3, 4], dtype="Int64"), - "test_str": pd.Series(["こんにちは", "str2", "b", "Ha好ち😁ðy̆"], dtype="object"), - }, - ) - assert_frame_equal(df, expected, check_names=True) diff --git a/connectorx-python/examples/flame_tpch.rs b/connectorx-python/examples/flame_tpch.rs deleted file mode 100644 index 926df9a..0000000 --- a/connectorx-python/examples/flame_tpch.rs +++ /dev/null @@ -1,25 +0,0 @@ -mod tpch; - -use pprof::protos::Message; -use std::env; -use std::fs::File; -use std::io::Write; - -fn main() { - let args: Vec = env::args().collect(); - let guard = pprof::ProfilerGuard::new(10).unwrap(); - - tpch::run(10, &args[1]); - - if let Ok(report) = guard.report().build() { - let file = File::create("flamegraph.svg").unwrap(); - report.flamegraph(file).unwrap(); - - let mut file = File::create("profile.pb").unwrap(); - let profile = report.pprof().unwrap(); - - let mut content = Vec::new(); - profile.encode(&mut content).unwrap(); - file.write_all(&content).unwrap(); - }; -} diff --git a/connectorx-python/examples/tpch.rs b/connectorx-python/examples/tpch.rs deleted file mode 100644 index 69c9afa..0000000 --- a/connectorx-python/examples/tpch.rs +++ /dev/null @@ -1,28 +0,0 @@ -use connectorx_python::read_sql::{read_sql, PartitionQuery}; -use pyo3::Python; -use std::env; - -const QUERY: &'static str = r#" -SELECT - * -FROM LINEITEM"#; - -pub fn run(nq: usize, conn: &str) { - let conn = env::var(conn).unwrap(); - - Python::with_gil(|py| { - read_sql( - py, - &conn, - "pandas", - None, - None, - Some(PartitionQuery::new(QUERY, "L_ORDERKEY", None, None, nq)), - ) - .unwrap(); - }); -} - -fn main() { - run(1, "POSTGRES_URL"); -} diff --git a/connectorx-python/poetry.lock b/connectorx-python/poetry.lock deleted file mode 100644 index 6922983..0000000 --- a/connectorx-python/poetry.lock +++ /dev/null @@ -1,1229 +0,0 @@ -[[package]] -name = "appnope" -version = "0.1.3" -description = "Disable App Nap on macOS >= 10.9" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "attrs" -version = "22.2.0" -description = "Classes Without Boilerplate" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -cov = ["attrs", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] -dev = ["attrs"] -docs = ["furo", "sphinx", "myst-parser", "zope.interface", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["attrs", "zope.interface"] -tests-no-zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] -tests_no_zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] - -[[package]] -name = "backcall" -version = "0.2.0" -description = "Specifications for callback functions passed in to an API" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "black" -version = "21.12b0" -description = "The uncompromising code formatter." -category = "dev" -optional = false -python-versions = ">=3.6.2" - -[package.dependencies] -click = ">=7.1.2" -mypy-extensions = ">=0.4.3" -pathspec = ">=0.9.0,<1" -platformdirs = ">=2" -tomli = ">=0.2.6,<2.0.0" -typing-extensions = [ - {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}, - {version = "!=3.10.0.1", markers = "python_version >= \"3.10\""}, -] - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -python2 = ["typed-ast (>=1.4.3)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "bleach" -version = "5.0.1" -description = "An easy safelist-based HTML-sanitizing tool." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -six = ">=1.9.0" -webencodings = "*" - -[package.extras] -css = ["tinycss2 (>=1.1.0,<1.2)"] -dev = ["build (==0.8.0)", "flake8 (==4.0.1)", "hashin (==0.17.0)", "pip-tools (==6.6.2)", "pytest (==7.1.2)", "Sphinx (==4.3.2)", "tox (==3.25.0)", "twine (==4.0.1)", "wheel (==0.37.1)", "black (==22.3.0)", "mypy (==0.961)"] - -[[package]] -name = "certifi" -version = "2022.12.7" -description = "Python package for providing Mozilla's CA Bundle." -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "cffi" -version = "1.15.1" -description = "Foreign Function Interface for Python calling C code." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -pycparser = "*" - -[[package]] -name = "charset-normalizer" -version = "2.1.1" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "dev" -optional = false -python-versions = ">=3.6.0" - -[package.extras] -unicode_backport = ["unicodedata2"] - -[[package]] -name = "click" -version = "8.1.3" -description = "Composable command line interface toolkit" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "cloudpickle" -version = "2.2.0" -description = "Extended pickling support for Python objects" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -category = "dev" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" - -[[package]] -name = "contexttimer" -version = "0.3.3" -description = "A timer context manager measuring the clock wall time of the code block it contains." -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "cryptography" -version = "38.0.4" -description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -cffi = ">=1.12" - -[package.extras] -docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"] -docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"] -pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"] -sdist = ["setuptools-rust (>=0.11.4)"] -ssh = ["bcrypt (>=3.1.5)"] -test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"] - -[[package]] -name = "dask" -version = "2021.12.0" -description = "Parallel PyData with Task Scheduling" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -cloudpickle = ">=1.1.1" -fsspec = ">=0.6.0" -numpy = {version = ">=1.18", optional = true, markers = "extra == \"dataframe\""} -packaging = ">=20.0" -pandas = {version = ">=1.0", optional = true, markers = "extra == \"dataframe\""} -partd = ">=0.3.10" -pyyaml = "*" -toolz = ">=0.8.2" - -[package.extras] -array = ["numpy (>=1.18)"] -complete = ["bokeh (>=2.1.1)", "distributed (==2021.12.0)", "jinja2", "numpy (>=1.18)", "pandas (>=1.0)"] -dataframe = ["numpy (>=1.18)", "pandas (>=1.0)"] -diagnostics = ["bokeh (>=2.1.1)", "jinja2"] -distributed = ["distributed (==2021.12.0)"] -test = ["pytest", "pytest-rerunfailures", "pytest-xdist", "pre-commit"] - -[[package]] -name = "decorator" -version = "5.1.1" -description = "Decorators for Humans" -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "distributed" -version = "2021.12.0" -description = "Distributed scheduler for Dask" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -click = ">=6.6" -cloudpickle = ">=1.5.0" -dask = "2021.12.0" -jinja2 = "*" -msgpack = ">=0.6.0" -psutil = ">=5.0" -pyyaml = "*" -sortedcontainers = "<2.0.0 || >2.0.0,<2.0.1 || >2.0.1" -tblib = ">=1.6.0" -toolz = ">=0.8.2" -tornado = {version = ">=6.0.3", markers = "python_version >= \"3.8\""} -zict = ">=0.1.3" - -[[package]] -name = "docopt" -version = "0.6.2" -description = "Pythonic argument parser, that will make you smile" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "docutils" -version = "0.19" -description = "Docutils -- Python Documentation Utilities" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "fsspec" -version = "2022.11.0" -description = "File-system specification" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -abfs = ["adlfs"] -adl = ["adlfs"] -arrow = ["pyarrow (>=1)"] -dask = ["dask", "distributed"] -dropbox = ["dropboxdrivefs", "requests", "dropbox"] -entrypoints = ["importlib-metadata"] -fuse = ["fusepy"] -gcs = ["gcsfs"] -git = ["pygit2"] -github = ["requests"] -gs = ["gcsfs"] -gui = ["panel"] -hdfs = ["pyarrow (>=1)"] -http = ["requests", "aiohttp (!=4.0.0a0,!=4.0.0a1)"] -libarchive = ["libarchive-c"] -oci = ["ocifs"] -s3 = ["s3fs"] -sftp = ["paramiko"] -smb = ["smbprotocol"] -ssh = ["paramiko"] -tqdm = ["tqdm"] - -[[package]] -name = "heapdict" -version = "1.0.1" -description = "a heap with decrease-key and increase-key operations" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "idna" -version = "3.4" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "importlib-metadata" -version = "5.2.0" -description = "Read metadata from Python packages" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -zipp = ">=0.5" - -[package.extras] -docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "sphinx-lint", "jaraco.tidelift (>=1.4)"] -perf = ["ipython"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8", "importlib-resources (>=1.3)"] - -[[package]] -name = "importlib-resources" -version = "5.10.1" -description = "Read resources from Python packages" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} - -[package.extras] -docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"] - -[[package]] -name = "iniconfig" -version = "1.1.1" -description = "iniconfig: brain-dead simple config-ini parsing" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "ipython" -version = "7.34.0" -description = "IPython: Productive Interactive Computing" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -appnope = {version = "*", markers = "sys_platform == \"darwin\""} -backcall = "*" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -decorator = "*" -jedi = ">=0.16" -matplotlib-inline = "*" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} -pickleshare = "*" -prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" -pygments = "*" -traitlets = ">=4.2" - -[package.extras] -all = ["Sphinx (>=1.3)", "ipykernel", "ipyparallel", "ipywidgets", "nbconvert", "nbformat", "nose (>=0.10.1)", "notebook", "numpy (>=1.17)", "pygments", "qtconsole", "requests", "testpath"] -doc = ["Sphinx (>=1.3)"] -kernel = ["ipykernel"] -nbconvert = ["nbconvert"] -nbformat = ["nbformat"] -notebook = ["notebook", "ipywidgets"] -parallel = ["ipyparallel"] -qtconsole = ["qtconsole"] -test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.17)"] - -[[package]] -name = "jaraco.classes" -version = "3.2.3" -description = "Utility functions for Python class constructs" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -more-itertools = "*" - -[package.extras] -docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] - -[[package]] -name = "jedi" -version = "0.18.2" -description = "An autocompletion tool for Python that can be used for text editors." -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -parso = ">=0.8.0,<0.9.0" - -[package.extras] -docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx-rtd-theme (==0.4.3)", "sphinx (==1.8.5)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] -qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] -testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] - -[[package]] -name = "jeepney" -version = "0.8.0" -description = "Low-level, pure Python DBus protocol wrapper." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["pytest", "pytest-trio", "pytest-asyncio (>=0.17)", "testpath", "trio", "async-timeout"] -trio = ["trio", "async-generator"] - -[[package]] -name = "jinja2" -version = "3.1.2" -description = "A very fast and expressive template engine." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "keyring" -version = "23.13.1" -description = "Store and access your passwords safely." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""} -importlib-resources = {version = "*", markers = "python_version < \"3.9\""} -"jaraco.classes" = "*" -jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""} -pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""} -SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""} - -[package.extras] -completion = ["shtab"] -docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"] - -[[package]] -name = "locket" -version = "1.0.0" -description = "File-based locks for Python on Linux and Windows" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "markupsafe" -version = "2.1.1" -description = "Safely add untrusted strings to HTML/XML markup." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "matplotlib-inline" -version = "0.1.6" -description = "Inline Matplotlib backend for Jupyter" -category = "dev" -optional = false -python-versions = ">=3.5" - -[package.dependencies] -traitlets = "*" - -[[package]] -name = "maturin" -version = "1.2.3" -description = "Build and publish crates with pyo3, rust-cpython and cffi bindings as well as rust binaries as python packages" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} - -[package.extras] -zig = ["ziglang (>=0.10.0,<0.11.0)"] -patchelf = ["patchelf"] - -[[package]] -name = "modin" -version = "0.18.0" -description = "Modin: Make your pandas code run faster by changing one line of code." -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -dask = {version = ">=2.22.0", optional = true, markers = "extra == \"dask\""} -distributed = {version = ">=2.22.0", optional = true, markers = "extra == \"dask\""} -fsspec = "*" -numpy = ">=1.18.5" -packaging = "*" -pandas = "1.5.2" -psutil = "*" - -[package.extras] -all = ["dask (>=2.22.0)", "distributed (>=2.22.0)", "ray[default] (>=1.13.0)", "pyarrow", "unidist[mpi] (>=0.2.1)", "rpyc (==4.1.5)", "cloudpickle", "boto3", "modin-spreadsheet (>=0.1.0)"] -dask = ["dask (>=2.22.0)", "distributed (>=2.22.0)"] -ray = ["ray[default] (>=1.13.0)", "pyarrow"] -remote = ["rpyc (==4.1.5)", "cloudpickle", "boto3"] -spreadsheet = ["modin-spreadsheet (>=0.1.0)"] -sql = ["dfsql (>=0.4.2)", "pyparsing (<=2.4.7)"] -unidist = ["unidist[mpi] (>=0.2.1)"] - -[[package]] -name = "more-itertools" -version = "9.0.0" -description = "More routines for operating on iterables, beyond itertools" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "msgpack" -version = "1.0.4" -description = "MessagePack serializer" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "mypy-extensions" -version = "0.4.3" -description = "Experimental type system extensions for programs checked with the mypy typechecker." -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "numpy" -version = "1.24.1" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "packaging" -version = "22.0" -description = "Core utilities for Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pandas" -version = "1.5.2" -description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, -] -python-dateutil = ">=2.8.1" -pytz = ">=2020.1" - -[package.extras] -test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] - -[[package]] -name = "parso" -version = "0.8.3" -description = "A Python Parser" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] -testing = ["docopt", "pytest (<6.0.0)"] - -[[package]] -name = "partd" -version = "1.3.0" -description = "Appendable key-value storage" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -locket = "*" -toolz = "*" - -[package.extras] -complete = ["numpy (>=1.9.0)", "pandas (>=0.19.0)", "pyzmq", "blosc"] - -[[package]] -name = "pathspec" -version = "0.10.3" -description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pexpect" -version = "4.8.0" -description = "Pexpect allows easy control of interactive console applications." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -ptyprocess = ">=0.5" - -[[package]] -name = "pickleshare" -version = "0.7.5" -description = "Tiny 'shelve'-like database with concurrency support" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "pkginfo" -version = "1.9.2" -description = "Query metadatdata from sdists / bdists / installed packages." -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -testing = ["pytest", "pytest-cov"] - -[[package]] -name = "platformdirs" -version = "2.6.2" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx-autodoc-typehints (>=1.19.5)", "sphinx (>=5.3)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest (>=7.2)"] - -[[package]] -name = "pluggy" -version = "1.0.0" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "polars" -version = "0.15.8" -description = "Blazingly fast DataFrame library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -typing_extensions = {version = ">=4.0.0", markers = "python_version < \"3.10\""} - -[package.extras] -matplotlib = ["matplotlib"] -numpy = ["numpy (>=1.16.0)"] -pandas = ["pyarrow (>=4.0.0)", "pandas"] -fsspec = ["fsspec"] -xlsx2csv = ["xlsx2csv (>=0.8.0)"] -pyarrow = ["pyarrow (>=4.0.0)"] -deltalake = ["deltalake"] -timezone = ["backports.zoneinfo", "tzdata"] -connectorx = ["connectorx"] -all = ["polars"] - -[[package]] -name = "prompt-toolkit" -version = "3.0.36" -description = "Library for building powerful interactive command lines in Python" -category = "dev" -optional = false -python-versions = ">=3.6.2" - -[package.dependencies] -wcwidth = "*" - -[[package]] -name = "psutil" -version = "5.9.4" -description = "Cross-platform lib for process and system monitoring in Python." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.extras] -test = ["ipaddress", "mock", "enum34", "pywin32", "wmi"] - -[[package]] -name = "ptyprocess" -version = "0.7.0" -description = "Run a subprocess in a pseudo terminal" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "py-cpuinfo" -version = "9.0.0" -description = "Get CPU info with pure Python" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "pyarrow" -version = "10.0.1" -description = "Python library for Apache Arrow" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = ">=1.16.6" - -[[package]] -name = "pycparser" -version = "2.21" -description = "C parser in Python" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pygments" -version = "2.13.0" -description = "Pygments is a syntax highlighting package written in Python." -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -plugins = ["importlib-metadata"] - -[[package]] -name = "pytest" -version = "6.2.5" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -toml = "*" - -[package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] - -[[package]] -name = "pytest-benchmark" -version = "3.4.1" -description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[package.dependencies] -py-cpuinfo = "*" -pytest = ">=3.8" - -[package.extras] -aspect = ["aspectlib"] -elasticsearch = ["elasticsearch"] -histogram = ["pygal", "pygaljs"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytz" -version = "2022.7" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pywin32-ctypes" -version = "0.2.0" -description = "" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "pyyaml" -version = "6.0.1" -description = "YAML parser and emitter for Python" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "readme-renderer" -version = "37.3" -description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -bleach = ">=2.1.0" -docutils = ">=0.13.1" -Pygments = ">=2.5.1" - -[package.extras] -md = ["cmarkgfm (>=0.8.0)"] - -[[package]] -name = "requests" -version = "2.28.1" -description = "Python HTTP for Humans." -category = "dev" -optional = false -python-versions = ">=3.7, <4" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<3" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "requests-toolbelt" -version = "0.10.1" -description = "A utility belt for advanced users of python-requests" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.dependencies] -requests = ">=2.0.1,<3.0.0" - -[[package]] -name = "rfc3986" -version = "2.0.0" -description = "Validating URI References per RFC 3986" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -idna2008 = ["idna"] - -[[package]] -name = "secretstorage" -version = "3.3.3" -description = "Python bindings to FreeDesktop.org Secret Service API" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -cryptography = ">=2.0" -jeepney = ">=0.6" - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "sortedcontainers" -version = "2.4.0" -description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "tblib" -version = "1.7.0" -description = "Traceback serialization library." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -category = "dev" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "tomli" -version = "1.2.3" -description = "A lil' TOML parser" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "toolz" -version = "0.12.0" -description = "List processing tools and functional utilities" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "tornado" -version = "6.2" -description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -category = "dev" -optional = false -python-versions = ">= 3.7" - -[[package]] -name = "tqdm" -version = "4.64.1" -description = "Fast, Extensible Progress Meter" -category = "dev" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["py-make (>=0.1.0)", "twine", "wheel"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "traitlets" -version = "5.8.0" -description = "Traitlets Python configuration system" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] - -[[package]] -name = "twine" -version = "3.8.0" -description = "Collection of utilities for publishing packages on PyPI" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -colorama = ">=0.4.3" -importlib-metadata = ">=3.6" -keyring = ">=15.1" -pkginfo = ">=1.8.1" -readme-renderer = ">=21.0" -requests = ">=2.20" -requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0" -rfc3986 = ">=1.4.0" -tqdm = ">=4.14" -urllib3 = ">=1.26.0" - -[[package]] -name = "typing-extensions" -version = "4.4.0" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "urllib3" -version = "1.26.13" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.extras] -brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "wcwidth" -version = "0.2.5" -description = "Measures the displayed width of unicode strings in a terminal" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "webencodings" -version = "0.5.1" -description = "Character encoding aliases for legacy web content" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "zict" -version = "2.2.0" -description = "Mutable mapping tools" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -heapdict = "*" - -[[package]] -name = "zipp" -version = "3.11.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "jaraco.functools", "more-itertools", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"] - -[extras] -all = ["dask", "pandas", "modin", "polars", "pyarrow"] -dask = ["dask", "pandas"] -modin = ["modin", "pandas"] -pandas = ["pandas"] -polars = ["pyarrow", "polars"] -pyarrow = ["pyarrow"] - -[metadata] -lock-version = "1.1" -python-versions = ">=3.8,<3.12" -content-hash = "88ecb2d7561052dfa32e9969765fb5972cd5f4c8eccea8b280debeda70ed7639" - -[metadata.files] -appnope = [] -atomicwrites = [] -attrs = [] -backcall = [ - {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, - {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, -] -black = [ - {file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"}, - {file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"}, -] -bleach = [] -certifi = [] -cffi = [] -charset-normalizer = [] -click = [] -cloudpickle = [] -colorama = [] -contexttimer = [ - {file = "contexttimer-0.3.3.tar.gz", hash = "sha256:35a1efd389af3f1ca509f33ff23e17d98b66c8fde5ba2a4eb8a8b7fa456598a5"}, -] -cryptography = [] -dask = [ - {file = "dask-2021.12.0-py3-none-any.whl", hash = "sha256:47041fe1874e64c395e7be772173999e50b5c61a577084158083b9ef4b4175b2"}, - {file = "dask-2021.12.0.tar.gz", hash = "sha256:90614c9d162713e4849532c86f2854e8d53468521285413403b6c496344c0109"}, -] -decorator = [] -distributed = [ - {file = "distributed-2021.12.0-py3-none-any.whl", hash = "sha256:ea8cdb56ecbf1f999c4e28a5c848ce231cb90d6919e42c13e89ceb0d86366d41"}, - {file = "distributed-2021.12.0.tar.gz", hash = "sha256:c6119a2cf1fb2d8ac60337915bb9a790af6530afcb5d7a809a3308323b874714"}, -] -docopt = [ - {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, -] -docutils = [] -fsspec = [] -heapdict = [ - {file = "HeapDict-1.0.1-py3-none-any.whl", hash = "sha256:6065f90933ab1bb7e50db403b90cab653c853690c5992e69294c2de2b253fc92"}, - {file = "HeapDict-1.0.1.tar.gz", hash = "sha256:8495f57b3e03d8e46d5f1b2cc62ca881aca392fd5cc048dc0aa2e1a6d23ecdb6"}, -] -idna = [] -importlib-metadata = [] -importlib-resources = [] -iniconfig = [ - {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, - {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, -] -ipython = [] -"jaraco.classes" = [] -jedi = [] -jeepney = [] -jinja2 = [] -keyring = [] -locket = [] -markupsafe = [] -matplotlib-inline = [] -maturin = [] -modin = [] -more-itertools = [] -msgpack = [] -mypy-extensions = [ - {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, - {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, -] -numpy = [] -packaging = [] -pandas = [] -parso = [] -partd = [] -pathspec = [] -pexpect = [ - {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, - {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, -] -pickleshare = [ - {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, - {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, -] -pkginfo = [] -platformdirs = [] -pluggy = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, -] -polars = [] -prompt-toolkit = [] -psutil = [] -ptyprocess = [ - {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, - {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, -] -py = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] -py-cpuinfo = [] -pyarrow = [] -pycparser = [] -pygments = [] -pytest = [ - {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, - {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, -] -pytest-benchmark = [ - {file = "pytest-benchmark-3.4.1.tar.gz", hash = "sha256:40e263f912de5a81d891619032983557d62a3d85843f9a9f30b98baea0cd7b47"}, - {file = "pytest_benchmark-3.4.1-py2.py3-none-any.whl", hash = "sha256:36d2b08c4882f6f997fd3126a3d6dfd70f3249cde178ed8bbc0b73db7c20f809"}, -] -python-dateutil = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] -pytz = [] -pywin32-ctypes = [ - {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"}, - {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"}, -] -pyyaml = [] -readme-renderer = [] -requests = [] -requests-toolbelt = [] -rfc3986 = [] -secretstorage = [] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -sortedcontainers = [ - {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, - {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, -] -tblib = [ - {file = "tblib-1.7.0-py2.py3-none-any.whl", hash = "sha256:289fa7359e580950e7d9743eab36b0691f0310fce64dee7d9c31065b8f723e23"}, - {file = "tblib-1.7.0.tar.gz", hash = "sha256:059bd77306ea7b419d4f76016aef6d7027cc8a0785579b5aad198803435f882c"}, -] -toml = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] -tomli = [ - {file = "tomli-1.2.3-py3-none-any.whl", hash = "sha256:e3069e4be3ead9668e21cb9b074cd948f7b3113fd9c8bba083f48247aab8b11c"}, - {file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"}, -] -toolz = [] -tornado = [] -tqdm = [] -traitlets = [] -twine = [] -typing-extensions = [] -urllib3 = [] -wcwidth = [ - {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, - {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, -] -webencodings = [ - {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, - {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, -] -zict = [] -zipp = [] diff --git a/connectorx-python/pyproject.toml b/connectorx-python/pyproject.toml deleted file mode 100644 index aebc2f8..0000000 --- a/connectorx-python/pyproject.toml +++ /dev/null @@ -1,81 +0,0 @@ -[tool.poetry] -authors = ["SFU Database System Lab "] -classifiers = [ - "Development Status :: 4 - Beta", - "Topic :: Software Development :: Build Tools", - "Environment :: Console", - "Operating System :: OS Independent", - "Intended Audience :: Science/Research", - "Intended Audience :: Developers", - "Intended Audience :: Financial and Insurance Industry", - "Intended Audience :: Healthcare Industry", - "Topic :: Scientific/Engineering", - "Framework :: IPython", -] -description = "Load data from databases to dataframes, the fastest way." -keywords = ["read_sql"] -license = "MIT" -maintainers = ["Weiyuan Wu "] -name = "connectorx" -readme = "README.md" # Markdown files are supported -version = "0.3.3-alpha.1" - -[project] -name = "connectorx" # Target file name of maturin build -readme = "README.md" -license = { text = "MIT" } -requires-python = ">=3.8" - -[tool.poetry.dependencies] -dask = {version = "^2021", optional = true, extras = ["dataframe"]} -modin = {version = ">=0.10", optional = true} -numpy = ">=1.21.5" -pandas = {version = "^1", optional = true} -polars = {version = ">=0.8", optional = true} -pyarrow = {version = ">=4", optional = true} -python = ">=3.8,<3.12" - -[tool.poetry.extras] -all = ["dask", "pandas", "modin", "polars", "pyarrow"] -dask = ["dask", "pandas"] -modin = ["modin", "pandas"] -pandas = ["pandas"] -polars = ["pyarrow", "polars"] -pyarrow = ["pyarrow"] - -[tool.poetry.dev-dependencies] -black = "^21.4b0" -contexttimer = "^0.3.3" -dask = {extras = ["dataframe"], version = "^2021.7.0"} -docopt = "^0.6.2" -ipython = "^7.31.1" -maturin = ">=1.0,<2.0" -modin = {extras = ["dask"], version = ">=0.10.1"} -polars = ">=0.8" -pyarrow = ">=4" -pytest = "^6.2" -pytest-benchmark = "^3.4.1" -twine = "^3.4.1" - -[tool.pytest.ini_options] -minversion = "6.0" -python_functions = "test_* bench_*" -# python_files = check_*.py -# python_classes = Check -# addopts = "-ra -q" -# testpaths = [ -# "tests", -# "integration", -# ] - -[build-system] -build-backend = "maturin" -requires = ["maturin>=1.0,<2.0"] - -[tool.maturin] -include = [ - { path = "connectorx/*.so", format = "sdist"}, - { path = "connectorx/*.pyd", format = "sdist"}, - { path = "connectorx/dependencies/", format = "sdist"}, - { path = "LICENSE", format = "sdist"}, -] diff --git a/connectorx-python/src/arrow.rs b/connectorx-python/src/arrow.rs deleted file mode 100644 index 6521a2f..0000000 --- a/connectorx-python/src/arrow.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use arrow::record_batch::RecordBatch; -use connectorx::source_router::SourceConn; -use connectorx::{prelude::*, sql::CXQuery}; -use fehler::throws; -use libc::uintptr_t; -use pyo3::prelude::*; -use pyo3::{PyAny, Python}; -use std::convert::TryFrom; -use std::sync::Arc; - -#[throws(ConnectorXPythonError)] -pub fn write_arrow<'a>( - py: Python<'a>, - source_conn: &SourceConn, - origin_query: Option, - queries: &[CXQuery], -) -> &'a PyAny { - let destination = get_arrow(source_conn, origin_query, queries)?; - let rbs = destination.arrow()?; - let ptrs = to_ptrs(rbs); - let obj: PyObject = ptrs.into_py(py); - obj.into_ref(py) -} - -pub fn to_ptrs(rbs: Vec) -> (Vec, Vec>) { - if rbs.is_empty() { - return (vec![], vec![]); - } - - let mut result = vec![]; - let names = rbs[0] - .schema() - .fields() - .iter() - .map(|f| f.name().clone()) - .collect(); - - for rb in rbs.into_iter() { - let mut cols = vec![]; - - for array in rb.columns().into_iter() { - let data = array.to_data(); - let array_ptr = Arc::new(arrow::ffi::FFI_ArrowArray::new(&data)); - let schema_ptr = Arc::new( - arrow::ffi::FFI_ArrowSchema::try_from(data.data_type()).expect("export schema c"), - ); - cols.push(( - Arc::into_raw(array_ptr) as uintptr_t, - Arc::into_raw(schema_ptr) as uintptr_t, - )); - } - - result.push(cols); - } - (names, result) -} diff --git a/connectorx-python/src/arrow2.rs b/connectorx-python/src/arrow2.rs deleted file mode 100644 index 573c7c8..0000000 --- a/connectorx-python/src/arrow2.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use arrow2::{ - array::Array, - chunk::Chunk, - datatypes::{Field, Schema}, - ffi, -}; -use connectorx::source_router::SourceConn; -use connectorx::{prelude::*, sql::CXQuery}; -use fehler::throws; -use libc::uintptr_t; -use pyo3::prelude::*; -use pyo3::{PyAny, Python}; -use std::sync::Arc; - -#[throws(ConnectorXPythonError)] -pub fn write_arrow<'a>( - py: Python<'a>, - source_conn: &SourceConn, - origin_query: Option, - queries: &[CXQuery], -) -> &'a PyAny { - let destination = get_arrow2(source_conn, origin_query, queries)?; - let (rbs, schema) = destination.arrow()?; - let ptrs = to_ptrs(rbs, schema); - let obj: PyObject = ptrs.into_py(py); - obj.into_ref(py) -} - -fn to_ptrs( - rbs: Vec>>, - schema: Arc, -) -> (Vec, Vec>) { - if rbs.is_empty() { - return (vec![], vec![]); - } - - let mut result = vec![]; - let names = schema.fields.iter().map(|f| f.name.clone()).collect(); - - for rb in rbs.into_iter() { - let mut cols = vec![]; - - for array in rb.into_arrays() { - let schema_ptr = - ffi::export_field_to_c(&Field::new("", array.data_type().clone(), true)); - let array_ptr = ffi::export_array_to_c(array); - let array_ptr = Box::into_raw(Box::new(array_ptr)); - let schema_ptr = Box::into_raw(Box::new(schema_ptr)); - - cols.push((array_ptr as uintptr_t, schema_ptr as uintptr_t)); - } - - result.push(cols); - } - (names, result) -} diff --git a/connectorx-python/src/constants.rs b/connectorx-python/src/constants.rs deleted file mode 100644 index 7e2cc84..0000000 --- a/connectorx-python/src/constants.rs +++ /dev/null @@ -1,7 +0,0 @@ -// PyString buffer size in MB -pub const PYSTRING_BUFFER_SIZE: usize = 4; - -#[cfg(not(debug_assertions))] -pub const J4RS_BASE_PATH: &str = "./target/release"; -#[cfg(debug_assertions)] -pub const J4RS_BASE_PATH: &str = "./target/debug"; diff --git a/connectorx-python/src/errors.rs b/connectorx-python/src/errors.rs deleted file mode 100644 index a8754ef..0000000 --- a/connectorx-python/src/errors.rs +++ /dev/null @@ -1,66 +0,0 @@ -use pyo3::exceptions::PyRuntimeError; -use pyo3::PyErr; -use thiserror::Error; - -#[allow(unused)] -pub type Result = std::result::Result; - -/// Errors that can be raised from this library. -#[derive(Error, Debug)] -pub enum ConnectorXPythonError { - /// The required type does not same as the schema defined. - #[error("Unknown pandas data type: {0}.")] - UnknownPandasType(String), - - #[error("Python: {0}.")] - PythonError(String), - - #[error(transparent)] - NdArrayShapeError(#[from] ndarray::ShapeError), - - #[error(transparent)] - ConnectorXError(#[from] connectorx::errors::ConnectorXError), - - #[error(transparent)] - ConnectorXOutError(#[from] connectorx::errors::ConnectorXOutError), - - #[error(transparent)] - MsSQLSourceError(#[from] connectorx::sources::mssql::MsSQLSourceError), - - #[error(transparent)] - PostgresSourceError(#[from] connectorx::sources::postgres::PostgresSourceError), - - #[error(transparent)] - MySQLSourceError(#[from] connectorx::sources::mysql::MySQLSourceError), - - #[error(transparent)] - SQLiteSourceError(#[from] connectorx::sources::sqlite::SQLiteSourceError), - - #[error(transparent)] - OracleSourceError(#[from] connectorx::sources::oracle::OracleSourceError), - - #[error(transparent)] - BigQuerySourceError(#[from] connectorx::sources::bigquery::BigQuerySourceError), - - #[error(transparent)] - ArrowDestinationError(#[from] connectorx::destinations::arrow::ArrowDestinationError), - - #[error(transparent)] - Arrow2DestinationError(#[from] connectorx::destinations::arrow2::Arrow2DestinationError), - - /// Any other errors that are too trivial to be put here explicitly. - #[error(transparent)] - Other(#[from] anyhow::Error), -} - -impl From for PyErr { - fn from(e: ConnectorXPythonError) -> PyErr { - PyRuntimeError::new_err(format!("{}", e)) - } -} - -impl From for ConnectorXPythonError { - fn from(e: PyErr) -> ConnectorXPythonError { - ConnectorXPythonError::PythonError(format!("{}", e)) - } -} diff --git a/connectorx-python/src/lib.rs b/connectorx-python/src/lib.rs deleted file mode 100644 index 5487275..0000000 --- a/connectorx-python/src/lib.rs +++ /dev/null @@ -1,96 +0,0 @@ -pub mod arrow; -pub mod arrow2; -pub mod constants; -mod errors; -pub mod pandas; -pub mod read_sql; - -use crate::constants::J4RS_BASE_PATH; -use connectorx::fed_dispatcher::run; -use pyo3::exceptions::PyRuntimeError; -use pyo3::prelude::*; -use pyo3::{wrap_pyfunction, PyResult}; -use std::collections::HashMap; -use std::env; -use std::sync::Once; - -#[macro_use] -extern crate lazy_static; - -static START: Once = Once::new(); - -// https://github.com/PyO3/pyo3-built/issues/21 -// #[allow(dead_code)] -// mod build { -// include!(concat!(env!("OUT_DIR"), "/built.rs")); -// } - -#[pymodule] -fn connectorx(_: Python, m: &PyModule) -> PyResult<()> { - START.call_once(|| { - let _ = env_logger::try_init(); - }); - - m.add_wrapped(wrap_pyfunction!(read_sql))?; - m.add_wrapped(wrap_pyfunction!(read_sql2))?; - m.add_wrapped(wrap_pyfunction!(partition_sql))?; - m.add_wrapped(wrap_pyfunction!(get_meta))?; - m.add_class::()?; - Ok(()) -} - -#[pyfunction] -pub fn read_sql<'a>( - py: Python<'a>, - conn: &str, - return_type: &str, - protocol: Option<&str>, - queries: Option>, - partition_query: Option, -) -> PyResult<&'a PyAny> { - read_sql::read_sql(py, conn, return_type, protocol, queries, partition_query) -} - -#[pyfunction] -pub fn partition_sql( - conn: &str, - partition_query: read_sql::PyPartitionQuery, -) -> PyResult> { - let source_conn = connectorx::source_router::parse_source(conn, None) - .map_err(|e| crate::errors::ConnectorXPythonError::from(e))?; - let queries = connectorx::partition::partition(&partition_query.into(), &source_conn) - .map_err(|e| crate::errors::ConnectorXPythonError::from(e))?; - Ok(queries.into_iter().map(|q| q.to_string()).collect()) -} - -#[pyfunction] -pub fn read_sql2<'a>( - py: Python<'a>, - sql: &str, - db_map: HashMap, -) -> PyResult<&'a PyAny> { - let rbs = run( - sql.to_string(), - db_map, - Some( - env::var("J4RS_BASE_PATH") - .unwrap_or(J4RS_BASE_PATH.to_string()) - .as_str(), - ), - ) - .map_err(|e| PyRuntimeError::new_err(format!("{}", e)))?; - let ptrs = arrow::to_ptrs(rbs); - let obj: PyObject = ptrs.into_py(py); - Ok(obj.into_ref(py)) -} - -#[pyfunction] -pub fn get_meta<'a>( - py: Python<'a>, - conn: &str, - protocol: Option<&str>, - query: String, -) -> PyResult<&'a PyAny> { - pandas::get_meta::get_meta(py, conn, protocol.unwrap_or("binary"), query) - .map_err(|e| From::from(e)) -} diff --git a/connectorx-python/src/pandas/destination.rs b/connectorx-python/src/pandas/destination.rs deleted file mode 100644 index 82426f3..0000000 --- a/connectorx-python/src/pandas/destination.rs +++ /dev/null @@ -1,407 +0,0 @@ -use super::{ - pandas_columns::{ - ArrayBlock, BooleanBlock, BytesBlock, DateTimeBlock, Float64Block, HasPandasColumn, - Int64Block, PandasColumn, PandasColumnObject, PyBytes, StringBlock, - }, - pystring::PyString, - typesystem::{PandasArrayType, PandasBlockType, PandasTypeSystem}, -}; -use crate::errors::{ConnectorXPythonError, Result}; -use anyhow::anyhow; -use connectorx::prelude::*; -use fehler::{throw, throws}; -use itertools::Itertools; -use numpy::{PyArray1, PyArray2}; -use pyo3::{ - prelude::{pyclass, pymethods, PyResult}, - types::{IntoPyDict, PyList, PyTuple}, - FromPyObject, IntoPy, PyAny, PyObject, Python, -}; -use std::{ - collections::HashMap, - mem::transmute, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, -}; - -#[pyclass] -pub struct PandasBlockInfo { - dt: PandasBlockType, - #[pyo3(get, set)] - cids: Vec, // column ids -} - -#[pymethods] -impl PandasBlockInfo { - #[getter] - fn dt(&self) -> PyResult { - Ok(PandasArrayType::from(self.dt) as u32) - } -} - -pub struct PandasDestination<'py> { - py: Python<'py>, - nrow: usize, - schema: Vec, - names: Vec, - block_datas: Vec<&'py PyAny>, // either 2d array for normal blocks, or two 1d arrays for extension blocks - block_infos: Vec, -} - -impl<'a> PandasDestination<'a> { - pub fn new(py: Python<'a>) -> Self { - PandasDestination { - py, - nrow: 0, - schema: vec![], - names: vec![], - block_datas: vec![], - block_infos: vec![], - } - } - - pub fn result(self) -> Result<&'a PyAny> { - #[throws(ConnectorXPythonError)] - fn to_list>(py: Python<'_>, arr: Vec) -> &'_ PyList { - let list = PyList::empty(py); - for e in arr { - list.append(e.into_py(py))?; - } - list - } - let block_infos = to_list(self.py, self.block_infos)?; - let names = to_list(self.py, self.names)?; - let block_datas = to_list(self.py, self.block_datas)?; - let result = [ - ("data", block_datas), - ("headers", names), - ("block_infos", block_infos), - ] - .into_py_dict(self.py); - Ok(result) - } - - #[throws(ConnectorXPythonError)] - fn allocate_array( - &mut self, - dt: PandasBlockType, - placement: Vec, - ) { - // has to use `zeros` instead of `new` for String type initialization - let data = PyArray2::::zeros(self.py, [placement.len(), self.nrow], false); - let block_info = PandasBlockInfo { - dt, - cids: placement, - }; - - self.block_datas.push(data.into()); - self.block_infos.push(block_info); - } - - #[throws(ConnectorXPythonError)] - fn allocate_masked_array( - &mut self, - dt: PandasBlockType, - placement: Vec, - ) { - for pos in placement { - let block_info = PandasBlockInfo { - dt, - cids: vec![pos], - }; - let data = PyArray1::::zeros(self.py, self.nrow, false); - let mask = PyArray1::::zeros(self.py, self.nrow, false); - let obj = PyTuple::new(self.py, vec![data.as_ref(), mask.as_ref()]); - self.block_datas.push(obj.into()); - self.block_infos.push(block_info); - } - } -} - -impl<'a> Destination for PandasDestination<'a> { - const DATA_ORDERS: &'static [DataOrder] = &[DataOrder::RowMajor]; - type TypeSystem = PandasTypeSystem; - type Partition<'b> = PandasPartitionDestination<'b> where 'a: 'b; - type Error = ConnectorXPythonError; - - fn needs_count(&self) -> bool { - true - } - - #[throws(ConnectorXPythonError)] - fn allocate>( - &mut self, - nrows: usize, - names: &[S], - schema: &[PandasTypeSystem], - data_order: DataOrder, - ) { - if !matches!(data_order, DataOrder::RowMajor) { - throw!(ConnectorXError::UnsupportedDataOrder(data_order)) - } - self.nrow = nrows; - self.schema = schema.to_vec(); - self.names - .extend(names.iter().map(AsRef::as_ref).map(ToString::to_string)); - - let mut block_indices = HashMap::>::new(); - schema - .iter() - .enumerate() - .for_each(|(i, dt)| block_indices.entry((*dt).into()).or_default().push(i)); - - for (dt, placement) in block_indices { - match dt { - PandasBlockType::Boolean(true) => { - self.allocate_masked_array::(dt, placement)?; - } - PandasBlockType::Boolean(false) => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::Int64(true) => { - self.allocate_masked_array::(dt, placement)?; - } - PandasBlockType::Int64(false) => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::Float64 => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::BooleanArray => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::Float64Array => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::Int64Array => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::String => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::DateTime => { - self.allocate_array::(dt, placement)?; - } - PandasBlockType::Bytes => { - self.allocate_array::(dt, placement)?; - } - }; - } - } - - #[throws(ConnectorXPythonError)] - fn partition(&mut self, counts: usize) -> Vec> { - let mut partitioned_columns: Vec>> = - (0..self.schema.len()).map(|_| Vec::new()).collect(); - - for (idx, block) in self.block_infos.iter().enumerate() { - let buf = self.block_datas[idx]; - match block.dt { - PandasBlockType::Boolean(_) => { - let bblock = BooleanBlock::extract(buf)?; - - let bcols = bblock.split()?; - for (&cid, bcol) in block.cids.iter().zip_eq(bcols) { - partitioned_columns[cid] = bcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::Float64 => { - let fblock = Float64Block::extract(buf)?; - let fcols = fblock.split()?; - for (&cid, fcol) in block.cids.iter().zip_eq(fcols) { - partitioned_columns[cid] = fcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::BooleanArray => { - let bblock = ArrayBlock::::extract(buf)?; - let bcols = bblock.split()?; - for (&cid, bcol) in block.cids.iter().zip_eq(bcols) { - partitioned_columns[cid] = bcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::Float64Array => { - let fblock = ArrayBlock::::extract(buf)?; - let fcols = fblock.split()?; - for (&cid, fcol) in block.cids.iter().zip_eq(fcols) { - partitioned_columns[cid] = fcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::Int64Array => { - let fblock = ArrayBlock::::extract(buf)?; - let fcols = fblock.split()?; - for (&cid, fcol) in block.cids.iter().zip_eq(fcols) { - partitioned_columns[cid] = fcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::Int64(_) => { - let ublock = Int64Block::extract(buf)?; - let ucols = ublock.split()?; - for (&cid, ucol) in block.cids.iter().zip_eq(ucols) { - partitioned_columns[cid] = ucol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::String => { - let sblock = StringBlock::extract(buf)?; - let scols = sblock.split()?; - for (&cid, scol) in block.cids.iter().zip_eq(scols) { - partitioned_columns[cid] = scol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::Bytes => { - let bblock = BytesBlock::extract(buf)?; - let bcols = bblock.split()?; - for (&cid, bcol) in block.cids.iter().zip_eq(bcols) { - partitioned_columns[cid] = bcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - PandasBlockType::DateTime => { - let dblock = DateTimeBlock::extract(buf)?; - let dcols = dblock.split()?; - for (&cid, dcol) in block.cids.iter().zip_eq(dcols) { - partitioned_columns[cid] = dcol - .partition(counts) - .into_iter() - .map(|c| Box::new(c) as _) - .collect() - } - } - } - } - - let mut par_destinations = vec![]; - let glob_row = Arc::new(AtomicUsize::new(0)); - for _ in 0..counts { - let mut columns = Vec::with_capacity(partitioned_columns.len()); - for (i, partitions) in partitioned_columns.iter_mut().enumerate() { - columns.push( - partitions - .pop() - .ok_or_else(|| anyhow!("empty partition for {}th column", i))?, - ); - } - - par_destinations.push(PandasPartitionDestination::new( - columns, - &self.schema[..], - Arc::clone(&glob_row), - )); - } - - par_destinations - } - - fn schema(&self) -> &[Self::TypeSystem] { - self.schema.as_ref() - } -} -pub struct PandasPartitionDestination<'a> { - columns: Vec>, - schema: &'a [PandasTypeSystem], - seq: usize, - glob_row: Arc, - cur_row: usize, -} - -impl<'a> PandasPartitionDestination<'a> { - fn new( - columns: Vec>, - schema: &'a [PandasTypeSystem], - glob_row: Arc, - ) -> Self { - Self { - columns, - schema, - seq: 0, - glob_row, - cur_row: 0, - } - } - - fn loc(&mut self) -> (usize, usize) { - let (row, col) = ( - self.cur_row + self.seq / self.ncols(), - self.seq % self.ncols(), - ); - self.seq += 1; - (row, col) - } -} - -impl<'a> DestinationPartition<'a> for PandasPartitionDestination<'a> { - type TypeSystem = PandasTypeSystem; - type Error = ConnectorXPythonError; - - fn ncols(&self) -> usize { - self.schema.len() - } - - fn finalize(&mut self) -> Result<()> { - for col in &mut self.columns { - col.finalize()?; - } - Ok(()) - } - - #[throws(ConnectorXPythonError)] - fn aquire_row(&mut self, n: usize) -> usize { - if n == 0 { - return self.cur_row; - } - self.cur_row = self.glob_row.fetch_add(n, Ordering::Relaxed); - self.seq = 0; - self.cur_row - } -} - -impl<'a, T> Consume for PandasPartitionDestination<'a> -where - T: HasPandasColumn + TypeAssoc + std::fmt::Debug, -{ - type Error = ConnectorXPythonError; - - fn consume(&mut self, value: T) -> Result<()> { - let (row, col) = self.loc(); - - self.schema[col].check::()?; - // How do we check type id for borrowed types? - // assert!(self.columns[col].typecheck(TypeId::of::())); - - let (column, _): (&mut T::PandasColumn<'a>, *const ()) = - unsafe { transmute(&*self.columns[col]) }; - column.write(value, row) - } -} diff --git a/connectorx-python/src/pandas/get_meta.rs b/connectorx-python/src/pandas/get_meta.rs deleted file mode 100644 index bc5e7de..0000000 --- a/connectorx-python/src/pandas/get_meta.rs +++ /dev/null @@ -1,230 +0,0 @@ -use super::{ - destination::PandasDestination, - transports::{ - BigQueryPandasTransport, MsSQLPandasTransport, MysqlPandasTransport, OraclePandasTransport, - PostgresPandasTransport, SqlitePandasTransport, - }, -}; -use crate::errors::ConnectorXPythonError; -use connectorx::source_router::{SourceConn, SourceType}; -use connectorx::{ - prelude::*, - sources::{ - bigquery::BigQuerySource, - mssql::MsSQLSource, - mysql::{BinaryProtocol as MySQLBinaryProtocol, MySQLSource, TextProtocol}, - postgres::{ - rewrite_tls_args, BinaryProtocol as PgBinaryProtocol, CSVProtocol, CursorProtocol, - PostgresSource, SimpleProtocol, - }, - sqlite::SQLiteSource, - }, - sql::CXQuery, -}; -use fehler::throws; -use log::debug; -use postgres::NoTls; -use postgres_openssl::MakeTlsConnector; -use pyo3::prelude::*; -use std::convert::TryFrom; -use std::sync::Arc; - -#[throws(ConnectorXPythonError)] -pub fn get_meta<'a>(py: Python<'a>, conn: &str, protocol: &str, query: String) -> &'a PyAny { - let source_conn = SourceConn::try_from(conn)?; - let mut destination = PandasDestination::new(py); - let queries = &[CXQuery::Naked(query)]; - - match source_conn.ty { - SourceType::Postgres => { - debug!("Protocol: {}", protocol); - let (config, tls) = rewrite_tls_args(&source_conn.conn)?; - match (protocol, tls) { - ("csv", Some(tls_conn)) => { - let sb = - PostgresSource::::new(config, tls_conn, 1)?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("csv", None) => { - let sb = PostgresSource::::new(config, NoTls, 1)?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("binary", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, tls_conn, 1, - )?; - let mut dispatcher = - Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new(sb, &mut destination, queries, None); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("binary", None) => { - let sb = PostgresSource::::new(config, NoTls, 1)?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("cursor", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, tls_conn, 1, - )?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("cursor", None) => { - let sb = PostgresSource::::new(config, NoTls, 1)?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("simple", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, tls_conn, 1, - )?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - ("simple", None) => { - let sb = PostgresSource::::new(config, NoTls, 1)?; - let mut dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - _ => unimplemented!("{} protocol not supported", protocol), - } - } - SourceType::SQLite => { - // remove the first "sqlite://" manually since url.path is not correct for windows - let path = &source_conn.conn.as_str()[9..]; - let source = SQLiteSource::new(path, 1)?; - let mut dispatcher = Dispatcher::<_, _, SqlitePandasTransport>::new( - source, - &mut destination, - queries, - None, - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - SourceType::MySQL => { - debug!("Protocol: {}", protocol); - match protocol { - "binary" => { - let source = MySQLSource::::new(&source_conn.conn[..], 1)?; - let mut dispatcher = Dispatcher::< - _, - _, - MysqlPandasTransport, - >::new( - source, &mut destination, queries, None - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - "text" => { - let source = MySQLSource::::new(&source_conn.conn[..], 1)?; - let mut dispatcher = - Dispatcher::<_, _, MysqlPandasTransport>::new( - source, - &mut destination, - queries, - None, - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - _ => unimplemented!("{} protocol not supported", protocol), - } - } - SourceType::MsSQL => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = MsSQLSource::new(rt, &source_conn.conn[..], 1)?; - let mut dispatcher = Dispatcher::<_, _, MsSQLPandasTransport>::new( - source, - &mut destination, - queries, - None, - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - SourceType::Oracle => { - let source = OracleSource::new(&source_conn.conn[..], 1)?; - let mut dispatcher = Dispatcher::<_, _, OraclePandasTransport>::new( - source, - &mut destination, - queries, - None, - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - SourceType::BigQuery => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = BigQuerySource::new(rt, &source_conn.conn[..])?; - let mut dispatcher = Dispatcher::<_, _, BigQueryPandasTransport>::new( - source, - &mut destination, - queries, - None, - ); - debug!("Running dispatcher"); - dispatcher.get_meta()?; - } - _ => unimplemented!("{:?} not implemented!", source_conn.ty), - } - - destination.result()? -} diff --git a/connectorx-python/src/pandas/mod.rs b/connectorx-python/src/pandas/mod.rs deleted file mode 100644 index be2e419..0000000 --- a/connectorx-python/src/pandas/mod.rs +++ /dev/null @@ -1,237 +0,0 @@ -mod destination; -pub mod get_meta; -mod pandas_columns; -mod pystring; -mod transports; -mod typesystem; - -pub use self::destination::{PandasBlockInfo, PandasDestination, PandasPartitionDestination}; -pub use self::transports::{ - BigQueryPandasTransport, MsSQLPandasTransport, MysqlPandasTransport, OraclePandasTransport, - PostgresPandasTransport, SqlitePandasTransport, -}; -pub use self::typesystem::{PandasDType, PandasTypeSystem}; -use crate::errors::ConnectorXPythonError; -use connectorx::source_router::{SourceConn, SourceType}; -use connectorx::sources::oracle::OracleSource; -use connectorx::{ - prelude::*, - sources::{ - mysql::{BinaryProtocol as MySQLBinaryProtocol, TextProtocol}, - postgres::{ - rewrite_tls_args, BinaryProtocol as PgBinaryProtocol, CSVProtocol, CursorProtocol, - SimpleProtocol, - }, - }, - sql::CXQuery, -}; -use fehler::throws; -use log::debug; -use postgres::NoTls; -use postgres_openssl::MakeTlsConnector; -use pyo3::{PyAny, Python}; -use std::sync::Arc; - -#[throws(ConnectorXPythonError)] -pub fn write_pandas<'a>( - py: Python<'a>, - source_conn: &SourceConn, - origin_query: Option, - queries: &[CXQuery], -) -> &'a PyAny { - let mut destination = PandasDestination::new(py); - let protocol = source_conn.proto.as_str(); - debug!("Protocol: {}", protocol); - - // TODO: unlock gil if possible - match source_conn.ty { - SourceType::Postgres => { - let (config, tls) = rewrite_tls_args(&source_conn.conn)?; - match (protocol, tls) { - ("csv", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("csv", None) => { - let sb = - PostgresSource::::new(config, NoTls, queries.len())?; - let dispatcher = - Dispatcher::<_, _, PostgresPandasTransport>::new( - sb, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - ("binary", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = - Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new(sb, &mut destination, queries, origin_query); - dispatcher.run()?; - } - ("binary", None) => { - let sb = PostgresSource::::new( - config, - NoTls, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("cursor", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("cursor", None) => { - let sb = - PostgresSource::::new(config, NoTls, queries.len())?; - let dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("simple", Some(tls_conn)) => { - let sb = PostgresSource::::new( - config, - tls_conn, - queries.len(), - )?; - let dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - ("simple", None) => { - let sb = - PostgresSource::::new(config, NoTls, queries.len())?; - let dispatcher = Dispatcher::< - _, - _, - PostgresPandasTransport, - >::new( - sb, &mut destination, queries, origin_query - ); - dispatcher.run()?; - } - _ => unimplemented!("{} protocol not supported", protocol), - } - } - SourceType::SQLite => { - // remove the first "sqlite://" manually since url.path is not correct for windows - let path = &source_conn.conn.as_str()[9..]; - let source = SQLiteSource::new(path, queries.len())?; - let dispatcher = Dispatcher::<_, _, SqlitePandasTransport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - SourceType::MySQL => match protocol { - "binary" => { - let source = - MySQLSource::::new(&source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, MysqlPandasTransport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - "text" => { - let source = - MySQLSource::::new(&source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, MysqlPandasTransport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - _ => unimplemented!("{} protocol not supported", protocol), - }, - SourceType::MsSQL => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = MsSQLSource::new(rt, &source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, MsSQLPandasTransport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - SourceType::Oracle => { - let source = OracleSource::new(&source_conn.conn[..], queries.len())?; - let dispatcher = Dispatcher::<_, _, OraclePandasTransport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - SourceType::BigQuery => { - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - let source = BigQuerySource::new(rt, &source_conn.conn[..])?; - let dispatcher = Dispatcher::<_, _, BigQueryPandasTransport>::new( - source, - &mut destination, - queries, - origin_query, - ); - dispatcher.run()?; - } - _ => unimplemented!("{:?} not implemented!", source_conn.ty), - } - - destination.result()? -} diff --git a/connectorx-python/src/pandas/pandas_columns/array.rs b/connectorx-python/src/pandas/pandas_columns/array.rs deleted file mode 100644 index f084a1a..0000000 --- a/connectorx-python/src/pandas/pandas_columns/array.rs +++ /dev/null @@ -1,266 +0,0 @@ -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject, GIL_MUTEX}; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use fehler::throws; -use ndarray::{ArrayViewMut2, Axis, Ix2}; -use numpy::{npyffi::NPY_TYPES, Element, PyArray, PyArrayDescr}; -use pyo3::{FromPyObject, Py, PyAny, PyResult, Python, ToPyObject}; -use std::any::TypeId; -use std::marker::PhantomData; - -#[derive(Clone)] -#[repr(transparent)] -pub struct PyList(Py); - -// In order to put it into a numpy array -unsafe impl Element for PyList { - const DATA_TYPE: numpy::DataType = numpy::DataType::Object; - fn is_same_type(dtype: &PyArrayDescr) -> bool { - unsafe { *dtype.as_dtype_ptr() }.type_num == NPY_TYPES::NPY_OBJECT as i32 - } -} - -pub struct ArrayBlock<'a, V> { - data: ArrayViewMut2<'a, PyList>, - buf_size_mb: usize, - _value_type: PhantomData, -} - -impl<'a, V> FromPyObject<'a> for ArrayBlock<'a, V> { - fn extract(ob: &'a PyAny) -> PyResult { - check_dtype(ob, "object")?; - let array = ob.downcast::>()?; - let data = unsafe { array.as_array_mut() }; - Ok(ArrayBlock:: { - data, - buf_size_mb: 16, // in MB - _value_type: PhantomData, - }) - } -} - -impl<'a, V> ArrayBlock<'a, V> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec> { - let mut ret = vec![]; - let mut view = self.data; - - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(ArrayColumn:: { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted FloatArray data"))? - .as_mut_ptr(), - lengths: vec![], - row_idx: vec![], - buffer: Vec::with_capacity(self.buf_size_mb * (1 << 17) * 11 / 10), // allocate a little bit more memory to avoid Vec growth - buf_size: self.buf_size_mb * (1 << 17), - }) - } - ret - } -} - -pub struct ArrayColumn { - data: *mut PyList, - buffer: Vec, - lengths: Vec, // usize::MAX if the string is None - row_idx: Vec, - buf_size: usize, -} - -unsafe impl Send for ArrayColumn {} -unsafe impl Sync for ArrayColumn {} - -impl PandasColumnObject for ArrayColumn -where - V: Send + ToPyObject, -{ - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::() || id == TypeId::of::>() - } - - fn typename(&self) -> &'static str { - std::any::type_name::() - } - - #[throws(ConnectorXPythonError)] - fn finalize(&mut self) { - self.flush()?; - } -} - -impl PandasColumn> for ArrayColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Vec, row: usize) { - self.lengths.push(val.len()); - self.buffer.extend_from_slice(&val[..]); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn>> for ArrayColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option>, row: usize) { - match val { - Some(v) => { - self.lengths.push(v.len()); - self.buffer.extend_from_slice(&v[..]); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} -impl PandasColumn> for ArrayColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Vec, row: usize) { - self.lengths.push(val.len()); - self.buffer.extend_from_slice(&val[..]); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn>> for ArrayColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option>, row: usize) { - match val { - Some(v) => { - self.lengths.push(v.len()); - self.buffer.extend_from_slice(&v[..]); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl PandasColumn> for ArrayColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Vec, row: usize) { - self.lengths.push(val.len()); - self.buffer.extend_from_slice(&val[..]); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn>> for ArrayColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option>, row: usize) { - match val { - Some(v) => { - self.lengths.push(v.len()); - self.buffer.extend_from_slice(&v[..]); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl HasPandasColumn for Vec { - type PandasColumn<'a> = ArrayColumn; -} - -impl HasPandasColumn for Option> { - type PandasColumn<'a> = ArrayColumn; -} - -impl HasPandasColumn for Vec { - type PandasColumn<'a> = ArrayColumn; -} - -impl HasPandasColumn for Option> { - type PandasColumn<'a> = ArrayColumn; -} - -impl HasPandasColumn for Vec { - type PandasColumn<'a> = ArrayColumn; -} - -impl HasPandasColumn for Option> { - type PandasColumn<'a> = ArrayColumn; -} -impl ArrayColumn -where - V: Send + ToPyObject, -{ - pub fn partition(self, counts: usize) -> Vec> { - let mut partitions = vec![]; - - for _ in 0..counts { - partitions.push(ArrayColumn { - data: self.data, - lengths: vec![], - row_idx: vec![], - buffer: Vec::with_capacity(self.buf_size), - buf_size: self.buf_size, - }); - } - partitions - } - - #[throws(ConnectorXPythonError)] - pub fn flush(&mut self) { - let nvecs = self.lengths.len(); - - if nvecs > 0 { - let py = unsafe { Python::assume_gil_acquired() }; - - { - // allocation in python is not thread safe - let _guard = GIL_MUTEX - .lock() - .map_err(|e| anyhow!("mutex poisoned {}", e))?; - let mut start = 0; - for (i, &len) in self.lengths.iter().enumerate() { - if len != usize::MAX { - let end = start + len; - unsafe { - // allocate and write in the same time - *self.data.add(self.row_idx[i]) = PyList( - pyo3::types::PyList::new(py, &self.buffer[start..end]).into(), - ); - }; - start = end; - } else { - unsafe { - let n = Py::from_borrowed_ptr(py, pyo3::ffi::Py_None()); - *self.data.add(self.row_idx[i]) = PyList(n); - } - } - } - } - - self.buffer.truncate(0); - self.lengths.truncate(0); - self.row_idx.truncate(0); - } - } - - #[throws(ConnectorXPythonError)] - pub fn try_flush(&mut self) { - if self.buffer.len() >= self.buf_size { - self.flush()?; - } - } -} diff --git a/connectorx-python/src/pandas/pandas_columns/boolean.rs b/connectorx-python/src/pandas/pandas_columns/boolean.rs deleted file mode 100644 index aab01a3..0000000 --- a/connectorx-python/src/pandas/pandas_columns/boolean.rs +++ /dev/null @@ -1,143 +0,0 @@ -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject}; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use fehler::throws; -use ndarray::{ArrayViewMut1, ArrayViewMut2, Axis, Ix2}; -use numpy::{PyArray, PyArray1}; -use pyo3::{types::PyTuple, FromPyObject, PyAny, PyResult}; -use std::any::TypeId; - -// Boolean -pub enum BooleanBlock<'a> { - NumPy(ArrayViewMut2<'a, bool>), - Extention(ArrayViewMut1<'a, bool>, ArrayViewMut1<'a, bool>), -} -impl<'a> FromPyObject<'a> for BooleanBlock<'a> { - fn extract(ob: &'a PyAny) -> PyResult { - if let Ok(array) = ob.downcast::>() { - // if numpy array - check_dtype(ob, "bool")?; - let data = unsafe { array.as_array_mut() }; - Ok(BooleanBlock::NumPy(data)) - } else { - // if extension array - let tuple = ob.downcast::()?; - let data = tuple.get_item(0)?; - let mask = tuple.get_item(1)?; - check_dtype(data, "bool")?; - check_dtype(mask, "bool")?; - - Ok(BooleanBlock::Extention( - unsafe { data.downcast::>()?.as_array_mut() }, - unsafe { mask.downcast::>()?.as_array_mut() }, - )) - } - } -} - -impl<'a> BooleanBlock<'a> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec { - let mut ret = vec![]; - match self { - BooleanBlock::Extention(data, mask) => ret.push(BooleanColumn { - data: data - .into_slice() - .ok_or_else(|| anyhow!("get None for Boolean data"))? - .as_mut_ptr(), - mask: Some( - mask.into_slice() - .ok_or_else(|| anyhow!("get None for Boolean mask"))? - .as_mut_ptr(), - ), - }), - BooleanBlock::NumPy(mut view) => { - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(BooleanColumn { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted Boolean data"))? - .as_mut_ptr(), - mask: None, - }) - } - } - } - ret - } -} - -pub struct BooleanColumn { - data: *mut bool, - mask: Option<*mut bool>, -} - -unsafe impl Send for BooleanColumn {} -unsafe impl Sync for BooleanColumn {} - -impl PandasColumnObject for BooleanColumn { - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::() || id == TypeId::of::>() - } - fn typename(&self) -> &'static str { - std::any::type_name::() - } -} - -impl PandasColumn for BooleanColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: bool, row: usize) { - unsafe { *self.data.add(row) = val }; - if let Some(mask) = self.mask.as_mut() { - unsafe { *mask.add(row) = false }; - } - } -} - -impl PandasColumn> for BooleanColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option, row: usize) { - match val { - Some(val) => { - unsafe { *self.data.add(row) = val }; - if let Some(mask) = self.mask.as_mut() { - unsafe { *mask.add(row) = false }; - } - } - None => { - if let Some(mask) = self.mask.as_mut() { - unsafe { *mask.add(row) = true }; - } else { - panic!("Writing null u64 to not null pandas array") - } - } - } - } -} - -impl HasPandasColumn for bool { - type PandasColumn<'a> = BooleanColumn; -} - -impl HasPandasColumn for Option { - type PandasColumn<'a> = BooleanColumn; -} - -impl BooleanColumn { - pub fn partition(self, counts: usize) -> Vec { - let mut partitions = vec![]; - - for _ in 0..counts { - partitions.push(BooleanColumn { - data: self.data, - mask: self.mask, - }); - } - - partitions - } -} diff --git a/connectorx-python/src/pandas/pandas_columns/bytes.rs b/connectorx-python/src/pandas/pandas_columns/bytes.rs deleted file mode 100644 index 6890df9..0000000 --- a/connectorx-python/src/pandas/pandas_columns/bytes.rs +++ /dev/null @@ -1,222 +0,0 @@ -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject, GIL_MUTEX}; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use fehler::throws; -use ndarray::{ArrayViewMut2, Axis, Ix2}; -use numpy::{npyffi::NPY_TYPES, Element, PyArray, PyArrayDescr}; -use pyo3::{FromPyObject, Py, PyAny, PyResult, Python}; -use std::any::TypeId; - -#[derive(Clone)] -#[repr(transparent)] -pub struct PyBytes(Py); - -// In order to put it into a numpy array -unsafe impl Element for PyBytes { - const DATA_TYPE: numpy::DataType = numpy::DataType::Object; - fn is_same_type(dtype: &PyArrayDescr) -> bool { - unsafe { *dtype.as_dtype_ptr() }.type_num == NPY_TYPES::NPY_OBJECT as i32 - } -} - -pub struct BytesBlock<'a> { - data: ArrayViewMut2<'a, PyBytes>, - buf_size_mb: usize, -} - -impl<'a> FromPyObject<'a> for BytesBlock<'a> { - fn extract(ob: &'a PyAny) -> PyResult { - check_dtype(ob, "object")?; - let array = ob.downcast::>()?; - let data = unsafe { array.as_array_mut() }; - Ok(BytesBlock { - data, - buf_size_mb: 16, // in MB - }) - } -} - -impl<'a> BytesBlock<'a> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec { - let mut ret = vec![]; - let mut view = self.data; - - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(BytesColumn { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted String data"))? - .as_mut_ptr(), - bytes_lengths: vec![], - row_idx: vec![], - bytes_buf: Vec::with_capacity(self.buf_size_mb * (1 << 20) * 11 / 10), // allocate a little bit more memory to avoid Vec growth - buf_size: self.buf_size_mb * (1 << 20), - }) - } - ret - } -} - -pub struct BytesColumn { - data: *mut PyBytes, - bytes_buf: Vec, - bytes_lengths: Vec, // usize::MAX if the string is None - row_idx: Vec, - buf_size: usize, -} - -unsafe impl Send for BytesColumn {} -unsafe impl Sync for BytesColumn {} - -impl PandasColumnObject for BytesColumn { - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::<&'static [u8]>() || id == TypeId::of::>() - } - fn typename(&self) -> &'static str { - std::any::type_name::<&'static [u8]>() - } - #[throws(ConnectorXPythonError)] - fn finalize(&mut self) { - self.flush()?; - } -} - -impl PandasColumn> for BytesColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Vec, row: usize) { - self.bytes_lengths.push(val.len()); - self.bytes_buf.extend_from_slice(&val[..]); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl<'r> PandasColumn<&'r [u8]> for BytesColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: &'r [u8], row: usize) { - self.bytes_lengths.push(val.len()); - self.bytes_buf.extend_from_slice(val); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn>> for BytesColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option>, row: usize) { - match val { - Some(b) => { - self.bytes_lengths.push(b.len()); - self.bytes_buf.extend_from_slice(&b[..]); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.bytes_lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl<'r> PandasColumn> for BytesColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option<&'r [u8]>, row: usize) { - match val { - Some(b) => { - self.bytes_lengths.push(b.len()); - self.bytes_buf.extend_from_slice(b); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.bytes_lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl HasPandasColumn for Vec { - type PandasColumn<'a> = BytesColumn; -} - -impl HasPandasColumn for Option> { - type PandasColumn<'a> = BytesColumn; -} - -impl<'r> HasPandasColumn for &'r [u8] { - type PandasColumn<'a> = BytesColumn; -} - -impl<'r> HasPandasColumn for Option<&'r [u8]> { - type PandasColumn<'a> = BytesColumn; -} - -impl BytesColumn { - pub fn partition(self, counts: usize) -> Vec { - let mut partitions = vec![]; - - for _ in 0..counts { - partitions.push(BytesColumn { - data: self.data, - bytes_lengths: vec![], - row_idx: vec![], - bytes_buf: Vec::with_capacity(self.buf_size), - buf_size: self.buf_size, - }); - } - - partitions - } - - #[throws(ConnectorXPythonError)] - pub fn flush(&mut self) { - let nstrings = self.bytes_lengths.len(); - - if nstrings > 0 { - let py = unsafe { Python::assume_gil_acquired() }; - - { - // allocation in python is not thread safe - let _guard = GIL_MUTEX - .lock() - .map_err(|e| anyhow!("mutex poisoned {}", e))?; - let mut start = 0; - for (i, &len) in self.bytes_lengths.iter().enumerate() { - if len != usize::MAX { - let end = start + len; - unsafe { - // allocate and write in the same time - *self.data.add(self.row_idx[i]) = PyBytes( - pyo3::types::PyBytes::new(py, &self.bytes_buf[start..end]).into(), - ); - }; - start = end; - } else { - unsafe { - let b = Py::from_borrowed_ptr(py, pyo3::ffi::Py_None()); - *self.data.add(self.row_idx[i]) = PyBytes(b); - } - } - } - } - - self.bytes_buf.truncate(0); - self.bytes_lengths.truncate(0); - self.row_idx.truncate(0); - } - } - - #[throws(ConnectorXPythonError)] - pub fn try_flush(&mut self) { - if self.bytes_buf.len() >= self.buf_size { - self.flush()?; - } - } -} diff --git a/connectorx-python/src/pandas/pandas_columns/datetime.rs b/connectorx-python/src/pandas/pandas_columns/datetime.rs deleted file mode 100644 index 1c927d9..0000000 --- a/connectorx-python/src/pandas/pandas_columns/datetime.rs +++ /dev/null @@ -1,100 +0,0 @@ -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject}; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use chrono::{DateTime, Utc}; -use fehler::throws; -use ndarray::{ArrayViewMut2, Axis, Ix2}; -use numpy::PyArray; -use pyo3::{FromPyObject, PyAny, PyResult}; -use std::any::TypeId; - -// datetime64 is represented in int64 in numpy -// https://github.com/numpy/numpy/blob/master/numpy/core/include/numpy/npy_common.h#L1104 -pub struct DateTimeBlock<'a> { - data: ArrayViewMut2<'a, i64>, -} - -impl<'a> FromPyObject<'a> for DateTimeBlock<'a> { - fn extract(ob: &'a PyAny) -> PyResult { - check_dtype(ob, "int64")?; - let array = ob.downcast::>()?; - let data = unsafe { array.as_array_mut() }; - Ok(DateTimeBlock { data }) - } -} - -impl<'a> DateTimeBlock<'a> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec { - let mut ret = vec![]; - let mut view = self.data; - - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(DateTimeColumn { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted DateTime data"))? - .as_mut_ptr(), - }) - } - ret - } -} - -pub struct DateTimeColumn { - data: *mut i64, -} - -unsafe impl Send for DateTimeColumn {} -unsafe impl Sync for DateTimeColumn {} - -impl PandasColumnObject for DateTimeColumn { - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::>() || id == TypeId::of::>>() - } - - fn typename(&self) -> &'static str { - std::any::type_name::>() - } -} - -impl PandasColumn> for DateTimeColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: DateTime, row: usize) { - unsafe { *self.data.add(row) = val.timestamp_nanos() }; - } -} - -impl PandasColumn>> for DateTimeColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option>, row: usize) { - // numpy use i64::MIN as NaT - unsafe { - *self.data.add(row) = val.map(|t| t.timestamp_nanos()).unwrap_or(i64::MIN); - }; - } -} - -impl HasPandasColumn for DateTime { - type PandasColumn<'a> = DateTimeColumn; -} - -impl HasPandasColumn for Option> { - type PandasColumn<'a> = DateTimeColumn; -} - -impl DateTimeColumn { - pub fn partition(self, counts: usize) -> Vec { - let mut partitions = vec![]; - - for _ in 0..counts { - partitions.push(DateTimeColumn { data: self.data }); - } - - partitions - } -} diff --git a/connectorx-python/src/pandas/pandas_columns/float64.rs b/connectorx-python/src/pandas/pandas_columns/float64.rs deleted file mode 100644 index 7d437bd..0000000 --- a/connectorx-python/src/pandas/pandas_columns/float64.rs +++ /dev/null @@ -1,96 +0,0 @@ -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject}; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use fehler::throws; -use ndarray::{ArrayViewMut2, Axis, Ix2}; -use numpy::PyArray; -use pyo3::{FromPyObject, PyAny, PyResult}; -use std::any::TypeId; - -// Float -pub struct Float64Block<'a> { - data: ArrayViewMut2<'a, f64>, -} - -impl<'a> FromPyObject<'a> for Float64Block<'a> { - fn extract(ob: &'a PyAny) -> PyResult { - check_dtype(ob, "float64")?; - let array = ob.downcast::>()?; - let data = unsafe { array.as_array_mut() }; - Ok(Float64Block { data }) - } -} - -impl<'a> Float64Block<'a> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec { - let mut ret = vec![]; - let mut view = self.data; - - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(Float64Column { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted Float64 data"))? - .as_mut_ptr(), - }) - } - ret - } -} - -pub struct Float64Column { - data: *mut f64, -} - -unsafe impl Send for Float64Column {} -unsafe impl Sync for Float64Column {} - -impl<'a> PandasColumnObject for Float64Column { - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::() || id == TypeId::of::>() - } - - fn typename(&self) -> &'static str { - std::any::type_name::() - } -} - -impl<'a> PandasColumn for Float64Column { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: f64, row: usize) { - unsafe { *self.data.add(row) = val }; - } -} - -impl<'a> PandasColumn> for Float64Column { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option, row: usize) { - match val { - None => unsafe { *self.data.add(row) = f64::NAN }, - Some(val) => unsafe { *self.data.add(row) = val }, - } - } -} - -impl HasPandasColumn for f64 { - type PandasColumn<'a> = Float64Column; -} - -impl HasPandasColumn for Option { - type PandasColumn<'a> = Float64Column; -} - -impl Float64Column { - pub fn partition(self, counts: usize) -> Vec { - let mut partitions = vec![]; - for _ in 0..counts { - partitions.push(Float64Column { data: self.data }); - } - partitions - } -} diff --git a/connectorx-python/src/pandas/pandas_columns/int64.rs b/connectorx-python/src/pandas/pandas_columns/int64.rs deleted file mode 100644 index cfdbfd5..0000000 --- a/connectorx-python/src/pandas/pandas_columns/int64.rs +++ /dev/null @@ -1,142 +0,0 @@ -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject}; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use fehler::throws; -use ndarray::{ArrayViewMut1, ArrayViewMut2, Axis, Ix2}; -use numpy::{PyArray, PyArray1}; -use pyo3::{types::PyTuple, FromPyObject, PyAny, PyResult}; -use std::any::TypeId; - -pub enum Int64Block<'a> { - NumPy(ArrayViewMut2<'a, i64>), - Extention(ArrayViewMut1<'a, i64>, ArrayViewMut1<'a, bool>), -} -impl<'a> FromPyObject<'a> for Int64Block<'a> { - fn extract(ob: &'a PyAny) -> PyResult { - if let Ok(array) = ob.downcast::>() { - check_dtype(ob, "int64")?; - let data = unsafe { array.as_array_mut() }; - Ok(Int64Block::NumPy(data)) - } else { - let tuple = ob.downcast::()?; - let data = tuple.get_item(0)?; - let mask = tuple.get_item(1)?; - check_dtype(data, "int64")?; - check_dtype(mask, "bool")?; - - Ok(Int64Block::Extention( - unsafe { data.downcast::>()?.as_array_mut() }, - unsafe { mask.downcast::>()?.as_array_mut() }, - )) - } - } -} - -impl<'a> Int64Block<'a> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec { - let mut ret = vec![]; - match self { - Int64Block::Extention(data, mask) => ret.push(Int64Column { - data: data - .into_slice() - .ok_or_else(|| anyhow!("get None for Int64 data"))? - .as_mut_ptr(), - mask: Some( - mask.into_slice() - .ok_or_else(|| anyhow!("get None for Int64 mask"))? - .as_mut_ptr(), - ), - }), - Int64Block::NumPy(mut view) => { - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(Int64Column { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted Int64 data"))? - .as_mut_ptr(), - mask: None, - }) - } - } - } - ret - } -} - -// for uint64 and Int64 -pub struct Int64Column { - data: *mut i64, - mask: Option<*mut bool>, -} - -unsafe impl Send for Int64Column {} -unsafe impl Sync for Int64Column {} - -impl PandasColumnObject for Int64Column { - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::() || id == TypeId::of::>() - } - - fn typename(&self) -> &'static str { - std::any::type_name::() - } -} - -impl PandasColumn for Int64Column { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: i64, row: usize) { - unsafe { *self.data.add(row) = val }; - if let Some(mask) = self.mask.as_mut() { - unsafe { *mask.add(row) = false }; - } - } -} - -impl PandasColumn> for Int64Column { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option, row: usize) { - match val { - Some(val) => { - unsafe { *self.data.add(row) = val }; - if let Some(mask) = self.mask.as_mut() { - unsafe { *mask.add(row) = false }; - } - } - None => { - if let Some(mask) = self.mask.as_mut() { - unsafe { *mask.add(row) = true }; - } else { - panic!("Writing null i64 to not null pandas array") - } - } - } - } -} - -impl HasPandasColumn for i64 { - type PandasColumn<'a> = Int64Column; -} - -impl HasPandasColumn for Option { - type PandasColumn<'a> = Int64Column; -} - -impl Int64Column { - pub fn partition(self, counts: usize) -> Vec { - let mut partitions = vec![]; - - for _ in 0..counts { - partitions.push(Int64Column { - data: self.data, - mask: self.mask, - }); - } - - partitions - } -} diff --git a/connectorx-python/src/pandas/pandas_columns/mod.rs b/connectorx-python/src/pandas/pandas_columns/mod.rs deleted file mode 100644 index 82ababc..0000000 --- a/connectorx-python/src/pandas/pandas_columns/mod.rs +++ /dev/null @@ -1,58 +0,0 @@ -mod array; -mod boolean; -mod bytes; -mod datetime; -mod float64; -mod int64; -mod string; -// TODO: use macro for integers - -use crate::errors::Result; -pub use crate::pandas::pandas_columns::array::{ArrayBlock, ArrayColumn, PyList}; -pub use crate::pandas::pandas_columns::bytes::{BytesBlock, BytesColumn, PyBytes}; -pub use boolean::{BooleanBlock, BooleanColumn}; -pub use datetime::{DateTimeBlock, DateTimeColumn}; -use fehler::throw; -pub use float64::{Float64Block, Float64Column}; -pub use int64::{Int64Block, Int64Column}; -use pyo3::{exceptions::PyRuntimeError, PyAny, PyResult}; -use std::any::TypeId; -use std::sync::Mutex; -pub use string::{StringBlock, StringColumn}; - -// A global GIL lock for Python object allocations like string, bytes and list -lazy_static! { - static ref GIL_MUTEX: Mutex<()> = Mutex::new(()); -} - -pub trait PandasColumnObject: Send { - fn typecheck(&self, _: TypeId) -> bool; - fn typename(&self) -> &'static str; - fn finalize(&mut self) -> Result<()> { - Ok(()) - } -} - -pub trait PandasColumn: Sized + PandasColumnObject { - fn write(&mut self, val: V, row: usize) -> Result<()>; -} - -// Indicates a type has an associated pandas column -pub trait HasPandasColumn: Sized { - type PandasColumn<'a>: PandasColumn; -} - -pub fn check_dtype(ob: &PyAny, expected_dtype: &str) -> PyResult<()> { - let dtype = ob.getattr("dtype")?.str()?; - let dtype = dtype.to_str()?; - if dtype != expected_dtype { - throw!(PyRuntimeError::new_err(format!( - "expecting ndarray to be '{}' found '{}' at {}:{}", - expected_dtype, - dtype, - file!(), - line!() - ))); - } - Ok(()) -} diff --git a/connectorx-python/src/pandas/pandas_columns/string.rs b/connectorx-python/src/pandas/pandas_columns/string.rs deleted file mode 100644 index 5f38a00..0000000 --- a/connectorx-python/src/pandas/pandas_columns/string.rs +++ /dev/null @@ -1,328 +0,0 @@ -use super::super::pystring::{PyString, StringInfo}; -use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject, GIL_MUTEX}; -use crate::constants::PYSTRING_BUFFER_SIZE; -use crate::errors::ConnectorXPythonError; -use anyhow::anyhow; -use fehler::throws; -use itertools::Itertools; -use ndarray::{ArrayViewMut2, Axis, Ix2}; -use numpy::PyArray; -use pyo3::{FromPyObject, PyAny, PyResult, Python}; -use std::any::TypeId; - -pub struct StringBlock<'a> { - data: ArrayViewMut2<'a, PyString>, - buf_size_mb: usize, -} - -impl<'a> FromPyObject<'a> for StringBlock<'a> { - fn extract(ob: &'a PyAny) -> PyResult { - check_dtype(ob, "object")?; - let array = ob.downcast::>()?; - let data = unsafe { array.as_array_mut() }; - Ok(StringBlock { - data, - buf_size_mb: PYSTRING_BUFFER_SIZE, // in MB - }) - } -} - -impl<'a> StringBlock<'a> { - #[throws(ConnectorXPythonError)] - pub fn split(self) -> Vec { - let mut ret = vec![]; - let mut view = self.data; - - let nrows = view.ncols(); - while view.nrows() > 0 { - let (col, rest) = view.split_at(Axis(0), 1); - view = rest; - ret.push(StringColumn { - data: col - .into_shape(nrows)? - .into_slice() - .ok_or_else(|| anyhow!("get None for splitted String data"))? - .as_mut_ptr(), - string_lengths: vec![], - row_idx: vec![], - string_buf: Vec::with_capacity(self.buf_size_mb * (1 << 20) * 11 / 10), // allocate a little bit more memory to avoid Vec growth - buf_size: self.buf_size_mb * (1 << 20), - }) - } - ret - } -} - -pub struct StringColumn { - data: *mut PyString, - string_buf: Vec, - string_lengths: Vec, // usize::MAX for empty string - row_idx: Vec, - buf_size: usize, -} - -unsafe impl Send for StringColumn {} -unsafe impl Sync for StringColumn {} - -impl PandasColumnObject for StringColumn { - fn typecheck(&self, id: TypeId) -> bool { - id == TypeId::of::<&'static [u8]>() || id == TypeId::of::>() - } - - fn typename(&self) -> &'static str { - std::any::type_name::<&'static [u8]>() - } - #[throws(ConnectorXPythonError)] - fn finalize(&mut self) { - self.flush(true)?; - } -} - -impl<'r> PandasColumn<&'r str> for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: &'r str, row: usize) { - let bytes = val.as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn> for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Box, row: usize) { - let bytes = val.as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: String, row: usize) { - let bytes = val.as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl PandasColumn for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: char, row: usize) { - let mut buffer = [0; 4]; // a char is max to 4 bytes - let bytes = val.encode_utf8(&mut buffer).as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } -} - -impl<'r> PandasColumn> for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option<&'r str>, row: usize) { - match val { - Some(b) => { - let bytes = b.as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.string_lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl PandasColumn>> for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option>, row: usize) { - match val { - Some(b) => { - let bytes = b.as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.string_lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} -impl PandasColumn> for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option, row: usize) { - match val { - Some(b) => { - let bytes = b.as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.string_lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl PandasColumn> for StringColumn { - #[throws(ConnectorXPythonError)] - fn write(&mut self, val: Option, row: usize) { - match val { - Some(b) => { - let mut buffer = [0; 4]; // a char is max to 4 bytes - let bytes = b.encode_utf8(&mut buffer).as_bytes(); - self.string_lengths.push(bytes.len()); - self.string_buf.extend_from_slice(bytes); - self.row_idx.push(row); - self.try_flush()?; - } - None => { - self.string_lengths.push(usize::MAX); - self.row_idx.push(row); - } - } - } -} - -impl<'r> HasPandasColumn for &'r str { - type PandasColumn<'a> = StringColumn; -} - -impl<'r> HasPandasColumn for Option<&'r str> { - type PandasColumn<'a> = StringColumn; -} - -impl HasPandasColumn for String { - type PandasColumn<'a> = StringColumn; -} - -impl HasPandasColumn for Option { - type PandasColumn<'a> = StringColumn; -} - -impl HasPandasColumn for char { - type PandasColumn<'a> = StringColumn; -} - -impl HasPandasColumn for Option { - type PandasColumn<'a> = StringColumn; -} - -impl HasPandasColumn for Box { - type PandasColumn<'a> = StringColumn; -} - -impl HasPandasColumn for Option> { - type PandasColumn<'a> = StringColumn; -} - -impl StringColumn { - pub fn partition(self, counts: usize) -> Vec { - let mut partitions = vec![]; - - for _ in 0..counts { - partitions.push(StringColumn { - data: self.data, - string_lengths: vec![], - row_idx: vec![], - string_buf: Vec::with_capacity(self.buf_size), - buf_size: self.buf_size, - }); - } - - partitions - } - - #[throws(ConnectorXPythonError)] - pub fn flush(&mut self, force: bool) { - let nstrings = self.string_lengths.len(); - if nstrings == 0 { - return; - } - - let guard = if force { - GIL_MUTEX - .lock() - .map_err(|e| anyhow!("mutex poisoned {}", e))? - } else { - match GIL_MUTEX.try_lock() { - Ok(guard) => guard, - Err(_) => return, - } - }; - let py = unsafe { Python::assume_gil_acquired() }; - - let mut string_infos = Vec::with_capacity(self.string_lengths.len()); - let mut start = 0; - for (i, &len) in self.string_lengths.iter().enumerate() { - if len != usize::MAX { - let end = start + len; - - unsafe { - let string_info = StringInfo::detect(&self.string_buf[start..end]); - *self.data.add(self.row_idx[i]) = string_info.pystring(py); - string_infos.push(Some(string_info)); - }; - - start = end; - } else { - string_infos.push(None); - - unsafe { *self.data.add(self.row_idx[i]) = PyString::none(py) }; - } - } - - // unlock GIL - std::mem::drop(guard); - - if !string_infos.is_empty() { - let mut start = 0; - for (i, (len, info)) in self - .string_lengths - .drain(..) - .zip_eq(string_infos) - .enumerate() - { - if len != usize::MAX { - let end = start + len; - unsafe { - (*self.data.add(self.row_idx[i])) - .write(&self.string_buf[start..end], info.unwrap()) - }; - - start = end; - } - } - - self.string_buf.truncate(0); - self.row_idx.truncate(0); - } - } - - #[throws(ConnectorXPythonError)] - pub fn try_flush(&mut self) { - if self.string_buf.len() >= self.buf_size { - self.flush(true)?; - return; - } - #[cfg(feature = "nbstr")] - if self.string_buf.len() >= self.buf_size / 2 { - self.flush(false)?; - } - } -} diff --git a/connectorx-python/src/pandas/pystring.rs b/connectorx-python/src/pandas/pystring.rs deleted file mode 100644 index 84f39d5..0000000 --- a/connectorx-python/src/pandas/pystring.rs +++ /dev/null @@ -1,165 +0,0 @@ -use bitfield::bitfield; -use numpy::{npyffi::NPY_TYPES, Element, PyArrayDescr}; -use pyo3::{ffi, Py, Python}; -use std::str::from_utf8_unchecked; - -#[derive(Clone, Debug)] -#[repr(transparent)] -pub struct PyString(Py); - -// In order to put it into a numpy array -unsafe impl Element for PyString { - const DATA_TYPE: numpy::DataType = numpy::DataType::Object; - fn is_same_type(dtype: &PyArrayDescr) -> bool { - unsafe { *dtype.as_dtype_ptr() }.type_num == NPY_TYPES::NPY_OBJECT as i32 - } -} - -#[derive(Clone, Copy)] -pub enum StringInfo { - ASCII(usize), // len of the string, not byte length - UCS1(usize), - UCS2(usize), - UCS4(usize), -} - -impl StringInfo { - pub unsafe fn detect(s: &[u8]) -> StringInfo { - let s = from_utf8_unchecked(s); - let mut maxchar = 0; - let mut len = 0; - - for ch in s.chars() { - if ch as u32 > maxchar { - maxchar = ch as u32; - } - len += 1; - } - - if maxchar <= 0x7F { - StringInfo::ASCII(len) - } else if maxchar <= 0xFF { - StringInfo::UCS1(len) - } else if maxchar <= 0xFFFF { - StringInfo::UCS2(len) - } else { - StringInfo::UCS4(len) - } - } - - pub fn pystring(&self, py: Python) -> PyString { - let objptr = unsafe { - match self { - StringInfo::ASCII(len) => ffi::PyUnicode_New(*len as ffi::Py_ssize_t, 0x7F), - StringInfo::UCS1(len) => ffi::PyUnicode_New(*len as ffi::Py_ssize_t, 0xFF), - StringInfo::UCS2(len) => ffi::PyUnicode_New(*len as ffi::Py_ssize_t, 0xFFFF), - StringInfo::UCS4(len) => ffi::PyUnicode_New(*len as ffi::Py_ssize_t, 0x10FFFF), - } - }; - - let s: Py = unsafe { Py::from_owned_ptr(py, objptr) }; - - PyString(s) - } -} - -impl PyString { - // get none string converted from none object, otherwise default strings are zeros - pub fn none(py: Python) -> PyString { - // this is very unsafe because Py_None is not a PyString from Rust's perspective. But it is fine because - // later these stuff will all be converted to a python object - let s = unsafe { Py::from_borrowed_ptr(py, ffi::Py_None()) }; - PyString(s) - } - - // the val should be same as the val used for new - pub unsafe fn write(&mut self, data: &[u8], info: StringInfo) { - match info { - StringInfo::ASCII(len) => { - let pyobj = PyASCIIObject::from_mut_ref(&mut self.0); - let buf = std::slice::from_raw_parts_mut( - (pyobj as *mut PyASCIIObject).offset(1) as *mut u8, - len as usize, - ); - - buf.copy_from_slice(data); - } - StringInfo::UCS1(len) => { - let pyobj = PyCompactUnicodeObject::from_mut_ref(&mut self.0); - let buf = std::slice::from_raw_parts_mut( - (pyobj as *mut PyCompactUnicodeObject).offset(1) as *mut u8, - len as usize, - ); - let data: Vec = from_utf8_unchecked(data).chars().map(|c| c as u8).collect(); - buf.copy_from_slice(&data); - } - StringInfo::UCS2(len) => { - let pyobj = PyCompactUnicodeObject::from_mut_ref(&mut self.0); - let buf = std::slice::from_raw_parts_mut( - (pyobj as *mut PyCompactUnicodeObject).offset(1) as *mut u16, - len as usize, - ); - let data: Vec = from_utf8_unchecked(data) - .chars() - .map(|c| c as u16) - .collect(); - buf.copy_from_slice(&data); - } - StringInfo::UCS4(len) => { - let pyobj = PyCompactUnicodeObject::from_mut_ref(&mut self.0); - let buf = std::slice::from_raw_parts_mut( - (pyobj as *mut PyCompactUnicodeObject).offset(1) as *mut u32, - len as usize, - ); - let data: Vec = from_utf8_unchecked(data) - .chars() - .map(|c| c as u32) - .collect(); - buf.copy_from_slice(&data); - } - } - } -} - -bitfield! { - struct PyUnicodeState(u32); - u32; - interned, _: 1, 0; - kind, _: 4, 2; - compact, _: 5, 5; - ascii, _: 6, 6; - ready, _: 7, 7; -} - -#[repr(C)] -pub struct PyASCIIObject { - obj: ffi::PyObject, - length: ffi::Py_ssize_t, - hash: ffi::Py_hash_t, - state: PyUnicodeState, - wstr: *mut u8, - // python string stores data right after all the fields -} - -impl PyASCIIObject { - pub unsafe fn from_mut_ref<'a>(obj: &'a mut Py) -> &'a mut Self { - let ascii: &mut &mut PyASCIIObject = std::mem::transmute(obj); - *ascii - } -} - -#[repr(C)] -pub struct PyCompactUnicodeObject { - base: PyASCIIObject, - utf8_length: ffi::Py_ssize_t, - utf8: *mut u8, - wstr_length: ffi::Py_ssize_t, - // python string stores data right after all the fields -} - -impl PyCompactUnicodeObject { - pub unsafe fn from_mut_ref<'a>(obj: &'a mut Py) -> &'a mut Self { - let unicode: &mut &mut PyCompactUnicodeObject = std::mem::transmute(obj); - *unicode - } -} diff --git a/connectorx-python/src/pandas/transports/bigquery.rs b/connectorx-python/src/pandas/transports/bigquery.rs deleted file mode 100644 index 2648662..0000000 --- a/connectorx-python/src/pandas/transports/bigquery.rs +++ /dev/null @@ -1,56 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use crate::pandas::destination::PandasDestination; -use crate::pandas::typesystem::PandasTypeSystem; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use connectorx::{ - impl_transport, - sources::bigquery::{BigQuerySource, BigQueryTypeSystem}, - typesystem::TypeConversion, -}; - -pub struct BigQueryPandasTransport<'py>(&'py ()); - -impl_transport!( - name = BigQueryPandasTransport<'tp>, - error = ConnectorXPythonError, - systems = BigQueryTypeSystem => PandasTypeSystem, - route = BigQuerySource => PandasDestination<'tp>, - mappings = { - { Bool[bool] => Bool[bool] | conversion auto } - { Boolean[bool] => Bool[bool] | conversion none } - { Int64[i64] => I64[i64] | conversion auto } - { Integer[i64] => I64[i64] | conversion none } - { Float64[f64] => F64[f64] | conversion auto } - { Float[f64] => F64[f64] | conversion none } - { Numeric[f64] => F64[f64] | conversion none } - { Bignumeric[f64] => F64[f64] | conversion none } - { String[String] => String[String] | conversion auto } - { Bytes[String] => String[String] | conversion none } - { Date[NaiveDate] => DateTime[DateTime] | conversion option } - { Datetime[NaiveDateTime] => DateTime[DateTime] | conversion option } - { Time[NaiveTime] => String[String] | conversion option } - { Timestamp[DateTime] => DateTime[DateTime] | conversion auto } - } -); - -impl<'py> TypeConversion> for BigQueryPandasTransport<'py> { - fn convert(val: NaiveDate) -> DateTime { - DateTime::from_naive_utc_and_offset( - val.and_hms_opt(0, 0, 0) - .unwrap_or_else(|| panic!("and_hms_opt got None from {:?}", val)), - Utc, - ) - } -} - -impl<'py> TypeConversion> for BigQueryPandasTransport<'py> { - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} - -impl<'py> TypeConversion for BigQueryPandasTransport<'py> { - fn convert(val: NaiveTime) -> String { - val.to_string() - } -} diff --git a/connectorx-python/src/pandas/transports/mod.rs b/connectorx-python/src/pandas/transports/mod.rs deleted file mode 100644 index 9f03abf..0000000 --- a/connectorx-python/src/pandas/transports/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -mod bigquery; -mod mssql; -mod mysql; -mod oracle; -mod postgres; -mod sqlite; - -pub use self::postgres::PostgresPandasTransport; -pub use bigquery::BigQueryPandasTransport; -pub use mssql::MsSQLPandasTransport; -pub use mysql::MysqlPandasTransport; -pub use oracle::OraclePandasTransport; -pub use sqlite::SqlitePandasTransport; diff --git a/connectorx-python/src/pandas/transports/mssql.rs b/connectorx-python/src/pandas/transports/mssql.rs deleted file mode 100644 index d8a1745..0000000 --- a/connectorx-python/src/pandas/transports/mssql.rs +++ /dev/null @@ -1,97 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use crate::pandas::{destination::PandasDestination, typesystem::PandasTypeSystem}; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use connectorx::{ - impl_transport, - sources::mssql::{FloatN, IntN, MsSQLSource, MsSQLTypeSystem}, - typesystem::TypeConversion, -}; -use rust_decimal::prelude::*; -use uuid::Uuid; - -pub struct MsSQLPandasTransport<'py>(&'py ()); - -impl_transport!( - name = MsSQLPandasTransport<'tp>, - error = ConnectorXPythonError, - systems = MsSQLTypeSystem => PandasTypeSystem, - route = MsSQLSource => PandasDestination<'tp>, - mappings = { - { Tinyint[u8] => I64[i64] | conversion auto } - { Smallint[i16] => I64[i64] | conversion auto } - { Int[i32] => I64[i64] | conversion auto } - { Bigint[i64] => I64[i64] | conversion auto } - { Intn[IntN] => I64[i64] | conversion option } - { Float24[f32] => F64[f64] | conversion auto } - { Float53[f64] => F64[f64] | conversion auto } - { Floatn[FloatN] => F64[f64] | conversion option } - { Bit[bool] => Bool[bool] | conversion auto } - { Nvarchar[&'r str] => Str[&'r str] | conversion auto } - { Varchar[&'r str] => Str[&'r str] | conversion none } - { Nchar[&'r str] => Str[&'r str] | conversion none } - { Char[&'r str] => Str[&'r str] | conversion none } - { Text[&'r str] => Str[&'r str] | conversion none } - { Ntext[&'r str] => Str[&'r str] | conversion none } - { Binary[&'r [u8]] => ByteSlice[&'r [u8]] | conversion auto } - { Varbinary[&'r [u8]] => ByteSlice[&'r [u8]] | conversion none } - { Image[&'r [u8]] => ByteSlice[&'r [u8]] | conversion none } - { Numeric[Decimal] => F64[f64] | conversion option } - { Decimal[Decimal] => F64[f64] | conversion none } - { Datetime[NaiveDateTime] => DateTime[DateTime] | conversion option } - { Datetime2[NaiveDateTime] => DateTime[DateTime] | conversion none } - { Smalldatetime[NaiveDateTime] => DateTime[DateTime] | conversion none } - { Date[NaiveDate] => DateTime[DateTime] | conversion option } - { Datetimeoffset[DateTime] => DateTime[DateTime] | conversion auto } - { Uniqueidentifier[Uuid] => String[String] | conversion option } - { Time[NaiveTime] => String[String] | conversion option } - { SmallMoney[f32] => F64[f64] | conversion none } - { Money[f64] => F64[f64] | conversion none } - } -); - -impl<'py> TypeConversion for MsSQLPandasTransport<'py> { - fn convert(val: IntN) -> i64 { - val.0 - } -} - -impl<'py> TypeConversion for MsSQLPandasTransport<'py> { - fn convert(val: FloatN) -> f64 { - val.0 - } -} - -impl<'py> TypeConversion> for MsSQLPandasTransport<'py> { - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} - -impl<'py> TypeConversion> for MsSQLPandasTransport<'py> { - fn convert(val: NaiveDate) -> DateTime { - DateTime::from_naive_utc_and_offset( - val.and_hms_opt(0, 0, 0) - .unwrap_or_else(|| panic!("and_hms_opt got None from {:?}", val)), - Utc, - ) - } -} - -impl<'py> TypeConversion for MsSQLPandasTransport<'py> { - fn convert(val: Uuid) -> String { - val.to_string() - } -} - -impl<'py> TypeConversion for MsSQLPandasTransport<'py> { - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} - -impl<'py> TypeConversion for MsSQLPandasTransport<'py> { - fn convert(val: NaiveTime) -> String { - val.to_string() - } -} diff --git a/connectorx-python/src/pandas/transports/mysql.rs b/connectorx-python/src/pandas/transports/mysql.rs deleted file mode 100644 index 9cd7213..0000000 --- a/connectorx-python/src/pandas/transports/mysql.rs +++ /dev/null @@ -1,119 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use crate::pandas::destination::PandasDestination; -use crate::pandas::typesystem::PandasTypeSystem; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use connectorx::{ - impl_transport, - sources::mysql::{BinaryProtocol, MySQLSource, MySQLTypeSystem, TextProtocol}, - typesystem::TypeConversion, -}; -use rust_decimal::prelude::*; -use serde_json::{to_string, Value}; -use std::marker::PhantomData; - -pub struct MysqlPandasTransport<'py, P>(&'py (), PhantomData

); - -impl_transport!( - name = MysqlPandasTransport<'tp, BinaryProtocol>, - error = ConnectorXPythonError, - systems = MySQLTypeSystem => PandasTypeSystem, - route = MySQLSource => PandasDestination<'tp>, - mappings = { - { Float[f32] => F64[f64] | conversion auto } - { Double[f64] => F64[f64] | conversion auto } - { Tiny[i8] => I64[i64] | conversion auto } - { Short[i16] => I64[i64] | conversion auto } - { Long[i32] => I64[i64] | conversion auto } - { Int24[i32] => I64[i64] | conversion none } - { LongLong[i64] => I64[i64] | conversion auto } - { UTiny[u8] => I64[i64] | conversion auto } - { UShort[u16] => I64[i64] | conversion auto } - { ULong[u32] => I64[i64] | conversion auto } - { UInt24[u32] => I64[i64] | conversion none } - { ULongLong[u64] => F64[f64] | conversion auto } - { Date[NaiveDate] => DateTime[DateTime] | conversion option } - { Time[NaiveTime] => String[String] | conversion option } - { Year[i16] => I64[i64] | conversion none} - { Datetime[NaiveDateTime] => DateTime[DateTime] | conversion option } - { Timestamp[NaiveDateTime] => DateTime[DateTime] | conversion none } - { Decimal[Decimal] => F64[f64] | conversion option } - { VarChar[String] => String[String] | conversion auto } - { Char[String] => String[String] | conversion none } - { Enum[String] => Str[String] | conversion none } - { TinyBlob[Vec] => Bytes[Vec] | conversion auto } - { Blob[Vec] => Bytes[Vec] | conversion none } - { MediumBlob[Vec] => Bytes[Vec] | conversion none } - { LongBlob[Vec] => Bytes[Vec] | conversion none } - { Json[Value] => String[String] | conversion option } - } -); - -impl_transport!( - name = MysqlPandasTransport<'tp, TextProtocol>, - error = ConnectorXPythonError, - systems = MySQLTypeSystem => PandasTypeSystem, - route = MySQLSource => PandasDestination<'tp>, - mappings = { - { Float[f32] => F64[f64] | conversion auto } - { Double[f64] => F64[f64] | conversion auto } - { Tiny[i8] => I64[i64] | conversion auto } - { Short[i16] => I64[i64] | conversion auto } - { Long[i32] => I64[i64] | conversion auto } - { Int24[i32] => I64[i64] | conversion none } - { LongLong[i64] => I64[i64] | conversion auto } - { UTiny[u8] => I64[i64] | conversion auto } - { UShort[u16] => I64[i64] | conversion auto } - { ULong[u32] => I64[i64] | conversion auto } - { UInt24[u32] => I64[i64] | conversion none } - { ULongLong[u64] => F64[f64] | conversion auto } - { Date[NaiveDate] => DateTime[DateTime] | conversion option } - { Time[NaiveTime] => String[String] | conversion option } - { Datetime[NaiveDateTime] => DateTime[DateTime] | conversion option } - { Timestamp[NaiveDateTime] => DateTime[DateTime] | conversion none } - { Year[i16] => I64[i64] | conversion none} - { Decimal[Decimal] => F64[f64] | conversion option } - { VarChar[String] => String[String] | conversion auto } - { Char[String] => String[String] | conversion none } - { Enum[String] => Str[String] | conversion none } - { TinyBlob[Vec] => Bytes[Vec] | conversion auto } - { Blob[Vec] => Bytes[Vec] | conversion none } - { MediumBlob[Vec] => Bytes[Vec] | conversion none } - { LongBlob[Vec] => Bytes[Vec] | conversion none } - { Json[Value] => String[String] | conversion option } - } -); - -impl<'py, P> TypeConversion> for MysqlPandasTransport<'py, P> { - fn convert(val: NaiveDate) -> DateTime { - DateTime::from_naive_utc_and_offset( - val.and_hms_opt(0, 0, 0) - .unwrap_or_else(|| panic!("and_hms_opt got None from {:?}", val)), - Utc, - ) - } -} - -impl<'py, P> TypeConversion for MysqlPandasTransport<'py, P> { - fn convert(val: NaiveTime) -> String { - val.to_string() - } -} - -impl<'py, P> TypeConversion> for MysqlPandasTransport<'py, P> { - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} - -impl<'py, P> TypeConversion for MysqlPandasTransport<'py, P> { - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} - -impl<'py, P> TypeConversion for MysqlPandasTransport<'py, P> { - fn convert(val: Value) -> String { - to_string(&val).unwrap() - } -} diff --git a/connectorx-python/src/pandas/transports/oracle.rs b/connectorx-python/src/pandas/transports/oracle.rs deleted file mode 100644 index 5ff54fa..0000000 --- a/connectorx-python/src/pandas/transports/oracle.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use crate::pandas::destination::PandasDestination; -use crate::pandas::typesystem::PandasTypeSystem; -use chrono::{DateTime, NaiveDateTime, Utc}; -use connectorx::{ - impl_transport, - sources::oracle::{OracleSource, OracleTypeSystem}, - typesystem::TypeConversion, -}; - -pub struct OraclePandasTransport<'py>(&'py ()); - -impl_transport!( - name = OraclePandasTransport<'tp>, - error = ConnectorXPythonError, - systems = OracleTypeSystem => PandasTypeSystem, - route = OracleSource => PandasDestination<'tp>, - mappings = { - { NumFloat[f64] => F64[f64] | conversion auto } - { Float[f64] => F64[f64] | conversion none } - { BinaryFloat[f64] => F64[f64] | conversion none } - { BinaryDouble[f64] => F64[f64] | conversion none } - { NumInt[i64] => I64[i64] | conversion auto } - { Blob[Vec] => Bytes[Vec] | conversion auto } - { Clob[String] => String[String] | conversion none } - { VarChar[String] => String[String] | conversion auto } - { Char[String] => String[String] | conversion none } - { NVarChar[String] => String[String] | conversion none } - { NChar[String] => String[String] | conversion none } - { Date[NaiveDateTime] => DateTime[DateTime] | conversion option } - { Timestamp[NaiveDateTime] => DateTime[DateTime] | conversion none } - { TimestampTz[DateTime] => DateTime[DateTime] | conversion auto } - } -); - -impl<'py> TypeConversion> for OraclePandasTransport<'py> { - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} diff --git a/connectorx-python/src/pandas/transports/postgres.rs b/connectorx-python/src/pandas/transports/postgres.rs deleted file mode 100644 index f4c6c21..0000000 --- a/connectorx-python/src/pandas/transports/postgres.rs +++ /dev/null @@ -1,139 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use crate::pandas::{destination::PandasDestination, typesystem::PandasTypeSystem}; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use connectorx::{ - impl_transport, - sources::postgres::{ - BinaryProtocol, CSVProtocol, CursorProtocol, PostgresSource, PostgresTypeSystem, - SimpleProtocol, - }, - typesystem::TypeConversion, -}; -use postgres::NoTls; -use postgres_openssl::MakeTlsConnector; -use rust_decimal::prelude::*; -use serde_json::{to_string, Value}; -use std::collections::HashMap; -use std::marker::PhantomData; -use uuid::Uuid; - -pub struct PostgresPandasTransport<'py, P, C>(&'py (), PhantomData

, PhantomData); - -macro_rules! impl_postgres_transport { - ($proto:ty, $tls:ty) => { - impl_transport!( - name = PostgresPandasTransport<'tp, $proto, $tls>, - error = ConnectorXPythonError, - systems = PostgresTypeSystem => PandasTypeSystem, - route = PostgresSource<$proto, $tls> => PandasDestination<'tp>, - mappings = { - { Float4[f32] => F64[f64] | conversion auto } - { Float8[f64] => F64[f64] | conversion auto } - { Numeric[Decimal] => F64[f64] | conversion option } - { Int2[i16] => I64[i64] | conversion auto } - { Int4[i32] => I64[i64] | conversion auto } - { Int8[i64] => I64[i64] | conversion auto } - { BoolArray[Vec] => BoolArray[Vec] | conversion auto_vec } - { Int2Array[Vec] => I64Array[Vec] | conversion auto_vec } - { Int4Array[Vec] => I64Array[Vec] | conversion auto_vec } - { Int8Array[Vec] => I64Array[Vec] | conversion auto } - { Float4Array[Vec] => F64Array[Vec] | conversion auto_vec } - { Float8Array[Vec] => F64Array[Vec] | conversion auto } - { NumericArray[Vec] => F64Array[Vec] | conversion option } - { Bool[bool] => Bool[bool] | conversion auto } - { Char[i8] => Char[char] | conversion option } - { Text[&'r str] => Str[&'r str] | conversion auto } - { BpChar[&'r str] => Str[&'r str] | conversion none } - { VarChar[&'r str] => Str[&'r str] | conversion none } - { Name[&'r str] => Str[&'r str] | conversion none } - { Timestamp[NaiveDateTime] => DateTime[DateTime] | conversion option } - { TimestampTz[DateTime] => DateTime[DateTime] | conversion auto } - { Date[NaiveDate] => DateTime[DateTime] | conversion option } - { UUID[Uuid] => String[String] | conversion option } - { JSON[Value] => String[String] | conversion option } - { JSONB[Value] => String[String] | conversion none } - { Time[NaiveTime] => String[String] | conversion option } - { ByteA[Vec] => Bytes[Vec] | conversion auto } - { Enum[&'r str] => Str[&'r str] | conversion none } - { HSTORE[HashMap>] => String[String] | conversion option } - } - ); - } -} - -impl_postgres_transport!(BinaryProtocol, NoTls); -impl_postgres_transport!(BinaryProtocol, MakeTlsConnector); -impl_postgres_transport!(CSVProtocol, NoTls); -impl_postgres_transport!(CSVProtocol, MakeTlsConnector); -impl_postgres_transport!(CursorProtocol, NoTls); -impl_postgres_transport!(CursorProtocol, MakeTlsConnector); -impl_postgres_transport!(SimpleProtocol, NoTls); -impl_postgres_transport!(SimpleProtocol, MakeTlsConnector); - -impl<'py, P, C> TypeConversion>, String> - for PostgresPandasTransport<'py, P, C> -{ - fn convert(val: HashMap>) -> String { - to_string(&val).unwrap() - } -} - -impl<'py, P, C> TypeConversion, Vec> for PostgresPandasTransport<'py, P, C> { - fn convert(val: Vec) -> Vec { - val.into_iter() - .map(|v| { - v.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", v)) - }) - .collect() - } -} - -impl<'py, P, C> TypeConversion for PostgresPandasTransport<'py, P, C> { - fn convert(val: Decimal) -> f64 { - val.to_f64() - .unwrap_or_else(|| panic!("cannot convert decimal {:?} to float64", val)) - } -} - -impl<'py, P, C> TypeConversion for PostgresPandasTransport<'py, P, C> { - fn convert(val: NaiveTime) -> String { - val.to_string() - } -} - -impl<'py, P, C> TypeConversion for PostgresPandasTransport<'py, P, C> { - fn convert(val: i8) -> char { - val as u8 as char - } -} - -impl<'py, P, C> TypeConversion> - for PostgresPandasTransport<'py, P, C> -{ - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} - -impl<'py, P, C> TypeConversion> for PostgresPandasTransport<'py, P, C> { - fn convert(val: NaiveDate) -> DateTime { - DateTime::from_naive_utc_and_offset( - val.and_hms_opt(0, 0, 0) - .unwrap_or_else(|| panic!("and_hms_opt got None from {:?}", val)), - Utc, - ) - } -} - -impl<'py, P, C> TypeConversion for PostgresPandasTransport<'py, P, C> { - fn convert(val: Uuid) -> String { - val.to_string() - } -} - -impl<'py, P, C> TypeConversion for PostgresPandasTransport<'py, P, C> { - fn convert(val: Value) -> String { - to_string(&val).unwrap() - } -} diff --git a/connectorx-python/src/pandas/transports/sqlite.rs b/connectorx-python/src/pandas/transports/sqlite.rs deleted file mode 100644 index 122ba29..0000000 --- a/connectorx-python/src/pandas/transports/sqlite.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::errors::ConnectorXPythonError; -use crate::pandas::destination::PandasDestination; -use crate::pandas::typesystem::PandasTypeSystem; -use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; -use connectorx::{ - impl_transport, - sources::sqlite::{SQLiteSource, SQLiteTypeSystem}, - typesystem::TypeConversion, -}; - -pub struct SqlitePandasTransport<'py>(&'py ()); - -impl_transport!( - name = SqlitePandasTransport<'tp>, - error = ConnectorXPythonError, - systems = SQLiteTypeSystem => PandasTypeSystem, - route = SQLiteSource => PandasDestination<'tp>, - mappings = { - { Bool[bool] => Bool[bool] | conversion auto } - { Int8[i64] => I64[i64] | conversion auto } - { Int4[i32] => I64[i64] | conversion auto } - { Int2[i16] => I64[i64] | conversion auto } - { Real[f64] => F64[f64] | conversion auto } - { Text[Box] => BoxStr[Box] | conversion auto } - { Date[NaiveDate] => DateTime[DateTime] | conversion option } - { Time[NaiveTime] => String[String] | conversion option } - { Timestamp[NaiveDateTime] => DateTime[DateTime] | conversion option } - { Blob[Vec] => Bytes[Vec] | conversion auto } - } -); - -impl<'py> TypeConversion> for SqlitePandasTransport<'py> { - fn convert(val: NaiveDateTime) -> DateTime { - DateTime::from_naive_utc_and_offset(val, Utc) - } -} - -impl<'py> TypeConversion> for SqlitePandasTransport<'py> { - fn convert(val: NaiveDate) -> DateTime { - DateTime::from_naive_utc_and_offset( - val.and_hms_opt(0, 0, 0) - .unwrap_or_else(|| panic!("and_hms_opt got None from {:?}", val)), - Utc, - ) - } -} - -impl<'py> TypeConversion for SqlitePandasTransport<'py> { - fn convert(val: NaiveTime) -> String { - val.to_string() - } -} diff --git a/connectorx-python/src/pandas/typesystem.rs b/connectorx-python/src/pandas/typesystem.rs deleted file mode 100644 index 35a108b..0000000 --- a/connectorx-python/src/pandas/typesystem.rs +++ /dev/null @@ -1,113 +0,0 @@ -// Unfortunately, due to the orphan rule, typesystem implementation should be in this crate. -use chrono::{DateTime, Utc}; -use connectorx::impl_typesystem; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum PandasTypeSystem { - F64(bool), - I64(bool), - F64Array(bool), - I64Array(bool), - Bool(bool), - BoolArray(bool), - Char(bool), - Str(bool), - BoxStr(bool), - String(bool), - Bytes(bool), - ByteSlice(bool), - DateTime(bool), -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum PandasBlockType { - Boolean(bool), // bool indicates nullablity - Int64(bool), - Float64, - BooleanArray, - Int64Array, - Float64Array, - String, - DateTime, - Bytes, -} - -pub enum PandasArrayType { - NumpyArray, - IntegerArray, - BooleanArray, - DatetimeArray, -} - -impl From for PandasArrayType { - fn from(ty: PandasBlockType) -> PandasArrayType { - match ty { - PandasBlockType::Boolean(true) => PandasArrayType::BooleanArray, - PandasBlockType::Int64(true) => PandasArrayType::IntegerArray, - PandasBlockType::DateTime => PandasArrayType::DatetimeArray, - _ => PandasArrayType::NumpyArray, - } - } -} - -impl From for PandasBlockType { - fn from(ty: PandasTypeSystem) -> PandasBlockType { - match ty { - PandasTypeSystem::Bool(nullable) => PandasBlockType::Boolean(nullable), - PandasTypeSystem::I64(nullable) => PandasBlockType::Int64(nullable), - PandasTypeSystem::F64(_) => PandasBlockType::Float64, - PandasTypeSystem::BoolArray(_) => PandasBlockType::BooleanArray, - PandasTypeSystem::F64Array(_) => PandasBlockType::Float64Array, - PandasTypeSystem::I64Array(_) => PandasBlockType::Int64Array, - PandasTypeSystem::String(_) - | PandasTypeSystem::BoxStr(_) - | PandasTypeSystem::Str(_) - | PandasTypeSystem::Char(_) => PandasBlockType::String, - PandasTypeSystem::Bytes(_) | PandasTypeSystem::ByteSlice(_) => PandasBlockType::Bytes, - PandasTypeSystem::DateTime(_) => PandasBlockType::DateTime, - } - } -} - -impl_typesystem! { - system = PandasTypeSystem, - mappings = { - { F64 => f64 } - { I64 => i64 } - { F64Array => Vec } - { I64Array => Vec } - { Bool => bool } - { BoolArray => Vec } - { Char => char } - { Str => &'r str } - { BoxStr => Box } - { String => String } - { Bytes => Vec } - { ByteSlice => &'r [u8] } - { DateTime => DateTime } - } -} - -pub trait PandasDType: Sized { - // For initialize a pandas array when creating the pandas dataframe - fn is_masked(&self) -> bool; - fn array_name(&self) -> &'static str; -} - -impl PandasDType for PandasBlockType { - fn is_masked(&self) -> bool { - matches!( - *self, - PandasBlockType::Boolean(true) | PandasBlockType::Int64(true) - ) - } - - fn array_name(&self) -> &'static str { - match *self { - PandasBlockType::Boolean(true) => "BooleanArray", - PandasBlockType::Int64(true) => "IntegerArray", - PandasBlockType::DateTime => "DatetimeArray", - _ => "", - } - } -} diff --git a/connectorx-python/src/read_sql.rs b/connectorx-python/src/read_sql.rs deleted file mode 100644 index e91c5bc..0000000 --- a/connectorx-python/src/read_sql.rs +++ /dev/null @@ -1,83 +0,0 @@ -use connectorx::{ - partition::{partition, PartitionQuery}, - source_router::parse_source, - sql::CXQuery, -}; -use dict_derive::FromPyObject; -use fehler::throw; -use pyo3::prelude::*; -use pyo3::{exceptions::PyValueError, PyResult}; - -use crate::errors::ConnectorXPythonError; - -#[derive(FromPyObject)] -pub struct PyPartitionQuery { - query: String, - column: String, - min: Option, - max: Option, - num: usize, -} - -impl Into for PyPartitionQuery { - fn into(self) -> PartitionQuery { - PartitionQuery::new( - self.query.as_str(), - self.column.as_str(), - self.min, - self.max, - self.num, - ) - } -} - -pub fn read_sql<'a>( - py: Python<'a>, - conn: &str, - return_type: &str, - protocol: Option<&str>, - queries: Option>, - partition_query: Option, -) -> PyResult<&'a PyAny> { - let source_conn = parse_source(conn, protocol).map_err(|e| ConnectorXPythonError::from(e))?; - let (queries, origin_query) = match (queries, partition_query) { - (Some(queries), None) => (queries.into_iter().map(CXQuery::Naked).collect(), None), - (None, Some(part)) => { - let origin_query = Some(part.query.clone()); - let queries = partition(&part.into(), &source_conn) - .map_err(|e| ConnectorXPythonError::from(e))?; - (queries, origin_query) - } - (Some(_), Some(_)) => throw!(PyValueError::new_err( - "partition_query and queries cannot be both specified", - )), - (None, None) => throw!(PyValueError::new_err( - "partition_query and queries cannot be both None", - )), - }; - - match return_type { - "pandas" => Ok(crate::pandas::write_pandas( - py, - &source_conn, - origin_query, - &queries, - )?), - "arrow" => Ok(crate::arrow::write_arrow( - py, - &source_conn, - origin_query, - &queries, - )?), - "arrow2" => Ok(crate::arrow2::write_arrow( - py, - &source_conn, - origin_query, - &queries, - )?), - _ => Err(PyValueError::new_err(format!( - "return type should be 'pandas' or 'arrow', got '{}'", - return_type - ))), - } -} diff --git a/connectorx/examples/jvm_test.rs b/connectorx/examples/jvm_test.rs deleted file mode 100644 index 7e294ab..0000000 --- a/connectorx/examples/jvm_test.rs +++ /dev/null @@ -1,57 +0,0 @@ -use connectorx::{ - prelude::*, - sources::postgres::{rewrite_tls_args, BinaryProtocol, PostgresSource}, - sql::CXQuery, - transports::PostgresArrowTransport, -}; -use j4rs::{ClasspathEntry, InvocationArg, Jvm, JvmBuilder}; -use postgres::NoTls; -use std::convert::TryFrom; -use std::env; -use std::fs; -use std::iter::Iterator; -use url::Url; - -fn main() { - let path = fs::canonicalize("./federated-rewriter.jar").unwrap(); - println!("path: {:?}", path); - let entry = ClasspathEntry::new(path.to_str().unwrap()); - let jvm: Jvm = JvmBuilder::new().classpath_entry(entry).build().unwrap(); - - let args: Vec = env::args().collect(); - let file = &args[1]; - let sql = fs::read_to_string(file).unwrap(); - println!("input sql: {}", sql); - let sql = InvocationArg::try_from(sql).unwrap(); - let rewrite_sql = jvm - .invoke_static("ai.dataprep.federated.QueryRewriter", "rewrite", &[sql]) - .unwrap(); - - let rewrite_sql: String = jvm.to_rust(rewrite_sql).unwrap(); - - println!("rewrite sql: {}", rewrite_sql); - - let conn = env::var("POSTGRES_URL").unwrap(); - let url = Url::parse(&conn).unwrap(); - let (config, _) = rewrite_tls_args(&url).unwrap(); - - let sb = PostgresSource::::new(config, NoTls, 1).unwrap(); - let mut destination = ArrowDestination::new(); - let queries = [CXQuery::naked(rewrite_sql)]; - let dispatcher = Dispatcher::<_, _, PostgresArrowTransport>::new( - sb, - &mut destination, - &queries, - None, - ); - println!("run dispatcher"); - dispatcher.run().unwrap(); - let result = destination.arrow().unwrap(); - let counts = result - .iter() - .map(|rb| rb.num_rows()) - .collect::>(); - - println!("result rows: {}", counts.iter().sum::()); - println!("result columns: {}", result[0].schema()) -} diff --git a/connectorx/src/constants.rs b/connectorx/src/constants.rs deleted file mode 100644 index b65cca4..0000000 --- a/connectorx/src/constants.rs +++ /dev/null @@ -1,40 +0,0 @@ -#[cfg(any(feature = "dst_arrow", feature = "dst_arrow2"))] -pub(crate) const SECONDS_IN_DAY: i64 = 86_400; - -#[allow(dead_code)] -const KILO: usize = 1 << 10; - -#[cfg(any(feature = "dst_arrow", feature = "dst_arrow2"))] -pub const RECORD_BATCH_SIZE: usize = 64 * KILO; - -#[cfg(any( - feature = "src_postgres", - feature = "src_mysql", - feature = "src_oracle", - feature = "src_mssql" -))] -pub const DB_BUFFER_SIZE: usize = 32; - -#[cfg(any(feature = "src_oracle"))] -pub const ORACLE_ARRAY_SIZE: u32 = KILO as u32; - -#[cfg(all(not(debug_assertions), feature = "federation"))] -pub const J4RS_BASE_PATH: &str = "../target/release"; - -#[cfg(all(debug_assertions, feature = "federation"))] -pub const J4RS_BASE_PATH: &str = "../target/debug"; - -#[cfg(feature = "federation")] -pub const CX_REWRITER_PATH: &str = - "../connectorx-python/connectorx/dependencies/federated-rewriter.jar"; - -#[cfg(feature = "federation")] -pub const POSTGRES_JDBC_DRIVER: &str = "org.postgresql.Driver"; - -#[cfg(feature = "federation")] -pub const MYSQL_JDBC_DRIVER: &str = "com.mysql.cj.jdbc.Driver"; - -#[cfg(feature = "federation")] -pub const DUCKDB_JDBC_DRIVER: &str = "org.duckdb.DuckDBDriver"; - -pub const CONNECTORX_PROTOCOL: &str = "cxprotocol"; diff --git a/connectorx/src/fed_dispatcher.rs b/connectorx/src/fed_dispatcher.rs deleted file mode 100644 index 875a9fa..0000000 --- a/connectorx/src/fed_dispatcher.rs +++ /dev/null @@ -1,92 +0,0 @@ -use crate::{prelude::*, sql::CXQuery}; -use arrow::record_batch::RecordBatch; -use datafusion::datasource::MemTable; -use datafusion::prelude::*; -use fehler::throws; -use log::debug; -use rayon::prelude::*; -use std::collections::HashMap; -use std::convert::TryFrom; -use std::sync::{mpsc::channel, Arc}; - -#[throws(ConnectorXOutError)] -pub fn run( - sql: String, - db_map: HashMap, - j4rs_base: Option<&str>, -) -> Vec { - debug!("federated input sql: {}", sql); - let mut db_conn_map: HashMap = HashMap::new(); - for (k, v) in db_map.into_iter() { - db_conn_map.insert( - k, - FederatedDataSourceInfo::new_from_conn_str( - SourceConn::try_from(v.as_str())?, - false, - "", - "", - ), - ); - } - let fed_plan = rewrite_sql(sql.as_str(), &db_conn_map, j4rs_base)?; - - debug!("fetch queries from remote"); - let (sender, receiver) = channel(); - fed_plan.into_par_iter().enumerate().try_for_each_with( - sender, - |s, (i, p)| -> Result<(), ConnectorXOutError> { - match p.db_name.as_str() { - "LOCAL" => { - s.send((p.sql, None)).expect("send error local"); - } - _ => { - debug!("start query {}: {}", i, p.sql); - let mut queries = vec![]; - p.sql.split(';').for_each(|ss| { - queries.push(CXQuery::naked(ss)); - }); - let source_conn = &db_conn_map[p.db_name.as_str()] - .conn_str_info - .as_ref() - .unwrap(); - - let destination = get_arrow(source_conn, None, queries.as_slice())?; - let rbs = destination.arrow()?; - - let provider = MemTable::try_new(rbs[0].schema(), vec![rbs])?; - s.send((p.db_alias, Some(Arc::new(provider)))) - .expect(&format!("send error {}", i)); - debug!("query {} finished", i); - } - } - Ok(()) - }, - )?; - - let ctx = SessionContext::new(); - let mut alias_names: Vec = vec![]; - let mut local_sql = String::new(); - receiver - .iter() - .try_for_each(|(alias, provider)| -> Result<(), ConnectorXOutError> { - match provider { - Some(p) => { - ctx.register_table(alias.as_str(), p)?; - alias_names.push(alias); - } - None => local_sql = alias, - } - - Ok(()) - })?; - - debug!("\nexecute query final...\n{}\n", local_sql); - let rt = Arc::new(tokio::runtime::Runtime::new().expect("Failed to create runtime")); - // until datafusion fix the bug: https://github.com/apache/arrow-datafusion/issues/2147 - for alias in alias_names { - local_sql = local_sql.replace(format!("\"{}\"", alias).as_str(), alias.as_str()); - } - - let df = rt.block_on(ctx.sql(local_sql.as_str()))?; - rt.block_on(df.collect())? -} diff --git a/connectorx/src/fed_rewriter.rs b/connectorx/src/fed_rewriter.rs deleted file mode 100644 index 9d717c3..0000000 --- a/connectorx/src/fed_rewriter.rs +++ /dev/null @@ -1,185 +0,0 @@ -use crate::{ - constants::{CX_REWRITER_PATH, J4RS_BASE_PATH}, - prelude::*, -}; -use fehler::throws; -use j4rs::{ClasspathEntry, Instance, InvocationArg, Jvm, JvmBuilder}; -use log::debug; -use std::collections::HashMap; -use std::convert::TryFrom; -use std::{env, fs}; - -pub struct Plan { - pub db_name: String, - pub db_alias: String, - pub sql: String, - pub cardinality: usize, -} - -pub struct FederatedDataSourceInfo<'a> { - pub conn_str_info: Option, - pub manual_info: Option>>, - pub is_local: bool, - pub jdbc_url: &'a str, - pub jdbc_driver: &'a str, -} - -impl<'a> FederatedDataSourceInfo<'a> { - pub fn new_from_conn_str( - source_conn: SourceConn, - is_local: bool, - jdbc_url: &'a str, - jdbc_driver: &'a str, - ) -> Self { - Self { - conn_str_info: Some(source_conn), - manual_info: None, - is_local, - jdbc_url, - jdbc_driver, - } - } - pub fn new_from_manual_schema( - manual_schema: HashMap>, - is_local: bool, - ) -> Self { - Self { - conn_str_info: None, - manual_info: Some(manual_schema), - is_local, - jdbc_url: "", - jdbc_driver: "", - } - } -} - -#[throws(ConnectorXOutError)] -fn init_jvm(j4rs_base: Option<&str>) -> Jvm { - let base = match j4rs_base { - Some(path) => fs::canonicalize(path) - .map_err(|_| ConnectorXOutError::FileNotFoundError(path.to_string()))?, - None => fs::canonicalize(J4RS_BASE_PATH) - .map_err(|_| ConnectorXOutError::FileNotFoundError(J4RS_BASE_PATH.to_string()))?, - }; - debug!("j4rs base path: {:?}", base); - - let rewriter_path = env::var("CX_REWRITER_PATH").unwrap_or(CX_REWRITER_PATH.to_string()); - let path = fs::canonicalize(rewriter_path.as_str()) - .map_err(|_| ConnectorXOutError::FileNotFoundError(rewriter_path))?; - - debug!("rewriter path: {:?}", path); - - let entry = ClasspathEntry::new(path.to_str().unwrap()); - JvmBuilder::new() - .skip_setting_native_lib() - .classpath_entry(entry) - .with_base_path(base.to_str().unwrap()) - .build()? -} - -#[allow(dead_code)] -#[throws(ConnectorXOutError)] -fn create_sources( - jvm: &Jvm, - db_map: &HashMap, -) -> (Instance, Instance) { - let mut db_config = vec![]; - let db_manual = jvm.create_instance("java.util.HashMap", &[])?; - - for (db_name, db_info) in db_map.iter() { - if db_info.manual_info.is_some() { - let manual_info = db_info.manual_info.as_ref().unwrap(); - let schema_info = jvm.create_instance("java.util.HashMap", &[])?; - for (name, columns) in manual_info { - let arr_instance = jvm.java_list("java.lang.String", columns.to_vec())?; - jvm.invoke( - &schema_info, - "put", - &[ - InvocationArg::try_from(name).unwrap(), - InvocationArg::try_from(arr_instance).unwrap(), - ], - )?; - } - let fed_ds = jvm.create_instance( - "ai.dataprep.federated.FederatedDataSource", - &[ - InvocationArg::try_from(db_info.is_local).unwrap(), - InvocationArg::try_from(schema_info).unwrap(), - ], - )?; - jvm.invoke( - &db_manual, - "put", - &[ - InvocationArg::try_from(db_name).unwrap(), - InvocationArg::try_from(fed_ds).unwrap(), - ], - )?; - } else { - db_config.push(String::from(db_name)); - } - } - let db_config = jvm.java_list("java.lang.String", db_config)?; - (db_config, db_manual) -} - -#[allow(dead_code)] -#[throws(ConnectorXOutError)] -fn create_sources2(jvm: &Jvm, db_map: &HashMap) -> Instance { - let mut dbs = vec![]; - for db in db_map.keys() { - dbs.push(String::from(db)); - } - jvm.java_list("java.lang.String", dbs)? -} - -#[throws(ConnectorXOutError)] -pub fn rewrite_sql( - sql: &str, - db_map: &HashMap, - j4rs_base: Option<&str>, -) -> Vec { - let jvm = init_jvm(j4rs_base)?; - debug!("init jvm successfully!"); - - let sql = InvocationArg::try_from(sql).unwrap(); - let (db_config, db_manual) = create_sources(&jvm, db_map)?; - let rewriter = jvm.create_instance("ai.dataprep.federated.FederatedQueryRewriter", &[])?; - let db_config = InvocationArg::try_from(db_config).unwrap(); - let db_manual = InvocationArg::try_from(db_manual).unwrap(); - let plan = jvm.invoke(&rewriter, "rewrite3", &[sql, db_config, db_manual])?; - - let count = jvm.invoke(&plan, "getCount", &[])?; - let count: i32 = jvm.to_rust(count)?; - debug!("rewrite finished, got {} queries", count); - - let mut fed_plan = vec![]; - for i in 0..count { - let idx = [InvocationArg::try_from(i).unwrap().into_primitive()?]; - - let db = jvm.invoke(&plan, "getDBName", &idx)?; - let db: String = jvm.to_rust(db)?; - - let alias_db = jvm.invoke(&plan, "getAliasDBName", &idx)?; - let alias_db: String = jvm.to_rust(alias_db)?; - - let rewrite_sql = jvm.invoke(&plan, "getSql", &idx)?; - let rewrite_sql: String = jvm.to_rust(rewrite_sql)?; - - let cardinality = jvm.invoke(&plan, "getCardinality", &idx)?; - let cardinality: usize = jvm.to_rust(cardinality)?; - - debug!( - "{} - db: {}, alias: {}, cardinality: {}, rewrite sql: {}", - i, db, alias_db, cardinality, rewrite_sql - ); - fed_plan.push(Plan { - db_name: db, - db_alias: alias_db, - sql: rewrite_sql, - cardinality, - }); - } - fed_plan -} diff --git a/connectorx/tests/test_fed.rs b/connectorx/tests/test_fed.rs deleted file mode 100644 index cfa200a..0000000 --- a/connectorx/tests/test_fed.rs +++ /dev/null @@ -1,21 +0,0 @@ -use connectorx::fed_dispatcher::run; -use std::collections::HashMap; -use std::env; - -#[test] -#[ignore] -fn test_fed() { - let _ = env_logger::builder().is_test(true).try_init(); - - let sql = "select test_bool, AVG(test_float) as avg_float, SUM(test_int) as sum_int from db1.test_table as a, db2.test_str as b where a.test_int = b.id AND test_nullint is not NULL GROUP BY test_bool ORDER BY sum_int"; - let db_map = HashMap::from([ - (String::from("db1"), env::var("DB1").unwrap()), - (String::from("db2"), env::var("DB2").unwrap()), - ]); - - println!("db_map: {:?}", db_map); - - // make sure no error here - let rbs = run(sql.to_string(), db_map, None).unwrap(); - arrow::util::pretty::print_batches(&rbs).unwrap(); -} diff --git a/dbs/Justfile b/dbs/Justfile new file mode 100644 index 0000000..1575654 --- /dev/null +++ b/dbs/Justfile @@ -0,0 +1,20 @@ +start: + docker-compose up -d + +stop: + docker-compose down -d + + +seed: + psql $POSTGRES_URL -f postgres.sql + sqlite3 ${SQLITE_URL#sqlite://} < sqlite.sql + +# dbs not included in ci +seed-more: + mysql --protocol tcp -h$MYSQL_HOST -P$MYSQL_PORT -u$MYSQL_USER -p$MYSQL_PASSWORD $MYSQL_DB < mysql.sql + mssql-cli -S$MSSQL_HOST -U$MSSQL_USER -P$MSSQL_PASSWORD -d$MSSQL_DB -i mssql.sql + mysql --protocol tcp -h$CLICKHOUSE_HOST -P$CLICKHOUSE_PORT -u$CLICKHOUSE_USER -p$CLICKHOUSE_PASSWORD $CLICKHOUSE_DB < clickhouse.sql + psql $REDSHIFT_URL -f redshift.sql + ORACLE_URL_SCRIPT=`echo ${ORACLE_URL#oracle://} | sed "s/:/\//"` + cat oracle.sql | sqlplus $ORACLE_URL_SCRIPT + mysql --protocol tcp -h$MARIADB_HOST -P$MARIADB_PORT -u$MARIADB_USER -p$MARIADB_PASSWORD $MARIADB_DB < mysql.sql diff --git a/scripts/bigquery.sql b/dbs/bigquery.sql similarity index 100% rename from scripts/bigquery.sql rename to dbs/bigquery.sql diff --git a/scripts/clickhouse.sql b/dbs/clickhouse.sql similarity index 100% rename from scripts/clickhouse.sql rename to dbs/clickhouse.sql diff --git a/dbs/docker-compose.yml b/dbs/docker-compose.yml new file mode 100644 index 0000000..5b1e6ee --- /dev/null +++ b/dbs/docker-compose.yml @@ -0,0 +1,17 @@ +services: + postgres: + image: "postgres:16-alpine" + ports: + - "5432:5432" + environment: + POSTGRES_DB: dummy + POSTGRES_USER: root + POSTGRES_PASSWORD: root + mysql: + image: "mariadb:10" + ports: + - "3306:3306" + environment: + MYSQL_DATABASE: dummy + MYSQL_ROOT_PASSWORD: root + command: --secure-file-priv="" diff --git a/scripts/duckdb.sql b/dbs/duckdb.sql similarity index 100% rename from scripts/duckdb.sql rename to dbs/duckdb.sql diff --git a/scripts/mem_monitor.sh b/dbs/mem_monitor.sh similarity index 100% rename from scripts/mem_monitor.sh rename to dbs/mem_monitor.sh diff --git a/scripts/mssql.sql b/dbs/mssql.sql similarity index 100% rename from scripts/mssql.sql rename to dbs/mssql.sql diff --git a/scripts/mysql.sql b/dbs/mysql.sql similarity index 100% rename from scripts/mysql.sql rename to dbs/mysql.sql diff --git a/scripts/oracle.sql b/dbs/oracle.sql similarity index 100% rename from scripts/oracle.sql rename to dbs/oracle.sql diff --git a/scripts/postgres.sql b/dbs/postgres.sql similarity index 100% rename from scripts/postgres.sql rename to dbs/postgres.sql diff --git a/scripts/redshift.sql b/dbs/redshift.sql similarity index 100% rename from scripts/redshift.sql rename to dbs/redshift.sql diff --git a/scripts/sqlite.sql b/dbs/sqlite.sql similarity index 100% rename from scripts/sqlite.sql rename to dbs/sqlite.sql diff --git a/docs/_config.yml b/docs/_config.yml deleted file mode 100644 index afd3af8..0000000 --- a/docs/_config.yml +++ /dev/null @@ -1,24 +0,0 @@ -# Book settings -# Learn more at https://jupyterbook.org/customize/config.html - -title: ConnectorX -author: SFU-DB -logo: logo.png - -# Force re-execution of notebooks on each build. -# See https://jupyterbook.org/content/execute.html -execute: - execute_notebooks: force - - -# Information about where the book exists on the web -repository: - url: https://github.com/sfu-db/connector-x # Online location of your book - path_to_book: docs # Optional path to your book, relative to the repository root - branch: main # Which branch of the repository should be used when creating links (optional) - -# Add GitHub buttons to your book -# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository -html: - use_issues_button: true - use_repository_button: true diff --git a/docs/_toc.yml b/docs/_toc.yml deleted file mode 100644 index 4f7274d..0000000 --- a/docs/_toc.yml +++ /dev/null @@ -1,18 +0,0 @@ -# Table of contents -# Learn more at https://jupyterbook.org/customize/toc.html - -format: jb-book -root: intro - -chapters: - - file: install - - file: api - - file: databases - sections: - - file: databases/bigquery - - file: databases/mssql - - file: databases/mysql - - file: databases/oracle - - file: databases/postgres - - file: databases/sqlite - - file: freq_questions diff --git a/docs/api.md b/docs/api.md deleted file mode 100644 index 50fa4da..0000000 --- a/docs/api.md +++ /dev/null @@ -1,96 +0,0 @@ -# Basic usage -ConnectorX enables you to run the SQL query, load data from databases into a Pandas Dataframe in the fastest and most memory efficient way. - -## API -```python -connectorx.read_sql(conn: Union[str, Dict[str, str]], query: Union[List[str], str], *, return_type: str = "pandas", protocol: str = "binary", partition_on: Optional[str] = None, partition_range: Optional[Tuple[int, int]] = None, partition_num: Optional[int] = None) -``` - -## Parameters -- `conn: Union[str, Dict[str, str]]`: Connection string URI for querying single database or dict of database names (key) and connection string URIs (value) for querying multiple databases. - - Please check out [here](https://sfu-db.github.io/connector-x/databases.html) for connection string examples of each database -- `query: Union[str, List[str]]`: SQL query or list of partitioned SQL queries for fetching data. -- `return_type: str = "pandas"`: The return type of this function. It can be `arrow` (`arrow2`), `pandas`, `modin`, `dask` or `polars`. -- `protocol: str = "binary"`: The protocol used to fetch data from source, default is `binary`. Check out [here](./databases.md) to see more details. -- `partition_on: Optional[str]`: The column to partition the result. -- `partition_range: Optional[Tuple[int, int]]`: The value range of the partition column. -- `partition_num: Optional[int]`: The number of partitions to generate. -- `index_col: Optional[str]`: The index column to set for the result dataframe. Only applicable when `return_type` is `pandas`, `modin` or `dask`. - - -## Examples -- Read a DataFrame from a SQL using a single thread - - ```python - import connectorx as cx - - postgres_url = "postgresql://username:password@server:port/database" - query = "SELECT * FROM lineitem" - - cx.read_sql(postgres_url, query) - ``` - -- Read a DataFrame parallelly using 10 threads by automatically partitioning the provided SQL on the partition column (`partition_range` will be automatically queried if not given) - - ```python - import connectorx as cx - - postgres_url = "postgresql://username:password@server:port/database" - query = "SELECT * FROM lineitem" - - cx.read_sql(postgres_url, query, partition_on="l_orderkey", partition_num=10) - ``` - -- Read a DataFrame parallelly using 2 threads by manually providing two partition SQLs (the schemas of all the query results should be same) - - ```python - import connectorx as cx - - postgres_url = "postgresql://username:password@server:port/database" - queries = ["SELECT * FROM lineitem WHERE l_orderkey <= 30000000", "SELECT * FROM lineitem WHERE l_orderkey > 30000000"] - - cx.read_sql(postgres_url, queries) - - ``` - -- Read a DataFrame parallelly using 4 threads from a more complex query - - ```python - import connectorx as cx - - postgres_url = "postgresql://username:password@server:port/database" - query = f""" - SELECT l_orderkey, - SUM(l_extendedprice * ( 1 - l_discount )) AS revenue, - o_orderdate, - o_shippriority - FROM customer, - orders, - lineitem - WHERE c_mktsegment = 'BUILDING' - AND c_custkey = o_custkey - AND l_orderkey = o_orderkey - AND o_orderdate < DATE '1995-03-15' - AND l_shipdate > DATE '1995-03-15' - GROUP BY l_orderkey, - o_orderdate, - o_shippriority - """ - - cx.read_sql(postgres_url, query, partition_on="l_orderkey", partition_num=4) - - ``` - -- Read a DataFrame from a SQL joined from multiple databases (experimental, only support PostgreSQL for now) - - ```python - import connectorx as cx - - db1 = "postgresql://username1:password1@server1:port1/database1" - db2 = "postgresql://username2:password2@server2:port2/database2" - query = "SELECT * FROM db1.nation n, db2.region r where n.n_regionkey = r.r_regionkey" - - cx.read_sql({"db1": db1, "db2": db2}, query) - - ``` - diff --git a/docs/databases.md b/docs/databases.md deleted file mode 100644 index 6eb371f..0000000 --- a/docs/databases.md +++ /dev/null @@ -1,10 +0,0 @@ -# Databases configuration and performance - -ConnectorX supports retrieving data from Postgres, MsSQL, MySQL, Oracle, SQLite, and BigQuery. This chapter introduces how to use ConnectorX to connect each database and the conversion between database types and Pandas types. - -* [BigQuery](./databases/bigquery.md) -* [MsSQL](./databases/mssql.md) -* [MySQL](./databases/mysql.md) -* [Oracle](./databases/oracle.md) -* [Postgres](./databases/postgres.md) -* [SQLite](./databases/sqlite.md) \ No newline at end of file diff --git a/docs/databases/bigquery.md b/docs/databases/bigquery.md deleted file mode 100644 index f1b0b79..0000000 --- a/docs/databases/bigquery.md +++ /dev/null @@ -1,37 +0,0 @@ -# BigQuery - -```{note} -BigQuery does not need to specify protocol. -``` - -```{warning} -Currently, BigQuery does not support to apply paritition on Query with limit clause. -For example, `cx.read_sql(conn, 'select * from table limit 10', parition_num=3, partition_on='int')` will fail. -If you want to fetch result from query with limit clause, please do not use partitioning. -``` - -### BigQuery Connection - -**Authentication File:** BigQuery connection need an authentication json file from Google Cloud Platform. If you do not have an authentication json file, you can create your BigQuery authentication [here](https://cloud.google.com/docs/authentication/getting-started). - -```py -import connectorx as cx -authentication_file_path = '/home/user/path/auth.json' # path to your authentication json file -conn = 'bigquery://' + authentication_file_path # connection token -query = 'SELECT * FROM `database.dataset.table`' # query string -cx.read_sql(conn, query) # read data from BigQuery -``` - -### BigQuery-Pandas Type Mapping -| BigQuery Type | Pandas Type | Comment | -|:-------------------------:|:---------------------------:|:----------------------------------:| -| Bool, Boolean | bool, boolean(nullable) | | -| Int64, Integer | int64, Int64(nullable) | | -| Float64, Float | float64 | | -| Numeric | float64 | | -| String | object | | -| BYTES | object | | -| Time | object | | -| DATE | datetime64[ns] | | -| Datetime | datetime64[ns] | | -| TIMESTAMP | datetime64[ns] | UTC | \ No newline at end of file diff --git a/docs/databases/mssql.md b/docs/databases/mssql.md deleted file mode 100644 index f637f4e..0000000 --- a/docs/databases/mssql.md +++ /dev/null @@ -1,63 +0,0 @@ -# MsSQL - -```{note} -SQLServer does not need to specify protocol. -``` - -### MsSQL Connection -```{hint} -By adding `trusted_connection=true` to connection uri parameter, windows authentication will be enabled. Example: `mssql://host:port/db?trusted_connection=true` -By adding `encrypt=true` to connection uri parameter, SQLServer will use SSL encryption. Example: `mssql://host:port/db?encrypt=true&trusted_connection=true` -``` -```{hint} -if the user password has special characters, they need to be sanitized. example: `from urllib import parse; password = parse.quote_plus(password)` -``` - -```py -import connectorx as cx -conn = 'mssql://username:password@server:port/database?encrypt=true&trusted_connection=true' # connection token -query = 'SELECT * FROM table' # query string -cx.read_sql(conn, query) # read data from MsSQL -``` - -### SQLServer-Pandas Type Mapping -| SQLServer Type | Pandas Type | Comment | -|:---------------:|:---------------------------:|:----------------------------------:| -| TINYINT | int64, Int64(nullable) | | -| SMALLINT | int64, Int64(nullable) | | -| INT | int64, Int64(nullable) | | -| BIGINT | int64, Int64(nullable) | | -| FLOAT | float64 | | -| NUMERIC | float64 | | -| DECIMAL | float64 | cannot support precision larger than 28 | -| BIT | bool, boolean(nullable) | | -| VARCHAR | object | | -| CHAR | object | | -| TEXT | object | | -| NVARCHAR | object | | -| NCHAR | object | | -| NTEXT | object | | -| VARBINARY | object | | -| BINARY | object | | -| IMAGE | object | | -| DATETIME | datetime64[ns] | | -| DATETIME2 | datetime64[ns] | | -| SMALLDATETIME | datetime64[ns] | | -| DATE | datetime64[ns] | | -| DATETIMEOFFSET | datetime64[ns] | | -| TIME | object | | -| UNIQUEIDENTIFIER| object | | - -### Performance (r5.4xlarge docker in another EC2 instance) - -**Modin does not support read_sql on Mssql** - -- Time chart, lower is better. - -

time chart

- -- Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **14x** less time compared with Pandas. diff --git a/docs/databases/mysql.md b/docs/databases/mysql.md deleted file mode 100644 index d486b3a..0000000 --- a/docs/databases/mysql.md +++ /dev/null @@ -1,51 +0,0 @@ -# MySQL - -## Protocols -* `binary`: [MySQL Binary protocol](https://github.com/blackbeam/rust-mysql-simple), recommend to use in general. -* `text`: [MySQL Text protocol](https://github.com/blackbeam/rust-mysql-simple), slower than `binary`, recommend to use only when `binary` protocol is not supported by the source (e.g. Clickhouse). - -## MySQL Connection -```py -import connectorx as cx -conn = 'mysql://username:password@server:port/database' # connection token -query = 'SELECT * FROM table' # query string -cx.read_sql(conn, query) # read data from MySQL -``` - -## MySQL-Pandas Type Mapping -| MySQL Type | Pandas Type | Comment | -|:---------------:|:---------------------------:|:----------------------------------:| -| TINYINT | int64, Int64(nullable) | | -| SMALLINT | int64, Int64(nullable) | | -| MEDIUMINT | int64, Int64(nullable) | | -| INT | int64, Int64(nullable) | | -| BIGINT | int64, Int64(nullable) | | -| FLOAT | float64 | | -| DOUBLE | float64 | | -| DECIMAL | float64, object(Clickhouse) | Clickhouse return DECIMAL in string, cannot support precision larger than 28 | -| VARCHAR | object | | -| CHAR | object | | -| DATE | datetime64[ns] | only support date after year 1970 | -| TIME | object | | -| DATETIME | datetime64[ns] | only support date after year 1970 | -| TIMESTAMP | datetime64[ns] | | -| YEAR | int64, Int64(nullable) | | -| TINYBLOB | object | | -| BLOB | object | | -| MEDIUMBLOB | object | | -| LONGBLOB | object | | -| JSON | object | | -| ENUM | object | | - - -### Performance (db.m6g.4xlarge RDS) - -- Time chart, lower is better. - -

time chart

- -- Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **8x** less time compared with Pandas. diff --git a/docs/databases/oracle.md b/docs/databases/oracle.md deleted file mode 100644 index f25e05e..0000000 --- a/docs/databases/oracle.md +++ /dev/null @@ -1,40 +0,0 @@ -# Oracle - - -### Oracle Connection -```py -import connectorx as cx -conn = 'oracle://username:password@server:port/database' # connection token -query = 'SELECT * FROM table' # query string -cx.read_sql(conn, query) # read data from Oracle -``` - -### Oracle-Pandas Type Mapping -| Oracle Type | Pandas Type | Comment | -|:-------------------------:|:---------------------------:|:----------------------------------:| -| Number(\*,0) | int64, Int64(nullable) | | -| Number(\*,>0) | float64 | | -| Float | float64 | | -| BINARY_FLOAT | float64 | | -| BINARY_DOUBLE | float64 | | -| VARCHAR2 | object | | -| CHAR | object | | -| NCHAR | object | | -| NVarchar2 | object | | -| DATE | datetime64[ns] | | -| TIMESTAMP | datetime64[ns] | | -| TIMESTAMP WITH TIME ZONE | datetime64[ns] | | - -### Performance (db.r5.4xlarge RDS) - -**Modin and Turbodbc does not support read_sql on Oracle** - -- Time chart, lower is better. - -

time chart

- -- Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **3x** less time compared with Pandas. diff --git a/docs/databases/postgres.md b/docs/databases/postgres.md deleted file mode 100644 index caa39cf..0000000 --- a/docs/databases/postgres.md +++ /dev/null @@ -1,66 +0,0 @@ -# Postgres - -### Protocols -* `binary`: [Postgres Binary COPY protocol](https://www.postgresql.org/docs/current/sql-copy.html), recommend to use in general since fast data parsing speed. -* `csv`: [Postgres CSV COPY protocol](https://www.postgresql.org/docs/current/sql-copy.html), recommend to use when network is slow (`csv` usually results in smaller size than `binary`). -* `cursor`: Conventional wire protocol (slowest one), recommend to use only when `binary` and `csv` is not supported by the source (e.g. Redshift). - -## Postgres Connection -```{hint} -Adding `sslmode=require` to connection uri parameter force SSL connection. Example: `postgresql://username:password@host:port/db?sslmode=require`. `sslmode=disable` to disable SSL connection. - -To connect to redshift, replace `postgresql://` with `redshift://`. -``` - -```py -import connectorx as cx -conn = 'postgres://username:password@server:port/database' # connection token -query = "SELECT * FROM table" # query string -cx.read_sql(conn, query) # read data from Postgres -``` - -## Postgres-Pandas Type Mapping - -| Postgres Type | Pandas Type | Comment | -|:---------------:|:-------------------------:|:----------------------------------:| -| BOOL | bool, boolean(nullable) | | -| INT2 | int64, Int64(nullable) | | -| INT4 | int64, Int64(nullable) | | -| INT8 | int64, Int64(nullable) | | -| FLOAT4 | float64 | | -| FLOAT8 | float64 | | -| NUMERIC | float64 | cannot support precision larger than 28 | -| TEXT | object | | -| BPCHAR | object | | -| VARCHAR | object | | -| CHAR | object | | -| BYTEA | object | | -| DATE | datetime64[ns] | | -| TIME | object | | -| TIMESTAMP | datetime64[ns] | | -| TIMESTAMPZ | datetime64[ns] | | -| UUID | object | | -| JSON | object | | -| JSONB | object | | -| ENUM | object | need to convert enum column to text manually (`::text`) when using `csv` and `cursor` protocol | -| ltree | object | binary protocol supported only after Postgres version 13 | -| lquery | object | binary protocol supported only after Postgres version 13 | -| ltxtquery | object | binary protocol supported only after Postgres version 13 | -| INT2[] | object | list of i64 | -| INT4[] | object | list of i64 | -| INT8[] | object | list of i64 | -| FLOAT4[] | object | list of f64 | -| FLOAT8[] | object | list of f64 | -| NUMERIC[] | object | list of f64 | - -## Performance (db.m6g.4xlarge RDS) - -- Time chart, lower is better. - -

time chart

- -- Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **3x** less memory and **13x** less time compared with Pandas. diff --git a/docs/databases/sqlite.md b/docs/databases/sqlite.md deleted file mode 100644 index 2d8d5ab..0000000 --- a/docs/databases/sqlite.md +++ /dev/null @@ -1,50 +0,0 @@ -# SQLite -Since SQLite adopts a [dynamic type system](https://www.sqlite.org/datatype3.html), we infer type as follow: -* If there is a declared type of the column, we derive the type using [column affinity rules](https://www.sqlite.org/datatype3.html#affname), code can be found [here](https://github.com/sfu-db/connector-x/blob/main/connectorx/src/sources/sqlite/typesystem.rs#L47). -* Otherwise we directly adopt the value's type in the first row of the result (in each partition), which results in INTEGER, REAL, TEXT and BLOB. -* If the first row of the result is NULL in the partition, try next partition. Throw an error if first rows of all partitions are NULL for a column. - -### SQLite Connection -```py -import connectorx as cx -db_path = '/home/user/path/test.db' # path to your SQLite database -conn = 'sqlite://' + db_path # connection token -query = 'SELECT * FROM `database.dataset.table`' # query string -cx.read_sql(conn, query) # read data from SQLite -``` - -Example on windows: -```py -import connectorx as cx -import urllib -db_path = urllib.parse.quote("C:\\user\\path\\test.db") # url encode the path to your SQLite database -conn = 'sqlite://' + db_path # connection token -query = 'SELECT * FROM `database.dataset.table`' # query string -cx.read_sql(conn, query) # read data from SQLite -``` - -### SQLite Type Mapping -| SQLite Type | Pandas Type | Comment | -|:----------------:|:---------------------------:|:----------------------------------:| -| INTEGER | int64, Int64(nullable) | declared type that contains substring "int" | -| BOOL | bool, boolean(nullable) | declared type is "boolean" or "bool" | -| REAL | float64 | declared type that contains substring "real", "floa", "doub" | -| TEXT | object | declared type that contains substring "char", "clob", "text" | -| BLOB | object | declared type that contains substring "blob" | -| DATE | datetime64[ns] | declared type is "date" | -| TIME | object | declared type is "time" | -| TIMESTAMP | datetime64[ns] | declared type is "datetime" or "timestamp", the format must follow `YYYY-MM-DD HH:MM:SS"/"YYYY-MM-DD HH:MM:SS.SSS`| - -## Performance (r5.4xlarge EC2 same instance) - -**Turbodbc does not support read_sql on SQLite** - -- Time chart, lower is better. - -

time chart

- -- Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses **2x** less memory and **5x** less time compared with Pandas. diff --git a/docs/freq_questions.md b/docs/freq_questions.md deleted file mode 100644 index 71e9f0a..0000000 --- a/docs/freq_questions.md +++ /dev/null @@ -1,37 +0,0 @@ -# Frequently asked questions - -## How to specify the partition number? - -`partition_num` will determine how many queries we are going to split from the original one and issue to the database. Underlying, we use [rayon](https://github.com/rayon-rs/rayon) as our parallel executor, which adopts a pool of threads to handle each partitioned query. The number of threads in the pool equals to the number of logical cores on the machine. It is recommended to set the `partition_num` to the number of available logical cores. - -## How to choose the partition column? - -`partition_on` specifies on which column we will partition the query. In order to achieve the best performance, it is ideal that each partitioned query will return the same number of rows. And since we partition the column evenly, it is recommended that the numerical `partition_on` column is evenly distributed. Whether a column has index or not might also affect the performance depends on the source database. You can give it a try if you have multiple candidates. Also, you can manually partition the query if our partition method cannot match your need. ConnectorX will still return a whole dataframe with all the results of the list of queries you input. - -## How to print log in Python? - -Set the environment variable `RUST_LOG` to have a detailed look at Rust log. -```python -import os -os.environ["RUST_LOG"]="connectorx=debug,connectorx_python=debug" -import connectorx as cx - -df = cx.read_sql(conn, query) // It will be more clear to test when no partitioning first -``` - -## Why is my query slow on ConnectorX? - -ConnectorX is mainly targeting on the large query result fetching scenario. It speeds up the process by optimizing the client-side execution and saturating both network and machine resource through parallelism. When query execution on the database server is the bottleneck (for example when the result size is small, and/or the query is very complex), there will be overhead coming from metadata fetching. In ConnectorX, there are up to three info that will be fetched before issue the query to database: - -* MIN, MAX query for partition range (if partition is enabled and `partition_range` is not given) -* COUNT query (if `return_type="pandas"`) -* schema fetching query, which gets type and name for each column in the result - -For users who want to have pandas.DataFrame as final result. In order to avoid the costly COUNT query, one workaround is to use Arrow as an intermediate destination from ConnectorX and convert it into Pandas using Arrow’s [to_pandas API](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html?pyarrow.Table.to_pandas). For example: - -```Python -import connectorx as cx - -table = cx.read_sql(db_uri, query, return_type="arrow") # or arrow2 https://github.com/jorgecarleitao/arrow2 -df = table.to_pandas(split_blocks=False, date_as_object=False) -``` \ No newline at end of file diff --git a/docs/install.md b/docs/install.md deleted file mode 100644 index 0a77c3e..0000000 --- a/docs/install.md +++ /dev/null @@ -1,46 +0,0 @@ -# Getting Started - -## Installation - -### Pip - -The easiest way to install ConnectorX is using pip, with the following command: - -```bash -pip install connectorx -``` - -### Build from source code - -* Step 0: Install tools. - * Install Rust: `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh` - * Install [just](https://github.com/casey/just): `cargo install just` - * Install [Poetry](https://python-poetry.org/docs/): `pip3 install poetry` - -* Step 1: Fresh clone of source. -```bash -git clone https://github.com/sfu-db/connector-x.git -``` - -* Step 2: Install and switch to the correct rust version (please refer [this file](https://github.com/sfu-db/connector-x/blob/main/.github/workflows/release.yml) and search for `rust` for the latest using version). -```bash -rustup install {version} -rustup override set {version} -``` - -* Step 3: Install system dependencies. Please refer to [release.yml](https://github.com/sfu-db/connector-x/blob/main/.github/workflows/release.yml) for dependencies needed for different os. - -* Step 4: Install python dependencies. -```bash -just bootstrap-python -``` - -* Step 5: Build wheel. -```bash -just build-python-wheel -``` - -NOTES: -* `OPENSSL_NO_VENDOR=1` might required to compile for windows users. -* Dynamic library is required for the python installation. (e.g. If you are using `pyenv`, use command `PYTHON_CONFIGURE_OPTS=“--enable-shared” pyenv install {version}` to install python since dylib is not enabled by default.) - diff --git a/docs/intro.md b/docs/intro.md deleted file mode 100644 index 53fb8ea..0000000 --- a/docs/intro.md +++ /dev/null @@ -1,115 +0,0 @@ -# Introduction - -Load data from to , the fastest way. - -ConnectorX enables you to load data from databases into Python in the fastest and most memory efficient way. It is a Python package that provides a high-level interface to the popular database connectors. Here is our rust documentation: [rust-docs](https://sfu-db.github.io/connector-x/rust-docs/connectorx/). - -What you need is one line of code: - -```python -import connectorx as cx - -cx.read_sql("postgresql://username:password@server:port/database", "SELECT * FROM lineitem") -``` - -Optionally, you can accelerate the data loading using parallelism by specifying a partition column. - -```python -import connectorx as cx - -cx.read_sql("postgresql://username:password@server:port/database", "SELECT * FROM lineitem", partition_on="l_orderkey", partition_num=10) -``` - -The function will partition the query by **evenly** splitting the specified column to the amount of partitions. -ConnectorX will assign one thread for each partition to load and write data in parallel. -Currently, we support partitioning on **numerical** columns (**cannot contain NULL**) for **SPJA** queries. - -**Experimental: We are now providing federated query support (PostgreSQL only and do not support partition for now), you can write a single query to join tables from two or more databases! (JRE >= 1.8 is required)** -```python -import connectorx as cx - -db1 = "postgresql://username1:password1@server1:port1/database1" -db2 = "postgresql://username2:password2@server2:port2/database2" - -cx.read_sql({"db1": db1, "db2": db2}, "SELECT * FROM db1.nation n, db2.region r where n.n_regionkey = r.r_regionkey") -``` - -Check out more detailed usage and examples [here](https://sfu-db.github.io/connector-x/api.html). A general introduction of the project can be found in this [blog post](https://towardsdatascience.com/connectorx-the-fastest-way-to-load-data-from-databases-a65d4d4062d5). - -# Performance - -We compared different solutions in Python that provides the `read_sql` function, by loading a 10x TPC-H lineitem table (8.6GB) from Postgres into a DataFrame, with 4 cores parallelism. - -## Time chart, lower is better. - -

time chart

- -## Memory consumption chart, lower is better. - -

memory chart

- -In conclusion, ConnectorX uses up to **3x** less memory and **21x** less time (**3x** less memory and **13x** less time compared with Pandas.). More benchmark result can be found under each database pages [here](https://sfu-db.github.io/connector-x/databases.html). - -## How does ConnectorX achieve a lightening speed while keeping the memory footprint low? - -We observe that existing solutions more or less do data copy multiple times when downloading the data. -Additionally, implementing a data intensive application in Python brings additional cost. - -ConnectorX is written in Rust and follows "zero-copy" principle. -This allows it to make full use of the CPU by becoming cache and branch predictor friendly. Moreover, the architecture of ConnectorX ensures the data will be copied exactly once, directly from the source to the destination. - -## How does ConnectorX download the data? - -Upon receiving the query, e.g. `SELECT * FROM lineitem`, ConnectorX will first issue a `LIMIT 1` query `SELECT * FROM lineitem LIMIT 1` to get the schema of the result set. - -Then, if `partition_on` is specified, ConnectorX will issue `SELECT MIN($partition_on), MAX($partition_on) FROM (SELECT * FROM lineitem)` to know the range of the partition column. -After that, the original query is split into partitions based on the min/max information, e.g. `SELECT * FROM (SELECT * FROM lineitem) WHERE $partition_on > 0 AND $partition_on < 10000`. -ConnectorX will then run a count query to get the partition size (e.g. `SELECT COUNT(*) FROM (SELECT * FROM lineitem) WHERE $partition_on > 0 AND $partition_on < 10000`). If the partition -is not specified, the count query will be `SELECT COUNT(*) FROM (SELECT * FROM lineitem)`. - -Finally, ConnectorX will use the schema info as well as the count info to allocate memory and download data by executing the queries normally. - -Once the downloading begins, there will be one thread for each partition so that the data are downloaded in parallel at the partition level. The thread will issue the query of the corresponding -partition to the database and then write the returned data to the destination row-wise or column-wise (depends on the database) in a streaming fashion. - -# Supported Sources & Destinations - -Example connection string, supported protocols and data types for each data source can be found [here](https://sfu-db.github.io/connector-x/databases.html). - -For more planned data sources, please check out our [discussion](https://github.com/sfu-db/connector-x/discussions/61). - -## Sources -- [x] Postgres -- [x] Mysql -- [x] Mariadb (through mysql protocol) -- [x] Sqlite -- [x] Redshift (through postgres protocol) -- [x] Clickhouse (through mysql protocol) -- [x] SQL Server -- [x] Azure SQL Database (through mssql protocol) -- [x] Oracle -- [x] Big Query -- [ ] ODBC (WIP) -- [ ] ... - -## Destinations -- [x] Pandas -- [x] PyArrow -- [x] Modin (through Pandas) -- [x] Dask (through Pandas) -- [x] Polars (through PyArrow) - -# Supports - -You are always welcomed to: -1. Ask questions in stackoverflow. Make sure to have #connectorx attached. -2. Ask questions & propose new ideas in our [forum][discussion_page]. -3. Help us developing this project (adding databases and dataframes), please check out this [guide](https://github.com/sfu-db/connector-x/blob/main/CONTRIBUTING.md). - -# Organizations and Projects using ConnectorX - -[](https://github.com/pola-rs/polars) -[](https://dataprep.ai/) -[](https://modin.readthedocs.io) - -To add your project/organization here, reply our post [here](https://github.com/sfu-db/connector-x/discussions/146) diff --git a/docs/logo.png b/docs/logo.png deleted file mode 100644 index f788fda..0000000 Binary files a/docs/logo.png and /dev/null differ diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 7e821e4..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -jupyter-book -matplotlib -numpy diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..9ff0b76 --- /dev/null +++ b/flake.lock @@ -0,0 +1,60 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1689068808, + "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1690486282, + "narHash": "sha256-TuHKuIl/6HnJbXydm0S19I5dZsNXYK+FN7KvIbJskb8=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "78faafa6e6684acd1ec9770161a85d3b83caf7c5", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..f0e00ac --- /dev/null +++ b/flake.nix @@ -0,0 +1,51 @@ +{ + description = "PRQL development environment"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + + essentials = with pkgs; [ + # compiler requirements + rustup + clang + + # tools + cargo-nextest + bacon + cargo-audit + cargo-insta + cargo-release + pkg-config + openssl + #cargo-llvm-cov + + # actions + just + #sd + #ripgrep + #nodePackages.prettier + #nodePackages.prettier-plugin-go-template + #nixpkgs-fmt + #rsync + ]; + + dbs = with pkgs; [ + postgresql_15 + sqlite + mysql + ]; + + in + { + devShells.default = pkgs.mkShell { + buildInputs = essentials ++ dbs; + }; + }); +} diff --git a/scripts/benchmarks/tpch-clickhouse.sql b/scripts/benchmarks/tpch-clickhouse.sql deleted file mode 100644 index a63ed02..0000000 --- a/scripts/benchmarks/tpch-clickhouse.sql +++ /dev/null @@ -1,21 +0,0 @@ --- mysql --local-infile --protocol tcp -h$CLICKHOUSE_HOST -P$CLICKHOUSE_PORT -u$CLICKHOUSE_USER -p$CLICKHOUSE_PASSWORD $CLICKHOUSE_DB < tpch-clickhouse.sql --- clickhouse-client --user $CLICKHOUSE_USER --password $CLICKHOUSE_PASSWORD --database $CLICKHOUSE_DB --format_csv_delimiter="|" --query="INSERT INTO tpch.lineitem FORMAT CSV" < $TPCH_DIR/lineitem.tbl - -DROP TABLE IF EXISTS lineitem; -CREATE TABLE lineitem ( L_ORDERKEY INTEGER NOT NULL, - L_PARTKEY INTEGER NOT NULL, - L_SUPPKEY INTEGER NOT NULL, - L_LINENUMBER INTEGER NOT NULL, - L_QUANTITY DOUBLE NOT NULL, - L_EXTENDEDPRICE DOUBLE NOT NULL, - L_DISCOUNT DOUBLE NOT NULL, - L_TAX DOUBLE NOT NULL, - L_RETURNFLAG CHAR(1) NOT NULL, - L_LINESTATUS CHAR(1) NOT NULL, - L_SHIPDATE DATE NOT NULL, - L_COMMITDATE DATE NOT NULL, - L_RECEIPTDATE DATE NOT NULL, - L_SHIPINSTRUCT CHAR(25) NOT NULL, - L_SHIPMODE CHAR(10) NOT NULL, - L_COMMENT VARCHAR(44) NOT NULL - )Engine=MergeTree() ORDER BY L_ORDERKEY; diff --git a/scripts/benchmarks/tpch-mssql.sql b/scripts/benchmarks/tpch-mssql.sql deleted file mode 100644 index 5fa9e49..0000000 --- a/scripts/benchmarks/tpch-mssql.sql +++ /dev/null @@ -1,35 +0,0 @@ --- mssql-cli -S$MSSQL_HOST -U$MSSQL_USER -P$MSSQL_PSWD -d$MSSQL_DB -i tpch-mssql.sql - -DROP TABLE IF EXISTS LINEITEM; -CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL, - L_PARTKEY INTEGER NOT NULL, - L_SUPPKEY INTEGER NOT NULL, - L_LINENUMBER INTEGER NOT NULL, - L_QUANTITY DECIMAL(15,2) NOT NULL, - L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, - L_DISCOUNT DECIMAL(15,2) NOT NULL, - L_TAX DECIMAL(15,2) NOT NULL, - L_RETURNFLAG CHAR(1) NOT NULL, - L_LINESTATUS CHAR(1) NOT NULL, - L_SHIPDATE DATE NOT NULL, - L_COMMITDATE DATE NOT NULL, - L_RECEIPTDATE DATE NOT NULL, - L_SHIPINSTRUCT CHAR(25) NOT NULL, - L_SHIPMODE CHAR(10) NOT NULL, - L_COMMENT VARCHAR(44) NOT NULL); - -CREATE INDEX lineitem_l_orderkey_idx ON LINEITEM (l_orderkey); - -BULK INSERT LINEITEM -FROM '/tmp/lineitem.tbl' -WITH -( - FORMAT = 'CSV', - FIELDQUOTE = '"', - FIRSTROW = 1, - FIELDTERMINATOR = '|', --CSV field delimiter - ROWTERMINATOR = '\n', --Use to shift the control to next row - TABLOCK -) - --- bcp tpch.dbo.lineitem in '$TPCH_DIR/lineitem.tbl' -f format.fmt diff --git a/scripts/benchmarks/tpch-mysql.sql b/scripts/benchmarks/tpch-mysql.sql deleted file mode 100644 index 289cd7e..0000000 --- a/scripts/benchmarks/tpch-mysql.sql +++ /dev/null @@ -1,25 +0,0 @@ --- mysql --local-infile --protocol tcp -h$MYSQL_HOST -P$MYSQL_PORT -u$MYSQL_USER -p$MYSQL_PASSWORD $MYSQL_DB < tpch-mysql.sql - -DROP TABLE IF EXISTS lineitem; -CREATE TABLE lineitem ( L_ORDERKEY INTEGER NOT NULL, - L_PARTKEY INTEGER NOT NULL, - L_SUPPKEY INTEGER NOT NULL, - L_LINENUMBER INTEGER NOT NULL, - L_QUANTITY DECIMAL(15,2) NOT NULL, - L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, - L_DISCOUNT DECIMAL(15,2) NOT NULL, - L_TAX DECIMAL(15,2) NOT NULL, - L_RETURNFLAG CHAR(1) NOT NULL, - L_LINESTATUS CHAR(1) NOT NULL, - L_SHIPDATE DATE NOT NULL, - L_COMMITDATE DATE NOT NULL, - L_RECEIPTDATE DATE NOT NULL, - L_SHIPINSTRUCT CHAR(25) NOT NULL, - L_SHIPMODE CHAR(10) NOT NULL, - L_COMMENT VARCHAR(44) NOT NULL); - -ALTER TABLE `lineitem` ADD INDEX `lineitem_orderkey_index` (`l_orderkey`); - -SET GLOBAL local_infile = 'ON'; -SHOW GLOBAL VARIABLES LIKE 'local_infile'; -LOAD DATA LOCAL INFILE '$TPCH_DIR/lineitem.tbl' INTO TABLE `lineitem` FIELDS TERMINATED BY '|' ENCLOSED BY '\"' LINES TERMINATED BY '\n'; diff --git a/scripts/benchmarks/tpch-postgres.sql b/scripts/benchmarks/tpch-postgres.sql deleted file mode 100644 index 5e3640b..0000000 --- a/scripts/benchmarks/tpch-postgres.sql +++ /dev/null @@ -1,23 +0,0 @@ --- psql $POSTGRES_URL -f tpch-postgres.sql - -DROP TABLE IF EXISTS LINEITEM; -CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL, - L_PARTKEY INTEGER NOT NULL, - L_SUPPKEY INTEGER NOT NULL, - L_LINENUMBER INTEGER NOT NULL, - L_QUANTITY DECIMAL(15,2) NOT NULL, - L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, - L_DISCOUNT DECIMAL(15,2) NOT NULL, - L_TAX DECIMAL(15,2) NOT NULL, - L_RETURNFLAG CHAR(1) NOT NULL, - L_LINESTATUS CHAR(1) NOT NULL, - L_SHIPDATE DATE NOT NULL, - L_COMMITDATE DATE NOT NULL, - L_RECEIPTDATE DATE NOT NULL, - L_SHIPINSTRUCT CHAR(25) NOT NULL, - L_SHIPMODE CHAR(10) NOT NULL, - L_COMMENT VARCHAR(44) NOT NULL); - -CREATE INDEX lineitem_l_orderkey_idx ON LINEITEM USING btree (l_orderkey); - -\copy LINEITEM FROM '$TPCH_DIR/lineitem.tbl' DELIMITER '|' ENCODING 'LATIN1'; diff --git a/scripts/benchmarks/tpch_redshift.sql b/scripts/benchmarks/tpch_redshift.sql deleted file mode 100644 index 10954ef..0000000 --- a/scripts/benchmarks/tpch_redshift.sql +++ /dev/null @@ -1,127 +0,0 @@ -DROP table if exists customer; -DROP table if exists lineitem; -DROP table if exists nation; -DROP table if exists orders; -DROP table if exists part; -DROP table if exists partsupp; -DROP table if exists region; -DROP table if exists supplier; - -create table customer ( - c_custkey int8 not null , - c_name varchar(25) not null, - c_address varchar(40) not null, - c_nationkey int4 not null, - c_phone char(15) not null, - c_acctbal numeric(12,2) not null, - c_mktsegment char(10) not null, - c_comment varchar(117) not null, - Primary Key(C_CUSTKEY) -) distkey(c_custkey) sortkey(c_custkey); - -create table lineitem ( - l_orderkey int8 not null , - l_partkey int8 not null, - l_suppkey int4 not null, - l_linenumber int4 not null, - l_quantity numeric(12,2) not null, - l_extendedprice numeric(12,2) not null, - l_discount numeric(12,2) not null, - l_tax numeric(12,2) not null, - l_returnflag char(1) not null, - l_linestatus char(1) not null, - l_shipdate date not null , - l_commitdate date not null, - l_receiptdate date not null, - l_shipinstruct char(25) not null, - l_shipmode char(10) not null, - l_comment varchar(44) not null, - Primary Key(L_ORDERKEY, L_LINENUMBER) -) distkey(l_orderkey) sortkey(l_orderkey) ; -/* distkey(l_orderkey) sortkey(l_shipdate,l_orderkey) ;*/ - -create table nation ( - n_nationkey int4 not null, - n_name char(25) not null , - n_regionkey int4 not null, - n_comment varchar(152) not null, - Primary Key(N_NATIONKEY) -) distkey(n_nationkey) sortkey(n_nationkey) ; - -create table orders ( - o_orderkey int8 not null, - o_custkey int8 not null, - o_orderstatus char(1) not null, - o_totalprice numeric(12,2) not null, - o_orderdate date not null, - o_orderpriority char(15) not null, - o_clerk char(15) not null, - o_shippriority int4 not null, - o_comment varchar(79) not null, - Primary Key(O_ORDERKEY) -) distkey(o_orderkey) sortkey(o_orderdate, o_orderkey) ; - -create table part ( - p_partkey int8 not null , - p_name varchar(55) not null, - p_mfgr char(25) not null, - p_brand char(10) not null, - p_type varchar(25) not null, - p_size int4 not null, - p_container char(10) not null, - p_retailprice numeric(12,2) not null, - p_comment varchar(23) not null, - PRIMARY KEY (P_PARTKEY) -) distkey(p_partkey) sortkey(p_partkey); - -create table partsupp ( - ps_partkey int8 not null, - ps_suppkey int4 not null, - ps_availqty int4 not null, - ps_supplycost numeric(12,2) not null, - ps_comment varchar(199) not null, - Primary Key(PS_PARTKEY, PS_SUPPKEY) -) distkey(ps_partkey) sortkey(ps_partkey); - -create table region ( - r_regionkey int4 not null, - r_name char(25) not null , - r_comment varchar(152) not null, - Primary Key(R_REGIONKEY) -) distkey(r_regionkey) sortkey(r_regionkey); - -create table supplier ( - s_suppkey int4 not null, - s_name char(25) not null, - s_address varchar(40) not null, - s_nationkey int4 not null, - s_phone char(15) not null, - s_acctbal numeric(12,2) not null, - s_comment varchar(101) not null, - Primary Key(S_SUPPKEY) -) distkey(s_suppkey) sortkey(s_suppkey) -; - -/* - To load the sample data, you must provide authentication for your cluster to access Amazon S3 on your behalf. - You can provide either role-based authentication or key-based authentication. - - Text files needed to load test data under s3://redshift-downloads/TPC-H/10GB are publicly available. - Any valid credentials should have read access. - - The COPY commands include a placeholder for the aws_access_key_id and aws_secret_access_key. - User must update the credentials clause below with valid credentials or the command will fail. - e.g. (1) aws_iam_role=arn:aws:iam::xxxxxxxxxxx:role/xxxxxxx - (2) aws_access_key_id= ;aws_secret_access_key= - - For more information check samples in https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-create-sample-db.html -*/ - -copy region from 's3://redshift-downloads/TPC-H/10GB/region/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy nation from 's3://redshift-downloads/TPC-H/10GB/nation/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy lineitem from 's3://redshift-downloads/TPC-H/10GB/lineitem/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy orders from 's3://redshift-downloads/TPC-H/10GB/orders/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy part from 's3://redshift-downloads/TPC-H/10GB/part/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy supplier from 's3://redshift-downloads/TPC-H/10GB/supplier/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy partsupp from 's3://redshift-downloads/TPC-H/10GB/partsupp/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; -copy customer from 's3://redshift-downloads/TPC-H/10GB/customer/' credentials 'aws_access_key_id= ;aws_secret_access_key=' gzip delimiter '|' region 'us-east-1'; diff --git a/scripts/python-helper.py b/scripts/python-helper.py deleted file mode 100644 index 945530f..0000000 --- a/scripts/python-helper.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Usage: - python-helper.py (copy-extension|rename-wheel) - -Options: - -h --help Show this screen. - --version Show version. -""" -import platform -import sys -import sysconfig -from shutil import copyfile -from pathlib import Path -import os -from docopt import docopt - -# copied from the maturin project -METADATA = { - "major": sys.version_info.major, - "minor": sys.version_info.minor, - "abiflags": sysconfig.get_config_var("ABIFLAGS"), - "interpreter": platform.python_implementation().lower(), - "ext_suffix": sysconfig.get_config_var("EXT_SUFFIX"), - "abi_tag": (sysconfig.get_config_var("SOABI") or "-").split("-")[1] or None, - "m": sysconfig.get_config_var("WITH_PYMALLOC") == 1, - "u": sysconfig.get_config_var("Py_UNICODE_SIZE") == 4, - "d": sysconfig.get_config_var("Py_DEBUG") == 1, - # This one isn't technically necessary, but still very useful for sanity checks - "platform": platform.system().lower(), - # We need this one for windows abi3 builds - "base_prefix": sys.base_prefix, -} - - - -def main() -> None: - args = docopt(__doc__) - if args["copy-extension"]: - if METADATA["platform"] == "windows": - suffix = ".dll" - src = Path("./target/release/connectorx") - elif METADATA["platform"] == "linux": - suffix = ".so" - src = Path("./target/release/libconnectorx") - elif METADATA["platform"] == "darwin": - suffix = ".dylib" - src = Path("./target/release/libconnectorx") - else: - raise NotImplementedError(f"platform '{METADATA['platform']}' not supported") - - dst = Path("./connectorx/connectorx") - copyfile(src.with_suffix(suffix), dst.with_suffix(METADATA["ext_suffix"])) - elif args["rename-wheel"]: - pyver = f"{METADATA['major']}{METADATA['minor']}" - - if METADATA["platform"] == "windows": - arch = "win_amd64" - # abitag = METADATA["abi_tag"] # this does not work on windows - if pyver == "37": - abitag = "37m" - else: - abitag = pyver - elif METADATA["platform"] == "linux": - arch = "manylinux_2_28_x86_64" - abitag = METADATA["abi_tag"] - elif METADATA["platform"] == "darwin": - arch = "macosx_10_15_intel" - abitag = METADATA["abi_tag"] - else: - raise NotImplementedError(f"platform '{platform}' not supported") - - for p in Path("./dist").iterdir(): - if p.suffix == ".whl": - pkgname, version, *rest = p.stem.split("-") - break - - - os.rename( - p, - f"./dist/{pkgname}-{version}-cp{pyver}-cp{abitag}-{arch}.whl", - ) - else: - raise ValueError(f"args not understand {args}") - -if __name__ == "__main__": - main() -