Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(libp2p): shared TCP listeners and AutoTLS.AutoWSS #10565

Merged
merged 30 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7833e64
feat(libp2p): enable shared TCP listeners
aschmahmann Oct 30, 2024
590bed0
chore(tmp): update go-libp2p to branch
aschmahmann Nov 12, 2024
2a33eb2
Merge branch 'master' into feat/libp2p-sharedtcp
gammazero Nov 19, 2024
bc215c2
docs: switch mentions of /ws to /tcp/4001
lidel Nov 29, 2024
6de3faa
chore: latest boxo and go-libp2p mains
lidel Dec 3, 2024
1831a3d
Merge remote-tracking branch 'origin/master' into feat/libp2p-sharedtcp
lidel Dec 3, 2024
9b887ea
feat: AutoTLS.AutoWSS
lidel Dec 4, 2024
80e8895
fix: conditional tcp sharing
lidel Dec 9, 2024
0d7ad48
test: go-libp2p#3080
lidel Dec 10, 2024
2adb2f1
chore: go-libp2p@master with go-libp2p#3080
lidel Dec 10, 2024
65d51e9
test: remove replace
lidel Dec 17, 2024
66317b4
chore: go-libp2p v0.38.1
lidel Dec 19, 2024
d178b87
Merge 'origin/master' into feat/libp2p-sharedtcp
lidel Dec 19, 2024
2d84386
docs: AutoTLS.AutoWSS and go-libp2p v0.38.x
lidel Dec 19, 2024
c841ca6
chore: p2p-forge/client v0.2.0-rc1
lidel Dec 19, 2024
0a59574
test: boxo@main (v0.26.0 pre release)
lidel Dec 19, 2024
075ed6b
chore: p2p-forge/client v0.2.0
lidel Dec 19, 2024
a436f4e
test: boxo@main (v0.26.0 pre release)
lidel Dec 19, 2024
b5cfd6d
test: sharness without tcp mux
lidel Dec 19, 2024
6caf78c
test: sharness with tcp mux
lidel Dec 19, 2024
2919d6d
Merge remote-tracking branch 'origin/master' into feat/libp2p-sharedtcp
lidel Dec 19, 2024
e150509
Merge branch 'master' into feat/libp2p-sharedtcp
gammazero Dec 20, 2024
7a4ec80
test: LIBP2P_TCP_MUX: false
lidel Dec 20, 2024
858e10a
test: LIBP2P_TCP_MUX: true
lidel Dec 20, 2024
50cd7cd
test: run sharness sequentially
lidel Dec 20, 2024
6367f64
testi: set sharness timeout to 2h
lidel Dec 20, 2024
bb87df3
fix: disable libp2p.ShareTCPListener() in PNET
lidel Dec 20, 2024
6b5b2ed
chore: restore regular sharness
lidel Dec 20, 2024
d532e58
chore: fix lint
lidel Dec 20, 2024
a1f1cb2
chore(ci): timeout sharness after 15m
lidel Dec 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion config/autotls.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ import p2pforge "github.com/ipshipyard/p2p-forge/client"
// for obtaining a domain and TLS certificate to improve connectivity for web
// browser clients. More: https://github.com/ipshipyard/p2p-forge#readme
type AutoTLS struct {
// Enables the p2p-forge feature
// Enables the p2p-forge feature and all related features.
Enabled Flag `json:",omitempty"`

// Optional, controls if Kubo should add /tls/sni/.../ws listener to every /tcp port if no explicit /ws is defined in Addresses.Swarm
AutoWSS Flag `json:",omitempty"`

// Optional override of the parent domain that will be used
DomainSuffix *OptionalString `json:",omitempty"`

Expand All @@ -27,4 +30,5 @@ const (
DefaultDomainSuffix = p2pforge.DefaultForgeDomain
DefaultRegistrationEndpoint = p2pforge.DefaultForgeEndpoint
DefaultCAEndpoint = p2pforge.DefaultCAEndpoint
DefaultAutoWSS = true // requires AutoTLS.Enabled
)
47 changes: 44 additions & 3 deletions core/node/groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"time"

Expand Down Expand Up @@ -115,6 +116,8 @@ func LibP2P(bcfg *BuildCfg, cfg *config.Config, userResourceOverrides rcmgr.Part
enableRelayService := cfg.Swarm.RelayService.Enabled.WithDefault(enableRelayTransport)
enableRelayClient := cfg.Swarm.RelayClient.Enabled.WithDefault(enableRelayTransport)
enableAutoTLS := cfg.AutoTLS.Enabled.WithDefault(config.DefaultAutoTLSEnabled)
enableAutoWSS := cfg.AutoTLS.AutoWSS.WithDefault(config.DefaultAutoWSS)
atlsLog := log.Logger("autotls")

// Log error when relay subsystem could not be initialized due to missing dependency
if !enableRelayTransport {
Expand All @@ -125,21 +128,59 @@ func LibP2P(bcfg *BuildCfg, cfg *config.Config, userResourceOverrides rcmgr.Part
logger.Fatal("Failed to enable `Swarm.RelayClient`, it requires `Swarm.Transports.Network.Relay` to be true.")
}
}

if enableAutoTLS {
if !cfg.Swarm.Transports.Network.TCP.WithDefault(true) {
logger.Fatal("Invalid configuration: AutoTLS.Enabled=true requires Swarm.Transports.Network.TCP to be true as well.")
}
if !cfg.Swarm.Transports.Network.Websocket.WithDefault(true) {
logger.Fatal("Invalid configuration: AutoTLS.Enabled=true requires Swarm.Transports.Network.Websocket to be true as well.")
}

// AutoTLS for Secure WebSockets: ensure WSS listeners are in place (manual or automatic)
wssWildcard := fmt.Sprintf("/tls/sni/*.%s/ws", cfg.AutoTLS.DomainSuffix.WithDefault(config.DefaultDomainSuffix))
wssWildcardPresent := false
customWsPresent := false
customWsRegex := regexp.MustCompile(`/wss?$`)
tcpRegex := regexp.MustCompile(`/tcp/\d+$`)

// inspect listeners defined in config at Addresses.Swarm
var tcpListeners []string
for _, listener := range cfg.Addresses.Swarm {
// detect if user manually added /tls/sni/.../ws listener matching AutoTLS.DomainSuffix
if strings.Contains(listener, wssWildcard) {
atlsLog.Infof("found compatible wildcard listener in Addresses.Swarm. AutoTLS will be used on %s", listener)
wssWildcardPresent = true
break
}
// detect if user manually added own /ws or /wss listener that is
// not related to AutoTLS feature
if customWsRegex.MatchString(listener) {
atlsLog.Infof("found custom /ws listener set by user in Addresses.Swarm. AutoTLS will not be used on %s.", listener)
customWsPresent = true
break
}
// else, remember /tcp listeners that can be reused for /tls/sni/../ws
if tcpRegex.MatchString(listener) {
tcpListeners = append(tcpListeners, listener)
}
}
if !wssWildcardPresent {
logger.Fatal(fmt.Sprintf("Invalid configuration: AutoTLS.Enabled=true requires a catch-all Addresses.Swarm listener ending with %q to be present, see https://github.com/ipfs/kubo/blob/master/docs/config.md#autotls", wssWildcard))

// Append AutoTLS's wildcard listener
// if no manual /ws listener was set by the user
if enableAutoWSS && !wssWildcardPresent && !customWsPresent {
if len(tcpListeners) == 0 {
logger.Fatal("Invalid configuration: AutoTLS.AutoWSS=true requires at least one /tcp listener present in Addresses.Swarm, see https://github.com/ipfs/kubo/blob/master/docs/config.md#autotls")
}
for _, tcpListener := range tcpListeners {
wssListener := tcpListener + wssWildcard
cfg.Addresses.Swarm = append(cfg.Addresses.Swarm, wssListener)
atlsLog.Infof("appended AutoWSS listener: %s", wssListener)
}
}

if !wssWildcardPresent && !enableAutoWSS {
logger.Fatal(fmt.Sprintf("Invalid configuration: AutoTLS.Enabled=true requires a /tcp listener ending with %q to be present in Addresses.Swarm or AutoTLS.AutoWSS=true, see https://github.com/ipfs/kubo/blob/master/docs/config.md#autotls", wssWildcard))
}
}

Expand All @@ -152,7 +193,7 @@ func LibP2P(bcfg *BuildCfg, cfg *config.Config, userResourceOverrides rcmgr.Part

// Services (resource management)
fx.Provide(libp2p.ResourceManager(bcfg.Repo.Path(), cfg.Swarm, userResourceOverrides)),
maybeProvide(libp2p.P2PForgeCertMgr(bcfg.Repo.Path(), cfg.AutoTLS), enableAutoTLS),
maybeProvide(libp2p.P2PForgeCertMgr(bcfg.Repo.Path(), cfg.AutoTLS, atlsLog), enableAutoTLS),
maybeInvoke(libp2p.StartP2PAutoTLS, enableAutoTLS),
fx.Provide(libp2p.AddrFilters(cfg.Swarm.AddrFilters)),
fx.Provide(libp2p.AddrsFactory(cfg.Addresses.Announce, cfg.Addresses.AppendAnnounce, cfg.Addresses.NoAnnounce)),
Expand Down
11 changes: 5 additions & 6 deletions core/node/libp2p/addrs.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,20 @@ func ListenOn(addresses []string) interface{} {
}
}

func P2PForgeCertMgr(repoPath string, cfg config.AutoTLS) interface{} {
func P2PForgeCertMgr(repoPath string, cfg config.AutoTLS, atlsLog *logging.ZapEventLogger) interface{} {
return func() (*p2pforge.P2PForgeCertMgr, error) {
storagePath := filepath.Join(repoPath, "p2p-forge-certs")

forgeLogger := logging.Logger("autotls").Desugar()

// TODO: this should not be necessary, but we do it to help tracking
// down any race conditions causing
// https://github.com/ipshipyard/p2p-forge/issues/8
certmagic.Default.Logger = forgeLogger.Named("default_fixme")
certmagic.DefaultACME.Logger = forgeLogger.Named("default_acme_client_fixme")
rawLogger := atlsLog.Desugar()
certmagic.Default.Logger = rawLogger.Named("default_fixme")
certmagic.DefaultACME.Logger = rawLogger.Named("default_acme_client_fixme")

certStorage := &certmagic.FileStorage{Path: storagePath}
certMgr, err := p2pforge.NewP2PForgeCertMgr(
p2pforge.WithLogger(forgeLogger.Sugar()),
p2pforge.WithLogger(rawLogger.Sugar()),
p2pforge.WithForgeDomain(cfg.DomainSuffix.WithDefault(config.DefaultDomainSuffix)),
p2pforge.WithForgeRegistrationEndpoint(cfg.RegistrationEndpoint.WithDefault(config.DefaultRegistrationEndpoint)),
p2pforge.WithCAEndpoint(cfg.CAEndpoint.WithDefault(config.DefaultCAEndpoint)),
Expand Down
10 changes: 8 additions & 2 deletions core/node/libp2p/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ func Transports(tptConfig config.Transports) interface{} {
) (opts Libp2pOpts, err error) {
privateNetworkEnabled := params.Fprint != nil

if tptConfig.Network.TCP.WithDefault(true) {
tcpEnabled := tptConfig.Network.TCP.WithDefault(true)
wsEnabled := tptConfig.Network.Websocket.WithDefault(true)
if tcpEnabled {
// TODO(9290): Make WithMetrics configurable
opts.Opts = append(opts.Opts, libp2p.Transport(tcp.NewTCPTransport, tcp.WithMetrics()))
}

if tptConfig.Network.Websocket.WithDefault(true) {
if wsEnabled {
if params.ForgeMgr == nil {
opts.Opts = append(opts.Opts, libp2p.Transport(websocket.New))
} else {
Expand Down Expand Up @@ -64,6 +66,10 @@ func Transports(tptConfig config.Transports) interface{} {
opts.Opts = append(opts.Opts, libp2p.Transport(webrtc.New))
}

if tcpEnabled || wsEnabled {
lidel marked this conversation as resolved.
Show resolved Hide resolved
opts.Opts = append(opts.Opts, libp2p.ShareTCPListener())
lidel marked this conversation as resolved.
Show resolved Hide resolved
}

return opts, nil
}
}
Expand Down
5 changes: 5 additions & 0 deletions docs/changelogs/v0.33.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- [Overview](#overview)
- [🔦 Highlights](#-highlights)
- [Shared TCP listeners](#shared-tcp-listeners)
- [Bitswap improvements from Boxo](#bitswap-improvements-from-boxo)
- [Using default `libp2p_rcmgr` metrics](#using-default-libp2p_rcmgr--metrics)
- [`ipfs add --to-files` no longer works with `--wrap`](#ipfs-add---to-files-no-longer-works-with---wrap)
Expand All @@ -17,6 +18,10 @@

### 🔦 Highlights

#### Shared TCP listeners

Kubo now supports sharing the same port (`4001` by default) by multiple libp2p transports, such as [raw TCP](https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmtransportsnetworktcp) and [WebSockets](https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmtransportsnetworkwebsocket).

#### Bitswap improvements from Boxo

This release includes some refactorings and improvements affecting Bitswap which should improve reliability. One of the changes affects blocks providing. Previously, the bitswap layer took care itself of announcing new blocks -added or received- with the configured provider (i.e. DHT). This bypassed the "Reprovider", that is, the system that manages precisely "providing" the blocks stored by Kubo. The Reprovider knows how to take advantage of the [AcceleratedDHTClient](https://github.com/ipfs/kubo/blob/master/docs/config.md#routingaccelerateddhtclient), is able to handle priorities, logs statistics and is able to resume on daemon reboot where it left off. From now on, Bitswap will not be doing any providing on-the-side and all announcements are managed by the reprovider. In some cases, when the reproviding queue is full with other elements, this may cause additional delays, but more likely this will result in improved block-providing behaviour overall.
Expand Down
35 changes: 21 additions & 14 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ config file at runtime.
- [`AutoNAT.Throttle.Interval`](#autonatthrottleinterval)
- [`AutoTLS`](#autotls)
- [`AutoTLS.Enabled`](#autotlsenabled)
- [`AutoTLS.AutoWSS`](#autotlsautowss)
- [`AutoTLS.DomainSuffix`](#autotlsdomainsuffix)
- [`AutoTLS.RegistrationEndpoint`](#autotlsregistrationendpoint)
- [`AutoTLS.RegistrationToken`](#autotlsregistrationtoken)
Expand Down Expand Up @@ -492,33 +493,39 @@ Type: `object`
> Feel free to enable it and [report issues](https://github.com/ipfs/kubo/issues/new/choose) if you want to help with testing.
> Track progress in [kubo#10560](https://github.com/ipfs/kubo/issues/10560).

Enables AutoTLS feature to get DNS+TLS for [libp2p Secure WebSocket](https://github.com/libp2p/specs/blob/master/websockets/README.md) listeners defined in [`Addresses.Swarm`](#addressesswarm), such as `/ip4/0.0.0.0/tcp/4002/tls/sni/*.libp2p.direct/ws` and `/ip6/::/tcp/4002/tls/sni/*.libp2p.direct/ws`.
Enables AutoTLS feature to get DNS+TLS for [libp2p Secure WebSocket](https://github.com/libp2p/specs/blob/master/websockets/README.md) on `/tcp` port.

If `.../tls/sni/*.libp2p.direct/ws` [multiaddr] is present in [`Addresses.Swarm`](#addressesswarm)
If `AutoTLS.AutoWSS` is `true`, or `/tcp/../tls/sni/*.libp2p.direct/ws` [multiaddr] is present in [`Addresses.Swarm`](#addressesswarm)
with SNI segment ending with [`AutoTLS.DomainSuffix`](#autotlsdomainsuffix),
Kubo will obtain and set up a trusted PKI TLS certificate for it, making it dialable from web browser's [Secure Contexts](https://w3c.github.io/webappsec-secure-contexts/).
Kubo will obtain and set up a trusted PKI TLS certificate for `*.peerid.libp2p.direct`, making it dialable from web browser's [Secure Contexts](https://w3c.github.io/webappsec-secure-contexts/).

> [!TIP]
> - Most users don't need custom `/ws` config in `Addresses.Swarm`. Try running this with `AutoTLS.AutoWSS=true`: it will reuse preexisting catch-all `/tcp` ports that were already forwarded/safelisted on your firewall.
> - Debugging can be enabled by setting environment variable `GOLOG_LOG_LEVEL="error,autotls=debug,p2p-forge/client=debug"`. Less noisy `GOLOG_LOG_LEVEL="error,autotls=info` may be informative enough.
> - Certificates are stored in `$IPFS_PATH/p2p-forge-certs`. Removing directory and restarting daemon will trigger certificate rotation.

> [!IMPORTANT]
> Caveats:
> - Requires your Kubo node to be publicly dialable.
> - If you want to test this with a node that is behind a NAT and uses manual port forwarding or UPnP (`Swarm.DisableNatPortMap=false`),
> add catch-all `/ip4/0.0.0.0/tcp/4002/tls/sni/*.libp2p.direct/ws` and `/ip6/::/tcp/4002/tls/sni/*.libp2p.direct/ws` to [`Addresses.Swarm`](#addressesswarm)
> - If you want to test this with a node that is behind a NAT and uses manual TCP port forwarding or UPnP (`Swarm.DisableNatPortMap=false`), use `AutoTLS.AutoWSS=true`, or manually
> add catch-all `/ip4/0.0.0.0/tcp/4001/tls/sni/*.libp2p.direct/ws` and `/ip6/::/tcp/4001/tls/sni/*.libp2p.direct/ws` to [`Addresses.Swarm`](#addressesswarm)
> and **wait 5-15 minutes** for libp2p node to set up and learn about own public addresses via [AutoNAT](#autonat).
> - If your node is fresh and just started, the [p2p-forge] client may produce and log ERRORs during this time, but once a publicly dialable addresses are set up, a subsequent retry should be successful.
> - Listeners defined in [`Addresses.Swarm`](#addressesswarm) with `/tls/sni` must use a separate port from other TCP listeners, e.g. `4002` instead of the default `4001`.
> - A separate port (`/tcp/4002`) has to be used instead of `/tcp/4001` because we wait for TCP port sharing ([go-libp2p#2984](https://github.com/libp2p/go-libp2p/issues/2684)) to be implemented.
> - If you use manual port forwarding, make sure incoming connections to this additional port are allowed the same way `4001` ones already are.
> - The TLS certificate is used only for [libp2p WebSocket](https://github.com/libp2p/specs/blob/master/websockets/README.md) connections.
> - Right now, this is NOT used for hosting a [Gateway](#gateway) over HTTPS (that use case still requires manual TLS setup on reverse proxy, and your own domain).

> [!TIP]
> - Debugging can be enabled by setting environment variable `GOLOG_LOG_LEVEL="error,autotls=debug,p2p-forge/client=debug"`
> - Certificates are stored in `$IPFS_PATH/p2p-forge-certs`. Removing directory and restarting daemon will trigger certificate rotation.

Default: `false`

Type: `flag`

### `AutoTLS.AutoWSS`

Optional. Controls if Kubo should add `/tls/sni/*.libp2p.direct/ws` listener to every pre-existing `/tcp` port IFF no explicit `/ws` is defined in [`Addresses.Swarm`](#addressesswarm) already.

Default: `true` (active only if `AutoTLS.Enabled` is `true` as well)

Type: `flag`

### `AutoTLS.DomainSuffix`

Optional override of the parent domain suffix that will be used in DNS+TLS+WebSockets multiaddrs generated by [p2p-forge] client.
Expand Down Expand Up @@ -2150,8 +2157,8 @@ Default: Enabled
Type: `flag`

Listen Addresses:
* /ip4/0.0.0.0/tcp/4002/ws
* /ip6/::/tcp/4002/ws
* /ip4/0.0.0.0/tcp/4001/ws
* /ip6/::/tcp/4001/ws

#### `Swarm.Transports.Network.QUIC`

Expand Down
2 changes: 2 additions & 0 deletions docs/examples/kubo-as-a-library/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,5 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
lukechampine.com/blake3 v1.3.0 // indirect
)

replace github.com/libp2p/go-libp2p => github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e
4 changes: 2 additions & 2 deletions docs/examples/kubo-as-a-library/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,8 @@ github.com/libp2p/go-flow-metrics v0.0.1/go.mod h1:Iv1GH0sG8DtYN3SVJ2eG221wMiNpZ
github.com/libp2p/go-flow-metrics v0.0.3/go.mod h1:HeoSNUrOJVK1jEpDqVEiUOIXqhbnS27omG0uWU5slZs=
github.com/libp2p/go-flow-metrics v0.2.0 h1:EIZzjmeOE6c8Dav0sNv35vhZxATIXWZg6j/C08XmmDw=
github.com/libp2p/go-flow-metrics v0.2.0/go.mod h1:st3qqfu8+pMfh+9Mzqb2GTiwrAGjIPszEjZmtksN8Jc=
github.com/libp2p/go-libp2p v0.37.2 h1:Irh+n9aDPTLt9wJYwtlHu6AhMUipbC1cGoJtOiBqI9c=
github.com/libp2p/go-libp2p v0.37.2/go.mod h1:M8CRRywYkqC6xKHdZ45hmqVckBj5z4mRLIMLWReypz8=
github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e h1:XxOIfolkGAmT+Wt9Hh1zLeJcEUq/JUQGn1GcbGvqjzU=
github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e/go.mod h1:M8CRRywYkqC6xKHdZ45hmqVckBj5z4mRLIMLWReypz8=
github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
github.com/libp2p/go-libp2p-core v0.2.4/go.mod h1:STh4fdfa5vDYr0/SzYYeqnt+E6KfEV5VxfIrm0bcI0g=
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,5 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
lukechampine.com/blake3 v1.3.0 // indirect
)

replace github.com/libp2p/go-libp2p => github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e
Copy link
Member

@lidel lidel Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got this to the state that is as close to go-libp2p release as possible, filling this here so I won't forget:

  • remove from all go.mod files and switch to go-libp2p release

Copy link
Member

@lidel lidel Dec 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There seem to be a regression between 9b887ea (this PR) and 433444b (latest master) caused by TCP reuse logic in go-libp2p bumped here.

I did curl localhost:5001/debug/pprof/profile > ipfs.cpuprof and CPU profile seems to confirm the source is TCP reuse from libp2p/go-libp2p#2984? (cc @aschmahmann @MarcoPolo):

cpu

How to reproduce

  • Start ipfs daemon
  • Open http://127.0.0.1:5001/webui#/peers and wait a minute:
    • this will trigger connections to peers to fetch any missing block get for geoip data (if not cached locally) – acting like accelerator for any underlying bugs in TCP port reuse
    • 💢 staging-2024-12-04-9b887e (go-libp2p master v0.37.1-0.20241202220543-9024f8e8c86e) will grow CPU use once ipfs-webui Peers screen is opened. CPU peaks within 1minute.
    • 💚 master-2024-12-03-433444b (go-libp2p v0.37.2) does not trigger high CPU load

Here are docker images for repro convenience:

  • docker run --rm -it --net=host ipfs/kubo:staging-2024-12-04-9b887ea
  • docker run --rm -it --net=host ipfs/kubo:master-2024-12-03-433444b

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

libp2p/go-libp2p#3080 seems to fix the CPU spin.

Tested in 2adb2f1 – need to investigate why CI fails.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am debugging sharness with make -O -j 1 because CI runs with -j 10 and logs are mangled.

Copy link
Member

@lidel lidel Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting bizzare. Tests with the same go-libp2p and boxo version pass in #10631.

The only go.mod diff between a436f4e (this PR, ci failing due to timeout after 20m) and 070e6ae (#10631, ci green, sharness run takes 5minutes), is:

-       github.com/ipshipyard/p2p-forge v0.1.0
+       github.com/ipshipyard/p2p-forge v0.2.0

other than that, we don't change anything in default config other than enabling shared tcp.
Will disable (b5cfd6d) and re-run, to see if that causes issue somewhere.

Copy link
Member

@lidel lidel Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 👉 Sharness passes green if we disable libp2p.ShareTCPListener() (b5cfd6d passed in job here).

Something related to libp2p.ShareTCPListener() makes sharness hang and timeout if run in 10 parallel threads (cd test/sharness and time make -O -j 10).

Copy link
Member

@lidel lidel Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still looking for the cause, but I was able to reproduce locally by using the same settings as CI:

$ export TEST_DOCKER=1
$ export TEST_PLUGIN=0
$ export TEST_FUSE=0
$ export TEST_VERBOSE=1
$ export TEST_JUNIT=1
$ export TEST_EXPENSIVE=1
$ export IPFS_CHECK_RCMGR_DEFAULTS=1
$ export CONTINUE_ON_S_FAILURE=1
$ time make -O -j 10 test_sharness coverage/sharness_tests.coverprofile test/sharness/test-results/sharness.xml

# [...]
ok 1717 - add a few entries to big_dir/ to retrigger sharding

expecting success:
    kill -0 $IPFS_PID

ok 1718 - 'ipfs daemon' is still running

expecting success:
    test_kill_repeat_10_sec $IPFS_PID

ok 1719 - 'ipfs daemon' can be killed

# passed all 1719 test(s)
1..1719

# hangs..

It hanged for a few minutes and finished eventually, but took 3x longer than regular run (15m vs 5m).
Re-run and it hanged for 20m and i had to kill it, so there is some factor of randomness to the hang.

Looking at process tree spawned by make, the t0250-files-api.sh and t0260-sharding.sh t0181-private-network.sh t0182-circuit-relay.sh and t0320-pubsub.sh seem to hang the longest in semi-random order.

Will resume tomorrow unless someone else has any idea.

In the meantime, i've re-run https://github.com/ipfs/kubo/actions/runs/12420930073 to double-confirm disabling libp2p.ShareTCPListener() fixes sharness.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run two tests to narrow down the search:

  • 🟢 7a4ec80 disabled libp2p.ShareTCPListener() and sharness finished in 8m successfully (log)
  • 🔴 858e10a enabled libp2p.ShareTCPListener() and sharness timeouted after 20m (log)

Maybe increasing timeout would help, but taking 2x as long feels like a bug.

Copy link
Member

@lidel lidel Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, running sequentially (#10565 (comment)) found the problem:

  • Enabling libp2p.ShareTCPListener() break PNET and ./t0181-private-network.sh hangs – kinda makes sense, PNET is TCP-only feature of libp2p and we mess up with TCP.

I'll now confirm this is the only one failing,
and if other tests pass, will disable TCP sharing when PNET is enabled for now.

I'll also see if we can set timeout per test suite / unit, to avoid this debugging horror in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good news, PNET was the only reason sharness failed. Disabled port sharing when PNET is enabled (bb87df3) and CI is green again:

I'm going forward with Kubo 0.33.0-rc1.

4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,8 @@ github.com/libp2p/go-flow-metrics v0.0.1/go.mod h1:Iv1GH0sG8DtYN3SVJ2eG221wMiNpZ
github.com/libp2p/go-flow-metrics v0.0.3/go.mod h1:HeoSNUrOJVK1jEpDqVEiUOIXqhbnS27omG0uWU5slZs=
github.com/libp2p/go-flow-metrics v0.2.0 h1:EIZzjmeOE6c8Dav0sNv35vhZxATIXWZg6j/C08XmmDw=
github.com/libp2p/go-flow-metrics v0.2.0/go.mod h1:st3qqfu8+pMfh+9Mzqb2GTiwrAGjIPszEjZmtksN8Jc=
github.com/libp2p/go-libp2p v0.37.2 h1:Irh+n9aDPTLt9wJYwtlHu6AhMUipbC1cGoJtOiBqI9c=
github.com/libp2p/go-libp2p v0.37.2/go.mod h1:M8CRRywYkqC6xKHdZ45hmqVckBj5z4mRLIMLWReypz8=
github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e h1:XxOIfolkGAmT+Wt9Hh1zLeJcEUq/JUQGn1GcbGvqjzU=
github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e/go.mod h1:M8CRRywYkqC6xKHdZ45hmqVckBj5z4mRLIMLWReypz8=
github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
github.com/libp2p/go-libp2p-core v0.2.4/go.mod h1:STh4fdfa5vDYr0/SzYYeqnt+E6KfEV5VxfIrm0bcI0g=
Expand Down
3 changes: 3 additions & 0 deletions test/dependencies/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ require (
github.com/libp2p/go-msgio v0.3.0 // indirect
github.com/libp2p/go-nat v0.2.0 // indirect
github.com/libp2p/go-netroute v0.2.1 // indirect
github.com/libp2p/go-reuseport v0.4.0 // indirect
github.com/lufeee/execinquery v1.2.1 // indirect
github.com/macabu/inamedparam v0.1.3 // indirect
github.com/magiconair/properties v1.8.7 // indirect
Expand Down Expand Up @@ -312,3 +313,5 @@ require (
mvdan.cc/gofumpt v0.7.0 // indirect
mvdan.cc/unparam v0.0.0-20240528143540-8a5130ca722f // indirect
)

replace github.com/libp2p/go-libp2p => github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e
4 changes: 2 additions & 2 deletions test/dependencies/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,8 @@ github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38y
github.com/libp2p/go-cidranger v1.1.0/go.mod h1:KWZTfSr+r9qEo9OkI9/SIEeAtw+NNoU0dXIXt15Okic=
github.com/libp2p/go-flow-metrics v0.2.0 h1:EIZzjmeOE6c8Dav0sNv35vhZxATIXWZg6j/C08XmmDw=
github.com/libp2p/go-flow-metrics v0.2.0/go.mod h1:st3qqfu8+pMfh+9Mzqb2GTiwrAGjIPszEjZmtksN8Jc=
github.com/libp2p/go-libp2p v0.37.2 h1:Irh+n9aDPTLt9wJYwtlHu6AhMUipbC1cGoJtOiBqI9c=
github.com/libp2p/go-libp2p v0.37.2/go.mod h1:M8CRRywYkqC6xKHdZ45hmqVckBj5z4mRLIMLWReypz8=
github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e h1:XxOIfolkGAmT+Wt9Hh1zLeJcEUq/JUQGn1GcbGvqjzU=
github.com/libp2p/go-libp2p v0.37.1-0.20241202220543-9024f8e8c86e/go.mod h1:M8CRRywYkqC6xKHdZ45hmqVckBj5z4mRLIMLWReypz8=
github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
github.com/libp2p/go-libp2p-kad-dht v0.28.1 h1:DVTfzG8Ybn88g9RycIq47evWCRss5f0Wm8iWtpwyHso=
Expand Down
Loading