diff --git a/.github/workflows/pr-commitlint.yml b/.github/workflows/pr-commitlint.yml index 87a018d50..b691ce993 100644 --- a/.github/workflows/pr-commitlint.yml +++ b/.github/workflows/pr-commitlint.yml @@ -18,4 +18,7 @@ jobs: last_commit=HEAD^2 # don't lint the merge commit npx commitlint --from $first_commit~1 --to $last_commit -V - name: Lint Pull Request - run: echo "${{ github.event.pull_request.title }}"$'\n\n'"${{ github.event.pull_request.body }}" | npx commitlint -V + env: + TITLE: ${{ github.event.pull_request.title }} + BODY: ${{ github.event.pull_request.body }} + run: export NL=; printenv TITLE NL BODY | npx commitlint -V diff --git a/.gitignore b/.gitignore index be3036fc8..9b453f9a6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ mayastor/local-write_verify-0-verify.state test-yamls/* /package-lock.json /node_modules +artifacts/ diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..4dd6f4572 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,59 @@ +## CNCF Community Code of Conduct v1.0 + +Other languages available: +- [Chinese/中文](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/zh.md) +- [Czech/Česky](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/cs.md) +- [German/Deutsch](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/de.md) +- [Spanish/Español](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/es.md) +- [French/Français](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/fr.md) +- [Italian/Italiano](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/it.md) +- [Japanese/日本語](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/jp.md) +- [Korean/한국어](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/ko.md) +- [Ukrainian/Українська](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/uk.md) +- [Russian/Русский](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/ru.md) +- [Portuguese/Português](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/pt.md) +- [Arabic/العربية](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/ar.md) +- [Polish/Polski](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/pl.md) +- [Indonesian/Bahasa Indonesia](https://raw.githubusercontent.com/cncf/foundation/master/code-of-conduct-languages/id.md) + +### Contributor Code of Conduct + +As contributors and maintainers of this project, and in the interest of fostering +an open and welcoming community, we pledge to respect all people who contribute +through reporting issues, posting feature requests, updating documentation, +submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free experience for +everyone, regardless of level of experience, gender, gender identity and expression, +sexual orientation, disability, personal appearance, body size, race, ethnicity, age, +religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing others' private information, such as physical or electronic addresses, + without explicit permission +* Other unethical or unprofessional conduct. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are not +aligned to this Code of Conduct. By adopting this Code of Conduct, project maintainers +commit themselves to fairly and consistently applying these principles to every aspect +of managing this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This code of conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +Instances of abusive, harassing, or otherwise unacceptable behavior in Kubernetes may be reported by contacting the [Kubernetes Code of Conduct Committee](https://git.k8s.io/community/committee-code-of-conduct) via . For other projects, please contact a CNCF project maintainer or our mediator, Mishi Choudhary . + +This Code of Conduct is adapted from the Contributor Covenant +(http://contributor-covenant.org), version 1.2.0, available at +http://contributor-covenant.org/version/1/2/0/ + +### CNCF Events Code of Conduct + +CNCF events are governed by the Linux Foundation [Code of Conduct](https://events.linuxfoundation.org/code-of-conduct/) available on the event page. This is designed to be compatible with the above policy and also includes more details on responding to incidents. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..d4276e956 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,140 @@ +# Contributing to MayaStor + +We're excited to have you interested in contributing to MayaStor! + +> *Disclaimer:* MayaStor is a **beta** project, and contributors at this stage of the project +> lifecycle may experience minor hurdles to contribution. +> +> **We want to overcome these. Please report them.** + +If you have any questions, our ecosystem can be connected with over [Discord][mayastor-discord] +(for development) and [Slack][mayastor-slack] ([invite][mayastor-slack-inviter], for support). + +Our interactions here are governed by the [CNCF Code of Conduct](CODE-OF_CONDUCT.md). + +## Development Environment + +Consult the [`doc/build.md`](doc/build.md) for a complete guide to getting started contributing +to MayaStor. + +## Issues & Pull Requests + +### Reporting Bugs + +You would be **the best** if you reported complete, well described, reproducable bugs to us. If +you can't, that's ok. Do your best. + +Our [Bug Report][issue-bug-report] template includes instructions to get the the information we +need from you. + +### Requesting new features + +You are invited to open *complete, well described* issues proposing new features. While MayaStor +has no formal RFC process at this time, the [Rust RFC template][rust-rfc-template] is an +excellent place to derive your issue description from. + +**You should indicate if you are able to complete and support features you propose.** + +### Committing + +Start work off the `develop` branch. **Not `master`.** + +[bors][bors] will merge your commits. We do not do [*squash merges*][squash-merges]. + +Each commit message must adhere to [Conventional Commits][conventional-commits]. You can use +[`convco`][tools-convco] if you would prefer a tool to help. + +It is absolutely fine to force push your branch if you need. Feel free to rewrite commit history +of your pull requests. + +### Reviews + +The review process is governed by [bors][bors]. + +Pull requests require at least 1 approval from maintainer or SIG member. + +Once review is given, Maintainers and SIG members may indicate merge readiness with the comment +`bors merge`. + +**Please do not hit the 'Update Branch' button.** The commit message is not conventional, +[bors][bors] will yell at you. Let [bors][bors] handle it, or rebase it yourself. + +## Organization + +Our maintainers are: + +* [@gila][members-gila] - [@mayadata-io][maya-data] +* [@jkryl][members-jkryl] - [@mayadata-io][maya-data] +* [@GlennBullingham][members-GlennBullingham] - [@mayadata-io][maya-data] + +Our Special Interest Groups (SIGs) are: + +* Dataplane + + [@hoverbear][members-hoverbear] - [@mayadata-io][maya-data] & + [@Hoverbear-Consulting](https://github.com/Hoverbear-Consulting) + + [@mtzaurus][members-mtzaurus] - [@mayadata-io][maya-data] + + [@jonathan-teh][members-jonathan-teh] - [@mayadata-io][maya-data] +* e2e-testing + + [@chriswldenyer][members-chriswldenyer] - [@mayadata-io][maya-data] + + [@blaisedias][members-blaisedias] - [@mayadata-io][maya-data] +* Control Plane + + [@tiagolobocastro][members-tiagolobocastro] - [@mayadata-io][maya-data] + + [@paulyoong][members-paulyoong] - [@mayadata-io][maya-data] + +Former SIGs (and their members) are: + +* None, yet! + +### Organization FAQs + +* **What is a *Maintainer*?** + + Maintainers are the project architects. They have the final say on what features get accepted, + what code gets merged, when releases are cut, and how the project evolves. + + Maintainers **must** make decisions unanimously, no majorities, no votes. + +* **What is a *Special Interest Group (SIG)*?** + + SIGs are small ephemeral teams (max 7) working on a general topic. + + They may change at any time, and have no strict definition. + + SIGs may be created, empowered, and destroyed by the maintainers at any time. + +* **Are there other levels/roles/organization structure?** + + No. We want to focus on building MayaStor. + + It's preferable that we flow like water as opposed to become a rue goldberg machine of rules. + +* **May I join a SIG? Become a maintainer?** + + Of course, we'd love that! + + Once you have a bit of contribution history with the project you will probably already find + yourself working with a SIG, so ask, and they'll include you. + + Once you have acted as part of multiple SIGs, contributed at least one major feature, and + resolved multiple bug reports, our maintainers may choose to include you in their midst. + +[maya-data]: https://github.com/mayadata-io/ +[mayastor-discord]: https://discord.gg/nhpyMeJCHE +[mayastor-slack]: https://kubernetes.slack.com/messages/openebs +[mayastor-slack-inviter]: https://slack.k8s.io/ +[members-gila]: https://github.com/gila +[members-jkryl]: https://github.com/jkryl +[members-GlennBullingham]: https://github.com/GlennBullingham +[members-hoverbear]: https://github.com/hoverbear +[members-tiagolobocastro]: https://github.com/tiagolobocastro +[members-mtzaurus]: https://github.com/mtzaurus +[members-jonathan-teh]: https://github.com/jonathan-teh +[members-paulyoong]: https://github.com/paulyoong +[members-chriswldenyer]: https://github.com/chriswldenyer +[members-blaisedias]: https://github.com/blaisedias +[rust-rfc-template]: https://github.com/rust-lang/rfcs/blob/master/0000-template.md +[issue-bug-report]: https://github.com/openebs/Mayastor/issues/new?labels=new&template=bug_report.md +[bors]: https://bors.tech/ +[squash-merges]: https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-request-merges#squash-and-merge-your-pull-request-commits +[conventional-commits]: https://www.conventionalcommits.org/en/v1.0.0/ +[tools-convco]: https://convco.github.io/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 60c2ed6c4..3c9391f47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,7 +39,7 @@ dependencies = [ "derive_more", "either", "futures-util", - "http 0.2.2", + "http 0.2.3", "log", "rustls", "tokio-rustls", @@ -76,7 +76,7 @@ dependencies = [ "futures-util", "fxhash", "h2", - "http 0.2.2", + "http 0.2.3", "httparse", "indexmap", "itoa", @@ -102,7 +102,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a60f9ba7c4e6df97f3aacb14bb5c0cd7d98a49dcbaed0d7f292912ad9a6a3ed2" dependencies = [ - "quote 1.0.7", + "quote 1.0.8", + "syn 1.0.51", +] + +[[package]] +name = "actix-openapi-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.8", "syn 1.0.51", ] @@ -113,7 +122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd1f7dbda1645bf7da33554db60891755f6c01c1b2169e2f4c492098d30c235" dependencies = [ "bytestring", - "http 0.2.2", + "http 0.2.3", "log", "regex", "serde", @@ -276,7 +285,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad26f77093333e0e7c6ffe54ebe3582d908a104e448723eec6d43d08b07143fb" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -309,6 +318,34 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" +[[package]] +name = "agents" +version = "0.1.0" +dependencies = [ + "async-trait", + "composer", + "dyn-clonable", + "futures", + "http 0.2.3", + "humantime 2.0.1", + "lazy_static", + "mbus_api", + "nats", + "rpc", + "serde", + "serde_json", + "smol", + "snafu", + "state", + "structopt", + "tokio", + "tonic", + "tracing", + "tracing-futures", + "tracing-subscriber", + "url", +] + [[package]] name = "ahash" version = "0.3.8" @@ -509,7 +546,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25f9db3b38af870bf7e5cc649167533b493928e50744e2c30ae350230b414670" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -526,7 +563,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -661,7 +698,7 @@ dependencies = [ "log", "peeking_take_while", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "regex", "rustc-hash", "shlex", @@ -741,7 +778,7 @@ dependencies = [ "futures-core", "futures-util", "hex", - "http 0.2.2", + "http 0.2.3", "hyper", "hyper-rustls", "hyper-unix-connector", @@ -833,6 +870,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" +[[package]] +name = "bytes" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" + [[package]] name = "bytesize" version = "1.0.1" @@ -1301,7 +1344,7 @@ dependencies = [ "fnv", "ident_case", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "strsim 0.9.3", "syn 1.0.51", ] @@ -1324,7 +1367,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" dependencies = [ "darling_core 0.10.2", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1355,6 +1398,22 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993a608597367c6377b258c25d7120740f00ed23a2252b729b1932dd7866f908" +[[package]] +name = "deployer" +version = "0.1.0" +dependencies = [ + "async-trait", + "composer", + "mbus_api", + "nats", + "paste", + "rpc", + "structopt", + "strum", + "strum_macros", + "tokio", +] + [[package]] name = "derivative" version = "2.1.1" @@ -1362,7 +1421,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb582b60359da160a9477ee80f15c8d784c477e69c217ef2cdd4169c24ea380f" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1398,7 +1457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1488,7 +1547,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "558e40ea573c374cf53507fd240b7ee2f5477df7cfebdb97323ec61c719399c5" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1544,7 +1603,7 @@ checksum = "7c5f0096a91d210159eceb2ff5e1c4da18388a170e1e3ce948aac9c8fdbbf595" dependencies = [ "heck", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1580,7 +1639,7 @@ checksum = "22deed3a8124cff5fa835713fa105621e43bbdc46690c3a6b68328a012d350d4" dependencies = [ "proc-macro-error", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "rustversion", "syn 1.0.51", "synstructure", @@ -1630,7 +1689,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", "synstructure", ] @@ -1796,7 +1855,7 @@ checksum = "77408a692f1f97bcc61dc001d752e00643408fbc922e4d634c655df50d595556" dependencies = [ "proc-macro-hack", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1925,7 +1984,7 @@ checksum = "34a97a52fdee1870a34fa6e4b77570cba531b27d1838874fef4429a791a3d657" dependencies = [ "proc-macro-hack", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -1945,7 +2004,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http 0.2.2", + "http 0.2.3", "indexmap", "slab", "tokio", @@ -2008,11 +2067,11 @@ dependencies = [ [[package]] name = "http" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84129d298a6d57d246960ff8eb831ca4af3f96d29e2e28848dae275408658e26" +checksum = "7245cd7449cc792608c3c8a9eaf69bd4eabbabf802713748fd739c98b82f0747" dependencies = [ - "bytes 0.5.6", + "bytes 1.0.1", "fnv", "itoa", ] @@ -2024,7 +2083,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4908999be8b408e507d4148f3374a6f9e34e941f2d8c3928b1d565f1453291d" dependencies = [ "bytes 0.5.6", - "http 0.2.2", + "http 0.2.3", ] [[package]] @@ -2034,7 +2093,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13d5ff830006f7646652e057693569bfe0d51760c0085a071769d142a205111b" dependencies = [ "bytes 0.5.6", - "http 0.2.2", + "http 0.2.3", ] [[package]] @@ -2075,7 +2134,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http 0.2.2", + "http 0.2.3", "http-body 0.3.1", "httparse", "httpdate", @@ -2303,7 +2362,7 @@ dependencies = [ "base64 0.12.3", "bytes 0.5.6", "chrono", - "http 0.2.2", + "http 0.2.3", "percent-encoding 2.1.0", "serde", "serde-value", @@ -2335,7 +2394,7 @@ dependencies = [ "either", "futures", "futures-util", - "http 0.2.2", + "http 0.2.3", "jsonpath_lib", "k8s-openapi", "log", @@ -2360,7 +2419,7 @@ checksum = "cd71bf282e5551ac0852afcf25352b7fb8dd9a66eed7b6e66a6ebbf6b5b2f475" dependencies = [ "Inflector", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "serde_json", "syn 1.0.51", ] @@ -2440,6 +2499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" dependencies = [ "scopeguard", + "serde", ] [[package]] @@ -2483,6 +2543,13 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "macros" +version = "0.1.0" +dependencies = [ + "actix-openapi-macros", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -2587,6 +2654,8 @@ dependencies = [ "log", "nats", "once_cell", + "paperclip", + "percent-encoding 2.1.0", "rpc", "serde", "serde_json", @@ -3032,6 +3101,81 @@ dependencies = [ "num-traits 0.2.14", ] +[[package]] +name = "paperclip" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cc445ec12c9ce0ba673cfda392c4aaea27bc5e26fa3e7bd2689386208f00f7b" +dependencies = [ + "anyhow", + "itertools 0.9.0", + "once_cell", + "paperclip-actix", + "paperclip-core", + "paperclip-macros", + "parking_lot", + "semver", + "serde", + "serde_derive", + "serde_json", + "serde_yaml", + "thiserror", + "url", +] + +[[package]] +name = "paperclip-actix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f3d2788500bb13c5b0d453e2225e38ed7369f630a14adade8840fee12ee41e5" +dependencies = [ + "actix-service", + "actix-web", + "futures", + "once_cell", + "paperclip-core", + "paperclip-macros", + "parking_lot", + "serde_json", +] + +[[package]] +name = "paperclip-core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b82c73e73209604585f3c8e3eb3c1f386ddc521d5311047d2de25a88a91f3613" +dependencies = [ + "actix-web", + "mime", + "once_cell", + "paperclip-macros", + "parking_lot", + "pin-project 1.0.2", + "regex", + "serde", + "serde_json", + "serde_yaml", + "thiserror", +] + +[[package]] +name = "paperclip-macros" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c526435a3b0cbc5145d3aa6d66fd32adf987b9f588ace873c01ed2bc6e23f451" +dependencies = [ + "heck", + "http 0.2.3", + "lazy_static", + "mime", + "proc-macro-error", + "proc-macro2 1.0.24", + "quote 1.0.8", + "strum", + "strum_macros", + "syn 1.0.51", +] + [[package]] name = "parking" version = "2.0.0" @@ -3072,6 +3216,12 @@ dependencies = [ "err-derive", ] +[[package]] +name = "paste" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5d65c4d95931acda4498f675e332fcbdc9a06705cd07086c510e9b6009cd1c1" + [[package]] name = "peeking_take_while" version = "0.1.2" @@ -3136,7 +3286,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65ad2ae56b6abe3a1ee25f15ee605bacadb9a764edaba9c2bf4103800d4a1895" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -3147,7 +3297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8e8d2bf0b23038a4424865103a4df472855692821aab4e4f5c3312d461d9e5f" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -3202,7 +3352,7 @@ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", "version_check", ] @@ -3214,7 +3364,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "version_check", ] @@ -3295,7 +3445,7 @@ dependencies = [ "anyhow", "itertools 0.8.2", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -3332,9 +3482,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" dependencies = [ "proc-macro2 1.0.24", ] @@ -3571,7 +3721,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "http 0.2.2", + "http 0.2.3", "http-body 0.3.1", "hyper", "hyper-tls", @@ -3612,15 +3762,19 @@ name = "rest" version = "0.1.0" dependencies = [ "actix-rt", + "actix-service", "actix-web", "actix-web-opentelemetry", "anyhow", "async-trait", "composer", "futures", + "http 0.2.3", + "macros", "mbus_api", "opentelemetry", "opentelemetry-jaeger", + "paperclip", "rpc", "rustls", "serde", @@ -3629,6 +3783,7 @@ dependencies = [ "structopt", "strum", "strum_macros", + "tinytemplate", "tokio", "tracing", "tracing-futures", @@ -3865,7 +4020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -3923,7 +4078,7 @@ checksum = "1197ff7de45494f290c1e3e1a6f80e108974681984c87a3e480991ef3d0f1950" dependencies = [ "darling 0.10.2", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -3939,34 +4094,6 @@ dependencies = [ "yaml-rust", ] -[[package]] -name = "services" -version = "0.1.0" -dependencies = [ - "async-trait", - "composer", - "dyn-clonable", - "futures", - "http 0.2.2", - "humantime 2.0.1", - "lazy_static", - "mbus_api", - "nats", - "rpc", - "serde", - "serde_json", - "smol", - "snafu", - "state", - "structopt", - "tokio", - "tonic", - "tracing", - "tracing-futures", - "tracing-subscriber", - "url", -] - [[package]] name = "sha-1" version = "0.9.2" @@ -4101,7 +4228,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7073448732a89f2f3e6581989106067f403d378faeafb4a50812eb814170d3e5" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -4173,7 +4300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "serde", "serde_derive", "syn 1.0.51", @@ -4187,7 +4314,7 @@ checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11" dependencies = [ "base-x", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "serde", "serde_derive", "serde_json", @@ -4239,7 +4366,7 @@ dependencies = [ "heck", "proc-macro-error", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -4257,7 +4384,7 @@ checksum = "e61bb0be289045cb80bfce000512e32d09f8337e54c186725da381377ad1f8d5" dependencies = [ "heck", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -4305,7 +4432,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b4f34193997d92804d359ed09953e25d5138df6bcc055a71bf68ee89fdf9223" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "unicode-xid 0.2.1", ] @@ -4325,7 +4452,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", "unicode-xid 0.2.1", ] @@ -4393,7 +4520,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -4472,7 +4599,7 @@ checksum = "e5c3be1edfad6027c69f5491cf4cb310d1a71ecd6af742788c6ff8bced86b8fa" dependencies = [ "proc-macro-hack", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "standback", "syn 1.0.51", ] @@ -4486,6 +4613,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinytemplate" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ada8616fad06a2d0c455adc530de4ef57605a8120cc65da9653e0e9623ca74" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.1.0" @@ -4532,7 +4669,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e44da00bfc73a25f814cd8d7e57a68a5c31b74b3152a0a1d1f590c97ed06265a" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -4598,7 +4735,7 @@ dependencies = [ "bytes 0.5.6", "futures-core", "futures-util", - "http 0.2.2", + "http 0.2.3", "http-body 0.3.1", "hyper", "percent-encoding 1.0.1", @@ -4624,7 +4761,7 @@ checksum = "0436413ba71545bcc6c2b9a0f9d78d72deb0123c6a75ccdfe7c056f9930f5e52" dependencies = [ "proc-macro2 1.0.24", "prost-build", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -4826,7 +4963,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e0ccfc3378da0cce270c946b676a376943f5cd16aeba64568e7939806f4ada" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", ] @@ -5141,7 +5278,7 @@ dependencies = [ "lazy_static", "log", "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", "wasm-bindgen-shared", ] @@ -5164,7 +5301,7 @@ version = "0.2.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b13312a745c08c469f0b292dd2fcd6411dba5f7160f593da6ef69b64e407038" dependencies = [ - "quote 1.0.7", + "quote 1.0.8", "wasm-bindgen-macro-support", ] @@ -5175,7 +5312,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f249f06ef7ee334cc3b8ff031bfc11ec99d00f34d86da7498396dc1e3b1498fe" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", "wasm-bindgen-backend", "wasm-bindgen-shared", @@ -5208,7 +5345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8fb9c67be7439ee8ab1b7db502a49c05e51e2835b66796c705134d9b8e1a585" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", ] [[package]] @@ -5361,7 +5498,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3f369ddb18862aba61aa49bf31e74d29f0f162dec753063200e1dc084345d16" dependencies = [ "proc-macro2 1.0.24", - "quote 1.0.7", + "quote 1.0.8", "syn 1.0.51", "synstructure", ] diff --git a/Cargo.toml b/Cargo.toml index df42cb618..5a901b448 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,9 +14,11 @@ members = [ "nvmeadm", "rpc", "sysfs", - "services", - "mbus-api", + "control-plane/agents", + "control-plane/mbus-api", "composer", - "rest", - "operators", + "control-plane/rest", + "control-plane/operators", + "control-plane/macros", + "control-plane/deployer" ] diff --git a/Jenkinsfile b/Jenkinsfile index c327d41a6..1230ca274 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,15 @@ #!/usr/bin/env groovy +// On-demand E2E infra configuration +// https://mayadata.atlassian.net/wiki/spaces/MS/pages/247332965/Test+infrastructure#On-Demand-E2E-K8S-Clusters + +def e2e_build_cluster_job='k8s-build-cluster' // Jenkins job to build cluster +def e2e_destroy_cluster_job='k8s-destroy-cluster' // Jenkins job to destroy cluster +// Environment to run e2e test in (job param of $e2e_build_cluster_job) +def e2e_environment="hcloud-kubeadm" +// Global variable to pass current k8s job between stages +def k8s_job="" + // Searches previous builds to find first non aborted one def getLastNonAbortedBuild(build) { if (build == null) { @@ -133,37 +143,129 @@ pipeline { } } } - } - } - stage('e2e tests') { - agent { label 'nixos-mayastor' } - environment { - GIT_COMMIT_SHORT = sh( - // using printf to get rid of trailing newline - script: "printf \$(git rev-parse --short ${GIT_COMMIT})", - returnStdout: true - ) - } - steps { - // e2e tests are the most demanding step for space on the disk so we - // test the free space here rather than repeating the same code in all - // stages. - sh "./scripts/reclaim-space.sh 10" - // Build images (REGISTRY is set in jenkin's global configuration). - // Note: We might want to build and test dev images that have more - // assertions instead but that complicates e2e tests a bit. - sh "./scripts/release.sh --alias-tag ci --registry ${env.REGISTRY}" - withCredentials([file(credentialsId: 'kubeconfig', variable: 'KUBECONFIG')]) { - sh 'kubectl get nodes -o wide' - sh "nix-shell --run './scripts/e2e-test.sh --device /dev/nvme1n1 --tag \"${env.GIT_COMMIT_SHORT}\" --registry \"${env.REGISTRY}\"'" - } - } - // Always remove all docker images because they are usually used just once - // and underlaying pkgs are already cached by nix so they can be easily - // recreated. - post { - always { - sh 'docker image prune --all --force' + stage('e2e tests') { + stages { + stage('e2e docker images') { + agent { label 'nixos-mayastor' } + steps { + // e2e tests are the most demanding step for space on the disk so we + // test the free space here rather than repeating the same code in all + // stages. + sh "./scripts/reclaim-space.sh 10" + // Build images (REGISTRY is set in jenkin's global configuration). + // Note: We might want to build and test dev images that have more + // assertions instead but that complicates e2e tests a bit. + sh "./scripts/release.sh --alias-tag ci --registry \"${env.REGISTRY}\"" + // Always remove all docker images because they are usually used just once + // and underlaying pkgs are already cached by nix so they can be easily + // recreated. + } + post { + always { + sh 'docker image prune --all --force' + } + } + } + stage('build e2e cluster') { + agent { label 'nixos' } + steps { + script { + k8s_job=build( + job: "${e2e_build_cluster_job}", + propagate: true, + wait: true, + parameters: [[ + $class: 'StringParameterValue', + name: "ENVIRONMENT", + value: "${e2e_environment}" + ]] + ) + } + } + } + stage('run e2e') { + agent { label 'nixos-mayastor' } + environment { + GIT_COMMIT_SHORT = sh( + // using printf to get rid of trailing newline + script: "printf \$(git rev-parse --short ${GIT_COMMIT})", + returnStdout: true + ) + KUBECONFIG = "${env.WORKSPACE}/${e2e_environment}/modules/k8s/secrets/admin.conf" + } + steps { + // FIXME(arne-rusek): move hcloud's config to top-level dir in TF scripts + sh """ + mkdir -p "${e2e_environment}/modules/k8s/secrets" + """ + copyArtifacts( + projectName: "${k8s_job.getProjectName()}", + selector: specific("${k8s_job.getNumber()}"), + filter: "${e2e_environment}/modules/k8s/secrets/admin.conf", + target: "", + fingerprintArtifacts: true + ) + sh 'kubectl get nodes -o wide' + sh "nix-shell --run './scripts/e2e-test.sh --device /dev/sdb --tag \"${env.GIT_COMMIT_SHORT}\" --registry \"${env.REGISTRY}\"'" + } + post { + failure { + script { + withCredentials([string(credentialsId: 'HCLOUD_TOKEN', variable: 'HCLOUD_TOKEN')]) { + e2e_nodes=sh( + script: """ + nix-shell -p hcloud --run 'hcloud server list' | grep -e '-${k8s_job.getNumber()} ' | awk '{ print \$2" "\$4 }' + """, + returnStdout: true + ).trim() + } + // Job name for multi-branch is Mayastor/ however + // in URL jenkins requires /job/ in between for url to work + urlized_job_name=JOB_NAME.replaceAll("/", "/job/") + self_url="${JENKINS_URL}job/${urlized_job_name}/${BUILD_NUMBER}" + self_name="${JOB_NAME}#${BUILD_NUMBER}" + build_cluster_run_url="${JENKINS_URL}job/${k8s_job.getProjectName()}/${k8s_job.getNumber()}" + build_cluster_destroy_url="${JENKINS_URL}job/${e2e_destroy_cluster_job}/buildWithParameters?BUILD=${k8s_job.getProjectName()}%23${k8s_job.getNumber()}" + kubeconfig_url="${JENKINS_URL}job/${k8s_job.getProjectName()}/${k8s_job.getNumber()}/artifact/hcloud-kubeadm/modules/k8s/secrets/admin.conf" + slackSend( + channel: '#mayastor-backend', + color: 'danger', + message: "E2E k8s cluster <$build_cluster_run_url|#${k8s_job.getNumber()}> left running due to failure of " + + "<$self_url|$self_name>. Investigate using <$kubeconfig_url|kubeconfig>, or ssh as root to:\n" + + "```$e2e_nodes```\n" + + "And then <$build_cluster_destroy_url|destroy> the cluster.\n" + + "Note: you need to click `proceed` and will get an empty page when using destroy link. " + + "()" + ) + } + } + } + } + stage('destroy e2e cluster') { + agent { label 'nixos' } + steps { + script { + build( + job: "${e2e_destroy_cluster_job}", + propagate: true, + wait: true, + parameters: [ + [ + $class: 'StringParameterValue', + name: "ENVIRONMENT", + value: "${e2e_environment}" + ], + [ + $class: 'RunParameterValue', + name: "BUILD", + runId:"${k8s_job.getProjectName()}#${k8s_job.getNumber()}" + ] + ] + ) + } + } + } + } } } } diff --git a/chart/README.md b/chart/README.md index f9c8a4730..3bc39eb22 100644 --- a/chart/README.md +++ b/chart/README.md @@ -23,7 +23,7 @@ kubectl delete namespace mayastor ## templating -[ ] templatize namespace properly - mayastor namespace is hardcoded in yaml templates +[x] templatize namespace properly - mayastor namespace is hardcoded in yaml templates - use Release.Namespace - use Release.Name [ ] allow pulling image from authenticated repository diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl index 296354258..7b4753649 100644 --- a/chart/templates/_helpers.tpl +++ b/chart/templates/_helpers.tpl @@ -6,3 +6,13 @@ {{- "" }} {{- end }} {{- end }} + +{{/* Generate CPU list specification based on CPU count (-l param of mayastor) */}} +{{- define "mayastorCpuSpec" -}} +{{- range $i, $e := until (int .Values.mayastorCpuCount) }} +{{- if gt $i 0 }} +{{- printf "," }} +{{- end }} +{{- printf "%d" (add $i 1) }} +{{- end }} +{{- end }} diff --git a/chart/templates/csi-daemonset.yaml b/chart/templates/csi-daemonset.yaml index 93a2b0de6..9c2072d15 100644 --- a/chart/templates/csi-daemonset.yaml +++ b/chart/templates/csi-daemonset.yaml @@ -1,7 +1,7 @@ apiVersion: apps/v1 kind: DaemonSet metadata: - namespace: mayastor + namespace: {{ .Release.Namespace }} name: mayastor-csi labels: openebs/engine: mayastor @@ -29,7 +29,7 @@ spec: containers: - name: mayastor-csi image: {{ include "mayastorImagesPrefix" . }}mayadata/mayastor-csi:{{ .Values.mayastorImagesTag }} - imagePullPolicy: Always + imagePullPolicy: {{ .Values.mayastorImagePullPolicy }} # we need privileged because we mount filesystems and use mknod securityContext: privileged: true @@ -71,15 +71,10 @@ spec: cpu: "100m" memory: "50Mi" - name: csi-driver-registrar - image: quay.io/k8scsi/csi-node-driver-registrar:v1.3.0 + image: quay.io/k8scsi/csi-node-driver-registrar:v2.1.0 args: - "--csi-address=/csi/csi.sock" - "--kubelet-registration-path=/var/lib/kubelet/plugins/mayastor.openebs.io/csi.sock" - lifecycle: - preStop: - exec: - # this is needed in order for CSI to detect that the plugin is gone - command: ["/bin/sh", "-c", "rm -f /registration/io.openebs.csi-mayastor-reg.sock /csi/csi.sock"] volumeMounts: - name: plugin-dir mountPath: /csi diff --git a/chart/templates/mayastor-daemonset.yaml b/chart/templates/mayastor-daemonset.yaml index 9ee09fbe3..c84957d64 100644 --- a/chart/templates/mayastor-daemonset.yaml +++ b/chart/templates/mayastor-daemonset.yaml @@ -1,7 +1,7 @@ apiVersion: apps/v1 kind: DaemonSet metadata: - namespace: mayastor + namespace: {{ .Release.Namespace }} name: mayastor labels: openebs/engine: mayastor @@ -20,7 +20,7 @@ spec: app: mayastor spec: hostNetwork: true - # To resolve services from mayastor namespace + # To resolve services from mayastor's namespace dnsPolicy: ClusterFirstWithHostNet nodeSelector: openebs.io/engine: mayastor @@ -32,7 +32,7 @@ spec: containers: - name: mayastor image: {{ include "mayastorImagesPrefix" . }}mayadata/mayastor:{{ .Values.mayastorImagesTag }} - imagePullPolicy: Always + imagePullPolicy: {{ .Values.mayastorImagePullPolicy }} env: - name: MY_NODE_NAME valueFrom: @@ -59,7 +59,7 @@ spec: - "-g$(MY_POD_IP)" - "-nnats" - "-y/var/local/mayastor/config.yaml" - - "-m0x3" + - "-l{{ include "mayastorCpuSpec" . }}" securityContext: privileged: true volumeMounts: @@ -78,13 +78,13 @@ spec: # belong to Guaranteed QoS class, hence can never get evicted in case of # pressure unless they exceed those limits. limits and requests must be the same. limits: - cpu: "2" - memory: "500Mi" - hugepages-2Mi: "1Gi" + cpu: "{{ .Values.mayastorCpuCount }}" + memory: "512Mi" + hugepages-2Mi: "{{ .Values.mayastorHugePagesGiB }}Gi" requests: - cpu: "2" - memory: "500Mi" - hugepages-2Mi: "1Gi" + cpu: "{{ .Values.mayastorCpuCount }}" + memory: "512Mi" + hugepages-2Mi: "{{ .Values.mayastorHugePagesGiB }}Gi" ports: - containerPort: 10124 protocol: TCP diff --git a/chart/templates/moac-deployment.yaml b/chart/templates/moac-deployment.yaml index d1f2af816..40d0d6cbb 100644 --- a/chart/templates/moac-deployment.yaml +++ b/chart/templates/moac-deployment.yaml @@ -1,9 +1,8 @@ ---- kind: Deployment apiVersion: apps/v1 metadata: name: moac - namespace: mayastor + namespace: {{ .Release.Namespace }} spec: replicas: 1 selector: @@ -45,7 +44,7 @@ spec: - name: moac image: {{ include "mayastorImagesPrefix" . }}mayadata/moac:{{ .Values.mayastorImagesTag }} - imagePullPolicy: Always + imagePullPolicy: {{ .Values.mayastorImagePullPolicy }} args: - "--csi-address=$(CSI_ENDPOINT)" - "--namespace=$(MY_POD_NAMESPACE)" @@ -62,23 +61,6 @@ spec: volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ - ports: - - containerPort: 4000 - protocol: TCP - name: "rest-api" volumes: - name: socket-dir - emptyDir: ---- -kind: Service -apiVersion: v1 -metadata: - name: moac - namespace: mayastor -spec: - selector: - app: moac - ports: - - protocol: TCP - port: 4000 - targetPort: 4000 + emptyDir: \ No newline at end of file diff --git a/chart/templates/moac-rbac.yaml b/chart/templates/moac-rbac.yaml index 464799af5..ae271ca2d 100644 --- a/chart/templates/moac-rbac.yaml +++ b/chart/templates/moac-rbac.yaml @@ -3,7 +3,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: moac - namespace: mayastor + namespace: {{ .Release.Namespace }} --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 @@ -86,7 +86,7 @@ metadata: subjects: - kind: ServiceAccount name: moac - namespace: mayastor + namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole name: moac diff --git a/chart/templates/nats-deployment.yaml b/chart/templates/nats-deployment.yaml index a64702a5e..b44e44785 100644 --- a/chart/templates/nats-deployment.yaml +++ b/chart/templates/nats-deployment.yaml @@ -3,7 +3,7 @@ kind: Deployment apiVersion: apps/v1 metadata: name: nats - namespace: mayastor + namespace: {{ .Release.Namespace }} spec: replicas: 1 selector: @@ -27,7 +27,7 @@ kind: Service apiVersion: v1 metadata: name: nats - namespace: mayastor + namespace: {{ .Release.Namespace }} spec: selector: app: nats diff --git a/chart/templates/pool.yaml b/chart/templates/pool.yaml new file mode 100644 index 000000000..91ebae3e9 --- /dev/null +++ b/chart/templates/pool.yaml @@ -0,0 +1,15 @@ +{{- range .Values.mayastorPools }} +--- +apiVersion: "openebs.io/v1alpha1" +kind: MayastorPool +metadata: + # Name can be anything as long as it is unique + name: pool-on-{{ .node }} + # or let k8s to generate a unique pool name + #generateName: pool- + namespace: {{ $.Release.Namespace }} +spec: + node: {{ .node }} + # ATM only one disk device is supported (i.e. /dev/nvme1n1) + disks: ["{{ .device }}"] +{{- end }} \ No newline at end of file diff --git a/chart/values.yaml b/chart/values.yaml index 427b5b163..dcd79b1a9 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -1,2 +1,8 @@ -mayastorImagesTag: latest +mayastorImagesTag: develop +mayastorImagePullPolicy: Always +mayastorCpuCount: "2" +mayastorHugePagesGiB: "1" mayastorImagesRepo: "" +mayastorPools: + - node: "NODE_NAME" + device: "DEVICE" \ No newline at end of file diff --git a/commitlint.config.js b/commitlint.config.js index c12291346..ceafd84d8 100644 --- a/commitlint.config.js +++ b/commitlint.config.js @@ -1,8 +1,7 @@ module.exports = { extends: ['@commitlint/config-conventional'], rules: { - "header-max-length": async () => [2, "always", 50], - "body-max-line-length": async () => [2, "always", 72], + 'type-enum': [2, 'always', ['build', 'chore', 'ci', 'docs', 'feat', 'fix', 'perf', 'refactor', 'revert', 'style', 'test', 'example']], }, - defaultIgnores: false, + defaultIgnores: false, } diff --git a/composer/Cargo.toml b/composer/Cargo.toml index 9783d7d74..28e1f2c04 100644 --- a/composer/Cargo.toml +++ b/composer/Cargo.toml @@ -16,7 +16,7 @@ ipnetwork = "0.17.0" bollard = "0.8.0" tracing = "0.1.22" tracing-subscriber = "0.2.15" -mbus_api = { path = "../mbus-api" } +mbus_api = { path = "../control-plane/mbus-api" } [dev-dependencies] tokio = { version = "0.2", features = ["full"] } diff --git a/composer/src/lib.rs b/composer/src/lib.rs index 66e4aefab..9f1e65e03 100644 --- a/composer/src/lib.rs +++ b/composer/src/lib.rs @@ -206,6 +206,8 @@ pub struct ContainerSpec { /// Key-Map of environment variables /// Starts with RUST_LOG=debug,h2=info env: HashMap, + /// Volume bind dst/source + binds: HashMap, } impl ContainerSpec { @@ -259,6 +261,20 @@ impl ContainerSpec { } self } + /// use a volume binds between host path and container container + pub fn with_bind(mut self, host: &str, container: &str) -> Self { + self.binds.insert(container.to_string(), host.to_string()); + self + } + + /// List of volume binds with each element as host:container + fn binds(&self) -> Vec { + let mut vec = vec![]; + self.binds.iter().for_each(|(container, host)| { + vec.push(format!("{}:{}", host, container)); + }); + vec + } /// Environment variables as a vector with each element as: /// "{key}={value}" @@ -270,10 +286,10 @@ impl ContainerSpec { vec } /// Command/entrypoint followed by/and arguments - fn commands(&self) -> Vec { + fn commands(&self, network: &str) -> Vec { let mut commands = vec![]; if let Some(mut binary) = self.binary.clone() { - binary.setup_nats(&self.name); + binary.setup_nats(network); commands.extend(binary.commands()); } else if let Some(command) = self.command.clone() { commands.push(command); @@ -312,6 +328,14 @@ impl Default for Builder { } } +/// trait to allow extensibility using the Builder pattern +pub trait BuilderConfigure { + fn configure( + &self, + cfg: Builder, + ) -> Result>; +} + impl Builder { /// construct a new builder for `[ComposeTest'] pub fn new() -> Self { @@ -327,6 +351,41 @@ impl Builder { } } + /// get the name of the experiment + pub fn get_name(&self) -> String { + self.name.clone() + } + + /// configure the `Builder` using the `BuilderConfigure` trait + pub fn configure( + self, + cfg: impl BuilderConfigure, + ) -> Result> { + cfg.configure(self) + } + + /// next ordinal container ip + pub fn next_container_ip(&self) -> Result { + let net: Ipv4Network = self.network.parse().map_err(|error| { + bollard::errors::Error::IOError { + err: std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Invalid network format: {}", error), + ), + } + })?; + let ip = net.nth((self.containers.len() + 2) as u32); + match ip { + None => Err(bollard::errors::Error::IOError { + err: std::io::Error::new( + std::io::ErrorKind::AddrNotAvailable, + "No available ip", + ), + }), + Some(ip) => Ok(ip.to_string()), + } + } + /// run all containers on build pub fn autorun(mut self, run: bool) -> Builder { self.autorun = run; @@ -512,7 +571,8 @@ pub struct ComposeTest { label_prefix: String, /// automatically clean up the things we have created for this test clean: bool, - pub prune: bool, + /// remove existing containers upon creation + prune: bool, /// base image for image-less containers image: Option, /// output container logs on panic @@ -557,12 +617,15 @@ impl ComposeTest { /// networking IP and/or subnets async fn network_create(&mut self) -> Result { let mut net = self.network_list_labeled().await?; - if !net.is_empty() { let first = net.pop().unwrap(); if Some(self.name.clone()) == first.name { // reuse the same network self.network_id = first.id.unwrap(); + if self.prune { + // but clean up the existing containers + self.remove_network_containers(&self.name).await?; + } return Ok(self.network_id.clone()); } else { self.network_remove_labeled().await?; @@ -605,7 +668,10 @@ impl ComposeTest { } /// remove all containers from the network - async fn remove_network_containers(&self, name: &str) -> Result<(), Error> { + pub async fn remove_network_containers( + &self, + name: &str, + ) -> Result<(), Error> { let containers = self.list_network_containers(name).await?; for k in &containers { let name = k.id.clone().unwrap(); @@ -739,13 +805,14 @@ impl ComposeTest { ) .await; } - + let mut binds = vec![ + format!("{}:{}", self.srcdir, self.srcdir), + "/nix:/nix:ro".into(), + "/dev/hugepages:/dev/hugepages:rw".into(), + ]; + binds.extend(spec.binds()); let host_config = HostConfig { - binds: Some(vec![ - format!("{}:{}", self.srcdir, self.srcdir), - "/nix:/nix:ro".into(), - "/dev/hugepages:/dev/hugepages:rw".into(), - ]), + binds: Some(binds), mounts: Some(vec![ // DPDK needs to have a /tmp Mount { @@ -796,7 +863,7 @@ impl ComposeTest { } let name = spec.name.as_str(); - let cmd = spec.commands(); + let cmd = spec.commands(&self.name); let cmd = cmd.iter().map(|s| s.as_str()).collect(); let image = spec .image @@ -853,8 +920,13 @@ impl ComposeTest { /// Pulls the docker image, if one is specified and is not present locally async fn pull_missing_image(&self, image: &Option) { if let Some(image) = image { - if !self.image_exists(image).await { - self.pull_image(image).await; + let image = if !image.contains(':') { + format!("{}:latest", image) + } else { + image.clone() + }; + if !self.image_exists(&image).await { + self.pull_image(&image).await; } } } @@ -891,7 +963,17 @@ impl ComposeTest { /// start the container pub async fn start(&self, name: &str) -> Result<(), Error> { - let id = self.containers.get(name).unwrap(); + let id = self.containers.get(name).ok_or( + bollard::errors::Error::IOError { + err: std::io::Error::new( + std::io::ErrorKind::NotFound, + format!( + "Can't start container {} as it was not configured", + name + ), + ), + }, + )?; self.docker .start_container::<&str>(id.0.as_str(), None) .await?; @@ -902,10 +984,15 @@ impl ComposeTest { /// stop the container pub async fn stop(&self, name: &str) -> Result<(), Error> { let id = self.containers.get(name).unwrap(); + self.stop_id(id.0.as_str()).await + } + + /// stop the container by its id + pub async fn stop_id(&self, id: &str) -> Result<(), Error> { if let Err(e) = self .docker .stop_container( - id.0.as_str(), + id, Some(StopContainerOptions { t: 3, }), @@ -1012,6 +1099,22 @@ impl ComposeTest { Ok(()) } + /// stop all the containers part of the network + /// returns the last error, if any or Ok + pub async fn stop_network_containers(&self) -> Result<(), Error> { + let mut result = Ok(()); + let containers = self.list_network_containers(&self.name).await?; + for container in containers { + if let Some(id) = container.id { + if let Err(e) = self.stop_id(&id).await { + println!("Failed to stop container id {:?}", id); + result = Err(e); + } + } + } + result + } + /// inspect the given container pub async fn inspect( &self, diff --git a/services/Cargo.toml b/control-plane/agents/Cargo.toml similarity index 85% rename from services/Cargo.toml rename to control-plane/agents/Cargo.toml index 9d919a50d..17d616d73 100644 --- a/services/Cargo.toml +++ b/control-plane/agents/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "services" +name = "agents" version = "0.1.0" authors = ["Tiago Castro "] edition = "2018" @@ -20,6 +20,11 @@ path = "pool/src/server.rs" name = "volume" path = "volume/src/server.rs" +[[bin]] +name = "jsongrpc" +path = "jsongrpc/src/server.rs" + + [lib] name = "common" path = "common/src/lib.rs" @@ -42,12 +47,12 @@ state = "0.4.2" tracing = "0.1" tracing-subscriber = "0.2" tracing-futures = "0.2.4" -rpc = { path = "../rpc" } +rpc = { path = "../../rpc" } url = "2.2.0" http = "0.2.1" [dev-dependencies] -composer = { path = "../composer" } +composer = { path = "../../composer" } [dependencies.serde] features = ["derive"] diff --git a/services/common/src/lib.rs b/control-plane/agents/common/src/lib.rs similarity index 100% rename from services/common/src/lib.rs rename to control-plane/agents/common/src/lib.rs diff --git a/services/common/src/wrapper/mod.rs b/control-plane/agents/common/src/wrapper/mod.rs similarity index 100% rename from services/common/src/wrapper/mod.rs rename to control-plane/agents/common/src/wrapper/mod.rs diff --git a/services/common/src/wrapper/v0/mod.rs b/control-plane/agents/common/src/wrapper/v0/mod.rs similarity index 94% rename from services/common/src/wrapper/v0/mod.rs rename to control-plane/agents/common/src/wrapper/v0/mod.rs index e904f9ec0..9aa6a6cda 100644 --- a/services/common/src/wrapper/v0/mod.rs +++ b/control-plane/agents/common/src/wrapper/v0/mod.rs @@ -29,6 +29,7 @@ use tonic::transport::Channel; /// Common error type for send/receive #[derive(Debug, Snafu)] +#[snafu(visibility = "pub")] #[allow(missing_docs)] pub enum SvcError { #[snafu(display("Failed to get nodes from the node service"))] @@ -85,6 +86,17 @@ pub enum SvcError { InvalidArguments {}, #[snafu(display("Not implemented"))] NotImplemented {}, + #[snafu(display( + "Json RPC call failed for method '{}' with parameters '{}'. Error {}", + method, + params, + error, + ))] + JsonRpc { + method: String, + params: String, + error: String, + }, } impl From for SvcError { diff --git a/services/common/src/wrapper/v0/node_traits.rs b/control-plane/agents/common/src/wrapper/v0/node_traits.rs similarity index 100% rename from services/common/src/wrapper/v0/node_traits.rs rename to control-plane/agents/common/src/wrapper/v0/node_traits.rs diff --git a/services/common/src/wrapper/v0/pool.rs b/control-plane/agents/common/src/wrapper/v0/pool.rs similarity index 100% rename from services/common/src/wrapper/v0/pool.rs rename to control-plane/agents/common/src/wrapper/v0/pool.rs diff --git a/services/common/src/wrapper/v0/registry.rs b/control-plane/agents/common/src/wrapper/v0/registry.rs similarity index 100% rename from services/common/src/wrapper/v0/registry.rs rename to control-plane/agents/common/src/wrapper/v0/registry.rs diff --git a/services/common/src/wrapper/v0/volume.rs b/control-plane/agents/common/src/wrapper/v0/volume.rs similarity index 100% rename from services/common/src/wrapper/v0/volume.rs rename to control-plane/agents/common/src/wrapper/v0/volume.rs diff --git a/services/examples/kiiss-client/main.rs b/control-plane/agents/examples/kiiss-client/main.rs similarity index 100% rename from services/examples/kiiss-client/main.rs rename to control-plane/agents/examples/kiiss-client/main.rs diff --git a/services/examples/node-client/main.rs b/control-plane/agents/examples/node-client/main.rs similarity index 100% rename from services/examples/node-client/main.rs rename to control-plane/agents/examples/node-client/main.rs diff --git a/control-plane/agents/examples/pool-client/README.md b/control-plane/agents/examples/pool-client/README.md new file mode 100644 index 000000000..7d8975f60 --- /dev/null +++ b/control-plane/agents/examples/pool-client/README.md @@ -0,0 +1,40 @@ +# Overview + +The pool-client is an example of how to interact with the Pool service over a +message bus. + +It performs the following operations: + +1. Creates a pool +2. Lists the pools +3. Destroys the previously created pool +4. Lists the pools again + +## Running the example + +The pool-client example requires the following to be started inside the nix shell: + +1. nats server +```bash +nats-server +``` + +2. node service +```bash +cargo run --bin node +``` + +3. mayastor - specifying the message bus endpoint and node name as "mayastor-node" +```bash +sudo ./target/debug/mayastor -n 127.0.0.1:4222 -N mayastor-node +``` + +4. pool service +```bash +cargo run --bin pool +``` + +5. pool-client +```bash +cargo run --example pool-client +``` \ No newline at end of file diff --git a/control-plane/agents/examples/pool-client/main.rs b/control-plane/agents/examples/pool-client/main.rs new file mode 100644 index 000000000..9a81a9a64 --- /dev/null +++ b/control-plane/agents/examples/pool-client/main.rs @@ -0,0 +1,68 @@ +use mbus_api::{v0::*, *}; +use structopt::StructOpt; +use tracing::info; + +#[derive(Debug, StructOpt)] +struct CliArgs { + /// The Nats Server URL to connect to + /// (supports the nats schema) + /// Default: nats://127.0.0.1:4222 + #[structopt(long, short, default_value = "nats://127.0.0.1:4222")] + url: String, +} + +fn init_tracing() { + if let Ok(filter) = tracing_subscriber::EnvFilter::try_from_default_env() { + tracing_subscriber::fmt().with_env_filter(filter).init(); + } else { + tracing_subscriber::fmt().with_env_filter("info").init(); + } +} + +const NODE_NAME: &str = "mayastor-node"; +const POOL_NAME: &str = "test-pool"; + +#[tokio::main] +async fn main() { + init_tracing(); + client().await; +} + +/// Client interactions with the Pool service. +async fn client() { + let cli_args = CliArgs::from_args(); + mbus_api::message_bus_init(cli_args.url).await; + create_pool(NODE_NAME, POOL_NAME).await; + list_pools().await; + destroy_pool(NODE_NAME, POOL_NAME).await; + list_pools().await; +} + +/// Create a pool on a given storage node with the given name. +async fn create_pool(node: &str, pool: &str) { + CreatePool { + node: node.into(), + id: pool.into(), + disks: vec!["malloc:///disk0?size_mb=100".into()], + } + .request() + .await + .unwrap(); +} + +// Destroy a pool on the given node with the given name. +async fn destroy_pool(node: &str, pool: &str) { + DestroyPool { + node: node.into(), + id: pool.into(), + } + .request() + .await + .unwrap(); +} + +/// List all pools. +async fn list_pools() { + let pools = GetPools::default().request().await.unwrap(); + info!("Received Pools: {:?}", pools); +} diff --git a/services/examples/service/main.rs b/control-plane/agents/examples/service/main.rs similarity index 100% rename from services/examples/service/main.rs rename to control-plane/agents/examples/service/main.rs diff --git a/control-plane/agents/jsongrpc/src/server.rs b/control-plane/agents/jsongrpc/src/server.rs new file mode 100644 index 000000000..cddd36039 --- /dev/null +++ b/control-plane/agents/jsongrpc/src/server.rs @@ -0,0 +1,80 @@ +pub mod service; + +use async_trait::async_trait; +use common::*; +use mbus_api::{v0::*, *}; +use service::*; +use std::{convert::TryInto, marker::PhantomData}; +use structopt::StructOpt; +use tracing::info; + +#[derive(Debug, StructOpt)] +struct CliArgs { + /// The Nats Server URL to connect to + /// (supports the nats schema) + /// Default: nats://127.0.0.1:4222 + #[structopt(long, short, default_value = "nats://127.0.0.1:4222")] + nats: String, +} + +/// Needed so we can implement the ServiceSubscriber trait for +/// the message types external to the crate +#[derive(Clone, Default)] +struct ServiceHandler { + data: PhantomData, +} + +macro_rules! impl_service_handler { + // RequestType is the message bus request type + // ServiceFnName is the name of the service function to route the request + // into + ($RequestType:ident, $ServiceFnName:ident) => { + #[async_trait] + impl ServiceSubscriber for ServiceHandler<$RequestType> { + async fn handler(&self, args: Arguments<'_>) -> Result<(), Error> { + let request: ReceivedMessage<$RequestType> = + args.request.try_into()?; + + let reply = JsonGrpcSvc::$ServiceFnName(&request.inner()) + .await + .map_err(|error| Error::ServiceError { + message: error.full_string(), + })?; + request.reply(reply).await + } + fn filter(&self) -> Vec { + vec![$RequestType::default().id()] + } + } + }; +} + +impl_service_handler!(JsonGrpcRequest, json_grpc_call); + +fn init_tracing() { + if let Ok(filter) = tracing_subscriber::EnvFilter::try_from_default_env() { + tracing_subscriber::fmt().with_env_filter(filter).init(); + } else { + tracing_subscriber::fmt().with_env_filter("info").init(); + } +} + +#[tokio::main] +async fn main() { + init_tracing(); + + let cli_args = CliArgs::from_args(); + info!("Using options: {:?}", &cli_args); + + server(cli_args).await; +} + +async fn server(cli_args: CliArgs) { + Service::builder(cli_args.nats, ChannelVs::JsonGrpc) + .connect() + .await + .with_subscription(ServiceHandler::::default()) + .with_default_liveness() + .run() + .await; +} diff --git a/control-plane/agents/jsongrpc/src/service.rs b/control-plane/agents/jsongrpc/src/service.rs new file mode 100644 index 000000000..7b9348c8d --- /dev/null +++ b/control-plane/agents/jsongrpc/src/service.rs @@ -0,0 +1,44 @@ +// clippy warning caused by the instrument macro +#![allow(clippy::unit_arg)] + +use ::rpc::mayastor::{JsonRpcReply, JsonRpcRequest}; +use common::wrapper::v0::{BusGetNode, SvcError}; +use mbus_api::message_bus::v0::{MessageBus, *}; +use rpc::mayastor::json_rpc_client::JsonRpcClient; +use snafu::ResultExt; + +#[derive(Clone, Default)] +pub(super) struct JsonGrpcSvc {} + +/// JSON gRPC service implementation +impl JsonGrpcSvc { + /// Generic JSON gRPC call issued to Mayastor using the JsonRpcClient. + pub(super) async fn json_grpc_call( + request: &JsonGrpcRequest, + ) -> Result { + let node = + MessageBus::get_node(&request.node) + .await + .context(BusGetNode { + node: request.node.clone(), + })?; + let mut client = + JsonRpcClient::connect(format!("http://{}", node.grpc_endpoint)) + .await + .unwrap(); + let response: JsonRpcReply = client + .json_rpc_call(JsonRpcRequest { + method: request.method.to_string(), + params: request.params.to_string(), + }) + .await + .map_err(|error| SvcError::JsonRpc { + method: request.method.to_string(), + params: request.params.to_string(), + error: error.to_string(), + })? + .into_inner(); + + Ok(response.result) + } +} diff --git a/services/kiiss/src/server.rs b/control-plane/agents/kiiss/src/server.rs similarity index 100% rename from services/kiiss/src/server.rs rename to control-plane/agents/kiiss/src/server.rs diff --git a/services/node/src/server.rs b/control-plane/agents/node/src/server.rs similarity index 100% rename from services/node/src/server.rs rename to control-plane/agents/node/src/server.rs diff --git a/services/pool/src/server.rs b/control-plane/agents/pool/src/server.rs similarity index 100% rename from services/pool/src/server.rs rename to control-plane/agents/pool/src/server.rs diff --git a/services/pool/src/service.rs b/control-plane/agents/pool/src/service.rs similarity index 100% rename from services/pool/src/service.rs rename to control-plane/agents/pool/src/service.rs diff --git a/services/volume/src/server.rs b/control-plane/agents/volume/src/server.rs similarity index 100% rename from services/volume/src/server.rs rename to control-plane/agents/volume/src/server.rs diff --git a/services/volume/src/service.rs b/control-plane/agents/volume/src/service.rs similarity index 100% rename from services/volume/src/service.rs rename to control-plane/agents/volume/src/service.rs diff --git a/control-plane/deployer/Cargo.toml b/control-plane/deployer/Cargo.toml new file mode 100644 index 000000000..0d38119f1 --- /dev/null +++ b/control-plane/deployer/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "deployer" +version = "0.1.0" +authors = ["Tiago Castro "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[[bin]] +name = "deployer" +path = "src/bin.rs" + +[dependencies] +mbus_api = { path = "../mbus-api" } +composer = { path = "../../composer" } +nats = "0.8" +structopt = "0.3.15" +tokio = { version = "0.2", features = ["full"] } +async-trait = "0.1.36" +rpc = { path = "../../rpc" } +strum = "0.19" +strum_macros = "0.19" +paste = "1.0.4" \ No newline at end of file diff --git a/control-plane/deployer/README.md b/control-plane/deployer/README.md new file mode 100644 index 000000000..a26f9baab --- /dev/null +++ b/control-plane/deployer/README.md @@ -0,0 +1,126 @@ +# Control Plane Deployer + +Deploying all the `control plane agents` with all the trimmings is not an entirely straightforward exercise as there +are many parts to it, including the additional configuration steps to be able to run multiple `mayastor` instances +alongside each other. + +The `deployer` tool facilitates this by creating a composable docker `"cluster"` which allows us to run any number of +`mayastor` instances, the `control plane agents` and any other pluggable components. + +## Examples + +**Using the help** +```textmate +[nix-shell:~/git/Mayastor]$ cargo run --bin deployer -- --help + deployer --help + agents 0.1.0 + Deployment actions + + USAGE: + deployer + + FLAGS: + -h, --help Prints help information + -V, --version Prints version information + + SUBCOMMANDS: + help Prints this message or the help of the given subcommand(s) + list List all running components + start Create and start all components + stop Stop and delete all components +``` +The help can also be used on each subcommand. + +**Deploying the cluster with default components** + +```textmate +[nix-shell:~/git/Mayastor]$ cargo run --bin deployer -- start -m 2 + Finished dev [unoptimized + debuginfo] target(s) in 0.13s + Running `sh /home/tiago/git/myconfigs/maya/test_as_sudo.sh target/debug/deployer start` +Using options: CliArgs { action: Start(StartOptions { agents: [Node(Node), Pool(Pool), Volume(Volume)], base_image: None, jaeger: false, no_rest: false, mayastors: 2, jaeger_image: None, build: false, dns: false, show_info: false, cluster_name: "cluster" }) } +``` + +Notice the options which are printed out. They can be overridden - more on this later. + +We could also use the `deploy` tool to inspect the components: +```textmate +[nix-shell:~/git/Mayastor]$ cargo run --bin deployer -- list + Compiling agents v0.1.0 (/home/tiago/git/Mayastor/agents) + Finished dev [unoptimized + debuginfo] target(s) in 5.50s + Running `sh /home/tiago/git/myconfigs/maya/test_as_sudo.sh target/debug/deployer list` +Using options: CliArgs { action: List(ListOptions { no_docker: false, format: None }) } +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +e775b6272009 "/home/tiago/git/May…" 56 minutes ago Up 56 minutes 0.0.0.0:8080-8081->8080-8081/tcp rest +888da76b62ed "/home/tiago/git/May…" 56 minutes ago Up 56 minutes volume +95e8e0c45755 "/home/tiago/git/May…" 56 minutes ago Up 56 minutes pool +bd1504c962fe "/home/tiago/git/May…" 56 minutes ago Up 56 minutes node +e8927a7a1cec "/home/tiago/git/May…" 56 minutes ago Up 56 minutes mayastor-2 +9788961605c1 "/home/tiago/git/May…" 56 minutes ago Up 56 minutes mayastor-1 +ff94b234f2b9 "/nix/store/pbd1hbhx…" 56 minutes ago Up 56 minutes 0.0.0.0:4222->4222/tcp nats +``` + +As the previous logs shows, the `rest` server ports are mapped to your host on 8080/8081. +So, we for example list existing `nodes` (aka `mayastor` instances) as such: +```textmate +[nix-shell:~/git/Mayastor]$ curl -k https://localhost:8080/v0/nodes | jq +[ + { + "id": "mayastor-1", + "grpcEndpoint": "10.1.0.3:10124", + "state": "Online" + }, + { + "id": "mayastor-2", + "grpcEndpoint": "10.1.0.4:10124", + "state": "Online" + } +] +``` + +To tear-down the cluster, just run the stop command: +```textmate +[nix-shell:~/git/Mayastor]$ cargo run --bin deployer -- stop + Finished dev [unoptimized + debuginfo] target(s) in 0.13s + Running `sh /home/tiago/git/myconfigs/maya/test_as_sudo.sh target/debug/deployer stop` +Using options: CliArgs { action: Stop(StopOptions { cluster_name: "cluster" }) } +``` + +For more information, please refer to the help argument on every command/subcommand. + +### Debugging a Service + +For example, to debug the rest server, we'd create a `cluster` without the rest server: +```textmate +[nix-shell:~/git/Mayastor]$ cargo run --bin deployer -- start --no-rest --show-info + Compiling agents v0.1.0 (/home/tiago/git/Mayastor/agents) + Finished dev [unoptimized + debuginfo] target(s) in 5.86s + Running `sh /home/tiago/git/myconfigs/maya/test_as_sudo.sh target/debug/deployer start --no-rest --show-info` +Using options: CliArgs { action: Start(StartOptions { agents: [Node(Node), Pool(Pool), Volume(Volume)], base_image: None, jaeger: false, no_rest: true, mayastors: 1, jaeger_image: None, build: false, dns: false, show_info: true, cluster_name: "cluster" }) } +[20994b0098d6] [/volume] /home/tiago/git/Mayastor/target/debug/volume -n nats.cluster:4222 +[f4884e343756] [/pool] /home/tiago/git/Mayastor/target/debug/pool -n nats.cluster:4222 +[fb6e78a0b6ef] [/node] /home/tiago/git/Mayastor/target/debug/node -n nats.cluster:4222 +[992df686ec8a] [/mayastor] /home/tiago/git/Mayastor/target/debug/mayastor -N mayastor -g 10.1.0.3:10124 -n nats.cluster:4222 +[0a5016d6c81f] [/nats] /nix/store/pbd1hbhxm17xy29mg1gibdbvbmr7gnz2-nats-server-2.1.9/bin/nats-server -DV +``` + +As you can see, there is no `rest` server started - go ahead and start your own! +This way you can make changes to this specific server and test them without destroying the state of the cluster. + +```textmate +[nix-shell:~/git/Mayastor]$ cargo run --bin rest + Finished dev [unoptimized + debuginfo] target(s) in 0.13s + Running `sh /home/tiago/git/myconfigs/maya/test_as_sudo.sh target/debug/rest` +Jan 27 12:23:44.993 INFO mbus_api::mbus_nats: Connecting to the nats server nats://0.0.0.0:4222... +Jan 27 12:23:45.007 INFO mbus_api::mbus_nats: Successfully connected to the nats server nats://0.0.0.0:4222 +Jan 27 12:23:45.008 INFO actix_server::builder: Starting 16 workers +Jan 27 12:23:45.008 INFO actix_server::builder: Starting "actix-web-service-0.0.0.0:8080" service on 0.0.0.0:8080 +.... +[nix-shell:~/git/Mayastor]$ curl -k https://localhost:8080/v0/nodes | jq +[ + { + "id": "bob's your uncle", + "grpcEndpoint": "10.1.0.3:10124", + "state": "Unknown" + } +] +``` diff --git a/control-plane/deployer/src/bin.rs b/control-plane/deployer/src/bin.rs new file mode 100644 index 000000000..be8bc3107 --- /dev/null +++ b/control-plane/deployer/src/bin.rs @@ -0,0 +1,224 @@ +pub mod infra; + +use composer::Builder; +use infra::*; +use structopt::StructOpt; + +#[derive(Debug, StructOpt)] +struct CliArgs { + #[structopt(subcommand)] + action: Action, +} + +#[derive(Debug, StructOpt)] +#[structopt(about = "Deployment actions")] +pub(crate) enum Action { + Start(StartOptions), + Stop(StopOptions), + List(ListOptions), +} + +const DEFAULT_CLUSTER_NAME: &str = "cluster"; + +#[derive(Debug, StructOpt)] +#[structopt(about = "Stop and delete all components")] +pub(crate) struct StopOptions { + /// Name of the cluster + #[structopt(short, long, default_value = DEFAULT_CLUSTER_NAME)] + cluster_name: String, +} + +#[derive(Debug, Default, StructOpt)] +#[structopt(about = "List all running components")] +pub(crate) struct ListOptions { + /// Simple list without using the docker executable + #[structopt(short, long)] + no_docker: bool, + + /// Format the docker output + #[structopt(short, long, conflicts_with = "no_docker")] + format: Option, + + /// Name of the cluster + #[structopt(short, long, default_value = DEFAULT_CLUSTER_NAME)] + cluster_name: String, +} + +#[derive(Debug, Clone, StructOpt)] +#[structopt(about = "Create and start all components")] +pub(crate) struct StartOptions { + /// Use the following Control Plane Agents + /// Specify one agent at a time or as a list. + /// ( "" for no agents ) + /// todo: specify start arguments, eg: Node="-v" + #[structopt( + short, + long, + default_value = "Node, Pool, Volume", + value_delimiter = "," + )] + agents: Vec, + + /// Use the following Control Plane Operators + /// Specify one operator at a time or as a list + #[structopt(short, long, value_delimiter = ",")] + operators: Option>, + + /// Kubernetes Config file if using operators + /// [default: "~/.kube/config"] + #[structopt(short, long)] + kube_config: Option, + + /// Use a base image for the binary components (eg: alpine:latest) + #[structopt(long)] + base_image: Option, + + /// Use a jaeger tracing service + #[structopt(short, long)] + jaeger: bool, + + /// Disable the REST Server + #[structopt(long)] + no_rest: bool, + + /// Use `N` mayastor instances + #[structopt(short, long, default_value = "1")] + mayastors: u32, + + /// Use this custom image for the jaeger tracing service + #[structopt(long, requires = "jaeger")] + jaeger_image: Option, + + /// Cargo Build each component before deploying + #[structopt(short, long)] + build: bool, + + /// Use a dns resolver for the cluster: defreitas/dns-proxy-server + /// Note this messes with your /etc/resolv.conf so use at your own risk + #[structopt(short, long)] + dns: bool, + + /// Show information from the cluster after creation + #[structopt(short, long)] + show_info: bool, + + /// Name of the cluster - currently only one allowed at a time + #[structopt(short, long, default_value = DEFAULT_CLUSTER_NAME)] + cluster_name: String, +} + +impl Action { + async fn act(&self) -> Result<(), Error> { + match self { + Action::Start(options) => options.start(self).await, + Action::Stop(options) => options.stop(self).await, + Action::List(options) => options.list(self).await, + } + } +} + +impl StartOptions { + async fn start(&self, _action: &Action) -> Result<(), Error> { + let components = Components::new(self.clone()); + let composer = Builder::new() + .name(&self.cluster_name) + .configure(components.clone())? + .with_clean(false) + .with_base_image(self.base_image.clone()) + .autorun(false) + .build() + .await?; + + components.start(&composer).await?; + if self.show_info { + let lister = ListOptions { + cluster_name: self.cluster_name.clone(), + ..Default::default() + }; + lister.list_simple().await?; + } + Ok(()) + } +} +impl StopOptions { + async fn stop(&self, _action: &Action) -> Result<(), Error> { + let composer = Builder::new() + .name(&self.cluster_name) + .with_prune(false) + .with_clean(true) + .build() + .await?; + let _ = composer.stop_network_containers().await; + let _ = composer + .remove_network_containers(&self.cluster_name) + .await?; + Ok(()) + } +} +impl ListOptions { + fn list_docker(&self) -> Result<(), Error> { + let label_filter = + format!("label=io.mayastor.test.name={}", self.cluster_name); + let mut args = vec!["ps", "-a", "--filter", &label_filter]; + if let Some(format) = &self.format { + args.push("--format"); + args.push(format) + } + let status = + std::process::Command::new("docker").args(args).status()?; + build_error("docker", status.code()) + } + async fn list_simple(&self) -> Result<(), Error> { + let cfg = Builder::new() + .name(&self.cluster_name) + .with_prune(false) + .with_clean(false) + .build() + .await?; + + for component in cfg.list_containers().await? { + let ip = match component.network_settings.clone() { + None => None, + Some(networks) => match networks.networks { + None => None, + Some(network) => match network.get(&self.cluster_name) { + None => None, + Some(endpoint) => endpoint.ip_address.clone(), + }, + }, + }; + println!( + "[{}] [{}] {}", + component + .names + .unwrap_or_default() + .first() + .unwrap_or(&"?".to_string()), + ip.unwrap_or_default(), + option_str(component.command), + ); + } + Ok(()) + } + async fn list(&self, _action: &Action) -> Result<(), Error> { + match self.no_docker { + true => self.list_simple().await, + false => self.list_docker(), + } + } +} + +fn option_str(input: Option) -> String { + match input { + Some(input) => input.to_string(), + None => "?".into(), + } +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + let cli_args = CliArgs::from_args(); + println!("Using options: {:?}", &cli_args); + + cli_args.action.act().await +} diff --git a/control-plane/deployer/src/infra/dns.rs b/control-plane/deployer/src/infra/dns.rs new file mode 100644 index 000000000..49b53a3a4 --- /dev/null +++ b/control-plane/deployer/src/infra/dns.rs @@ -0,0 +1,30 @@ +use super::*; + +#[async_trait] +impl ComponentAction for Dns { + fn configure( + &self, + options: &StartOptions, + cfg: Builder, + ) -> Result { + Ok(if options.dns { + cfg.add_container_spec( + ContainerSpec::from_image("dns", "defreitas/dns-proxy-server") + .with_bind("/var/run/docker.sock", "/var/run/docker.sock") + .with_bind("/etc/resolv.conf", "/etc/resolv.conf"), + ) + } else { + cfg + }) + } + async fn start( + &self, + options: &StartOptions, + cfg: &ComposeTest, + ) -> Result<(), Error> { + if options.dns { + cfg.start("dns").await?; + } + Ok(()) + } +} diff --git a/control-plane/deployer/src/infra/empty.rs b/control-plane/deployer/src/infra/empty.rs new file mode 100644 index 000000000..4b46dd1bc --- /dev/null +++ b/control-plane/deployer/src/infra/empty.rs @@ -0,0 +1,19 @@ +use super::*; + +#[async_trait] +impl ComponentAction for Empty { + fn configure( + &self, + _options: &StartOptions, + cfg: Builder, + ) -> Result { + Ok(cfg) + } + async fn start( + &self, + _options: &StartOptions, + _cfg: &ComposeTest, + ) -> Result<(), Error> { + Ok(()) + } +} diff --git a/control-plane/deployer/src/infra/jaeger.rs b/control-plane/deployer/src/infra/jaeger.rs new file mode 100644 index 000000000..ef78960ee --- /dev/null +++ b/control-plane/deployer/src/infra/jaeger.rs @@ -0,0 +1,34 @@ +use super::*; + +#[async_trait] +impl ComponentAction for Jaeger { + fn configure( + &self, + options: &StartOptions, + cfg: Builder, + ) -> Result { + Ok(if !options.jaeger { + cfg + } else { + cfg.add_container_spec( + ContainerSpec::from_image( + "jaeger", + "jaegertracing/all-in-one:latest", + ) + .with_portmap("16686", "16686") + .with_portmap("6831/udp", "6831/udp") + .with_portmap("6832/udp", "6832/udp"), + ) + }) + } + async fn start( + &self, + options: &StartOptions, + cfg: &ComposeTest, + ) -> Result<(), Error> { + if options.jaeger { + cfg.start("jaeger").await?; + } + Ok(()) + } +} diff --git a/control-plane/deployer/src/infra/mayastor.rs b/control-plane/deployer/src/infra/mayastor.rs new file mode 100644 index 000000000..d156e4a22 --- /dev/null +++ b/control-plane/deployer/src/infra/mayastor.rs @@ -0,0 +1,51 @@ +use super::*; + +#[async_trait] +impl ComponentAction for Mayastor { + fn configure( + &self, + options: &StartOptions, + cfg: Builder, + ) -> Result { + if options.build { + let status = std::process::Command::new("cargo") + .args(&["build", "-p", "mayastor", "--bin", "mayastor"]) + .status()?; + build_error("mayastor", status.code())?; + } + + let mut cfg = cfg; + for i in 0 .. options.mayastors { + let mayastor_socket = format!("{}:10124", cfg.next_container_ip()?); + + cfg = cfg.add_container_bin( + &Self::name(i, options), + Binary::from_dbg("mayastor") + .with_nats("-n") + .with_args(vec!["-N", &Self::name(i, options)]) + .with_args(vec!["-g", &mayastor_socket]), + ) + } + Ok(cfg) + } + async fn start( + &self, + options: &StartOptions, + cfg: &ComposeTest, + ) -> Result<(), Error> { + for i in 0 .. options.mayastors { + cfg.start(&Self::name(i, options)).await?; + } + Ok(()) + } +} + +impl Mayastor { + fn name(i: u32, options: &StartOptions) -> String { + if options.mayastors == 1 { + "mayastor".into() + } else { + format!("mayastor-{}", i + 1) + } + } +} diff --git a/control-plane/deployer/src/infra/mod.rs b/control-plane/deployer/src/infra/mod.rs new file mode 100644 index 000000000..341eb9ab3 --- /dev/null +++ b/control-plane/deployer/src/infra/mod.rs @@ -0,0 +1,347 @@ +pub mod dns; +mod empty; +pub mod jaeger; +pub mod mayastor; +pub mod nats; +pub mod rest; + +pub use ::nats::*; +pub use dns::*; +pub use empty::*; +pub use jaeger::*; +pub use mayastor::*; +pub use rest::*; + +use super::StartOptions; +use async_trait::async_trait; +use composer::{Binary, Builder, BuilderConfigure, ComposeTest, ContainerSpec}; +use mbus_api::{ + v0::{ChannelVs, Liveness}, + Message, +}; +use paste::paste; +use std::{cmp::Ordering, str::FromStr}; +use structopt::StructOpt; +use strum::VariantNames; +use strum_macros::{EnumVariantNames, ToString}; +pub(crate) type Error = Box; + +#[macro_export] +macro_rules! impl_ctrlp_agents { + ($($name:ident,)+) => { + #[derive(Debug, Clone)] + pub(crate) struct ControlPlaneAgents(Vec); + + #[derive(Debug, Clone, StructOpt, ToString, EnumVariantNames)] + #[structopt(about = "Control Plane Agents")] + pub(crate) enum ControlPlaneAgent { + Empty(Empty), + $( + $name($name), + )+ + } + + impl From<&ControlPlaneAgent> for Component { + fn from(ctrlp_svc: &ControlPlaneAgent) -> Self { + match ctrlp_svc { + ControlPlaneAgent::Empty(obj) => Component::Empty(obj.clone()), + $(ControlPlaneAgent::$name(obj) => Component::$name(obj.clone()),)+ + } + } + } + + paste! { + impl FromStr for ControlPlaneAgent { + type Err = String; + + fn from_str(source: &str) -> Result { + Ok(match source.trim().to_ascii_lowercase().as_str() { + "" => Self::Empty(Empty::default()), + $(stringify!([<$name:lower>]) => Self::$name($name::default()),)+ + _ => return Err(format!( + "\"{}\" is an invalid type of agent! Available types: {:?}", + source, + Self::VARIANTS + )), + }) + } + } + } + + $(#[async_trait] + impl ComponentAction for $name { + fn configure(&self, options: &StartOptions, cfg: Builder) -> Result { + let name = stringify!($name).to_ascii_lowercase(); + if options.build { + let status = std::process::Command::new("cargo") + .args(&["build", "-p", "agents", "--bin", &name]) + .status()?; + build_error(&format!("the {} agent", name), status.code())?; + } + Ok(cfg.add_container_bin( + &name, + Binary::from_dbg(&name).with_nats("-n"), + )) + } + async fn start(&self, _options: &StartOptions, cfg: &ComposeTest) -> Result<(), Error> { + let name = stringify!($name).to_ascii_lowercase(); + cfg.start(&name).await?; + Liveness {}.request_on(ChannelVs::$name).await?; + Ok(()) + } + })+ + }; + ($($name:ident), +) => { + impl_ctrlp_agents!($($name,)+); + }; +} + +#[macro_export] +macro_rules! impl_ctrlp_operators { + ($($name:ident,)+) => { + #[derive(Debug, Clone)] + pub(crate) struct ControlPlaneOperators(Vec); + + #[derive(Debug, Clone, StructOpt, ToString, EnumVariantNames)] + #[structopt(about = "Control Plane Operators")] + pub(crate) enum ControlPlaneOperator { + Empty(Empty), + $( + $name(paste!{[<$name Op>]}), + )+ + } + + paste! { + impl From<&ControlPlaneOperator> for Component { + fn from(ctrlp_svc: &ControlPlaneOperator) -> Self { + match ctrlp_svc { + ControlPlaneOperator::Empty(obj) => Component::Empty(obj.clone()), + $(ControlPlaneOperator::$name(obj) => Component::[<$name Op>](obj.clone()),)+ + } + } + } + } + + paste! { + impl FromStr for ControlPlaneOperator { + type Err = String; + + fn from_str(source: &str) -> Result { + Ok(match source.trim().to_ascii_lowercase().as_str() { + "" => Self::Empty(Default::default()), + $(stringify!([<$name:lower>]) => Self::$name(]}>::default()),)+ + _ => return Err(format!( + "\"{}\" is an invalid type of operator! Available types: {:?}", + source, + Self::VARIANTS + )), + }) + } + } + } + + $(#[async_trait] + impl ComponentAction for paste!{[<$name Op>]} { + fn configure(&self, options: &StartOptions, cfg: Builder) -> Result { + let name = format!("{}-op", stringify!($name).to_ascii_lowercase()); + if options.build { + let status = std::process::Command::new("cargo") + .args(&["build", "-p", "operators", "--bin", &name]) + .status()?; + build_error(&format!("the {} operator", name), status.code())?; + } + let rest = format!("http://rest.{}:8081", cfg.get_name()); + let host_kube_config = match &options.kube_config { + Some(config) => config.clone(), + None => { + match std::env::var("USER") { + Ok(user) => format!("/home/{}/.kube/config", user), + Err(_) => "/root/.kube/config".to_string(), + } + } + }; + let kube_config = match options.base_image { + Some(_) => "/root/.kube/config", + None => "/.kube/config", + }; + Ok(if options.jaeger { + let jaeger_config = format!("jaeger.{}:6831", cfg.get_name()); + cfg.add_container_spec( + ContainerSpec::from_binary( + &name, + Binary::from_dbg(&name) + .with_args(vec!["-r", &rest]) + .with_args(vec!["-j", &jaeger_config]), + ) + .with_bind(&host_kube_config, kube_config), + ) + } else { + cfg.add_container_spec( + ContainerSpec::from_binary( + &name, + Binary::from_dbg(&name).with_args(vec!["-r", &rest]) + ) + .with_bind(&host_kube_config, kube_config), + ) + }) + } + async fn start(&self, _options: &StartOptions, cfg: &ComposeTest) -> Result<(), Error> { + // todo: wait for the rest server to be up + let name = format!("{}-op", stringify!($name).to_ascii_lowercase()); + cfg.start(&name).await?; + Ok(()) + } + })+ + }; + ($($name:ident), +) => { + impl_ctrlp_operators!($($name,)+); + }; +} + +pub(crate) fn build_error( + name: &str, + status: Option, +) -> Result<(), Error> { + let make_error = |extra: &str| { + let error = format!("Failed to build {}: {}", name, extra); + std::io::Error::new(std::io::ErrorKind::Other, error) + }; + match status { + Some(0) => Ok(()), + Some(code) => { + let error = format!("exited with code {}", code); + Err(make_error(&error).into()) + } + None => Err(make_error("interrupted by signal").into()), + } +} + +#[macro_export] +macro_rules! impl_component { + ($($name:ident,$order:literal,)+) => { + #[derive(Debug, Clone, StructOpt, ToString, EnumVariantNames, Eq, PartialEq)] + #[structopt(about = "Control Plane Components")] + pub(crate) enum Component { + $( + $name($name), + )+ + } + + #[derive(Debug, Clone)] + pub(crate) struct Components(Vec, StartOptions); + impl BuilderConfigure for Components { + fn configure(&self, cfg: Builder) -> Result { + let mut cfg = cfg; + for component in &self.0 { + cfg = component.configure(&self.1, cfg)?; + } + Ok(cfg) + } + } + + impl Components { + pub(crate) fn push_generic_components(&mut self, name: &str, component: Component) { + if !ControlPlaneAgent::VARIANTS.iter().any(|&s| s == name) && + !ControlPlaneOperator::VARIANTS.iter().any(|&s| &format!("{}Op", s) == name) { + self.0.push(component); + } + } + pub(crate) fn new(options: StartOptions) -> Components { + let agents = options.agents.clone(); + let operators = options.operators.clone().unwrap_or_default(); + let mut components = agents + .iter() + .map(Component::from) + .collect::>(); + components.extend(operators + .iter() + .map(Component::from) + .collect::>()); + + let mut components = Components(components, options.clone()); + $(components.push_generic_components(stringify!($name), $name::default().into());)+ + components.0.sort(); + components + } + pub(crate) async fn start(&self, cfg: &ComposeTest) -> Result<(), Error> { + for component in &self.0 { + component.start(&self.1, cfg).await?; + } + Ok(()) + } + } + + #[async_trait] + pub(crate) trait ComponentAction { + fn configure(&self, options: &StartOptions, cfg: Builder) -> Result; + async fn start(&self, options: &StartOptions, cfg: &ComposeTest) -> Result<(), Error>; + } + + #[async_trait] + impl ComponentAction for Component { + fn configure(&self, options: &StartOptions, cfg: Builder) -> Result { + match self { + $(Self::$name(obj) => obj.configure(options, cfg),)+ + } + } + async fn start(&self, options: &StartOptions, cfg: &ComposeTest) -> Result<(), Error> { + match self { + $(Self::$name(obj) => obj.start(options, cfg).await,)+ + } + } + } + + $(impl From<$name> for Component { + fn from(from: $name) -> Component { + Component::$name(from) + } + })+ + + $(#[derive(Default, Debug, Clone, StructOpt, Eq, PartialEq)] + pub(crate) struct $name {})+ + + impl Component { + fn boot_order(&self) -> u32 { + match self { + $(Self::$name(_) => $order,)+ + } + } + } + + impl PartialOrd for Component { + fn partial_cmp(&self, other: &Self) -> Option { + self.boot_order().partial_cmp(&other.boot_order()) + } + } + impl Ord for Component { + fn cmp(&self, other: &Self) -> Ordering { + self.boot_order().cmp(&other.boot_order()) + } + } + }; + ($($name:ident, $order:ident), +) => { + impl_component!($($name,$order)+); + }; +} + +// Component Name and bootstrap ordering +// from lower to high +impl_component! { + Empty, 0, + Dns, 0, + Jaeger, 0, + Nats, 0, + Rest, 1, + Mayastor, 1, + Node, 2, + Pool, 3, + Volume, 3, + JsonGrpc, 3, + NodeOp, 4, +} + +// Message Bus Control Plane Agents +impl_ctrlp_agents!(Node, Pool, Volume, JsonGrpc); + +// Kubernetes Mayastor Low-level Operators +impl_ctrlp_operators!(Node); diff --git a/control-plane/deployer/src/infra/nats.rs b/control-plane/deployer/src/infra/nats.rs new file mode 100644 index 000000000..241749a4a --- /dev/null +++ b/control-plane/deployer/src/infra/nats.rs @@ -0,0 +1,27 @@ +use super::*; + +#[async_trait] +impl ComponentAction for Nats { + fn configure( + &self, + _options: &StartOptions, + cfg: Builder, + ) -> Result { + Ok(cfg.add_container_spec( + ContainerSpec::from_binary( + "nats", + Binary::from_nix("nats-server").with_arg("-DV"), + ) + .with_portmap("4222", "4222"), + )) + } + async fn start( + &self, + _options: &StartOptions, + cfg: &ComposeTest, + ) -> Result<(), Error> { + cfg.start("nats").await?; + cfg.connect_to_bus("nats").await; + Ok(()) + } +} diff --git a/control-plane/deployer/src/infra/rest.rs b/control-plane/deployer/src/infra/rest.rs new file mode 100644 index 000000000..07bf78ada --- /dev/null +++ b/control-plane/deployer/src/infra/rest.rs @@ -0,0 +1,59 @@ +use super::*; + +#[async_trait] +impl ComponentAction for Rest { + fn configure( + &self, + options: &StartOptions, + cfg: Builder, + ) -> Result { + Ok(if options.no_rest { + cfg + } else { + if options.build { + std::process::Command::new("cargo") + .args(&["build", "-p", "rest", "--bin", "rest"]) + .status()?; + } + if !options.jaeger { + cfg.add_container_spec( + ContainerSpec::from_binary( + "rest", + Binary::from_dbg("rest") + .with_nats("-n") + .with_arg("--dummy-certificates") + .with_args(vec!["--https", "rest:8080"]) + .with_args(vec!["--http", "rest:8081"]), + ) + .with_portmap("8080", "8080") + .with_portmap("8081", "8081"), + ) + } else { + let jaeger_config = format!("jaeger.{}:6831", cfg.get_name()); + cfg.add_container_spec( + ContainerSpec::from_binary( + "rest", + Binary::from_dbg("rest") + .with_nats("-n") + .with_arg("--dummy-certificates") + .with_args(vec!["-j", &jaeger_config]) + .with_args(vec!["--https", "rest:8080"]) + .with_args(vec!["--http", "rest:8081"]), + ) + .with_portmap("8080", "8080") + .with_portmap("8081", "8081"), + ) + } + }) + } + async fn start( + &self, + options: &StartOptions, + cfg: &ComposeTest, + ) -> Result<(), Error> { + if !options.no_rest { + cfg.start("rest").await?; + } + Ok(()) + } +} diff --git a/control-plane/macros/Cargo.toml b/control-plane/macros/Cargo.toml new file mode 100644 index 000000000..7b42531c8 --- /dev/null +++ b/control-plane/macros/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "macros" +version = "0.1.0" +authors = ["Tiago Castro "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +actix-openapi-macros = { path = "./actix" } \ No newline at end of file diff --git a/control-plane/macros/actix/Cargo.toml b/control-plane/macros/actix/Cargo.toml new file mode 100644 index 000000000..2bebf5d02 --- /dev/null +++ b/control-plane/macros/actix/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "actix-openapi-macros" +version = "0.1.0" +authors = ["Tiago Castro "] +edition = "2018" +description = "Collection of method/route macros to provide compatibility between actix v3 proc-macros and paperclip." + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +syn = { version = "1.0.0", features = ["full"] } +proc-macro2 = "1.0.24" +quote = "1.0.8" + +[lib] +proc-macro = true diff --git a/control-plane/macros/actix/src/lib.rs b/control-plane/macros/actix/src/lib.rs new file mode 100644 index 000000000..ddcdbdccc --- /dev/null +++ b/control-plane/macros/actix/src/lib.rs @@ -0,0 +1,199 @@ +use proc_macro::TokenStream; +use proc_macro2::TokenStream as TokenStream2; +use quote::{quote, ToTokens}; +use syn::{parse_macro_input, ItemFn}; + +macro_rules! doc_comment { + ($x:expr; $($tt:tt)*) => { + #[doc = $x] + $($tt)* + }; +} + +impl Method { + // removes the URI from the attributes and collects the rest + // so they can be used with the paperclip::actix::api_v2_operation + fn paperclip_attributes(attr: TokenStream) -> TokenStream { + let mut attr = parse_macro_input!(attr as syn::AttributeArgs); + if attr.len() < 3 { + TokenStream::new() + } else { + // remove the base URI path + attr.remove(0); + // remove the relative URI path + attr.remove(0); + let mut paperclip_attr = "".to_string(); + for i in attr { + paperclip_attr.push_str(&format!( + "{},", + i.into_token_stream().to_string() + )); + } + paperclip_attr.parse().unwrap() + } + } + /// URI with the full path used to register the handler + fn handler_uri(attr: TokenStream) -> TokenStream { + let mut attr = parse_macro_input!(attr as syn::AttributeArgs); + let base = attr.first().to_token_stream().to_string(); + attr.remove(0); + let uri = attr.first().to_token_stream().to_string(); + let base_unquoted = base.trim_matches('"'); + let uri_unquoted = uri.trim_matches('"'); + let handler_uri = format!("{}{}", base_unquoted, uri_unquoted); + let handler_uri_token = quote! { + #handler_uri + }; + handler_uri_token.into() + } + /// relative URI (full URI minus the openapi base path) + fn openapi_uri(attr: TokenStream) -> TokenStream { + let mut attr = parse_macro_input!(attr as syn::AttributeArgs); + // remove the Base Path + attr.remove(0); + attr.first().into_token_stream().into() + } + fn handler_name(item: TokenStream) -> syn::Result { + let handler: ItemFn = syn::parse(item)?; + Ok(handler.sig.ident) + } + fn generate( + &self, + attr: TokenStream, + item: TokenStream, + ) -> syn::Result { + let full_uri: TokenStream2 = Self::handler_uri(attr.clone()).into(); + let relative_uri: TokenStream2 = Self::openapi_uri(attr.clone()).into(); + let handler_name = Self::handler_name(item.clone())?; + let handler_fn: TokenStream2 = item.into(); + let method: TokenStream2 = self.method().parse()?; + let variant: TokenStream2 = self.variant().parse()?; + let handler_name_str = handler_name.to_string(); + let attr: TokenStream2 = Self::paperclip_attributes(attr).into(); + + Ok(quote! { + #[allow(non_camel_case_types, missing_docs)] + pub struct #handler_name; + + impl #handler_name { + fn resource() -> paperclip::actix::web::Resource { + #[paperclip::actix::api_v2_operation(#attr)] + #handler_fn + paperclip::actix::web::Resource::new(#full_uri) + .name(#handler_name_str) + .guard(actix_web::guard::#variant()) + .route(paperclip::actix::web::#method().to(#handler_name)) + } + } + + impl actix_web::dev::HttpServiceFactory for #handler_name { + fn register(self, config: &mut actix_web::dev::AppService) { + Self::resource().register(config); + } + } + + + impl paperclip::actix::Mountable for #handler_name { + fn path(&self) -> &str { + #relative_uri + } + + fn operations( + &mut self, + ) -> std::collections::BTreeMap< + paperclip::v2::models::HttpMethod, + paperclip::v2::models::DefaultOperationRaw, + > { + Self::resource().operations() + } + + fn definitions( + &mut self, + ) -> std::collections::BTreeMap< + String, + paperclip::v2::models::DefaultSchemaRaw, + > { + Self::resource().definitions() + } + + fn security_definitions( + &mut self, + ) -> std::collections::BTreeMap + { + Self::resource().security_definitions() + } + } + }) + } +} + +macro_rules! rest_methods { + ( + $($variant:ident, $method:ident, )+ + ) => { + /// All available Rest methods + #[derive(Debug, PartialEq, Eq, Hash)] + enum Method { + $( + $variant, + )+ + } + + impl Method { + fn method(&self) -> &'static str { + match self { + $(Self::$variant => stringify!($method),)+ + } + } + fn variant(&self) -> &'static str { + match self { + $(Self::$variant => stringify!($variant),)+ + } + } + } + + $(doc_comment! { + concat!(" +Creates route handler with `paperclip::actix::web::Resource", "`. +In order to control the output type and status codes the return value/response must implement the +trait actix_web::Responder. + +# Syntax +```text +#[", stringify!($method), r#"("path"[, attributes])] +``` + +# Attributes +- `"base"` - Raw literal string with the handler base path used by the openapi `paths`. +- `"path"` - Raw literal string representing the uri path for which to register the handler + when combined with the base path. +- any paperclip api_v2_operation attributes. + +# Example + +```rust +# use actix_web::Json; +# use macros::"#, stringify!($method), "; +#[", stringify!($method), r#"("", "/")] +async fn example() -> Json<()> { + Json(()) +} +``` +"#); + #[proc_macro_attribute] + pub fn $method(attr: TokenStream, item: TokenStream) -> TokenStream { + match Method::$variant.generate(attr, item) { + Ok(v) => v.into(), + Err(e) => e.to_compile_error().into(), + } + } + })+ + }; +} + +rest_methods! { + Get, get, + Post, post, + Put, put, + Delete, delete, +} diff --git a/control-plane/macros/src/lib.rs b/control-plane/macros/src/lib.rs new file mode 100644 index 000000000..61cca978c --- /dev/null +++ b/control-plane/macros/src/lib.rs @@ -0,0 +1,6 @@ +/// Compatibility layer between actix v2 and paperclip +pub mod actix { + /// Expose macros to create resource handlers, allowing multiple HTTP + /// method guards. + pub use actix_openapi_macros::*; +} diff --git a/mbus-api/Cargo.toml b/control-plane/mbus-api/Cargo.toml similarity index 78% rename from mbus-api/Cargo.toml rename to control-plane/mbus-api/Cargo.toml index 979c635d0..47b551b32 100644 --- a/mbus-api/Cargo.toml +++ b/control-plane/mbus-api/Cargo.toml @@ -21,10 +21,12 @@ strum_macros = "0.19" tracing = "0.1" tracing-futures = "0.2.4" tracing-subscriber = "0.2.0" +paperclip = { version = "0.5.0", features = ["actix3"] } +percent-encoding = "2.1.0" [dev-dependencies] -composer = { path = "../composer" } -rpc = { path = "../rpc" } +composer = { path = "../../composer" } +rpc = { path = "../../rpc" } [dependencies.serde] features = ["derive"] diff --git a/mbus-api/examples/client/main.rs b/control-plane/mbus-api/examples/client/main.rs similarity index 100% rename from mbus-api/examples/client/main.rs rename to control-plane/mbus-api/examples/client/main.rs diff --git a/mbus-api/examples/server/main.rs b/control-plane/mbus-api/examples/server/main.rs similarity index 100% rename from mbus-api/examples/server/main.rs rename to control-plane/mbus-api/examples/server/main.rs diff --git a/mbus-api/src/lib.rs b/control-plane/mbus-api/src/lib.rs similarity index 96% rename from mbus-api/src/lib.rs rename to control-plane/mbus-api/src/lib.rs index 7c7ed6302..8e2f4208a 100644 --- a/mbus-api/src/lib.rs +++ b/control-plane/mbus-api/src/lib.rs @@ -146,9 +146,10 @@ impl FromStr for Channel { type Err = strum::ParseError; fn from_str(source: &str) -> Result { - match &source[0 ..= 2] { - "v0/" => { - let c: v0::ChannelVs = source[3 ..].parse()?; + match source.split('/').next() { + Some(v0::VERSION) => { + let c: v0::ChannelVs = + source[v0::VERSION.len() + 1 ..].parse()?; Ok(Self::v0(c)) } _ => Err(strum::ParseError::VariantNotFound), @@ -208,9 +209,10 @@ impl FromStr for MessageId { type Err = strum::ParseError; fn from_str(source: &str) -> Result { - match &source[0 ..= 2] { - "v0/" => { - let id: v0::MessageIdVs = source[3 ..].parse()?; + match source.split('/').next() { + Some(v0::VERSION) => { + let id: v0::MessageIdVs = + source[v0::VERSION.len() + 1 ..].parse()?; Ok(Self::v0(id)) } _ => Err(strum::ParseError::VariantNotFound), @@ -220,7 +222,7 @@ impl FromStr for MessageId { impl ToString for MessageId { fn to_string(&self) -> String { match self { - Self::v0(id) => format!("v0/{}", id.to_string()), + Self::v0(id) => format!("{}/{}", v0::VERSION, id.to_string()), } } } diff --git a/mbus-api/src/mbus_nats.rs b/control-plane/mbus-api/src/mbus_nats.rs similarity index 100% rename from mbus-api/src/mbus_nats.rs rename to control-plane/mbus-api/src/mbus_nats.rs diff --git a/mbus-api/src/message_bus/mod.rs b/control-plane/mbus-api/src/message_bus/mod.rs similarity index 100% rename from mbus-api/src/message_bus/mod.rs rename to control-plane/mbus-api/src/message_bus/mod.rs diff --git a/mbus-api/src/message_bus/v0.rs b/control-plane/mbus-api/src/message_bus/v0.rs similarity index 80% rename from mbus-api/src/message_bus/v0.rs rename to control-plane/mbus-api/src/message_bus/v0.rs index e37d0c4fa..ec42f29ab 100644 --- a/mbus-api/src/message_bus/v0.rs +++ b/control-plane/mbus-api/src/message_bus/v0.rs @@ -1,7 +1,7 @@ // clippy warning caused by the instrument macro #![allow(clippy::unit_arg)] -use crate::{v0::*, *}; +pub use crate::{v0::*, *}; use async_trait::async_trait; /// Error sending/receiving @@ -28,83 +28,6 @@ impl From for BusError { /// Result for sending/receiving pub type BusResult = Result; -/// Node -pub type Node = crate::v0::Node; -/// Node list -pub type Nodes = crate::v0::Nodes; -/// Pool -pub type Pool = crate::v0::Pool; -/// Pool list -pub type Pools = crate::v0::Pools; -/// Replica -pub type Replica = crate::v0::Replica; -/// Replica list -pub type Replicas = crate::v0::Replicas; -/// Protocol -pub type Protocol = crate::v0::Protocol; -/// Replica Create -pub type CreateReplica = crate::v0::CreateReplica; -/// Pool Create -pub type CreatePool = crate::v0::CreatePool; -/// Replica Destroy -pub type DestroyReplica = crate::v0::DestroyReplica; -/// Pool Destroy -pub type DestroyPool = crate::v0::DestroyPool; -/// Replica Share -pub type ShareReplica = crate::v0::ShareReplica; -/// Replica Unshare -pub type UnshareReplica = crate::v0::UnshareReplica; -/// Query Filter -pub type Filter = crate::v0::Filter; -/// Nexus from the volume service -pub type Nexus = crate::v0::Nexus; -/// Vector of Nexuses from the volume service -pub type Nexuses = crate::v0::Nexuses; -/// State of the nexus -pub type NexusState = crate::v0::NexusState; -/// State of the volume -pub type VolumeState = crate::v0::VolumeState; -/// Child of the nexus -pub type Child = crate::v0::Child; -/// State of the child -pub type ChildState = crate::v0::ChildState; -/// Nexus Create -pub type CreateNexus = crate::v0::CreateNexus; -/// Nexus Destroy -pub type DestroyNexus = crate::v0::DestroyNexus; -/// Nexus Share -pub type ShareNexus = crate::v0::ShareNexus; -/// Nexus Unshare -pub type UnshareNexus = crate::v0::UnshareNexus; -/// Remove Nexus Child -pub type RemoveNexusChild = crate::v0::RemoveNexusChild; -/// Add Nexus Child -pub type AddNexusChild = crate::v0::AddNexusChild; -/// Volume -pub type Volume = crate::v0::Volume; -/// Volumes -pub type Volumes = crate::v0::Volumes; -/// Create Volume -pub type CreateVolume = crate::v0::CreateVolume; -/// Delete Volume -pub type DestroyVolume = crate::v0::DestroyVolume; -/// Add Volume Nexus -pub type AddVolumeNexus = crate::v0::AddVolumeNexus; -/// Remove Volume Nexus -pub type RemoveVolumeNexus = crate::v0::RemoveVolumeNexus; -/// Id of a mayastor node -pub type NodeId = crate::v0::NodeId; -/// Id of a mayastor pool -pub type PoolId = crate::v0::PoolId; -/// UUID of a mayastor pool replica -pub type ReplicaId = crate::v0::ReplicaId; -/// UUID of a mayastor nexus -pub type NexusId = crate::v0::NexusId; -/// URI of a mayastor nexus child -pub type ChildUri = crate::v0::ChildUri; -/// UUID of a mayastor volume -pub type VolumeId = crate::v0::VolumeId; - macro_rules! only_one { ($list:ident) => { if let Some(obj) = $list.first() { @@ -317,6 +240,12 @@ pub trait MessageBusTrait: Sized { request.request().await?; Ok(()) } + + /// Generic JSON gRPC call + #[tracing::instrument(level = "debug", err)] + async fn json_grpc_call(request: JsonGrpcRequest) -> BusResult { + Ok(request.request().await?) + } } /// Implementation of the bus interface trait diff --git a/mbus-api/src/receive.rs b/control-plane/mbus-api/src/receive.rs similarity index 100% rename from mbus-api/src/receive.rs rename to control-plane/mbus-api/src/receive.rs diff --git a/mbus-api/src/send.rs b/control-plane/mbus-api/src/send.rs similarity index 99% rename from mbus-api/src/send.rs rename to control-plane/mbus-api/src/send.rs index 90038db26..beee0ebf4 100644 --- a/mbus-api/src/send.rs +++ b/control-plane/mbus-api/src/send.rs @@ -2,7 +2,7 @@ use super::*; // todo: replace with proc-macros -/// Main Message trait, which should tipically be used to send +/// Main Message trait, which should typically be used to send /// MessageBus messages. /// Implements Message trait for the type `S` with the reply type /// `R`, the message id `I`, the default channel `C`. diff --git a/mbus-api/src/v0.rs b/control-plane/mbus-api/src/v0.rs similarity index 88% rename from mbus-api/src/v0.rs rename to control-plane/mbus-api/src/v0.rs index 7b5946931..97084241a 100644 --- a/mbus-api/src/v0.rs +++ b/control-plane/mbus-api/src/v0.rs @@ -1,8 +1,13 @@ +#![allow(clippy::field_reassign_with_default)] use super::*; +use paperclip::actix::Apiv2Schema; +use percent_encoding::percent_decode_str; use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, fmt::Debug}; use strum_macros::{EnumString, ToString}; +pub(super) const VERSION: &str = "v0"; + /// Versioned Channels #[derive(Clone, Debug, EnumString, ToString)] #[strum(serialize_all = "camelCase")] @@ -21,6 +26,8 @@ pub enum ChannelVs { Nexus, /// Keep it In Sync Service Kiiss, + /// Json gRPC Service + JsonGrpc, } impl Default for ChannelVs { fn default() -> Self { @@ -97,6 +104,8 @@ pub enum MessageIdVs { AddVolumeNexus, /// Remove nexus from volume RemoveVolumeNexus, + /// Generic JSON gRPC message + JsonGrpc, } // Only V0 should export this macro @@ -197,7 +206,15 @@ pub struct GetNodes {} /// State of the Node #[derive( - Serialize, Deserialize, Debug, Clone, EnumString, ToString, Eq, PartialEq, + Serialize, + Deserialize, + Debug, + Clone, + EnumString, + ToString, + Eq, + PartialEq, + Apiv2Schema, )] pub enum NodeState { /// Node has unexpectedly disappeared @@ -217,7 +234,9 @@ impl Default for NodeState { } /// Node information -#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +#[derive( + Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Apiv2Schema, +)] #[serde(rename_all = "camelCase")] pub struct Node { /// id of the mayastor instance @@ -273,11 +292,11 @@ impl Default for Filter { } } -macro_rules! bus_impl_string_id { +macro_rules! bus_impl_string_id_inner { ($Name:ident, $Doc:literal) => { #[doc = $Doc] - #[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Hash)] - pub struct $Name(pub String); + #[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Hash, Apiv2Schema)] + pub struct $Name(String); impl std::fmt::Display for $Name { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -286,10 +305,6 @@ macro_rules! bus_impl_string_id { } impl $Name { - /// Build Self from a string trait id - pub fn from>(id: T) -> Self { - $Name(id.into()) - } /// Build Self from a string trait id pub fn as_str<'a>(&'a self) -> &'a str { self.0.as_str() @@ -301,7 +316,6 @@ macro_rules! bus_impl_string_id { $Name::from(id) } } - impl From for $Name { fn from(id: String) -> Self { $Name::from(id.as_str()) @@ -322,12 +336,46 @@ macro_rules! bus_impl_string_id { }; } -bus_impl_string_id!(NodeId, "UUID of a mayastor node"); -bus_impl_string_id!(PoolId, "UUID of a mayastor pool"); +macro_rules! bus_impl_string_id { + ($Name:ident, $Doc:literal) => { + bus_impl_string_id_inner!($Name, $Doc); + impl $Name { + /// Build Self from a string trait id + pub fn from>(id: T) -> Self { + $Name(id.into()) + } + } + }; +} + +macro_rules! bus_impl_string_id_percent_decoding { + ($Name:ident, $Doc:literal) => { + bus_impl_string_id_inner!($Name, $Doc); + impl $Name { + /// Build Self from a string trait id + pub fn from>(id: T) -> Self { + let src: String = id.into(); + let decoded_src = percent_decode_str(src.clone().as_str()) + .decode_utf8() + .unwrap_or(src.into()) + .to_string(); + $Name(decoded_src) + } + } + }; +} + +bus_impl_string_id!(NodeId, "ID of a mayastor node"); +bus_impl_string_id!(PoolId, "ID of a mayastor pool"); bus_impl_string_id!(ReplicaId, "UUID of a mayastor pool replica"); bus_impl_string_id!(NexusId, "UUID of a mayastor nexus"); -bus_impl_string_id!(ChildUri, "URI of a mayastor nexus child"); +bus_impl_string_id_percent_decoding!(ChildUri, "URI of a mayastor nexus child"); bus_impl_string_id!(VolumeId, "UUID of a mayastor volume"); +bus_impl_string_id!(JsonGrpcMethod, "JSON gRPC method"); +bus_impl_string_id!( + JsonGrpcParams, + "Parameters to be passed to a JSON gRPC method" +); /// Pool Service /// Get all the pools from specific node or None for all nodes @@ -339,7 +387,15 @@ pub struct GetPools { /// State of the Pool #[derive( - Serialize, Deserialize, Debug, Clone, EnumString, ToString, Eq, PartialEq, + Serialize, + Deserialize, + Debug, + Clone, + EnumString, + ToString, + Eq, + PartialEq, + Apiv2Schema, )] pub enum PoolState { /// unknown state @@ -369,7 +425,9 @@ impl From for PoolState { } /// Pool information -#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +#[derive( + Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Apiv2Schema, +)] #[serde(rename_all = "camelCase")] pub struct Pool { /// id of the mayastor instance @@ -454,7 +512,9 @@ pub struct GetReplicas { } /// Replica information -#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +#[derive( + Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Apiv2Schema, +)] #[serde(rename_all = "camelCase")] pub struct Replica { /// id of the mayastor instance @@ -538,7 +598,15 @@ bus_impl_message_all!(UnshareReplica, UnshareReplica, (), Pool); /// Indicates what protocol the bdev is shared as #[derive( - Serialize, Deserialize, Debug, Clone, EnumString, ToString, Eq, PartialEq, + Serialize, + Deserialize, + Debug, + Clone, + EnumString, + ToString, + Eq, + PartialEq, + Apiv2Schema, )] #[strum(serialize_all = "camelCase")] #[serde(rename_all = "camelCase")] @@ -571,7 +639,15 @@ impl From for Protocol { /// State of the Replica #[derive( - Serialize, Deserialize, Debug, Clone, EnumString, ToString, Eq, PartialEq, + Serialize, + Deserialize, + Debug, + Clone, + EnumString, + ToString, + Eq, + PartialEq, + Apiv2Schema, )] #[strum(serialize_all = "camelCase")] #[serde(rename_all = "camelCase")] @@ -612,7 +688,9 @@ pub struct GetNexuses { } /// Nexus information -#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +#[derive( + Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Apiv2Schema, +)] #[serde(rename_all = "camelCase")] pub struct Nexus { /// id of the mayastor instance @@ -633,7 +711,9 @@ pub struct Nexus { } /// Child information -#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +#[derive( + Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Apiv2Schema, +)] #[serde(rename_all = "camelCase")] pub struct Child { /// uri of the child device @@ -645,7 +725,7 @@ pub struct Child { } /// Child State information -#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Apiv2Schema)] pub enum ChildState { /// Default Unknown state Unknown = 0, @@ -674,7 +754,15 @@ impl From for ChildState { /// Nexus State information #[derive( - Serialize, Deserialize, Debug, Clone, EnumString, ToString, Eq, PartialEq, + Serialize, + Deserialize, + Debug, + Clone, + EnumString, + ToString, + Eq, + PartialEq, + Apiv2Schema, )] pub enum NexusState { /// Default Unknown state @@ -791,7 +879,9 @@ bus_impl_message_all!(AddNexusChild, AddNexusChild, Child, Nexus); /// Volumes /// /// Volume information -#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +#[derive( + Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Apiv2Schema, +)] #[serde(rename_all = "camelCase")] pub struct Volume { /// name of the volume @@ -872,3 +962,16 @@ pub struct RemoveVolumeNexus { pub node: Option, } bus_impl_message_all!(RemoveVolumeNexus, RemoveVolumeNexus, (), Volume); + +/// Generic JSON gRPC request +#[derive(Serialize, Deserialize, Default, Debug, Clone)] +#[serde(rename_all = "camelCase")] +pub struct JsonGrpcRequest { + /// id of the mayastor instance + pub node: NodeId, + /// JSON gRPC method to call + pub method: JsonGrpcMethod, + /// parameters to be passed to the above method + pub params: JsonGrpcParams, +} +bus_impl_message_all!(JsonGrpcRequest, JsonGrpc, String, JsonGrpc); diff --git a/operators/Cargo.toml b/control-plane/operators/Cargo.toml similarity index 100% rename from operators/Cargo.toml rename to control-plane/operators/Cargo.toml diff --git a/operators/node/src/main.rs b/control-plane/operators/node/src/main.rs similarity index 96% rename from operators/node/src/main.rs rename to control-plane/operators/node/src/main.rs index 3798a2df8..535635268 100644 --- a/operators/node/src/main.rs +++ b/control-plane/operators/node/src/main.rs @@ -8,9 +8,8 @@ use tracing::{debug, error, info, instrument}; #[derive(Debug, StructOpt)] struct CliArgs { - /// The Rest Server hostname to connect to - /// Default: localhost:8080 - #[structopt(long, short, default_value = "localhost:8080")] + /// The Rest Server URL to connect to + #[structopt(long, short, default_value = "https://localhost:8080")] rest: String, /// Polling period @@ -85,9 +84,8 @@ async fn main() -> anyhow::Result<()> { let polling_period = CliArgs::from_args().period.into(); - let rest_url = format!("https://{}", CliArgs::from_args().rest); let rest_cli = rest_client::ActixRestClient::new( - &rest_url, + &CliArgs::from_args().rest, CliArgs::from_args().jaeger.is_some(), )?; @@ -122,7 +120,7 @@ async fn polling_work( ) -> anyhow::Result<()> { // Fetch all nodes as seen by the control plane via REST let rest_nodes = rest_cli.get_nodes().await?; - println!("Retrieved rest nodes: {:?}", rest_nodes); + debug!("Retrieved rest nodes: {:?}", rest_nodes); // Fetch all node CRD's from k8s let kube_nodes = nodes_get_all(&nodes_api).await?; diff --git a/rest/Cargo.toml b/control-plane/rest/Cargo.toml similarity index 72% rename from rest/Cargo.toml rename to control-plane/rest/Cargo.toml index fd83ea6aa..fe132c33b 100644 --- a/rest/Cargo.toml +++ b/control-plane/rest/Cargo.toml @@ -17,6 +17,7 @@ path = "./src/lib.rs" [dependencies] rustls = "0.18" actix-web = { version = "3.2.0", features = ["rustls"] } +actix-service = "1.0.6" mbus_api = { path = "../mbus-api" } async-trait = "0.1.41" serde_json = "1.0" @@ -34,13 +35,21 @@ opentelemetry-jaeger = { version = "0.10", features = ["tokio"] } tracing-opentelemetry = "0.10.0" opentelemetry = "0.11.2" actix-web-opentelemetry = "0.9.0" +paperclip = { version = "0.5.0", default-features = false, optional = true } +macros = { path = "../macros" } +http = "0.2.3" +tinytemplate = { version = "1.2" } [dev-dependencies] -composer = { path = "../composer" } -rpc = { path = "../rpc" } +composer = { path = "../../composer" } +rpc = { path = "../../rpc" } tokio = { version = "0.2", features = ["full"] } actix-rt = "1.1.1" [dependencies.serde] features = ["derive"] -version = "1.0" \ No newline at end of file +version = "1.0" + +[features] +default = ["paperclip", "paperclip/actix3"] +nightly = ["paperclip", "paperclip/actix-nightly"] diff --git a/rest/certs/README b/control-plane/rest/certs/README similarity index 100% rename from rest/certs/README rename to control-plane/rest/certs/README diff --git a/rest/certs/build.sh b/control-plane/rest/certs/build.sh similarity index 100% rename from rest/certs/build.sh rename to control-plane/rest/certs/build.sh diff --git a/rest/certs/openssl.cnf b/control-plane/rest/certs/openssl.cnf similarity index 100% rename from rest/certs/openssl.cnf rename to control-plane/rest/certs/openssl.cnf diff --git a/rest/certs/rsa/ca.cert b/control-plane/rest/certs/rsa/ca.cert similarity index 100% rename from rest/certs/rsa/ca.cert rename to control-plane/rest/certs/rsa/ca.cert diff --git a/rest/certs/rsa/ca.key b/control-plane/rest/certs/rsa/ca.key similarity index 100% rename from rest/certs/rsa/ca.key rename to control-plane/rest/certs/rsa/ca.key diff --git a/rest/certs/rsa/user.cert b/control-plane/rest/certs/rsa/user.cert similarity index 100% rename from rest/certs/rsa/user.cert rename to control-plane/rest/certs/rsa/user.cert diff --git a/rest/certs/rsa/user.chain b/control-plane/rest/certs/rsa/user.chain similarity index 100% rename from rest/certs/rsa/user.chain rename to control-plane/rest/certs/rsa/user.chain diff --git a/rest/certs/rsa/user.key b/control-plane/rest/certs/rsa/user.key similarity index 100% rename from rest/certs/rsa/user.key rename to control-plane/rest/certs/rsa/user.key diff --git a/rest/certs/rsa/user.req b/control-plane/rest/certs/rsa/user.req similarity index 100% rename from rest/certs/rsa/user.req rename to control-plane/rest/certs/rsa/user.req diff --git a/rest/certs/rsa/user.rsa b/control-plane/rest/certs/rsa/user.rsa similarity index 100% rename from rest/certs/rsa/user.rsa rename to control-plane/rest/certs/rsa/user.rsa diff --git a/control-plane/rest/service/src/main.rs b/control-plane/rest/service/src/main.rs new file mode 100644 index 000000000..10995a947 --- /dev/null +++ b/control-plane/rest/service/src/main.rs @@ -0,0 +1,175 @@ +mod v0; + +use actix_service::ServiceFactory; +use actix_web::{ + dev::{MessageBody, ServiceRequest, ServiceResponse}, + middleware, + App, + HttpServer, +}; +use rustls::{ + internal::pemfile::{certs, rsa_private_keys}, + NoClientAuth, + ServerConfig, +}; +use std::{fs::File, io::BufReader}; +use structopt::StructOpt; + +#[derive(Debug, StructOpt)] +pub(crate) struct CliArgs { + /// The bind address for the REST interface (with HTTPS) + /// Default: 0.0.0.0:8080 + #[structopt(long, default_value = "0.0.0.0:8080")] + https: String, + /// The bind address for the REST interface (with HTTP) + #[structopt(long)] + http: Option, + /// The Nats Server URL or address to connect to + /// Default: nats://0.0.0.0:4222 + #[structopt(long, short, default_value = "nats://0.0.0.0:4222")] + nats: String, + + /// Path to the certificate file + #[structopt(long, short, required_unless = "dummy-certificates")] + cert_file: Option, + /// Path to the key file + #[structopt(long, short, required_unless = "dummy-certificates")] + key_file: Option, + + /// Use dummy HTTPS certificates (for testing) + #[structopt(long, short, required_unless = "cert-file")] + dummy_certificates: bool, + + /// Trace rest requests to the Jaeger endpoint agent + #[structopt(long, short)] + jaeger: Option, +} + +use actix_web_opentelemetry::RequestTracing; +use opentelemetry::{ + global, + sdk::{propagation::TraceContextPropagator, trace::Tracer}, +}; +use opentelemetry_jaeger::Uninstall; + +fn init_tracing() -> Option<(Tracer, Uninstall)> { + if let Ok(filter) = tracing_subscriber::EnvFilter::try_from_default_env() { + tracing_subscriber::fmt().with_env_filter(filter).init(); + } else { + tracing_subscriber::fmt().with_env_filter("info").init(); + } + if let Some(agent) = CliArgs::from_args().jaeger { + tracing::info!("Starting jaeger trace pipeline at {}...", agent); + // Start a new jaeger trace pipeline + global::set_text_map_propagator(TraceContextPropagator::new()); + let (_tracer, _uninstall) = opentelemetry_jaeger::new_pipeline() + .with_agent_endpoint(agent) + .with_service_name("rest-server") + .install() + .expect("Jaeger pipeline install error"); + Some((_tracer, _uninstall)) + } else { + None + } +} + +/// Extension trait for actix-web applications. +pub trait OpenApiExt { + /// configures the App with this version's handlers and openapi generation + fn configure_api( + self, + config: &dyn Fn(actix_web::App) -> actix_web::App, + ) -> actix_web::App; +} + +impl OpenApiExt for actix_web::App +where + B: MessageBody, + T: ServiceFactory< + Config = (), + Request = ServiceRequest, + Response = ServiceResponse, + Error = actix_web::Error, + InitError = (), + >, +{ + fn configure_api( + self, + config: &dyn Fn(actix_web::App) -> actix_web::App, + ) -> actix_web::App { + config(self) + } +} + +fn get_certificates() -> anyhow::Result { + if CliArgs::from_args().dummy_certificates { + get_dummy_certificates() + } else { + // guaranteed to be `Some` by the require_unless attribute + let cert_file = CliArgs::from_args() + .cert_file + .expect("cert_file is required"); + let key_file = + CliArgs::from_args().key_file.expect("key_file is required"); + let cert_file = &mut BufReader::new(File::open(cert_file)?); + let key_file = &mut BufReader::new(File::open(key_file)?); + load_certificates(cert_file, key_file) + } +} + +fn get_dummy_certificates() -> anyhow::Result { + let cert_file = &mut BufReader::new( + &std::include_bytes!("../../certs/rsa/user.chain")[..], + ); + let key_file = &mut BufReader::new( + &std::include_bytes!("../../certs/rsa/user.rsa")[..], + ); + + load_certificates(cert_file, key_file) +} + +fn load_certificates( + cert_file: &mut BufReader, + key_file: &mut BufReader, +) -> anyhow::Result { + let mut config = ServerConfig::new(NoClientAuth::new()); + let cert_chain = certs(cert_file).map_err(|_| { + anyhow::anyhow!( + "Failed to retrieve certificates from the certificate file", + ) + })?; + let mut keys = rsa_private_keys(key_file).map_err(|_| { + anyhow::anyhow!( + "Failed to retrieve the rsa private keys from the key file", + ) + })?; + if keys.is_empty() { + anyhow::bail!("No keys found in the keys file"); + } + config.set_single_cert(cert_chain, keys.remove(0))?; + Ok(config) +} + +#[actix_web::main] +async fn main() -> anyhow::Result<()> { + // need to keep the jaeger pipeline tracer alive, if enabled + let _tracer = init_tracing(); + + mbus_api::message_bus_init(CliArgs::from_args().nats).await; + + let server = HttpServer::new(move || { + App::new() + .wrap(RequestTracing::new()) + .wrap(middleware::Logger::default()) + .configure_api(&v0::configure_api) + }) + .bind_rustls(CliArgs::from_args().https, get_certificates()?)?; + if let Some(http) = CliArgs::from_args().http { + server.bind(http).map_err(anyhow::Error::from)? + } else { + server + } + .run() + .await + .map_err(|e| e.into()) +} diff --git a/rest/service/src/v0/children.rs b/control-plane/rest/service/src/v0/children.rs similarity index 68% rename from rest/service/src/v0/children.rs rename to control-plane/rest/service/src/v0/children.rs index 41000e6f7..66527f424 100644 --- a/rest/service/src/v0/children.rs +++ b/control-plane/rest/service/src/v0/children.rs @@ -1,43 +1,41 @@ use super::*; -struct Factory {} -impl HttpServiceFactory for Factory { - fn register(self, config: &mut AppService) { - get_nexus_children.register(config); - get_nexus_child.register(config); - get_node_nexus_children.register(config); - get_node_nexus_child.register(config); - add_nexus_child.register(config); - add_node_nexus_child.register(config); - delete_nexus_child.register(config); - delete_node_nexus_child.register(config); - } -} -pub(crate) fn factory() -> impl HttpServiceFactory { - Factory {} +pub(super) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(get_nexus_children) + .service(get_nexus_child) + .service(get_node_nexus_children) + .service(get_node_nexus_child) + .service(add_nexus_child) + .service(add_node_nexus_child) + .service(delete_nexus_child) + .service(delete_node_nexus_child); } -#[get("/v0/nexuses/{nexus_id}/children")] +#[get("/v0", "/nexuses/{nexus_id}/children", tags(Children))] async fn get_nexus_children( web::Path(nexus_id): web::Path, -) -> impl Responder { +) -> Result>, RestError> { get_children_response(Filter::Nexus(nexus_id)).await } -#[get("/v0/nodes/{node_id}/nexuses/{nexus_id}/children")] +#[get("/v0", "/nodes/{node_id}/nexuses/{nexus_id}/children", tags(Children))] async fn get_node_nexus_children( web::Path((node_id, nexus_id)): web::Path<(NodeId, NexusId)>, -) -> impl Responder { +) -> Result>, RestError> { get_children_response(Filter::NodeNexus(node_id, nexus_id)).await } -#[get("/v0/nexuses/{nexus_id}/children/{child_id:.*}")] +#[get("/v0", "/nexuses/{nexus_id}/children/{child_id:.*}", tags(Children))] async fn get_nexus_child( web::Path((nexus_id, child_id)): web::Path<(NexusId, ChildUri)>, req: HttpRequest, -) -> impl Responder { +) -> Result, RestError> { get_child_response(child_id, req, Filter::Nexus(nexus_id)).await } -#[get("/v0/nodes/{node_id}/nexuses/{nexus_id}/children/{child_id:.*}")] +#[get( + "/v0", + "/nodes/{node_id}/nexuses/{nexus_id}/children/{child_id:.*}", + tags(Children) +)] async fn get_node_nexus_child( web::Path((node_id, nexus_id, child_id)): web::Path<( NodeId, @@ -45,19 +43,23 @@ async fn get_node_nexus_child( ChildUri, )>, req: HttpRequest, -) -> impl Responder { +) -> Result, RestError> { get_child_response(child_id, req, Filter::NodeNexus(node_id, nexus_id)) .await } -#[put("/v0/nexuses/{nexus_id}/children/{child_id:.*}")] +#[put("/v0", "/nexuses/{nexus_id}/children/{child_id:.*}", tags(Children))] async fn add_nexus_child( web::Path((nexus_id, child_id)): web::Path<(NexusId, ChildUri)>, req: HttpRequest, -) -> impl Responder { +) -> Result, RestError> { add_child_filtered(child_id, req, Filter::Nexus(nexus_id)).await } -#[put("/v0/nodes/{node_id}/nexuses/{nexus_id}/children/{child_id:.*}")] +#[put( + "/v0", + "/nodes/{node_id}/nexuses/{nexus_id}/children/{child_id:.*}", + tags(Children) +)] async fn add_node_nexus_child( web::Path((node_id, nexus_id, child_id)): web::Path<( NodeId, @@ -65,19 +67,23 @@ async fn add_node_nexus_child( ChildUri, )>, req: HttpRequest, -) -> impl Responder { +) -> Result, RestError> { add_child_filtered(child_id, req, Filter::NodeNexus(node_id, nexus_id)) .await } -#[delete("/v0/nexuses/{nexus_id}/children/{child_id:.*}")] +#[delete("/v0", "/nexuses/{nexus_id}/children/{child_id:.*}", tags(Children))] async fn delete_nexus_child( web::Path((nexus_id, child_id)): web::Path<(NexusId, ChildUri)>, req: HttpRequest, -) -> impl Responder { +) -> Result, RestError> { delete_child_filtered(child_id, req, Filter::Nexus(nexus_id)).await } -#[delete("/v0/nodes/{node_id}/nexuses/{nexus_id}/children/{child_id:.*}")] +#[delete( + "/v0", + "/nodes/{node_id}/nexuses/{nexus_id}/children/{child_id:.*}", + tags(Children) +)] async fn delete_node_nexus_child( web::Path((node_id, nexus_id, child_id)): web::Path<( NodeId, @@ -85,14 +91,14 @@ async fn delete_node_nexus_child( ChildUri, )>, req: HttpRequest, -) -> impl Responder { +) -> Result, RestError> { delete_child_filtered(child_id, req, Filter::NodeNexus(node_id, nexus_id)) .await } async fn get_children_response( filter: Filter, -) -> Result { +) -> Result>, RestError> { let nexus = MessageBus::get_nexus(filter).await?; RestRespond::ok(nexus.children) } @@ -101,7 +107,7 @@ async fn get_child_response( child_id: ChildUri, req: HttpRequest, filter: Filter, -) -> Result { +) -> Result, RestError> { let child_id = build_child_uri(child_id, req); let nexus = MessageBus::get_nexus(filter).await?; let child = find_nexus_child(&nexus, &child_id)?; @@ -123,12 +129,12 @@ async fn add_child_filtered( child_id: ChildUri, req: HttpRequest, filter: Filter, -) -> impl Responder { +) -> Result, RestError> { let child_uri = build_child_uri(child_id, req); let nexus = match MessageBus::get_nexus(filter).await { Ok(nexus) => nexus, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; let create = AddNexusChild { @@ -144,12 +150,12 @@ async fn delete_child_filtered( child_id: ChildUri, req: HttpRequest, filter: Filter, -) -> impl Responder { +) -> Result, RestError> { let child_uri = build_child_uri(child_id, req); let nexus = match MessageBus::get_nexus(filter).await { Ok(nexus) => nexus, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; let destroy = RemoveNexusChild { diff --git a/control-plane/rest/service/src/v0/jsongrpc.rs b/control-plane/rest/service/src/v0/jsongrpc.rs new file mode 100644 index 000000000..46ad574fc --- /dev/null +++ b/control-plane/rest/service/src/v0/jsongrpc.rs @@ -0,0 +1,35 @@ +//! Provides a REST interface to interact with JSON gRPC methods. +//! These methods are typically used to control SPDK directly. + +use super::*; +use mbus_api::v0::JsonGrpcRequest; + +/// Configure the functions that this service supports. +pub(crate) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(json_grpc_call); +} + +// A PUT request is required so that method parameters can be passed in the +// body. +// +// # Example +// To create a malloc bdev: +// ``` +// curl -X PUT "https://localhost:8080/v0/nodes/mayastor/jsongrpc/bdev_malloc_create" \ +// -H "accept: application/json" -H "Content-Type: application/json" \ +// -d '{"block_size": 512, "num_blocks": 64, "name": "Malloc0"}' +// ``` +#[put("/v0", "/nodes/{node}/jsongrpc/{method}", tags(JsonGrpc))] +async fn json_grpc_call( + web::Path((node, method)): web::Path<(NodeId, JsonGrpcMethod)>, + body: web::Json, +) -> Result, RestError> { + RestRespond::result( + MessageBus::json_grpc_call(JsonGrpcRequest { + node, + method, + params: body.into_inner().to_string().into(), + }) + .await, + ) +} diff --git a/control-plane/rest/service/src/v0/mod.rs b/control-plane/rest/service/src/v0/mod.rs new file mode 100644 index 000000000..f5f36f265 --- /dev/null +++ b/control-plane/rest/service/src/v0/mod.rs @@ -0,0 +1,70 @@ +#![allow(clippy::field_reassign_with_default)] +//! Version 0 of the URI's +//! Ex: /v0/nodes + +pub mod children; +pub mod jsongrpc; +pub mod nexuses; +pub mod nodes; +pub mod pools; +pub mod replicas; +pub mod swagger_ui; +pub mod volumes; + +use rest_client::versions::v0::*; + +use actix_service::ServiceFactory; +use actix_web::{ + dev::{MessageBody, ServiceRequest, ServiceResponse}, + web::{self, Json}, + HttpRequest, +}; +use macros::actix::{delete, get, put}; +use paperclip::actix::OpenApiExt; + +fn version() -> String { + "v0".into() +} +fn base_path() -> String { + format!("/{}", version()) +} +fn spec_uri() -> String { + format!("/{}/api/spec", version()) +} +fn get_api() -> paperclip::v2::models::DefaultApiRaw { + let mut api = paperclip::v2::models::DefaultApiRaw::default(); + api.info.version = version(); + api.info.title = "Mayastor RESTful API".into(); + api.base_path = Some(base_path()); + api +} + +fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + nodes::configure(cfg); + pools::configure(cfg); + replicas::configure(cfg); + nexuses::configure(cfg); + children::configure(cfg); + volumes::configure(cfg); + jsongrpc::configure(cfg); +} + +pub(super) fn configure_api( + api: actix_web::App, +) -> actix_web::App +where + B: MessageBody, + T: ServiceFactory< + Config = (), + Request = ServiceRequest, + Response = ServiceResponse, + Error = actix_web::Error, + InitError = (), + >, +{ + api.wrap_api_with_spec(get_api()) + .configure(configure) + .with_json_spec_at(&spec_uri()) + .build() + .configure(swagger_ui::configure) +} diff --git a/rest/service/src/v0/nexuses.rs b/control-plane/rest/service/src/v0/nexuses.rs similarity index 54% rename from rest/service/src/v0/nexuses.rs rename to control-plane/rest/service/src/v0/nexuses.rs index e79d21741..b2b3f4959 100644 --- a/rest/service/src/v0/nexuses.rs +++ b/control-plane/rest/service/src/v0/nexuses.rs @@ -1,75 +1,77 @@ use super::*; -struct Factory {} -impl HttpServiceFactory for Factory { - fn register(self, config: &mut AppService) { - get_nexuses.register(config); - get_nexus.register(config); - get_node_nexuses.register(config); - get_node_nexus.register(config); - put_node_nexus.register(config); - del_node_nexus.register(config); - del_nexus.register(config); - put_node_nexus_share.register(config); - del_node_nexus_share.register(config); - } -} -pub(crate) fn factory() -> impl HttpServiceFactory { - Factory {} +pub(super) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(get_nexuses) + .service(get_nexus) + .service(get_node_nexuses) + .service(get_node_nexus) + .service(put_node_nexus) + .service(del_node_nexus) + .service(del_nexus) + .service(put_node_nexus_share) + .service(del_node_nexus_share); } -#[get("/v0/nexuses")] -async fn get_nexuses() -> impl Responder { +#[get("/v0", "/nexuses", tags(Nexuses))] +async fn get_nexuses() -> Result>, RestError> { RestRespond::result(MessageBus::get_nexuses(Filter::None).await) } -#[get("/v0/nexuses/{nexus_id}")] -async fn get_nexus(web::Path(nexus_id): web::Path) -> impl Responder { - RestRespond::result(MessageBus::get_nexuses(Filter::Nexus(nexus_id)).await) +#[get("/v0", "/nexuses/{nexus_id}", tags(Nexuses))] +async fn get_nexus( + web::Path(nexus_id): web::Path, +) -> Result, RestError> { + RestRespond::result(MessageBus::get_nexus(Filter::Nexus(nexus_id)).await) } -#[get("/v0/nodes/{id}/nexuses")] +#[get("/v0", "/nodes/{id}/nexuses", tags(Nexuses))] async fn get_node_nexuses( web::Path(node_id): web::Path, -) -> impl Responder { +) -> Result>, RestError> { RestRespond::result(MessageBus::get_nexuses(Filter::Node(node_id)).await) } -#[get("/v0/nodes/{node_id}/nexuses/{nexus_id}")] +#[get("/v0", "/nodes/{node_id}/nexuses/{nexus_id}", tags(Nexuses))] async fn get_node_nexus( web::Path((node_id, nexus_id)): web::Path<(NodeId, NexusId)>, -) -> impl Responder { +) -> Result, RestError> { RestRespond::result( MessageBus::get_nexus(Filter::NodeNexus(node_id, nexus_id)).await, ) } -#[put("/v0/nodes/{node_id}/nexuses/{nexus_id}")] +#[put("/v0", "/nodes/{node_id}/nexuses/{nexus_id}", tags(Nexuses))] async fn put_node_nexus( web::Path((node_id, nexus_id)): web::Path<(NodeId, NexusId)>, create: web::Json, -) -> impl Responder { +) -> Result, RestError> { let create = create.into_inner().bus_request(node_id, nexus_id); RestRespond::result(MessageBus::create_nexus(create).await) } -#[delete("/v0/nodes/{node_id}/nexuses/{nexus_id}")] +#[delete("/v0", "/nodes/{node_id}/nexuses/{nexus_id}", tags(Nexuses))] async fn del_node_nexus( web::Path((node_id, nexus_id)): web::Path<(NodeId, NexusId)>, -) -> impl Responder { +) -> Result, RestError> { destroy_nexus(Filter::NodeNexus(node_id, nexus_id)).await } -#[delete("/v0/nexuses/{nexus_id}")] -async fn del_nexus(web::Path(nexus_id): web::Path) -> impl Responder { +#[delete("/v0", "/nexuses/{nexus_id}", tags(Nexuses))] +async fn del_nexus( + web::Path(nexus_id): web::Path, +) -> Result, RestError> { destroy_nexus(Filter::Nexus(nexus_id)).await } -#[put("/v0/nodes/{node_id}/nexuses/{nexus_id}/share/{protocol}")] +#[put( + "/v0", + "/nodes/{node_id}/nexuses/{nexus_id}/share/{protocol}", + tags(Nexuses) +)] async fn put_node_nexus_share( web::Path((node_id, nexus_id, protocol)): web::Path<( NodeId, NexusId, Protocol, )>, -) -> impl Responder { +) -> Result, RestError> { let share = ShareNexus { node: node_id, uuid: nexus_id, @@ -79,10 +81,10 @@ async fn put_node_nexus_share( RestRespond::result(MessageBus::share_nexus(share).await) } -#[delete("/v0/nodes/{node_id}/nexuses/{nexus_id}/share")] +#[delete("/v0", "/nodes/{node_id}/nexuses/{nexus_id}/share", tags(Nexuses))] async fn del_node_nexus_share( web::Path((node_id, nexus_id)): web::Path<(NodeId, NexusId)>, -) -> impl Responder { +) -> Result, RestError> { let unshare = UnshareNexus { node: node_id, uuid: nexus_id, @@ -90,7 +92,7 @@ async fn del_node_nexus_share( RestRespond::result(MessageBus::unshare_nexus(unshare).await) } -async fn destroy_nexus(filter: Filter) -> impl Responder { +async fn destroy_nexus(filter: Filter) -> Result, RestError> { let destroy = match filter.clone() { Filter::NodeNexus(node_id, nexus_id) => DestroyNexus { node: node_id, @@ -99,14 +101,14 @@ async fn destroy_nexus(filter: Filter) -> impl Responder { Filter::Nexus(nexus_id) => { let node_id = match MessageBus::get_nexus(filter).await { Ok(nexus) => nexus.node, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; DestroyNexus { node: node_id, uuid: nexus_id, } } - _ => return (RestError::from(BusError::NotFound)).into(), + _ => return Err(RestError::from(BusError::NotFound)), }; RestRespond::result(MessageBus::destroy_nexus(destroy).await) diff --git a/control-plane/rest/service/src/v0/nodes.rs b/control-plane/rest/service/src/v0/nodes.rs new file mode 100644 index 000000000..40a705eab --- /dev/null +++ b/control-plane/rest/service/src/v0/nodes.rs @@ -0,0 +1,16 @@ +use super::*; + +pub(super) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(get_nodes).service(get_node); +} + +#[get("/v0", "/nodes", tags(Nodes))] +async fn get_nodes() -> Result>, RestError> { + RestRespond::result(MessageBus::get_nodes().await) +} +#[get("/v0", "/nodes/{id}", tags(Nodes))] +async fn get_node( + web::Path(node_id): web::Path, +) -> Result, RestError> { + RestRespond::result(MessageBus::get_node(&node_id).await) +} diff --git a/rest/service/src/v0/pools.rs b/control-plane/rest/service/src/v0/pools.rs similarity index 53% rename from rest/service/src/v0/pools.rs rename to control-plane/rest/service/src/v0/pools.rs index f0361eb8f..6d75a49da 100644 --- a/rest/service/src/v0/pools.rs +++ b/control-plane/rest/service/src/v0/pools.rs @@ -1,68 +1,65 @@ use super::*; -struct Factory {} -impl HttpServiceFactory for Factory { - fn register(self, config: &mut AppService) { - get_pools.register(config); - get_pool.register(config); - get_node_pools.register(config); - get_node_pool.register(config); - put_node_pool.register(config); - del_node_pool.register(config); - del_pool.register(config); - } -} -pub(crate) fn factory() -> impl HttpServiceFactory { - Factory {} +pub(super) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(get_pools) + .service(get_pool) + .service(get_node_pools) + .service(get_node_pool) + .service(put_node_pool) + .service(del_node_pool) + .service(del_pool); } -#[get("/v0/pools")] -async fn get_pools() -> impl Responder { +#[get("/v0", "/pools", tags(Pools))] +async fn get_pools() -> Result>, RestError> { RestRespond::result(MessageBus::get_pools(Filter::None).await) } - -#[get("/v0/pools/{id}")] -async fn get_pool(web::Path(pool_id): web::Path) -> impl Responder { +#[get("/v0", "/pools/{id}", tags(Pools))] +async fn get_pool( + web::Path(pool_id): web::Path, +) -> Result, RestError> { RestRespond::result(MessageBus::get_pool(Filter::Pool(pool_id)).await) } -#[get("/v0/nodes/{id}/pools")] +#[get("/v0", "/nodes/{id}/pools", tags(Pools))] async fn get_node_pools( web::Path(node_id): web::Path, -) -> impl Responder { +) -> Result>, RestError> { RestRespond::result(MessageBus::get_pools(Filter::Node(node_id)).await) } -#[get("/v0/nodes/{node_id}/pools/{pool_id}")] +#[get("/v0", "/nodes/{node_id}/pools/{pool_id}", tags(Pools))] async fn get_node_pool( web::Path((node_id, pool_id)): web::Path<(NodeId, PoolId)>, -) -> impl Responder { +) -> Result, RestError> { RestRespond::result( MessageBus::get_pool(Filter::NodePool(node_id, pool_id)).await, ) } -#[put("/v0/nodes/{node_id}/pools/{pool_id}")] +#[put("/v0", "/nodes/{node_id}/pools/{pool_id}", tags(Pools))] async fn put_node_pool( web::Path((node_id, pool_id)): web::Path<(NodeId, PoolId)>, create: web::Json, -) -> impl Responder { +) -> Result, RestError> { let create = create.into_inner().bus_request(node_id, pool_id); RestRespond::result(MessageBus::create_pool(create).await) } -#[delete("/v0/nodes/{node_id}/pools/{pool_id}")] +#[delete("/v0", "/nodes/{node_id}/pools/{pool_id}", tags(Pools))] async fn del_node_pool( web::Path((node_id, pool_id)): web::Path<(NodeId, PoolId)>, -) -> impl Responder { +) -> Result, RestError> { destroy_pool(Filter::NodePool(node_id, pool_id)).await } -#[delete("/v0/pools/{pool_id}")] -async fn del_pool(web::Path(pool_id): web::Path) -> impl Responder { +#[delete("/v0", "/pools/{pool_id}", tags(Pools))] +async fn del_pool( + web::Path(pool_id): web::Path, +) -> Result, RestError> { destroy_pool(Filter::Pool(pool_id)).await } -async fn destroy_pool(filter: Filter) -> impl Responder { +async fn destroy_pool(filter: Filter) -> Result, RestError> { let destroy = match filter.clone() { Filter::NodePool(node_id, pool_id) => DestroyPool { node: node_id, @@ -71,14 +68,14 @@ async fn destroy_pool(filter: Filter) -> impl Responder { Filter::Pool(pool_id) => { let node_id = match MessageBus::get_pool(filter).await { Ok(pool) => pool.node, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; DestroyPool { node: node_id, id: pool_id, } } - _ => return (RestError::from(BusError::NotFound)).into(), + _ => return Err(RestError::from(BusError::NotFound)), }; RestRespond::result(MessageBus::destroy_pool(destroy).await) diff --git a/rest/service/src/v0/replicas.rs b/control-plane/rest/service/src/v0/replicas.rs similarity index 64% rename from rest/service/src/v0/replicas.rs rename to control-plane/rest/service/src/v0/replicas.rs index 7e47c902f..8d88003fc 100644 --- a/rest/service/src/v0/replicas.rs +++ b/control-plane/rest/service/src/v0/replicas.rs @@ -1,64 +1,61 @@ use super::*; -struct Factory {} -impl HttpServiceFactory for Factory { - fn register(self, config: &mut AppService) { - get_replicas.register(config); - get_replica.register(config); - get_replica.register(config); - get_node_replicas.register(config); - get_node_pool_replicas.register(config); - get_node_pool_replica.register(config); - put_node_pool_replica.register(config); - put_pool_replica.register(config); - del_node_pool_replica.register(config); - del_pool_replica.register(config); - put_node_pool_replica_share.register(config); - put_pool_replica_share.register(config); - del_node_pool_replica_share.register(config); - del_pool_replica_share.register(config); - } -} -pub(crate) fn factory() -> impl HttpServiceFactory { - Factory {} +pub(super) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(get_replicas) + .service(get_replica) + .service(get_node_replicas) + .service(get_node_pool_replicas) + .service(get_node_pool_replica) + .service(put_node_pool_replica) + .service(put_pool_replica) + .service(del_node_pool_replica) + .service(del_pool_replica) + .service(put_node_pool_replica_share) + .service(put_pool_replica_share) + .service(del_node_pool_replica_share) + .service(del_pool_replica_share); } -#[get("/v0/replicas")] -async fn get_replicas() -> impl Responder { +#[get("/v0", "/replicas", tags(Replicas))] +async fn get_replicas() -> Result>, RestError> { RestRespond::result(MessageBus::get_replicas(Filter::None).await) } -#[get("/v0/replicas/{id}")] +#[get("/v0", "/replicas/{id}", tags(Replicas))] async fn get_replica( web::Path(replica_id): web::Path, -) -> impl Responder { +) -> Result, RestError> { RestRespond::result( MessageBus::get_replica(Filter::Replica(replica_id)).await, ) } -#[get("/v0/nodes/{id}/replicas")] +#[get("/v0", "/nodes/{id}/replicas", tags(Replicas))] async fn get_node_replicas( web::Path(node_id): web::Path, -) -> impl Responder { +) -> Result>, RestError> { RestRespond::result(MessageBus::get_replicas(Filter::Node(node_id)).await) } -#[get("/v0/nodes/{node_id}/pools/{pool_id}/replicas")] +#[get("/v0", "/nodes/{node_id}/pools/{pool_id}/replicas", tags(Replicas))] async fn get_node_pool_replicas( web::Path((node_id, pool_id)): web::Path<(NodeId, PoolId)>, -) -> impl Responder { +) -> Result>, RestError> { RestRespond::result( MessageBus::get_replicas(Filter::NodePool(node_id, pool_id)).await, ) } -#[get("/v0/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}")] +#[get( + "/v0", + "/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}", + tags(Replicas) +)] async fn get_node_pool_replica( web::Path((node_id, pool_id, replica_id)): web::Path<( NodeId, PoolId, ReplicaId, )>, -) -> impl Responder { +) -> Result, RestError> { RestRespond::result( MessageBus::get_replica(Filter::NodePoolReplica( node_id, pool_id, replica_id, @@ -67,7 +64,11 @@ async fn get_node_pool_replica( ) } -#[put("/v0/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}")] +#[put( + "/v0", + "/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}", + tags(Replicas) +)] async fn put_node_pool_replica( web::Path((node_id, pool_id, replica_id)): web::Path<( NodeId, @@ -75,18 +76,18 @@ async fn put_node_pool_replica( ReplicaId, )>, create: web::Json, -) -> impl Responder { +) -> Result, RestError> { put_replica( Filter::NodePoolReplica(node_id, pool_id, replica_id), create.into_inner(), ) .await } -#[put("/v0/pools/{pool_id}/replicas/{replica_id}")] +#[put("/v0", "/pools/{pool_id}/replicas/{replica_id}", tags(Replicas))] async fn put_pool_replica( web::Path((pool_id, replica_id)): web::Path<(PoolId, ReplicaId)>, create: web::Json, -) -> impl Responder { +) -> Result, RestError> { put_replica( Filter::PoolReplica(pool_id, replica_id), create.into_inner(), @@ -94,24 +95,32 @@ async fn put_pool_replica( .await } -#[delete("/v0/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}")] +#[delete( + "/v0", + "/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}", + tags(Replicas) +)] async fn del_node_pool_replica( web::Path((node_id, pool_id, replica_id)): web::Path<( NodeId, PoolId, ReplicaId, )>, -) -> impl Responder { +) -> Result, RestError> { destroy_replica(Filter::NodePoolReplica(node_id, pool_id, replica_id)).await } -#[delete("/v0/pools/{pool_id}/replicas/{replica_id}")] +#[delete("/v0", "/pools/{pool_id}/replicas/{replica_id}", tags(Replicas))] async fn del_pool_replica( web::Path((pool_id, replica_id)): web::Path<(PoolId, ReplicaId)>, -) -> impl Responder { +) -> Result, RestError> { destroy_replica(Filter::PoolReplica(pool_id, replica_id)).await } -#[put("/v0/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}/share/{protocol}")] +#[put( + "/v0", + "/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}/share/{protocol}", + tags(Replicas) +)] async fn put_node_pool_replica_share( web::Path((node_id, pool_id, replica_id, protocol)): web::Path<( NodeId, @@ -119,45 +128,53 @@ async fn put_node_pool_replica_share( ReplicaId, Protocol, )>, -) -> impl Responder { +) -> Result, RestError> { share_replica( Filter::NodePoolReplica(node_id, pool_id, replica_id), protocol, ) .await } -#[put("/v0/pools/{pool_id}/replicas/{replica_id}/share/{protocol}")] +#[put( + "/v0", + "/pools/{pool_id}/replicas/{replica_id}/share/{protocol}", + tags(Replicas) +)] async fn put_pool_replica_share( web::Path((pool_id, replica_id, protocol)): web::Path<( PoolId, ReplicaId, Protocol, )>, -) -> impl Responder { +) -> Result, RestError> { share_replica(Filter::PoolReplica(pool_id, replica_id), protocol).await } -#[delete("/v0/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}/share")] +#[delete( + "/v0", + "/nodes/{node_id}/pools/{pool_id}/replicas/{replica_id}/share", + tags(Replicas) +)] async fn del_node_pool_replica_share( web::Path((node_id, pool_id, replica_id)): web::Path<( NodeId, PoolId, ReplicaId, )>, -) -> impl Responder { +) -> Result, RestError> { unshare_replica(Filter::NodePoolReplica(node_id, pool_id, replica_id)).await } -#[delete("/v0/pools/{pool_id}/replicas/{replica_id}/share")] +#[delete("/v0", "/pools/{pool_id}/replicas/{replica_id}/share", tags(Replicas))] async fn del_pool_replica_share( web::Path((pool_id, replica_id)): web::Path<(PoolId, ReplicaId)>, -) -> impl Responder { +) -> Result, RestError> { unshare_replica(Filter::PoolReplica(pool_id, replica_id)).await } async fn put_replica( filter: Filter, body: CreateReplicaBody, -) -> impl Responder { +) -> Result, RestError> { let create = match filter.clone() { Filter::NodePoolReplica(node_id, pool_id, replica_id) => { body.bus_request(node_id, pool_id, replica_id) @@ -165,17 +182,17 @@ async fn put_replica( Filter::PoolReplica(pool_id, replica_id) => { let node_id = match MessageBus::get_replica(filter).await { Ok(replica) => replica.node, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; body.bus_request(node_id, pool_id, replica_id) } - _ => return (RestError::from(BusError::NotFound)).into(), + _ => return Err(RestError::from(BusError::NotFound)), }; RestRespond::result(MessageBus::create_replica(create).await) } -async fn destroy_replica(filter: Filter) -> impl Responder { +async fn destroy_replica(filter: Filter) -> Result, RestError> { let destroy = match filter.clone() { Filter::NodePoolReplica(node_id, pool_id, replica_id) => { DestroyReplica { @@ -187,7 +204,7 @@ async fn destroy_replica(filter: Filter) -> impl Responder { Filter::PoolReplica(pool_id, replica_id) => { let node_id = match MessageBus::get_replica(filter).await { Ok(replica) => replica.node, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; DestroyReplica { @@ -196,13 +213,16 @@ async fn destroy_replica(filter: Filter) -> impl Responder { uuid: replica_id, } } - _ => return (RestError::from(BusError::NotFound)).into(), + _ => return Err(RestError::from(BusError::NotFound)), }; RestRespond::result(MessageBus::destroy_replica(destroy).await) } -async fn share_replica(filter: Filter, protocol: Protocol) -> impl Responder { +async fn share_replica( + filter: Filter, + protocol: Protocol, +) -> Result, RestError> { let share = match filter.clone() { Filter::NodePoolReplica(node_id, pool_id, replica_id) => ShareReplica { node: node_id, @@ -213,7 +233,7 @@ async fn share_replica(filter: Filter, protocol: Protocol) -> impl Responder { Filter::PoolReplica(pool_id, replica_id) => { let node_id = match MessageBus::get_replica(filter).await { Ok(replica) => replica.node, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; ShareReplica { @@ -223,13 +243,13 @@ async fn share_replica(filter: Filter, protocol: Protocol) -> impl Responder { protocol, } } - _ => return (RestError::from(BusError::NotFound)).into(), + _ => return Err(RestError::from(BusError::NotFound)), }; RestRespond::result(MessageBus::share_replica(share).await) } -async fn unshare_replica(filter: Filter) -> impl Responder { +async fn unshare_replica(filter: Filter) -> Result, RestError> { let unshare = match filter.clone() { Filter::NodePoolReplica(node_id, pool_id, replica_id) => { UnshareReplica { @@ -241,7 +261,7 @@ async fn unshare_replica(filter: Filter) -> impl Responder { Filter::PoolReplica(pool_id, replica_id) => { let node_id = match MessageBus::get_replica(filter).await { Ok(replica) => replica.node, - Err(error) => return (RestError::from(error)).into(), + Err(error) => return Err(RestError::from(error)), }; UnshareReplica { @@ -250,7 +270,7 @@ async fn unshare_replica(filter: Filter) -> impl Responder { uuid: replica_id, } } - _ => return (RestError::from(BusError::NotFound)).into(), + _ => return Err(RestError::from(BusError::NotFound)), }; RestRespond::result(MessageBus::unshare_replica(unshare).await) diff --git a/control-plane/rest/service/src/v0/resources/swagger-ui.html b/control-plane/rest/service/src/v0/resources/swagger-ui.html new file mode 100644 index 000000000..1e39a8676 --- /dev/null +++ b/control-plane/rest/service/src/v0/resources/swagger-ui.html @@ -0,0 +1,72 @@ + + + + + Swagger UI + + + + + + + +
+ + + + + + + \ No newline at end of file diff --git a/control-plane/rest/service/src/v0/swagger_ui.rs b/control-plane/rest/service/src/v0/swagger_ui.rs new file mode 100644 index 000000000..de711acf4 --- /dev/null +++ b/control-plane/rest/service/src/v0/swagger_ui.rs @@ -0,0 +1,44 @@ +use actix_web::{dev::Factory, web, Error, HttpResponse}; +use futures::future::{ok as fut_ok, Ready}; +use tinytemplate::TinyTemplate; + +pub(super) fn configure(cfg: &mut web::ServiceConfig) { + cfg.service( + web::resource(&format!("{}/swagger-ui", super::version())).route( + web::get().to(GetSwaggerUi(get_swagger_html(&super::spec_uri()))), + ), + ); +} + +static TEMPLATE: &str = include_str!("./resources/swagger-ui.html"); +fn get_swagger_html(spec_uri: &str) -> Result { + let context = serde_json::json!({ "api_spec_uri": spec_uri }); + let mut template = TinyTemplate::new(); + template + .add_template("swagger-ui", TEMPLATE) + .map_err(|e| e.to_string())?; + Ok(template + .render("swagger-ui", &context) + .map_err(|e| e.to_string())?) +} + +#[derive(Clone)] +struct GetSwaggerUi(Result); + +impl + Factory<(), Ready>, Result> + for GetSwaggerUi +{ + fn call(&self, _: ()) -> Ready> { + match &self.0 { + Ok(html) => { + fut_ok(HttpResponse::Ok().content_type("text/html").body(html)) + } + Err(error) => fut_ok( + HttpResponse::NotFound() + .content_type("application/json") + .body(serde_json::json!({ "error_message": error })), + ), + } + } +} diff --git a/rest/service/src/v0/volumes.rs b/control-plane/rest/service/src/v0/volumes.rs similarity index 54% rename from rest/service/src/v0/volumes.rs rename to control-plane/rest/service/src/v0/volumes.rs index b15b75833..ed7acadce 100644 --- a/rest/service/src/v0/volumes.rs +++ b/control-plane/rest/service/src/v0/volumes.rs @@ -1,60 +1,54 @@ use super::*; -struct Factory {} -impl HttpServiceFactory for Factory { - fn register(self, config: &mut AppService) { - get_volumes.register(config); - get_volume.register(config); - get_node_volumes.register(config); - get_node_volume.register(config); - put_volume.register(config); - del_volume.register(config); - } -} -pub(crate) fn factory() -> impl HttpServiceFactory { - Factory {} +pub(super) fn configure(cfg: &mut paperclip::actix::web::ServiceConfig) { + cfg.service(get_volumes) + .service(get_volume) + .service(get_node_volumes) + .service(get_node_volume) + .service(put_volume) + .service(del_volume); } -#[get("/v0/volumes")] -async fn get_volumes() -> impl Responder { +#[get("/v0", "/volumes", tags(Volumes))] +async fn get_volumes() -> Result>, RestError> { RestRespond::result(MessageBus::get_volumes(Filter::None).await) } -#[get("/v0/volumes/{volume_id}")] +#[get("/v0", "/volumes/{volume_id}", tags(Volumes))] async fn get_volume( web::Path(volume_id): web::Path, -) -> impl Responder { +) -> Result, RestError> { RestRespond::result(MessageBus::get_volume(Filter::Volume(volume_id)).await) } -#[get("/v0/nodes/{node_id}/volumes")] +#[get("/v0", "/nodes/{node_id}/volumes", tags(Volumes))] async fn get_node_volumes( web::Path(node_id): web::Path, -) -> impl Responder { +) -> Result>, RestError> { RestRespond::result(MessageBus::get_volumes(Filter::Node(node_id)).await) } -#[get("/v0/nodes/{node_id}/volumes/{volume_id}")] +#[get("/v0", "/nodes/{node_id}/volumes/{volume_id}", tags(Volumes))] async fn get_node_volume( web::Path((node_id, volume_id)): web::Path<(NodeId, VolumeId)>, -) -> impl Responder { +) -> Result, RestError> { RestRespond::result( MessageBus::get_volume(Filter::NodeVolume(node_id, volume_id)).await, ) } -#[put("/v0/volumes/{volume_id}")] +#[put("/v0", "/volumes/{volume_id}", tags(Volumes))] async fn put_volume( web::Path(volume_id): web::Path, create: web::Json, -) -> impl Responder { +) -> Result, RestError> { let create = create.into_inner().bus_request(volume_id); RestRespond::result(MessageBus::create_volume(create).await) } -#[delete("/v0/volumes/{volume_id}")] +#[delete("/v0", "/volumes/{volume_id}", tags(Volumes))] async fn del_volume( web::Path(volume_id): web::Path, -) -> impl Responder { +) -> Result, RestError> { let request = DestroyVolume { uuid: volume_id, }; diff --git a/rest/src/lib.rs b/control-plane/rest/src/lib.rs similarity index 83% rename from rest/src/lib.rs rename to control-plane/rest/src/lib.rs index ff7447f62..ff2c9a07a 100644 --- a/rest/src/lib.rs +++ b/control-plane/rest/src/lib.rs @@ -29,7 +29,21 @@ pub struct ActixRestClient { impl ActixRestClient { /// creates a new client which uses the specified `url` + /// uses the rustls connector if the url has the https scheme pub fn new(url: &str, trace: bool) -> anyhow::Result { + let url: url::Url = url.parse()?; + + match url.scheme() { + "https" => Self::new_https(&url, trace), + "http" => Ok(Self::new_http(&url, trace)), + invalid => { + let msg = format!("Invalid url scheme: {}", invalid); + Err(anyhow::Error::msg(msg)) + } + } + } + /// creates a new secure client + fn new_https(url: &url::Url, trace: bool) -> anyhow::Result { let cert_file = &mut BufReader::new( &std::include_bytes!("../certs/rsa/ca.cert")[..], ); @@ -46,10 +60,18 @@ impl ActixRestClient { Ok(Self { client: rest_client, - url: url.to_string(), + url: url.to_string().trim_end_matches('/').into(), trace, }) } + /// creates a new client + fn new_http(url: &url::Url, trace: bool) -> Self { + Self { + client: Client::new(), + url: url.to_string().trim_end_matches('/').into(), + trace, + } + } async fn get_vec(&self, urn: String) -> anyhow::Result> where for<'de> R: Deserialize<'de>, diff --git a/rest/src/versions/mod.rs b/control-plane/rest/src/versions/mod.rs similarity index 100% rename from rest/src/versions/mod.rs rename to control-plane/rest/src/versions/mod.rs diff --git a/rest/src/versions/v0.rs b/control-plane/rest/src/versions/v0.rs similarity index 84% rename from rest/src/versions/v0.rs rename to control-plane/rest/src/versions/v0.rs index a65a80b04..45c192131 100644 --- a/rest/src/versions/v0.rs +++ b/control-plane/rest/src/versions/v0.rs @@ -1,10 +1,15 @@ +#![allow(clippy::field_reassign_with_default)] use super::super::ActixRestClient; -use actix_web::{body::Body, http::StatusCode, HttpResponse, ResponseError}; -use async_trait::async_trait; -use mbus_api::{ - message_bus::{v0, v0::BusError}, - ErrorChain, +use actix_web::{ + body::Body, + http::StatusCode, + web::Json, + HttpResponse, + ResponseError, }; +use async_trait::async_trait; +pub use mbus_api::message_bus::v0::*; +use paperclip::actix::Apiv2Schema; use serde::{Deserialize, Serialize}; use std::{ fmt::{Display, Formatter}, @@ -12,34 +17,8 @@ use std::{ }; use strum_macros::{self, Display}; -/// Node from the node service -pub type Node = v0::Node; -/// Vector of Nodes from the node service -pub type Nodes = v0::Nodes; -/// Pool from the node service -pub type Pool = v0::Pool; -/// Vector of Pools from the pool service -pub type Pools = v0::Pools; -/// Replica -pub type Replica = v0::Replica; -/// Vector of Replicas from the pool service -pub type Replicas = v0::Replicas; -/// Replica protocol -pub type Protocol = v0::Protocol; -/// Create Pool request -pub type CreatePool = v0::CreatePool; -/// Create Replica request -pub type CreateReplica = v0::CreateReplica; -/// Replica Destroy -pub type DestroyReplica = v0::DestroyReplica; -/// Replica Share -pub type ShareReplica = v0::ShareReplica; -/// Replica Unshare -pub type UnshareReplica = v0::UnshareReplica; -/// Pool Destroy -pub type DestroyPool = v0::DestroyPool; /// Create Replica Body JSON -#[derive(Serialize, Deserialize, Default, Debug, Clone)] +#[derive(Serialize, Deserialize, Default, Debug, Clone, Apiv2Schema)] pub struct CreateReplicaBody { /// size of the replica in bytes pub size: u64, @@ -49,7 +28,7 @@ pub struct CreateReplicaBody { pub share: Protocol, } /// Create Pool Body JSON -#[derive(Serialize, Deserialize, Default, Debug, Clone)] +#[derive(Serialize, Deserialize, Default, Debug, Clone, Apiv2Schema)] pub struct CreatePoolBody { /// disk device paths or URIs to be claimed by the pool pub disks: Vec, @@ -102,31 +81,9 @@ impl CreateReplicaBody { } } } -/// Filter Nodes, Pools, Replicas, Nexuses -pub type Filter = v0::Filter; -/// Nexus from the volume service -pub type Nexus = v0::Nexus; -/// Vector of Nexuses from the volume service -pub type Nexuses = v0::Nexuses; -/// State of the nexus -pub type NexusState = v0::NexusState; -/// State of the nexus -pub type VolumeState = v0::VolumeState; -/// Child of the nexus -pub type Child = v0::Child; -/// State of the child -pub type ChildState = v0::ChildState; -/// Nexus Create -pub type CreateNexus = v0::CreateNexus; -/// Nexus Destroy -pub type DestroyNexus = v0::DestroyNexus; -/// Nexus Share -pub type ShareNexus = v0::ShareNexus; -/// Nexus Unshare -pub type UnshareNexus = v0::UnshareNexus; /// Create Nexus Body JSON -#[derive(Serialize, Deserialize, Default, Debug, Clone)] +#[derive(Serialize, Deserialize, Default, Debug, Clone, Apiv2Schema)] pub struct CreateNexusBody { /// size of the device in bytes pub size: u64, @@ -159,33 +116,9 @@ impl CreateNexusBody { } } } -/// Remove Nexus Child -pub type RemoveNexusChild = v0::RemoveNexusChild; -/// Add Nexus Child -pub type AddNexusChild = v0::AddNexusChild; -/// Volume -pub type Volume = v0::Volume; -/// Volumes -pub type Volumes = v0::Volumes; -/// Create Volume -pub type CreateVolume = v0::CreateVolume; -/// Destroy Volume -pub type DestroyVolume = v0::DestroyVolume; -/// Id of a mayastor node -pub type NodeId = v0::NodeId; -/// Id of a mayastor pool -pub type PoolId = v0::PoolId; -/// UUID of a mayastor pool replica -pub type ReplicaId = v0::ReplicaId; -/// UUID of a mayastor nexus -pub type NexusId = v0::NexusId; -/// URI of a mayastor nexus child -pub type ChildUri = v0::ChildUri; -/// UUID of a mayastor volume -pub type VolumeId = v0::VolumeId; /// Create Volume Body JSON -#[derive(Serialize, Deserialize, Default, Debug, Clone)] +#[derive(Serialize, Deserialize, Default, Debug, Clone, Apiv2Schema)] pub struct CreateVolumeBody { /// size of the volume in bytes pub size: u64, @@ -195,13 +128,13 @@ pub struct CreateVolumeBody { pub replicas: u64, /// only these nodes can be used for the replicas #[serde(default)] - pub allowed_nodes: Vec, + pub allowed_nodes: Option>, /// preferred nodes for the replicas #[serde(default)] - pub preferred_nodes: Vec, + pub preferred_nodes: Option>, /// preferred nodes for the nexuses #[serde(default)] - pub preferred_nexus_nodes: Vec, + pub preferred_nexus_nodes: Option>, } impl From for CreateVolumeBody { fn from(create: CreateVolume) -> Self { @@ -209,9 +142,9 @@ impl From for CreateVolumeBody { size: create.size, nexuses: create.nexuses, replicas: create.replicas, - preferred_nodes: create.preferred_nodes, - allowed_nodes: create.allowed_nodes, - preferred_nexus_nodes: create.preferred_nexus_nodes, + preferred_nodes: create.preferred_nodes.into(), + allowed_nodes: create.allowed_nodes.into(), + preferred_nexus_nodes: create.preferred_nexus_nodes.into(), } } } @@ -223,9 +156,12 @@ impl CreateVolumeBody { size: self.size, nexuses: self.nexuses, replicas: self.replicas, - allowed_nodes: self.allowed_nodes.clone(), - preferred_nodes: self.preferred_nodes.clone(), - preferred_nexus_nodes: self.preferred_nexus_nodes.clone(), + allowed_nodes: self.allowed_nodes.clone().unwrap_or_default(), + preferred_nodes: self.preferred_nodes.clone().unwrap_or_default(), + preferred_nexus_nodes: self + .preferred_nexus_nodes + .clone() + .unwrap_or_default(), } } } @@ -474,14 +410,6 @@ impl RestClient for ActixRestClient { Ok(nexuses) } - async fn get_nexus_children( - &self, - filter: Filter, - ) -> anyhow::Result> { - let children = get_filter!(self, filter, GetChildren).await?; - Ok(children) - } - async fn create_nexus(&self, args: CreateNexus) -> anyhow::Result { let urn = format!("/v0/nodes/{}/nexuses/{}", &args.node, &args.uuid); let replica = self.put(urn, CreateNexusBody::from(args)).await?; @@ -528,6 +456,7 @@ impl RestClient for ActixRestClient { self.del(urn).await?; Ok(()) } + async fn add_nexus_child( &self, args: AddNexusChild, @@ -539,6 +468,13 @@ impl RestClient for ActixRestClient { let replica = self.put(urn, Body::Empty).await?; Ok(replica) } + async fn get_nexus_children( + &self, + filter: Filter, + ) -> anyhow::Result> { + let children = get_filter!(self, filter, GetChildren).await?; + Ok(children) + } async fn get_volumes(&self, filter: Filter) -> anyhow::Result> { let volumes = get_filter!(self, filter, GetVolumes).await?; @@ -596,11 +532,14 @@ pub struct RestError { message: String, } +#[cfg(not(feature = "nightly"))] +impl paperclip::v2::schema::Apiv2Errors for RestError {} + impl RestError { // todo: response type convention fn get_resp_error(&self) -> HttpResponse { match &self.kind { - BusError::NotFound => HttpResponse::NoContent().json(()), + BusError::NotFound => HttpResponse::NotFound().json(()), BusError::NotUnique => { let error = serde_json::json!({"error": self.kind.as_ref(), "message": self.message }); tracing::error!("Got error: {}", error); @@ -661,13 +600,15 @@ impl Display for RestRespond { } impl RestRespond { /// Respond with a Result - pub fn result(from: Result) -> HttpResponse { - let resp: Self = from.into(); - resp.into() + pub fn result(from: Result) -> Result, RestError> { + match from { + Ok(v) => Ok(Json::(v)), + Err(e) => Err(e.into()), + } } /// Respond T with success - pub fn ok(object: T) -> Result { - Ok(HttpResponse::Ok().json(object)) + pub fn ok(object: T) -> Result, RestError> { + Ok(Json(object)) } } impl Into> for Result { diff --git a/rest/tests/v0_test.rs b/control-plane/rest/tests/v0_test.rs similarity index 93% rename from rest/tests/v0_test.rs rename to control-plane/rest/tests/v0_test.rs index 8570d97fb..9af46e5ca 100644 --- a/rest/tests/v0_test.rs +++ b/control-plane/rest/tests/v0_test.rs @@ -42,19 +42,24 @@ async fn client() { let mayastor = "node-test-name"; let test = Builder::new() .name("rest") - .add_container_spec(ContainerSpec::from_binary( - "nats", - Binary::from_nix("nats-server").with_arg("-DV"), - )) + .add_container_spec( + ContainerSpec::from_binary( + "nats", + Binary::from_nix("nats-server").with_arg("-DV"), + ) + .with_portmap("4222", "4222"), + ) .add_container_bin("node", Binary::from_dbg("node").with_nats("-n")) .add_container_bin("pool", Binary::from_dbg("pool").with_nats("-n")) .add_container_bin("volume", Binary::from_dbg("volume").with_nats("-n")) .add_container_spec( ContainerSpec::from_binary( "rest", - Binary::from_dbg("rest") - .with_nats("-n") - .with_args(vec!["-j", "10.1.0.8:6831"]), + Binary::from_dbg("rest").with_nats("-n").with_args(vec![ + "-j", + "10.1.0.8:6831", + "--dummy-certificates", + ]), ) .with_portmap("8080", "8080") .with_portmap("8081", "8081"), @@ -75,13 +80,8 @@ async fn client() { .with_portmap("6831/udp", "6831/udp") .with_portmap("6832/udp", "6832/udp"), ) - // uncomment to run alpine commands within the containers - //.with_base_image("alpine:latest".to_string()) .with_default_tracing() .autorun(false) - // uncomment to leave containers running allowing us access the jaeger - // traces at localhost:16686 - //.with_clean(false) .build() .await .unwrap(); diff --git a/csi/moac/.gitignore b/csi/moac/.gitignore index f9efbe2ac..806bbd3e9 100644 --- a/csi/moac/.gitignore +++ b/csi/moac/.gitignore @@ -1,7 +1,7 @@ /node_modules/ /proto/ /result -/watcher.js +/csi.js /nexus.js /node.js /node_operator.js @@ -11,4 +11,6 @@ /volume.js /volumes.js /volume_operator.js +/watcher.js +/workq.js /*.js.map diff --git a/csi/moac/csi.js b/csi/moac/csi.ts similarity index 71% rename from csi/moac/csi.js rename to csi/moac/csi.ts index 964a7cb03..3c0285baa 100644 --- a/csi/moac/csi.js +++ b/csi/moac/csi.ts @@ -3,9 +3,13 @@ 'use strict'; -const assert = require('assert'); +import assert from 'assert'; +import * as _ from 'lodash'; +import * as path from 'path'; +import { Volume } from './volume'; +import { Volumes } from './volumes'; + const fs = require('fs').promises; -const path = require('path'); const protoLoader = require('@grpc/proto-loader'); const grpc = require('grpc-uds'); const log = require('./logger').Logger('csi'); @@ -30,8 +34,33 @@ const packageDefinition = protoLoader.loadSync(PROTO_PATH, { }); const csi = grpc.loadPackageDefinition(packageDefinition).csi.v1; +// Done callback in CSI methods +type CsiDoneCb = (err: any, resp?: any) => void; +// CSI method signature +type CsiMethod = (args: any, cb: CsiDoneCb) => void; + +// Limited definition of topology key from CSI spec. +type TopologyKeys = { + segments: Record +}; + +// Simplified definition of K8s object as defined in the CSI spec. +type K8sVolume = { + volumeId: string, + capacityBytes: number, + accessibleTopology: TopologyKeys[], +}; + +// When list volumes method does not fit into one reply we store the context +// for the next retrieval. +type ListContext = { + volumes: { + volume: K8sVolume + }[] +}; + // Parse mayastor node ID (i.e. mayastor://node-name) and return the node name. -function parseMayastorNodeId (nodeId) { +function parseMayastorNodeId (nodeId: string) { const parts = nodeId.split('/'); if ( @@ -51,8 +80,8 @@ function parseMayastorNodeId (nodeId) { // Check that the list of volume capabilities does not contain unsupported // capability. Throws grpc error if a capability is not supported. // -// @param {string[]} caps Volume capabilities as described in CSI spec. -function checkCapabilities (caps) { +// @param caps Volume capabilities as described in CSI spec. +function checkCapabilities (caps: any[]) { if (!caps) { throw new GrpcError( grpc.status.INVALID_ARGUMENT, @@ -76,31 +105,64 @@ function checkCapabilities (caps) { // // @param {object} volume Volume object. // @returns {object} K8s CSI volume object. -function createK8sVolumeObject (volume) { - const obj = { +function createK8sVolumeObject (volume: Volume): K8sVolume { + const obj: K8sVolume = { volumeId: volume.uuid, capacityBytes: volume.getSize(), accessibleTopology: [] }; - if (volume.protocol.toLowerCase() === 'nbd') { - obj.accessibleTopology.push({ - segments: { 'kubernetes.io/hostname': volume.getNodeName() } - }); - } return obj; } +// Duplicate request cache entry helps to detect retransmits of the same request +// +// This may seem like a useless thing but k8s is agressive on retransmitting +// requests. The first retransmit happens just a tens of ms after the original +// request. Having many requests that are the same in progress creates havoc +// and forces mayastor to execute repeating code. +// +// NOTE: Assumption is that k8s doesn't submit duplicate request for the same +// volume (the same uuid) with different parameters. +// +class Request { + uuid: string; // ID of the object in the operation + op: string; // name of the operation + callbacks: CsiDoneCb[]; // callbacks to call when done + + constructor (uuid: string, op: string, cb: CsiDoneCb) { + this.uuid = uuid; + this.op = op; + this.callbacks = [cb]; + } + + wait (cb: CsiDoneCb) { + this.callbacks.push(cb); + } + + done (err: any, resp?: any) { + this.callbacks.forEach((cb) => cb(err, resp)); + } +} + // CSI Controller implementation. // // It implements Identity and Controller grpc services from csi proto file. // It relies on volume manager, when serving incoming CSI requests, that holds // information about volumes and provides methods to manipulate them. class CsiServer { + private server: any; + private ready: boolean; + private registry: any; + private volumes: Volumes | null; + private sockPath: string; + private nextListContextId: number; + private listContexts: Record; + private duplicateRequestCache: Request[]; + // Creates new csi server // - // @param {string} sockPath Unix domain socket for csi server to listen on. - constructor (sockPath) { - assert.strictEqual(typeof sockPath, 'string'); + // @param sockPath Unix domain socket for csi server to listen on. + constructor (sockPath: string) { this.server = new grpc.Server(); this.ready = false; this.registry = null; @@ -108,6 +170,7 @@ class CsiServer { this.sockPath = sockPath; this.nextListContextId = 1; this.listContexts = {}; + this.duplicateRequestCache = []; // The data returned by identity service should be kept in sync with // responses for the same methods on storage node. @@ -119,8 +182,7 @@ class CsiServer { // Wrap all controller methods by a check for readiness of the csi server // and request/response logging to avoid repeating code. - const self = this; - const controllerMethods = {}; + const controllerMethods: Record = {}; let methodNames = [ 'createVolume', 'deleteVolume', @@ -131,11 +193,13 @@ class CsiServer { 'getCapacity', 'controllerGetCapabilities' ]; + // Note: what used to be elegant in JS is a type disaster in TS. + // Dynamic wrapper for calling methods defined on an object. methodNames.forEach((name) => { - controllerMethods[name] = function checkReady (args, cb) { + controllerMethods[name] = (args, cb) => { log.trace(`CSI ${name} request: ${JSON.stringify(args)}`); - if (!self.ready) { + if (!this.ready) { return cb( new GrpcError( grpc.status.UNAVAILABLE, @@ -143,7 +207,8 @@ class CsiServer { ) ); } - return self[name](args, (err, resp) => { + let csiMethod = this[name as keyof CsiServer].bind(this); + return csiMethod(args, (err: any, resp: any) => { if (err) { if (!(err instanceof GrpcError)) { err = new GrpcError( @@ -199,19 +264,18 @@ class CsiServer { // Stop the grpc server. async stop () { - const self = this; return new Promise((resolve, reject) => { log.info('Shutting down grpc server'); - self.server.tryShutdown(resolve); + this.server.tryShutdown(resolve); }); } // Switch csi server to ready state (returned by identity.probe() method). // This will enable serving grpc controller service requests. // - // @param {object} registry Object holding node, replica, pool and nexus objects. - // @param {object} volumes Volume manager. - makeReady (registry, volumes) { + // @param registry Object holding node, replica, pool and nexus objects. + // @param volumes Volume manager. + makeReady (registry: any, volumes: Volumes) { this.ready = true; this.registry = registry; this.volumes = volumes; @@ -223,11 +287,37 @@ class CsiServer { this.ready = false; } + // Find outstanding request by uuid and operation type. + _findRequest (uuid: string, op: string): Request | undefined { + return this.duplicateRequestCache.find((e) => e.uuid === uuid && e.op === op); + } + + _beginRequest (uuid: string, op: string, cb: CsiDoneCb): Request | undefined { + let request = this._findRequest(uuid, op); + if (request) { + log.debug(`Duplicate ${op} volume request detected`); + request.wait(cb); + return; + } + request = new Request(uuid, op, cb); + this.duplicateRequestCache.push(request); + return request; + } + + // Remove request entry from the cache and call done callbacks. + _endRequest (request: Request, err: any, resp?: any) { + let idx = this.duplicateRequestCache.indexOf(request); + if (idx >= 0) { + this.duplicateRequestCache.splice(idx, 1); + } + request.done(err, resp); + } + // // Implementation of CSI identity methods // - getPluginInfo (_, cb) { + getPluginInfo (_: any, cb: CsiDoneCb) { log.debug( `getPluginInfo request (name=${PLUGIN_NAME}, version=${VERSION})` ); @@ -238,7 +328,7 @@ class CsiServer { }); } - getPluginCapabilities (_, cb) { + getPluginCapabilities (_: any, cb: CsiDoneCb) { const caps = ['CONTROLLER_SERVICE', 'VOLUME_ACCESSIBILITY_CONSTRAINTS']; log.debug('getPluginCapabilities request: ' + caps.join(', ')); cb(null, { @@ -248,7 +338,7 @@ class CsiServer { }); } - probe (_, cb) { + probe (_: any, cb: CsiDoneCb) { log.debug(`probe request (ready=${this.ready})`); cb(null, { ready: { value: this.ready } }); } @@ -257,7 +347,7 @@ class CsiServer { // Implementation of CSI controller methods // - async controllerGetCapabilities (_, cb) { + async controllerGetCapabilities (_: any, cb: CsiDoneCb) { const caps = [ 'CREATE_DELETE_VOLUME', 'PUBLISH_UNPUBLISH_VOLUME', @@ -272,8 +362,9 @@ class CsiServer { }); } - async createVolume (call, cb) { + async createVolume (call: any, cb: CsiDoneCb) { const args = call.request; + assert(this.volumes); log.debug( `Request to create volume "${args.name}" with size ` + @@ -367,6 +458,12 @@ class CsiServer { count = 1; } + // If this is a duplicate request then assure it is executed just once. + let request = this._beginRequest(uuid, 'create', cb); + if (!request) { + return; + } + // create the volume let volume; try { @@ -379,17 +476,14 @@ class CsiServer { protocol: protocol }); } catch (err) { - return cb(err); + this._endRequest(request, err); + return; } - // Enforce local access to the volume for NBD protocol - const accessibleTopology = []; - if (protocol.toLowerCase() === 'nbd') { - accessibleTopology.push({ - segments: { 'kubernetes.io/hostname': volume.getNodeName() } - }); - } - cb(null, { + // This was used in the old days for NBD protocol + const accessibleTopology: TopologyKeys[] = []; + + this._endRequest(request, null, { volume: { capacityBytes: volume.getSize(), volumeId: uuid, @@ -403,23 +497,31 @@ class CsiServer { }); } - async deleteVolume (call, cb) { + async deleteVolume (call: any, cb: CsiDoneCb) { const args = call.request; + assert(this.volumes); log.debug(`Request to destroy volume "${args.volumeId}"`); + // If this is a duplicate request then assure it is executed just once. + let request = this._beginRequest(args.volumeId, 'delete', cb); + if (!request) { + return; + } + try { await this.volumes.destroyVolume(args.volumeId); } catch (err) { - return cb(err); + return this._endRequest(request, err); } log.info(`Volume "${args.volumeId}" destroyed`); - cb(); + this._endRequest(request, null); } - async listVolumes (call, cb) { + async listVolumes (call: any, cb: CsiDoneCb) { + assert(this.volumes); const args = call.request; - let ctx = {}; + let ctx: ListContext; if (args.startingToken) { ctx = this.listContexts[args.startingToken]; @@ -452,18 +554,19 @@ class CsiServer { // TODO: purge list contexts older than .. (1 min) if (ctx.volumes.length > 0) { - const ctxId = this.nextListContextId++; + const ctxId = (this.nextListContextId++).toString(); this.listContexts[ctxId] = ctx; cb(null, { entries: entries, - nextToken: ctxId.toString() + nextToken: ctxId, }); } else { cb(null, { entries: entries }); } } - async controllerPublishVolume (call, cb) { + async controllerPublishVolume (call: any, cb: CsiDoneCb) { + assert(this.volumes); const args = call.request; log.debug( @@ -492,19 +595,6 @@ class CsiServer { new GrpcError(grpc.status.INVALID_ARGUMENT, 'missing storage protocol') ); } - if (protocol.toLowerCase() === 'nbd') { - const nodeName = volume.getNodeName(); - if (nodeId !== nodeName) { - return cb( - new GrpcError( - grpc.status.INVALID_ARGUMENT, - `Cannot publish the volume "${args.volumeId}" on a different ` + - `node "${nodeId}" than it was created "${nodeName}" when using ` + - `local access protocol ${protocol}` - ) - ); - } - } if (args.readonly) { return cb( new GrpcError( @@ -524,7 +614,13 @@ class CsiServer { return cb(err); } - const publishContext = {}; + // If this is a duplicate request then assure it is executed just once. + let request = this._beginRequest(args.volumeId, 'publish', cb); + if (!request) { + return; + } + + const publishContext: any = {}; try { publishContext.uri = await volume.publish(protocol); log.debug( @@ -533,18 +629,20 @@ class CsiServer { } catch (err) { if (err.code === grpc.status.ALREADY_EXISTS) { log.debug(`Volume "${args.volumeId}" already published on this node`); - cb(null, { publishContext }); + this._endRequest(request, null, { publishContext }); } else { cb(err); + this._endRequest(request, err); } return; } log.info(`Published volume "${args.volumeId}" over ${protocol}`); - cb(null, { publishContext }); + this._endRequest(request, null, { publishContext }); } - async controllerUnpublishVolume (call, cb) { + async controllerUnpublishVolume (call: any, cb: CsiDoneCb) { + assert(this.volumes); const args = call.request; log.debug(`Request to unpublish volume "${args.volumeId}"`); @@ -561,16 +659,24 @@ class CsiServer { } catch (err) { return cb(err); } + + // If this is a duplicate request then assure it is executed just once. + let request = this._beginRequest(args.volumeId, 'unpublish', cb); + if (!request) { + return; + } + try { await volume.unpublish(); } catch (err) { - return cb(err); + return this._endRequest(request, err); } log.info(`Unpublished volume "${args.volumeId}"`); - cb(null, {}); + this._endRequest(request, null, {}); } - async validateVolumeCapabilities (call, cb) { + async validateVolumeCapabilities (call: any, cb: CsiDoneCb) { + assert(this.volumes); const args = call.request; log.debug(`Request to validate volume capabilities for "${args.volumeId}"`); @@ -584,9 +690,9 @@ class CsiServer { ); } const caps = args.volumeCapabilities.filter( - (cap) => cap.accessMode.mode === 'SINGLE_NODE_WRITER' + (cap: any) => cap.accessMode.mode === 'SINGLE_NODE_WRITER' ); - const resp = {}; + const resp: any = {}; if (caps.length > 0) { resp.confirmed = { volumeCapabilities: caps }; } else { @@ -601,7 +707,7 @@ class CsiServer { // // XXX Is the caller interested in total capacity (sum of all pools) or // a capacity usable by a single volume? - async getCapacity (call, cb) { + async getCapacity (call: any, cb: CsiDoneCb) { let nodeName; const args = call.request; diff --git a/csi/moac/nexus.ts b/csi/moac/nexus.ts index f351295d1..cf594e2b7 100644 --- a/csi/moac/nexus.ts +++ b/csi/moac/nexus.ts @@ -1,7 +1,8 @@ // Nexus object implementation. -const _ = require('lodash'); -const assert = require('assert'); +import assert from 'assert'; +import * as _ from 'lodash'; + const { GrpcCode, GrpcError, mayastor } = require('./grpc_client'); const log = require('./logger').Logger('nexus'); @@ -10,15 +11,12 @@ import { Replica } from './replica'; // Protocol used to export nexus (volume) export enum Protocol { Unknown = 'unknown', - Nbd = 'nbd', Iscsi = 'iscsi', Nvmf = 'nvmf', } export function protocolFromString(val: string): Protocol { - if (val == Protocol.Nbd) { - return Protocol.Nbd; - } else if (val == Protocol.Iscsi) { + if (val == Protocol.Iscsi) { return Protocol.Iscsi; } else if (val == Protocol.Nvmf) { return Protocol.Nvmf; diff --git a/csi/moac/node.ts b/csi/moac/node.ts index ecc2b6139..b78a40b2a 100644 --- a/csi/moac/node.ts +++ b/csi/moac/node.ts @@ -6,12 +6,18 @@ import assert from 'assert'; import { Pool } from './pool'; import { Nexus } from './nexus'; import { Replica } from './replica'; +import { Workq } from './workq'; const EventEmitter = require('events'); -const Workq = require('./workq'); const log = require('./logger').Logger('node'); const { GrpcClient, GrpcCode, GrpcError } = require('./grpc_client'); +// Type used in workq for calling grpc +type GrpcCallArgs = { + method: string; + args: any; +} + // Object represents mayastor storage node. // // Node emits following events: @@ -25,7 +31,7 @@ export class Node extends EventEmitter { syncBadLimit: number; endpoint: string | null; client: any; - workq: any; + workq: Workq; syncFailed: number; syncTimer: NodeJS.Timeout | null; nexus: Nexus[]; @@ -49,7 +55,7 @@ export class Node extends EventEmitter { this.endpoint = null; this.client = null; // grpc client handle - this.workq = new Workq(); // work queue for serializing grpc calls + this.workq = new Workq('grpc call'); // work queue for serializing grpc calls // We don't want to switch all objects to offline state when moac starts // just because a node is not reachable from the beginning. That's why we // set syncFailed to syncBadLimit + 1. @@ -129,17 +135,19 @@ export class Node extends EventEmitter { // @returns A promise that evals to return value of gRPC method. // async call(method: string, args: any): Promise { - return this.workq.push({ method, args }, this._call.bind(this)); + return this.workq.push({ method, args }, (args: GrpcCallArgs) => { + return this._call(args.method, args.args); + }); } - async _call(ctx: any) { + async _call(method: string, args: any): Promise { if (!this.client) { throw new GrpcError( GrpcCode.INTERNAL, `Broken connection to mayastor on node "${this.name}"` ); } - return this.client.call(ctx.method, ctx.args); + return this.client.call(method, args); } // Sync triggered by the timer. It ensures that the sync does run in @@ -149,7 +157,9 @@ export class Node extends EventEmitter { this.syncTimer = null; try { - await this.workq.push({}, this._sync.bind(this)); + await this.workq.push(null, () => { + return this._sync(); + }); nextSync = this.syncPeriod; } catch (err) { // We don't want to cover up unexpected errors. But it's hard to @@ -180,20 +190,11 @@ export class Node extends EventEmitter { log.debug(`Syncing the node "${this.name}"`); // TODO: Harden checking of outputs of the methods below - let reply = await this._call({ - method: 'listNexus', - args: {} - }); + let reply = await this._call('listNexus', {}); const nexus = reply.nexusList; - reply = await this._call({ - method: 'listPools', - args: {} - }); + reply = await this._call('listPools', {}); const pools = reply.pools; - reply = await this._call({ - method: 'listReplicas', - args: {} - }); + reply = await this._call('listReplicas', {}); const replicas = reply.replicas; // Move the the node to online state before we attempt to merge objects diff --git a/csi/moac/node_operator.ts b/csi/moac/node_operator.ts index ce6b87c18..99bf8e0e9 100644 --- a/csi/moac/node_operator.ts +++ b/csi/moac/node_operator.ts @@ -17,11 +17,11 @@ import { CustomResourceCache, CustomResourceMeta, } from './watcher'; +import { Workq } from './workq'; const yaml = require('js-yaml'); const EventStream = require('./event_stream'); const log = require('./logger').Logger('node-operator'); -const Workq = require('./workq'); const RESOURCE_NAME: string = 'mayastornode'; const crdNode = yaml.safeLoad( @@ -75,7 +75,7 @@ export class NodeOperator { watcher: CustomResourceCache; // k8s resource watcher for nodes registry: any; namespace: string; - workq: any; // for serializing node operations + workq: Workq; // for serializing node operations eventStream: any; // events from the registry // Create node operator object. @@ -92,7 +92,7 @@ export class NodeOperator { ) { assert(registry); this.namespace = namespace; - this.workq = new Workq(); + this.workq = new Workq('mayastornode'); this.registry = registry; this.watcher = new CustomResourceCache( this.namespace, diff --git a/csi/moac/package.json b/csi/moac/package.json index a4a418aba..9c9b2f14d 100644 --- a/csi/moac/package.json +++ b/csi/moac/package.json @@ -14,8 +14,8 @@ }, "scripts": { "prepare": "./bundle_protos.sh", - "clean": "rm -f replica.js pool.js nexus.js", - "purge": "rm -rf node_modules proto node.js replica.js pool.js nexus.js watcher.js node_operator.js pool_operator.js volume.js volumes.js volume_operator.js *.js.map", + "clean": "rm -f csi.js node.js replica.js pool.js nexus.js watcher.js node_operator.js pool_operator.js volume.js volumes.js volume_operator.js workq.js *.js.map", + "purge": "rm -rf node_modules proto csi.js node.js replica.js pool.js nexus.js watcher.js node_operator.js pool_operator.js volume.js volumes.js volume_operator.js workq.js *.js.map", "compile": "tsc --pretty", "start": "./index.js", "test": "mocha test/index.js", diff --git a/csi/moac/pool.ts b/csi/moac/pool.ts index 959c5d58b..65f4715f2 100644 --- a/csi/moac/pool.ts +++ b/csi/moac/pool.ts @@ -1,7 +1,8 @@ // Pool object implementation. -const _ = require('lodash'); -const assert = require('assert'); +import assert from 'assert'; +import * as _ from 'lodash'; + const { GrpcCode, GrpcError } = require('./grpc_client'); const log = require('./logger').Logger('pool'); diff --git a/csi/moac/pool_operator.ts b/csi/moac/pool_operator.ts index ae67d7714..c2cffcb21 100644 --- a/csi/moac/pool_operator.ts +++ b/csi/moac/pool_operator.ts @@ -13,11 +13,11 @@ import { CustomResourceCache, CustomResourceMeta, } from './watcher'; +import { Workq } from './workq'; const yaml = require('js-yaml'); const log = require('./logger').Logger('pool-operator'); const EventStream = require('./event_stream'); -const Workq = require('./workq'); const RESOURCE_NAME: string = 'mayastorpool'; const POOL_FINALIZER = 'finalizer.mayastor.openebs.io'; @@ -125,7 +125,7 @@ export class PoolOperator { watcher: CustomResourceCache; // k8s resource watcher for pools registry: any; // registry containing info about mayastor nodes eventStream: any; // A stream of node and pool events. - workq: any; // for serializing pool operations + workq: Workq; // for serializing pool operations // Create pool operator. // @@ -142,7 +142,7 @@ export class PoolOperator { this.namespace = namespace; this.registry = registry; // registry containing info about mayastor nodes this.eventStream = null; // A stream of node and pool events. - this.workq = new Workq(); // for serializing pool operations + this.workq = new Workq('mayastorpool'); // for serializing pool operations this.watcher = new CustomResourceCache( this.namespace, RESOURCE_NAME, diff --git a/csi/moac/replica.ts b/csi/moac/replica.ts index 713d28aed..5f4aa79c7 100644 --- a/csi/moac/replica.ts +++ b/csi/moac/replica.ts @@ -1,6 +1,8 @@ // Replica object implementation. -const assert = require('assert'); +import assert from 'assert'; +import * as _ from 'lodash'; + const { GrpcCode, GrpcError } = require('./grpc_client'); const log = require('./logger').Logger('replica'); diff --git a/csi/moac/test/csi_test.js b/csi/moac/test/csi_test.js index 27c42a109..206a4b7d1 100644 --- a/csi/moac/test/csi_test.js +++ b/csi/moac/test/csi_test.js @@ -7,6 +7,7 @@ const fs = require('fs').promises; const grpc = require('grpc-uds'); const grpcPromise = require('grpc-promise'); const sinon = require('sinon'); +const sleep = require('sleep-promise'); const { CsiServer, csi } = require('../csi'); const { GrpcError, GrpcCode } = require('../grpc_client'); const Registry = require('../registry'); @@ -202,7 +203,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 10, limitBytes: 20, - protocol: 'nbd' + protocol: 'nvmf' }); sinon.stub(returnedVolume, 'getSize').returns(20); sinon.stub(returnedVolume, 'getNodeName').returns('some-node'); @@ -246,31 +247,6 @@ module.exports = function () { expect(result.volume.accessibleTopology).to.have.lengthOf(0); }); - it('should create a volume that can be accessed only locally', async () => { - createVolumeStub.resolves(returnedVolume); - const parameters = { protocol: 'nbd', repl: 3, blah: 'again' }; - const result = await client.createVolume().sendMessage({ - name: 'pvc-' + UUID, - capacityRange: { - requiredBytes: 10, - limitBytes: 20 - }, - volumeCapabilities: [ - { - accessMode: { mode: 'SINGLE_NODE_WRITER' }, - block: {} - } - ], - parameters: parameters - }); - expect(result.volume.accessibleTopology).to.have.lengthOf(1); - expect(result.volume.accessibleTopology[0]).to.eql({ - segments: { - 'kubernetes.io/hostname': 'some-node' - } - }); - }); - it('should fail if topology requirement other than hostname', async () => { createVolumeStub.resolves(returnedVolume); await shouldFailWith(GrpcCode.INVALID_ARGUMENT, () => @@ -290,7 +266,7 @@ module.exports = function () { requisite: [{ segments: { rack: 'some-rack-info' } }], preferred: [] }, - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }) ); }); @@ -311,7 +287,7 @@ module.exports = function () { block: {} } ], - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }) ); }); @@ -331,7 +307,7 @@ module.exports = function () { block: {} } ], - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }) ); }); @@ -353,7 +329,7 @@ module.exports = function () { filesystem: {} } ], - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }) ); }); @@ -373,7 +349,7 @@ module.exports = function () { filesystem: {} } ], - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }) ); }); @@ -395,7 +371,7 @@ module.exports = function () { accessibilityRequirements: { requisite: [{ segments: { 'kubernetes.io/hostname': 'node' } }] }, - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }); sinon.assert.calledWith(createVolumeStub, UUID, { replicaCount: 1, @@ -403,7 +379,7 @@ module.exports = function () { requiredNodes: ['node'], requiredBytes: 50, limitBytes: 0, - protocol: 'nbd' + protocol: 'nvmf' }); }); @@ -432,7 +408,7 @@ module.exports = function () { } ] }, - parameters: { protocol: 'nbd' } + parameters: { protocol: 'nvmf' } }); sinon.assert.calledWith(createVolumeStub, UUID, { replicaCount: 1, @@ -440,7 +416,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 50, limitBytes: 50, - protocol: 'nbd' + protocol: 'nvmf' }); }); @@ -458,7 +434,7 @@ module.exports = function () { block: {} } ], - parameters: { repl: '3', protocol: 'nbd' } + parameters: { repl: '3', protocol: 'nvmf' } }); sinon.assert.calledWith(createVolumeStub, UUID, { replicaCount: 3, @@ -466,7 +442,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 50, limitBytes: 70, - protocol: 'nbd' + protocol: 'nvmf' }); }); @@ -485,10 +461,54 @@ module.exports = function () { block: {} } ], - parameters: { repl: 'bla2', protocol: 'nbd' } + parameters: { repl: 'bla2', protocol: 'nvmf' } }) ); }); + + it('should detect duplicate create volume request', (done) => { + // We must sleep in the stub. Otherwise reply is sent before the second + // request comes in. + createVolumeStub.callsFake(async () => { + await sleep(10); + return returnedVolume; + }); + const create1 = client.createVolume().sendMessage({ + name: 'pvc-' + UUID, + capacityRange: { + requiredBytes: 50, + limitBytes: 70 + }, + volumeCapabilities: [ + { + accessMode: { mode: 'SINGLE_NODE_WRITER' }, + block: {} + } + ], + parameters: { repl: '3', protocol: 'nvmf' } + }); + const create2 = client.createVolume().sendMessage({ + name: 'pvc-' + UUID, + capacityRange: { + requiredBytes: 50, + limitBytes: 70 + }, + volumeCapabilities: [ + { + accessMode: { mode: 'SINGLE_NODE_WRITER' }, + block: {} + } + ], + parameters: { repl: '3', protocol: 'nvmf' } + }); + Promise.all([create1, create2]).then((results) => { + expect(results).to.have.lengthOf(2); + expect(results[0].volume.volumeId).to.equal(UUID); + expect(results[1].volume.volumeId).to.equal(UUID); + sinon.assert.calledOnce(createVolumeStub); + done(); + }); + }); }); describe('DeleteVolume', function () { @@ -525,6 +545,21 @@ module.exports = function () { sinon.assert.calledOnce(destroyVolumeStub); }); + + it('should detect duplicate delete volume request', (done) => { + // We must sleep in the stub. Otherwise reply is sent before the second + // request comes in. + destroyVolumeStub.callsFake(async () => { + await sleep(10); + }); + const delete1 = client.deleteVolume().sendMessage({ volumeId: UUID }); + const delete2 = client.deleteVolume().sendMessage({ volumeId: UUID }); + Promise.all([delete1, delete2]).then((results) => { + sinon.assert.calledOnce(destroyVolumeStub); + expect(results).to.have.lengthOf(2); + done(); + }); + }); }); describe('ListVolumes', function () { @@ -540,7 +575,7 @@ module.exports = function () { const vol = new Volume(uuidBase + i + j, registry, () => {}, { replicaCount: 3, requiredBytes: 100, - protocol: 'nbd' + protocol: 'nvmf' }); const getSizeStub = sinon.stub(vol, 'getSize'); getSizeStub.returns(100); @@ -626,9 +661,10 @@ module.exports = function () { }); it('should publish volume', async () => { + const nvmfUri = `nvmf://host/nqn-${UUID}`; const volume = new Volume(UUID, registry, () => {}, {}); const publishStub = sinon.stub(volume, 'publish'); - publishStub.resolves('/dev/sdb'); + publishStub.resolves(nvmfUri); const getNodeNameStub = sinon.stub(volume, 'getNodeName'); getNodeNameStub.returns('node'); getVolumesStub.returns(volume); @@ -644,46 +680,57 @@ module.exports = function () { mount_flags: 'ro' } }, - volumeContext: { protocol: 'iscsi' } + volumeContext: { protocol: 'nvmf' } }); - expect(reply.publishContext.uri).to.equal('/dev/sdb'); + expect(reply.publishContext.uri).to.equal(nvmfUri); sinon.assert.calledOnce(getVolumesStub); sinon.assert.calledWith(getVolumesStub, UUID); sinon.assert.calledOnce(publishStub); - sinon.assert.calledWith(publishStub, 'iscsi'); + sinon.assert.calledWith(publishStub, 'nvmf'); }); - it('should not publish volume if it does not exist', async () => { - getVolumesStub.returns(); - - await shouldFailWith(GrpcCode.NOT_FOUND, () => - client.controllerPublishVolume().sendMessage({ - volumeId: UUID, - nodeId: 'mayastor://node', - readonly: false, - volumeCapability: { - accessMode: { mode: 'SINGLE_NODE_WRITER' }, - mount: { - fsType: 'xfs', - mount_flags: 'ro' - } - }, - volumeContext: { protocol: 'nbd' } - }) - ); - sinon.assert.calledOnce(getVolumesStub); - sinon.assert.calledWith(getVolumesStub, UUID); - }); - - it('should not publish volume over nbd on a different node', async () => { + it('should detect duplicate publish volume request', (done) => { + const iscsiUri = `iscsi://host/iqn-${UUID}`; + const publishArgs = { + volumeId: UUID, + nodeId: 'mayastor://node2', + readonly: false, + volumeCapability: { + accessMode: { mode: 'SINGLE_NODE_WRITER' }, + mount: { + fsType: 'xfs', + mount_flags: 'ro' + } + }, + volumeContext: { protocol: 'iscsi' } + }; const volume = new Volume(UUID, registry, () => {}, {}); const publishStub = sinon.stub(volume, 'publish'); - publishStub.resolves(); + // We must sleep in the stub. Otherwise reply is sent before the second + // request comes in. + publishStub.callsFake(async () => { + await sleep(10); + return iscsiUri; + }); const getNodeNameStub = sinon.stub(volume, 'getNodeName'); - getNodeNameStub.returns('another-node'); + getNodeNameStub.returns('node'); getVolumesStub.returns(volume); - await shouldFailWith(GrpcCode.INVALID_ARGUMENT, () => + const publish1 = client.controllerPublishVolume().sendMessage(publishArgs); + const publish2 = client.controllerPublishVolume().sendMessage(publishArgs); + Promise.all([publish1, publish2]).then((results) => { + sinon.assert.calledOnce(publishStub); + expect(results).to.have.lengthOf(2); + expect(results[0].publishContext.uri).to.equal(iscsiUri); + expect(results[1].publishContext.uri).to.equal(iscsiUri); + done(); + }); + }); + + it('should not publish volume if it does not exist', async () => { + getVolumesStub.returns(); + + await shouldFailWith(GrpcCode.NOT_FOUND, () => client.controllerPublishVolume().sendMessage({ volumeId: UUID, nodeId: 'mayastor://node', @@ -695,12 +742,11 @@ module.exports = function () { mount_flags: 'ro' } }, - volumeContext: { protocol: 'nbd' } + volumeContext: { protocol: 'nvmf' } }) ); sinon.assert.calledOnce(getVolumesStub); sinon.assert.calledWith(getVolumesStub, UUID); - sinon.assert.notCalled(publishStub); }); it('should not publish readonly volume', async () => { @@ -723,7 +769,7 @@ module.exports = function () { mount_flags: 'ro' } }, - volumeContext: { protocol: 'nbd' } + volumeContext: { protocol: 'nvmf' } }) ); }); @@ -748,7 +794,7 @@ module.exports = function () { mount_flags: 'ro' } }, - volumeContext: { protocol: 'nbd' } + volumeContext: { protocol: 'nvmf' } }) ); }); @@ -773,7 +819,7 @@ module.exports = function () { mount_flags: 'ro' } }, - volumeContext: { protocol: 'nbd' } + volumeContext: { protocol: 'nvmf' } }) ); }); @@ -883,6 +929,31 @@ module.exports = function () { sinon.assert.calledWith(getVolumesStub, UUID); sinon.assert.calledOnce(unpublishStub); }); + + it('should detect duplicate unpublish volume request', (done) => { + const unpublishArgs = { + volumeId: UUID, + nodeId: 'mayastor://another-node' + }; + const volume = new Volume(UUID, registry, () => {}, {}); + const unpublishStub = sinon.stub(volume, 'unpublish'); + // We must sleep in the stub. Otherwise reply is sent before the second + // request comes in. + unpublishStub.callsFake(async () => { + await sleep(10); + }); + const getNodeNameStub = sinon.stub(volume, 'getNodeName'); + getNodeNameStub.returns('node'); + getVolumesStub.returns(volume); + + const unpublish1 = client.controllerUnpublishVolume().sendMessage(unpublishArgs); + const unpublish2 = client.controllerUnpublishVolume().sendMessage(unpublishArgs); + Promise.all([unpublish1, unpublish2]).then((results) => { + sinon.assert.calledOnce(unpublishStub); + expect(results).to.have.lengthOf(2); + done(); + }); + }); }); describe('ValidateVolumeCapabilities', function () { diff --git a/csi/moac/test/mayastor_mock.js b/csi/moac/test/mayastor_mock.js index 7c2438851..28484d5d0 100644 --- a/csi/moac/test/mayastor_mock.js +++ b/csi/moac/test/mayastor_mock.js @@ -13,7 +13,7 @@ const STAT_DELTA = 1000; // but also that it has not a default value (empty string, zero, ...). function assertHasKeys (obj, keys, empty) { empty = empty || []; - for (var key in obj) { + for (const key in obj) { if (keys.indexOf(key) < 0) { assert( false, @@ -21,7 +21,7 @@ function assertHasKeys (obj, keys, empty) { ); } } - for (var i = 0; i < keys.length; i++) { + for (let i = 0; i < keys.length; i++) { const key = keys[i]; const val = obj[key]; if ( @@ -43,7 +43,7 @@ function assertHasKeys (obj, keys, empty) { // The actual state (i.e. list of pools) can be retrieved by get*() method. class MayastorServer { constructor (endpoint, pools, replicas, nexus) { - var packageDefinition = protoLoader.loadSync( + const packageDefinition = protoLoader.loadSync( path.join(__dirname, '..', 'proto', 'mayastor.proto'), { keepCase: false, @@ -53,15 +53,15 @@ class MayastorServer { oneofs: true } ); - var mayastor = grpc.loadPackageDefinition(packageDefinition).mayastor; - var srv = new grpc.Server(); + const mayastor = grpc.loadPackageDefinition(packageDefinition).mayastor; + const srv = new grpc.Server(); this.pools = _.cloneDeep(pools || []); this.replicas = _.cloneDeep(replicas || []); this.nexus = _.cloneDeep(nexus || []); this.statCounter = 0; - var self = this; + const self = this; srv.addService(mayastor.Mayastor.service, { // When a pool is created we implicitly set state to POOL_ONLINE, // capacity to 100 and used to 4. @@ -72,7 +72,7 @@ class MayastorServer { ['name', 'disks'], [] ); - var pool = self.pools.find((p) => p.name === args.name); + let pool = self.pools.find((p) => p.name === args.name); if (!pool) { pool = { name: args.name, @@ -88,7 +88,7 @@ class MayastorServer { destroyPool: (call, cb) => { const args = call.request; assertHasKeys(args, ['name']); - var idx = self.pools.findIndex((p) => p.name === args.name); + const idx = self.pools.findIndex((p) => p.name === args.name); if (idx >= 0) { self.pools.splice(idx, 1); } @@ -102,7 +102,7 @@ class MayastorServer { createReplica: (call, cb) => { const args = call.request; assertHasKeys(args, ['uuid', 'pool', 'size', 'thin', 'share']); - var r = self.replicas.find((r) => r.uuid === args.uuid); + let r = self.replicas.find((r) => r.uuid === args.uuid); if (r) { return cb(null, r); } @@ -115,7 +115,7 @@ class MayastorServer { if (!args.thin) { pool.used += args.size; } - var uri; + let uri; if (args.share === 'REPLICA_NONE') { uri = 'bdev:///' + args.uuid; } else if (args.share === 'REPLICA_ISCSI') { @@ -136,13 +136,13 @@ class MayastorServer { cb(null, r); }, destroyReplica: (call, cb) => { - var args = call.request; + const args = call.request; assertHasKeys(args, ['uuid']); - var idx = self.replicas.findIndex((r) => r.uuid === args.uuid); + const idx = self.replicas.findIndex((r) => r.uuid === args.uuid); if (idx >= 0) { const r = self.replicas.splice(idx, 1)[0]; if (!r.thin) { - var pool = self.pools.find((p) => p.name === r.pool); + const pool = self.pools.find((p) => p.name === r.pool); pool.used -= r.size; } } @@ -194,7 +194,7 @@ class MayastorServer { createNexus: (call, cb) => { const args = call.request; assertHasKeys(args, ['uuid', 'size', 'children']); - var nexus = self.nexus.find((r) => r.uuid === args.uuid); + let nexus = self.nexus.find((r) => r.uuid === args.uuid); if (!nexus) { nexus = { uuid: args.uuid, @@ -214,9 +214,9 @@ class MayastorServer { cb(null, nexus); }, destroyNexus: (call, cb) => { - var args = call.request; + const args = call.request; assertHasKeys(args, ['uuid']); - var idx = self.nexus.findIndex((n) => n.uuid === args.uuid); + const idx = self.nexus.findIndex((n) => n.uuid === args.uuid); if (idx >= 0) { self.nexus.splice(idx, 1); } @@ -226,14 +226,14 @@ class MayastorServer { cb(null, { nexusList: self.nexus }); }, publishNexus: (call, cb) => { - var args = call.request; + const args = call.request; assertHasKeys(args, ['uuid', 'share', 'key'], ['key']); - assert.equal(0, args.share); // Must be value of NEXUS_NBD for now - var idx = self.nexus.findIndex((n) => n.uuid === args.uuid); + assert.equal(1, args.share); // Must be value of NEXUS_NVMF for now + const idx = self.nexus.findIndex((n) => n.uuid === args.uuid); if (idx >= 0) { - self.nexus[idx].deviceUri = 'file:///dev/nbd0'; + self.nexus[idx].deviceUri = 'nvmf://host/nqn'; cb(null, { - deviceUri: 'file:///dev/nbd0' + deviceUri: 'nvmf://host/nqn' }); } else { const err = new Error('not found'); @@ -242,9 +242,9 @@ class MayastorServer { } }, unpublishNexus: (call, cb) => { - var args = call.request; + const args = call.request; assertHasKeys(args, ['uuid']); - var idx = self.nexus.findIndex((n) => n.uuid === args.uuid); + const idx = self.nexus.findIndex((n) => n.uuid === args.uuid); if (idx >= 0) { delete self.nexus[idx].deviceUri; cb(null, {}); @@ -255,9 +255,9 @@ class MayastorServer { } }, addChildNexus: (call, cb) => { - var args = call.request; + const args = call.request; assertHasKeys(args, ['uuid', 'uri', 'norebuild']); - var n = self.nexus.find((n) => n.uuid === args.uuid); + const n = self.nexus.find((n) => n.uuid === args.uuid); if (!n) { const err = new Error('not found'); err.code = grpc.status.NOT_FOUND; @@ -276,9 +276,9 @@ class MayastorServer { }); }, removeChildNexus: (call, cb) => { - var args = call.request; + const args = call.request; assertHasKeys(args, ['uuid', 'uri']); - var n = self.nexus.find((n) => n.uuid === args.uuid); + const n = self.nexus.find((n) => n.uuid === args.uuid); if (!n) { const err = new Error('not found'); err.code = grpc.status.NOT_FOUND; diff --git a/csi/moac/test/nexus_test.js b/csi/moac/test/nexus_test.js index a099c6d8c..6507fd664 100644 --- a/csi/moac/test/nexus_test.js +++ b/csi/moac/test/nexus_test.js @@ -94,7 +94,7 @@ module.exports = function () { }); it('should emit event upon change of deviceUri property', () => { - newProps.deviceUri = 'file:///dev/nbd0'; + newProps.deviceUri = 'nvmf://host/nqn'; nexus.merge(newProps); // First event is new nexus event @@ -103,7 +103,7 @@ module.exports = function () { eventType: 'mod', object: nexus }); - expect(nexus.deviceUri).to.equal('file:///dev/nbd0'); + expect(nexus.deviceUri).to.equal('nvmf://host/nqn'); }); it('should emit event upon change of state property', () => { @@ -231,18 +231,18 @@ module.exports = function () { }); }); - it('should publish the nexus with nbd protocol', async () => { - callStub.resolves({ deviceUri: 'file:///dev/nbd0' }); + it('should publish the nexus with nvmf protocol', async () => { + callStub.resolves({ deviceUri: 'nvmf://host/nqn' }); - await nexus.publish('nbd'); + await nexus.publish('nvmf'); sinon.assert.calledOnce(callStub); sinon.assert.calledWith(callStub, 'publishNexus', { uuid: UUID, key: '', - share: 0 // Nbd for now + share: 1 }); - expect(nexus.deviceUri).to.equal('file:///dev/nbd0'); + expect(nexus.deviceUri).to.equal('nvmf://host/nqn'); sinon.assert.calledOnce(eventSpy); sinon.assert.calledWith(eventSpy, 'nexus', { eventType: 'mod', diff --git a/csi/moac/test/volume_operator_test.js b/csi/moac/test/volume_operator_test.js index 9947f3bba..b9fa01fb3 100644 --- a/csi/moac/test/volume_operator_test.js +++ b/csi/moac/test/volume_operator_test.js @@ -52,7 +52,7 @@ const defaultSpec = { requiredNodes: ['node2'], requiredBytes: 100, limitBytes: 120, - protocol: 'nbd' + protocol: 'nvmf' }; const defaultStatus = { @@ -60,7 +60,7 @@ const defaultStatus = { targetNodes: ['node2'], state: 'healthy', nexus: { - deviceUri: 'file:///dev/nbd0', + deviceUri: 'nvmf://host/nqn', state: 'NEXUS_ONLINE', node: 'node2', children: [ @@ -129,7 +129,7 @@ module.exports = function () { node: 'node2', state: 'healthy', nexus: { - deviceUri: 'file:///dev/nbd0', + deviceUri: 'nvmf://host/nqn', state: 'NEXUS_ONLINE', node: 'node2', children: [ @@ -160,7 +160,7 @@ module.exports = function () { expect(res.spec.limitBytes).to.equal(120); expect(res.status.size).to.equal(110); expect(res.status.state).to.equal('healthy'); - expect(res.status.nexus.deviceUri).to.equal('file:///dev/nbd0'); + expect(res.status.nexus.deviceUri).to.equal('nvmf://host/nqn'); expect(res.status.nexus.state).to.equal('NEXUS_ONLINE'); expect(res.status.nexus.node).to.equal('node2'); expect(res.status.nexus.children).to.have.length(1); @@ -452,7 +452,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 130, - protocol: 'nbd' + protocol: 'nvmf' }, defaultStatus ); @@ -500,7 +500,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 111, limitBytes: 130, - protocol: 'nbd' + protocol: 'nvmf' }, defaultStatus ); @@ -765,7 +765,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 130, - protocol: 'nbd' + protocol: 'nvmf' }; const volume = new Volume(UUID, registry, () => {}, newSpec); volumes.emit('volume', { @@ -795,7 +795,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 130, - protocol: 'nbd' + protocol: 'nvmf' }; const volume = new Volume(UUID, registry, () => {}, newSpec); volumes.emit('volume', { diff --git a/csi/moac/test/volume_test.js b/csi/moac/test/volume_test.js index 6400cac08..1dd2bcd1d 100644 --- a/csi/moac/test/volume_test.js +++ b/csi/moac/test/volume_test.js @@ -91,10 +91,10 @@ module.exports = function () { const node = new Node('node'); const stub = sinon.stub(node, 'call'); stub.onCall(0).resolves({}); - stub.onCall(1).resolves({ deviceUri: 'file:///dev/nbd0' }); + stub.onCall(1).resolves({ deviceUri: 'nvmf://host/nqn' }); shouldFailWith(GrpcCode.INTERNAL, async () => { - await volume.publish('nbd'); + await volume.publish('nvmf'); }); sinon.assert.notCalled(stub); }); @@ -103,10 +103,10 @@ module.exports = function () { const [volume, node] = createFakeVolume('healthy'); const stub = sinon.stub(node, 'call'); stub.onCall(0).resolves({ uuid: UUID, size: 100, state: 'NEXUS_ONLINE', children: [{ uri: `bdev:///${UUID}`, state: 'CHILD_ONLINE' }] }); - stub.onCall(1).resolves({ deviceUri: 'file:///dev/nbd0' }); + stub.onCall(1).resolves({ deviceUri: 'nvmf://host/nqn' }); - const uri = await volume.publish('nbd'); - expect(uri).to.equal('file:///dev/nbd0'); + const uri = await volume.publish('nvmf'); + expect(uri).to.equal('nvmf://host/nqn'); sinon.assert.calledTwice(stub); sinon.assert.calledWithMatch(stub.firstCall, 'createNexus', { uuid: UUID, @@ -126,10 +126,10 @@ module.exports = function () { nexus.bind(node); volume.newNexus(nexus); - stub.resolves({ deviceUri: 'file:///dev/nbd0' }); - const uri = await volume.publish('nbd'); - expect(uri).to.equal('file:///dev/nbd0'); - expect(nexus.deviceUri).to.equal('file:///dev/nbd0'); + stub.resolves({ deviceUri: 'nvmf://host/nqn' }); + const uri = await volume.publish('nvmf'); + expect(uri).to.equal('nvmf://host/nqn'); + expect(nexus.deviceUri).to.equal('nvmf://host/nqn'); sinon.assert.calledOnce(stub); sinon.assert.calledWithMatch(stub, 'publishNexus', { uuid: UUID, @@ -144,10 +144,10 @@ module.exports = function () { const getUriStub = sinon.stub(nexus, 'getUri'); nexus.bind(node); volume.newNexus(nexus); - getUriStub.returns('file:///dev/nbd0'); + getUriStub.returns('nvmf://host/nqn'); - const uri = await volume.publish('nbd'); - expect(uri).to.equal('file:///dev/nbd0'); + const uri = await volume.publish('nvmf'); + expect(uri).to.equal('nvmf://host/nqn'); sinon.assert.notCalled(stub); sinon.assert.calledOnce(getUriStub); }); @@ -160,13 +160,13 @@ module.exports = function () { nexus.bind(node); volume.newNexus(nexus); volume.publishedOn = node.name; - getUriStub.returns('file:///dev/nbd0'); + getUriStub.returns('nvmf://host/nqn'); stub.onCall(0).resolves({}); await volume.unpublish(); expect(volume.getNodeName()).to.be.undefined(); sinon.assert.calledOnce(stub); - sinon.assert.calledWithMatch(stub, 'destroyNexus', { + sinon.assert.calledWithMatch(stub, 'unpublishNexus', { uuid: UUID }); }); @@ -184,10 +184,7 @@ module.exports = function () { await volume.unpublish(); expect(volume.getNodeName()).to.be.undefined(); - sinon.assert.calledOnce(stub); - sinon.assert.calledWithMatch(stub, 'destroyNexus', { - uuid: UUID - }); + sinon.assert.notCalled(stub); }); it('should unpublish volume without nexus', async () => { diff --git a/csi/moac/test/volumes_test.js b/csi/moac/test/volumes_test.js index f126d285f..66d43840a 100644 --- a/csi/moac/test/volumes_test.js +++ b/csi/moac/test/volumes_test.js @@ -33,6 +33,7 @@ module.exports = function () { let nexus, replica1, replica2; let volume; let volEvents; + let isSynced1, isSynced2, isSynced3; // Create pristine test env with 3 pools on 3 nodes function createTestEnv () { @@ -41,6 +42,13 @@ module.exports = function () { node1 = new Node('node1'); node2 = new Node('node2'); node3 = new Node('node3'); + isSynced1 = sinon.stub(node1, 'isSynced'); + isSynced1.returns(true); + isSynced2 = sinon.stub(node2, 'isSynced'); + isSynced2.returns(true); + isSynced3 = sinon.stub(node3, 'isSynced'); + isSynced3.returns(true); + // pools sorted from the most to the least preferred pool1 = new Pool({ name: 'pool1', @@ -113,7 +121,7 @@ module.exports = function () { nexus = new Nexus({ uuid: UUID, size: 95, - deviceUri: 'file:///dev/nbd0', + deviceUri: 'nvmf://host/nqn', state: 'NEXUS_ONLINE', children: [ { @@ -143,7 +151,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 110, - protocol: 'nbd' + protocol: 'nvmf' }, 'pending', 95, published ? 'node1' : undefined); volumes.volumes[UUID] = volume; @@ -175,7 +183,7 @@ module.exports = function () { requiredNodes: ['node2', 'node3'], requiredBytes: 100, limitBytes: 110, - protocol: 'nbd' + protocol: 'nvmf' }) ); expect(volEvents).to.have.lengthOf(3); @@ -196,6 +204,18 @@ module.exports = function () { share: 'REPLICA_NONE', uri: 'bdev:///' + UUID }); + stub1.onCall(1).resolves({ + uuid: UUID, + size: 90, + state: 'NEXUS_ONLINE', + children: [ + { + uri: 'bdev:///' + UUID, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + } + ] + }); volumes.start(); volume = await volumes.createVolume(UUID, { @@ -204,10 +224,10 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 0, - protocol: 'nbd' + protocol: 'nvmf' }); + await waitUntil(() => volume.state === 'healthy', 'healthy volume'); expect(volume.size).to.equal(90); - expect(volume.state).to.equal('healthy'); sinon.assert.calledWithMatch(stub1.firstCall, 'createReplica', { uuid: UUID, pool: 'pool1', @@ -215,8 +235,7 @@ module.exports = function () { thin: false, share: 'REPLICA_NONE' }); - // 1 new + 2 mods - expect(volEvents).to.have.lengthOf(3); + expect(volEvents).to.have.lengthOf(5); }); it('should limit the size of created volume', async () => { @@ -229,6 +248,18 @@ module.exports = function () { share: 'REPLICA_NONE', uri: 'bdev:///' + UUID }); + stub1.onCall(1).resolves({ + uuid: UUID, + size: 50, + state: 'NEXUS_ONLINE', + children: [ + { + uri: 'bdev:///' + UUID, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + } + ] + }); volumes.start(); volume = await volumes.createVolume(UUID, { @@ -237,10 +268,10 @@ module.exports = function () { requiredNodes: [], requiredBytes: 10, limitBytes: 50, - protocol: 'nbd' + protocol: 'nvmf' }); + await waitUntil(() => volume.state === 'healthy', 'healthy volume'); expect(volume.size).to.equal(50); - expect(volume.state).to.equal('healthy'); sinon.assert.calledWithMatch(stub1.firstCall, 'createReplica', { uuid: UUID, pool: 'pool1', @@ -248,8 +279,7 @@ module.exports = function () { thin: false, share: 'REPLICA_NONE' }); - // 1 new + 2 mods - expect(volEvents).to.have.lengthOf(3); + expect(volEvents).to.have.lengthOf(5); }); it('should fail if the size is zero', async () => { @@ -261,7 +291,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 0, limitBytes: 0, - protocol: 'nbd' + protocol: 'nvmf' }) ); sinon.assert.notCalled(stub1); @@ -300,22 +330,28 @@ module.exports = function () { requiredNodes: [], requiredBytes: 10, limitBytes: 50, - protocol: 'nbd' + protocol: 'nvmf' }); + await waitUntil(() => volume.state === 'faulted', 'faulted volume'); sinon.assert.notCalled(stub2); sinon.assert.notCalled(stub3); - sinon.assert.notCalled(stub1); + sinon.assert.calledOnce(stub1); + sinon.assert.calledWithMatch(stub1.firstCall, 'createNexus', { + uuid: UUID, + size: 10, + children: [`bdev:///${UUID}`] + }); expect(Object.keys(volume.replicas)).to.have.lengthOf(1); expect(Object.values(volume.replicas)[0]).to.equal(replica); - expect(volume.state).to.equal('healthy'); - expect(volEvents).to.have.lengthOf(3); + expect(volEvents).to.have.lengthOf(4); expect(volEvents[0].eventType).to.equal('new'); expect(volEvents[1].eventType).to.equal('mod'); expect(volEvents[2].eventType).to.equal('mod'); + expect(volEvents[3].eventType).to.equal('mod'); }); it('should create the volume object and include pre-existing nexus', async () => { - // on node 1 is created replica and nexus + // on node 1 is created the replica and added to the nexus stub1.onCall(0).resolves({ uuid: UUID, pool: 'pool1', @@ -336,7 +372,7 @@ module.exports = function () { state: 'NEXUS_ONLINE', children: [ { - uri: `nvmf:///blabla/${UUID}`, + uri: `nvmf://blabla/${UUID}`, state: 'CHILD_ONLINE', rebuildProgress: 0 } @@ -353,7 +389,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 10, limitBytes: 50, - protocol: 'nbd' + protocol: 'nvmf' }); await waitUntil( () => @@ -378,7 +414,7 @@ module.exports = function () { }); expect(Object.keys(volume.replicas)).to.have.lengthOf(1); expect(volume.nexus).to.equal(nexus); - expect(volEvents).to.have.lengthOf(6); + expect(volEvents).to.have.lengthOf(5); }); it('should distribute nexuses evenly over available nodes', async () => { @@ -516,6 +552,81 @@ module.exports = function () { expect(uri).to.equal(`nvmf://${UUID2}`); expect(volume2.publishedOn).to.equal('node2'); }); + + it('should serialize volume creation requests', (done) => { + // on node 1 is created replica and nexus + stub1.onCall(0).resolves({ + uuid: UUID, + pool: 'pool1', + size: 10, + thin: false, + share: 'REPLICA_NONE', + uri: `bdev:///${UUID}` + }); + stub1.onCall(1).resolves({ + uuid: UUID, + size: 10, + state: 'NEXUS_ONLINE', + children: [ + { + uri: `bdev:///${UUID}`, + state: 'CHILD_FAULTED', + rebuildProgress: 0 + } + ] + }); + // the same repeats for the second volume + stub2.onCall(0).resolves({ + uuid: UUID2, + pool: 'pool2', + size: 10, + thin: false, + share: 'REPLICA_NONE', + uri: `bdev:///${UUID2}` + }); + stub2.onCall(1).resolves({ + uuid: UUID2, + size: 10, + state: 'NEXUS_ONLINE', + children: [ + { + uri: `bdev:///${UUID2}`, + state: 'CHILD_FAULTED', + rebuildProgress: 0 + } + ] + }); + + volumes.start(); + + // Create both volumes at once + const create1 = volumes.createVolume(UUID, { + replicaCount: 1, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 10, + limitBytes: 50, + protocol: 'nvmf' + }); + const create2 = volumes.createVolume(UUID2, { + replicaCount: 1, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 10, + limitBytes: 50, + protocol: 'nvmf' + }); + + Promise.all([create1, create2]).then(() => { + expect(Object.keys(volumes.list())).to.have.lengthOf(2); + // If requests are properly serialized then all grpc calls related to + // the first volume should precede the second volume's requests. + sinon.assert.calledTwice(stub1); + sinon.assert.calledTwice(stub2); + expect(stub1.secondCall.calledBefore(stub2.firstCall)).to.be.true(); + done(); + }); + }); }); describe('import volume', function () { @@ -532,7 +643,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 10, limitBytes: 50, - protocol: 'nbd' + protocol: 'nvmf' }; it('should import a volume and fault it if there are no replicas', async () => { @@ -545,6 +656,15 @@ module.exports = function () { }); it('should import a volume without nexus', async () => { + // we use two replicas in this test because it uncovers some corner cases + const customVolumeSpec = { + replicaCount: 2, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 10, + limitBytes: 50, + protocol: 'nvmf' + }; const replica1 = new Replica({ uuid: UUID, size: 40, @@ -556,14 +676,31 @@ module.exports = function () { uuid: UUID, size: 40, share: 'REPLICA_NVMF', - uri: `nvmf:///${UUID}` + uri: `nvmf://${UUID}` }); replica2.pool = { node: node2 }; const getReplicaSetStub = sinon.stub(registry, 'getReplicaSet'); getReplicaSetStub.returns([replica1, replica2]); + // import creates a nexus + stub1.onCall(0).resolves({ + uuid: UUID, + deviceUri: '', + size: 95, + state: 'NEXUS_ONLINE', + children: [{ + uri: `bdev:///${UUID}`, + state: 'CHILD_ONLINE' + }, { + uri: `nvmf://${UUID}`, + state: 'CHILD_ONLINE' + }] + }); + // and then it is destroyed again + stub1.onCall(1).resolves({}); + volumes.start(); - volume = await volumes.importVolume(UUID, volumeSpec, { size: 40 }); + volume = await volumes.importVolume(UUID, customVolumeSpec, { size: 40 }); expect(volume.state).to.equal('unknown'); expect(Object.keys(volume.replicas)).to.have.lengthOf(2); // give FSA a chance to run @@ -571,7 +708,7 @@ module.exports = function () { expect(volume.nexus).to.be.null(); expect(volume.state).to.equal('healthy'); expect(volume.size).to.equal(40); - expect(volEvents).to.have.lengthOf(4); + expect(volEvents).to.have.lengthOf(3); }); it('should import unpublished volume with nexus', async () => { @@ -604,7 +741,7 @@ module.exports = function () { volume = await volumes.importVolume(UUID, volumeSpec, { size: 40 }); // give FSA a chance to run await sleep(EYE_BLINK_MS); - expect(volume.nexus.getUri()).to.be.undefined(); + expect(volume.nexus).to.be.null(); expect(Object.keys(volume.replicas)).to.have.lengthOf(1); expect(Object.values(volume.replicas)[0]).to.equal(replica); expect(volume.state).to.equal('healthy'); @@ -612,7 +749,7 @@ module.exports = function () { }); it('should import published volume with nexus', async () => { - const deviceUri = 'nbd:///dev/ndb0'; + const deviceUri = 'nvmf://host/nqn'; const replica = new Replica({ uuid: UUID, size: 40, @@ -648,7 +785,7 @@ module.exports = function () { expect(Object.keys(volume.replicas)).to.have.lengthOf(1); expect(Object.values(volume.replicas)[0]).to.equal(replica); expect(volume.state).to.equal('healthy'); - expect(volEvents).to.have.lengthOf(5); + expect(volEvents).to.have.lengthOf(4); }); }); @@ -694,7 +831,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 110, - protocol: 'nbd' + protocol: 'nvmf' }); volume.newReplica(replica); volumes.volumes[UUID] = volume; @@ -719,7 +856,7 @@ module.exports = function () { requiredNodes: [node1.name], requiredBytes: 89, limitBytes: 111, - protocol: 'nbd' + protocol: 'nvmf' }); sinon.assert.notCalled(stub1); sinon.assert.notCalled(stub2); @@ -742,7 +879,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 110, - protocol: 'nbd' + protocol: 'nvmf' }); sinon.assert.notCalled(stub1); sinon.assert.notCalled(stub2); @@ -759,7 +896,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 94, - protocol: 'nbd' + protocol: 'nvmf' }) ); }); @@ -772,7 +909,7 @@ module.exports = function () { requiredNodes: [], requiredBytes: 96, limitBytes: 110, - protocol: 'nbd' + protocol: 'nvmf' }) ); }); @@ -784,335 +921,421 @@ module.exports = function () { requiredNodes: [node1.name], requiredBytes: 89, limitBytes: 111, - protocol: 'nvmf' + protocol: 'iscsi' })); }); }); describe('scale up/down', function () { - beforeEach(() => setUpReferenceEnv(true)); - afterEach(tearDownReferenceEnv); - - it('should scale up if a child is faulted', async () => { - // on node 3 is created the new replica - stub3.onCall(0).resolves({ - uuid: UUID, - pool: 'pool3', - size: 95, - thin: false, - share: 'REPLICA_NONE', - uri: 'bdev:///' + UUID - }); - stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); - // the faulted replica should be eventually removed - stub2.onCall(0).resolves({}); - // nexus should be updated twice (add and remove a replica) - stub1.onCall(0).resolves({ - uri: 'nvmf://replica3', - state: 'CHILD_DEGRADED', - rebuildProgress: 10 - }); - stub1.onCall(1).resolves({}); - - nexus.children[1].state = 'CHILD_FAULTED'; - registry.emit('nexus', { - eventType: 'mod', - object: nexus - }); + describe('with used nexus', function () { + beforeEach(() => setUpReferenceEnv(true)); + afterEach(tearDownReferenceEnv); + + it('should scale up if a child is faulted', async () => { + // on node 3 is created the new replica + stub3.onCall(0).resolves({ + uuid: UUID, + pool: 'pool3', + size: 95, + thin: false, + share: 'REPLICA_NONE', + uri: 'bdev:///' + UUID + }); + stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); + // the faulted replica should be eventually removed + stub2.onCall(0).resolves({}); + // nexus should be updated twice (add and remove a replica) + stub1.onCall(0).resolves({ + uri: 'nvmf://replica3', + state: 'CHILD_DEGRADED', + rebuildProgress: 10 + }); + stub1.onCall(1).resolves({}); + // and finally the nexus should be destroyed again + stub1.onCall(2).resolves({}); + + nexus.children[1].state = 'CHILD_FAULTED'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); - await waitUntil( - () => - nexus.children.length === 3 && - nexus.children.find((ch) => ch.uri === 'nvmf://replica3'), - 'new replica' - ); - - expect(volume.state).to.equal('degraded'); - const child = nexus.children.find((ch) => ch.uri === 'nvmf://replica3'); - child.state = 'CHILD_ONLINE'; - registry.emit('nexus', { - eventType: 'mod', - object: nexus - }); - - await waitUntil( - () => - nexus.children.length === 2 && - !nexus.children.find((ch) => ch.uri === `nvmf://remote/${UUID}`) && - nexus.children.find((ch) => ch.uri === 'nvmf://replica3'), - 'faulted replica removal' - ); - expect(volume.state).to.equal('healthy'); - }); - - it('should scale up if replicaCount is increased', async () => { - // on node 3 is created the new replica - stub3.onCall(0).resolves({ - uuid: UUID, - pool: 'pool3', - size: 95, - thin: false, - share: 'REPLICA_NONE', - uri: 'bdev:///' + UUID - }); - stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); - // nexus should be updated to add the new child - stub1.onCall(0).resolves({ - uri: 'nvmf://replica3', - state: 'CHILD_DEGRADED', - rebuildProgress: 10 - }); - - // update the spec - volumes.createVolume(UUID, { - replicaCount: 3, - preferredNodes: [], - requiredNodes: [], - requiredBytes: 90, - limitBytes: 110, - protocol: 'nbd' - }); - - await waitUntil( - () => - nexus.children.length === 3 && - nexus.children.find((ch) => ch.uri === 'nvmf://replica3'), - 'new replica' - ); - expect(volume.state).to.equal('degraded'); - }); - - it('should not scale up if the replica is there but just being rebuilt', async () => { - // this would have been normally done but should not be the case now - stub3.onCall(0).resolves({ - uuid: UUID, - pool: 'pool3', - size: 95, - thin: false, - share: 'REPLICA_NONE', - uri: 'bdev:///' + UUID - }); - stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); - stub1.onCall(0).resolves({ - uri: 'nvmf://replica3', - state: 'CHILD_DEGRADED', - rebuildProgress: 10 - }); - - nexus.children[0].state = 'CHILD_DEGRADED'; - registry.emit('nexus', { - eventType: 'mod', - object: nexus - }); - await waitUntil(() => volume.state === 'degraded', 'degraded volume'); - - try { await waitUntil( - () => nexus.children.length === 3, - 100, // 100 ms - 'new replica not to appear' + () => + nexus.children.length === 3 && + nexus.children.find((ch) => ch.uri === 'nvmf://replica3'), + 'new replica' ); - } catch (err) { - // we are fine - expect(volume.nexus.children).to.have.lengthOf(2); - expect(volume.state).to.equal('degraded'); - return; - } - throw new Error('well, the new replica did appear'); - }); - - it('should not scale up if replica is offline but the child is online', async () => { - // this would have been normally done but should not be the case now - stub3.onCall(0).resolves({ - uuid: UUID, - pool: 'pool3', - size: 95, - thin: false, - share: 'REPLICA_NONE', - uri: 'bdev:///' + UUID - }); - stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); - stub1.onCall(0).resolves({ - uri: 'nvmf://replica3', - state: 'CHILD_DEGRADED', - rebuildProgress: 10 - }); - replica1.offline(); + expect(volume.state).to.equal('degraded'); + const child = nexus.children.find((ch) => ch.uri === 'nvmf://replica3'); + child.state = 'CHILD_ONLINE'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); - try { await waitUntil( - () => nexus.children.length === 3, - 100, // 100 ms - 'new replica not to appear' + () => + nexus.children.length === 2 && + !nexus.children.find((ch) => ch.uri === `nvmf://remote/${UUID}`) && + nexus.children.find((ch) => ch.uri === 'nvmf://replica3'), + 'faulted replica removal' ); - } catch (err) { - // we are fine - expect(volume.nexus.children).to.have.lengthOf(2); expect(volume.state).to.equal('healthy'); - return; - } - throw new Error('well, the new replica did appear'); - }); - - it('should scale down if replicaCount is decreased', async () => { - // node 1: updated nexus (remove-child) - stub1.onCall(0).resolves({}); - // node 2: destroyed replica - stub2.onCall(1).resolves({}); - - // update the spec - volumes.createVolume(UUID, { - replicaCount: 1, - preferredNodes: [], - requiredNodes: [], - requiredBytes: 90, - limitBytes: 110, - protocol: 'nbd' - }); - - await waitUntil( - () => - nexus.children.length === 1 && - !nexus.children.find((ch) => ch.uri === `nvmf://remote/${UUID}`), - 'replica to be destroyed' - ); - expect(volume.state).to.equal('healthy'); - }); - - it('should not scale down if a rebuild is in progress', async () => { - // node 1: updated nexus (remove-child) - stub1.onCall(0).resolves({}); - // node 2: destroyed replica - stub2.onCall(1).resolves({}); - - nexus.children[0].state = 'CHILD_DEGRADED'; - registry.emit('nexus', { - eventType: 'mod', - object: nexus }); - await waitUntil(() => volume.state === 'degraded', 'degraded volume'); - // update the spec - volumes.createVolume(UUID, { - replicaCount: 1, - preferredNodes: [], - requiredNodes: [], - requiredBytes: 90, - limitBytes: 110, - protocol: 'nbd' - }); + it('should not scale up if the replica is there but just being rebuilt', async () => { + // this would have been normally done but should not be the case now + stub3.onCall(0).resolves({ + uuid: UUID, + pool: 'pool3', + size: 95, + thin: false, + share: 'REPLICA_NONE', + uri: 'bdev:///' + UUID + }); + stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); + stub1.onCall(0).resolves({ + uri: 'nvmf://replica3', + state: 'CHILD_DEGRADED', + rebuildProgress: 10 + }); + + nexus.children[0].state = 'CHILD_DEGRADED'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); + await waitUntil(() => volume.state === 'degraded', 'degraded volume'); + + try { + await waitUntil( + () => nexus.children.length === 3, + 100, // 100 ms + 'new replica not to appear' + ); + } catch (err) { + // we are fine + expect(volume.nexus.children).to.have.lengthOf(2); + expect(volume.state).to.equal('degraded'); + return; + } + throw new Error('well, the new replica did appear'); + }); + + it('should not scale up if replica is offline but the child is online', async () => { + // this would have been normally done but should not be the case now + stub3.onCall(0).resolves({ + uuid: UUID, + pool: 'pool3', + size: 95, + thin: false, + share: 'REPLICA_NONE', + uri: 'bdev:///' + UUID + }); + stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); + stub1.onCall(0).resolves({ + uri: 'nvmf://replica3', + state: 'CHILD_DEGRADED', + rebuildProgress: 10 + }); + + replica1.offline(); + + try { + await waitUntil( + () => nexus.children.length === 3, + 100, // 100 ms + 'new replica not to appear' + ); + } catch (err) { + // we are fine + expect(volume.nexus.children).to.have.lengthOf(2); + expect(volume.state).to.equal('healthy'); + return; + } + throw new Error('well, the new replica did appear'); + }); + + it('should not scale down if a rebuild is in progress', async () => { + // node 1: updated nexus (remove-child) + stub1.onCall(0).resolves({}); + // node 2: destroyed replica + stub2.onCall(1).resolves({}); + + nexus.children[0].state = 'CHILD_DEGRADED'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); + await waitUntil(() => volume.state === 'degraded', 'degraded volume'); + + // update the spec + await volumes.createVolume(UUID, { + replicaCount: 1, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 90, + limitBytes: 110, + protocol: 'nvmf' + }); + + try { + await waitUntil( + () => nexus.children.length === 1, + 100, + 'replica to be destroyed' + ); + } catch (err) { + expect(volume.state).to.equal('degraded'); + return; + } + throw new Error('The replica was removed even if in rebuild state'); + }); + + it('should scale up and then scale down when a volume is moved', async () => { + // on node 3 is created the new replica + stub3.onCall(0).resolves({ + uuid: UUID, + pool: 'pool3', + size: 95, + thin: false, + share: 'REPLICA_NONE', + uri: 'bdev:///' + UUID + }); + stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); + // nexus should be updated to add the new child + stub1.onCall(0).resolves({ + uri: 'nvmf://replica3', + state: 'CHILD_DEGRADED', + rebuildProgress: 10 + }); + + // update the spec: node2 remains but the first replica should move + // from node1 to node3 + volume = await volumes.createVolume(UUID, { + replicaCount: 2, + preferredNodes: [], + requiredNodes: ['node2', 'node3'], + requiredBytes: 90, + limitBytes: 110, + protocol: 'nvmf' + }); - try { await waitUntil( - () => nexus.children.length === 1, - 100, - 'replica to be destroyed' + () => nexus.children.length === 3 && volume.state === 'degraded', + 'new replica' ); - } catch (err) { - expect(volume.state).to.equal('degraded'); - return; - } - throw new Error('The replica was removed even if in rebuild state'); - }); - it('should scale up and then scale down when a volume is moved', async () => { - // on node 3 is created the new replica - stub3.onCall(0).resolves({ - uuid: UUID, - pool: 'pool3', - size: 95, - thin: false, - share: 'REPLICA_NONE', - uri: 'bdev:///' + UUID - }); - stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); - // nexus should be updated to add the new child - stub1.onCall(0).resolves({ - uri: 'nvmf://replica3', - state: 'CHILD_DEGRADED', - rebuildProgress: 10 - }); + const newChild = volume.nexus.children.find( + (ch) => ch.state === 'CHILD_DEGRADED' + ); + expect(newChild.uri).to.equal('nvmf://replica3'); + newChild.state = 'CHILD_ONLINE'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); + + await waitUntil(() => nexus.children.length === 2, 'replica removal'); + expect(volume.state).to.equal('healthy'); + expect(Object.keys(volume.replicas)).to.deep.equal(['node2', 'node3']); + }); + + it('should scale up if a new pool is created', async () => { + // on node 3 we destroy (and create) the pool and create the new replica + stub3.onCall(0).resolves({}); + stub3.onCall(1).resolves({ + uuid: UUID, + pool: 'pool3', + size: 95, + thin: false, + share: 'REPLICA_NONE', + uri: 'bdev:///' + UUID + }); + stub3.onCall(2).resolves({ uri: 'nvmf://replica3' }); + // nexus should be updated to add the new child + stub1.onCall(0).resolves({ + uri: 'nvmf://replica3', + state: 'CHILD_DEGRADED', + rebuildProgress: 10 + }); + + // delete the third pool to pretend we ran out of pools + await pool3.destroy(); + + // now we cannot create the new replica (this is the update op in fact) + await volumes.createVolume(UUID, { + replicaCount: 3, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 90, + limitBytes: 110, + protocol: 'nvmf' + }); + await waitUntil(() => volume.state === 'degraded', 'degraded volume'); + + // now create the pool and see if it gets used for the new replica + pool3 = new Pool({ + name: 'pool3', + disks: [], + capacity: 100, + used: 4, + state: 'POOL_DEGRADED' + }); + node3._registerPool(pool3); - // update the spec: node2 remains but the first replica should move - // from node1 to node3 - volumes.createVolume(UUID, { - replicaCount: 2, - preferredNodes: [], - requiredNodes: ['node2', 'node3'], - requiredBytes: 90, - limitBytes: 110, - protocol: 'nbd' - }); + await waitUntil( + () => nexus.children.length === 3 && volume.state === 'degraded', + 'degraded volume with new replica' + ); - await waitUntil(() => nexus.children.length === 3, 'new replica'); - expect(volume.state).to.equal('degraded'); + const newChild = volume.nexus.children.find( + (ch) => ch.state === 'CHILD_DEGRADED' + ); + expect(newChild.uri).to.equal('nvmf://replica3'); + newChild.state = 'CHILD_ONLINE'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); - const newChild = volume.nexus.children.find( - (ch) => ch.state === 'CHILD_DEGRADED' - ); - expect(newChild.uri).to.equal('nvmf://replica3'); - newChild.state = 'CHILD_ONLINE'; - registry.emit('nexus', { - eventType: 'mod', - object: nexus + await waitUntil( + () => nexus.children.length === 3 && volume.state === 'healthy', + 'healthy volume' + ); }); - - await waitUntil(() => nexus.children.length === 2, 'replica removal'); - expect(volume.state).to.equal('healthy'); - expect(Object.keys(volume.replicas)).to.deep.equal(['node2', 'node3']); }); - it('should scale up if a new pool is created', async () => { - // on node 3 we destroy (and create) the pool and create the new replica - stub3.onCall(0).resolves({}); - stub3.onCall(1).resolves({ - uuid: UUID, - pool: 'pool3', - size: 95, - thin: false, - share: 'REPLICA_NONE', - uri: 'bdev:///' + UUID - }); - stub3.onCall(2).resolves({ uri: 'nvmf://replica3' }); - // nexus should be updated to add the new child - stub1.onCall(0).resolves({ - uri: 'nvmf://replica3', - state: 'CHILD_DEGRADED', - rebuildProgress: 10 - }); - - // delete the third pool to pretend we ran out of pools - await pool3.destroy(); + describe('without nexus', function () { + beforeEach(() => setUpReferenceEnv(false)); + afterEach(tearDownReferenceEnv); + + it('should scale up if replicaCount is increased', async () => { + // scale up involves nexus creation + stub1.onCall(0).resolves({ + uuid: UUID, + size: 95, + state: 'NEXUS_ONLINE', + children: [ + { + uri: `bdev:///${UUID}`, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + }, + { + uri: `nvmf://remote/${UUID}`, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + } + ] + }); + // on node 3 is created the new replica + stub3.onCall(0).resolves({ + uuid: UUID, + pool: 'pool3', + size: 95, + thin: false, + share: 'REPLICA_NONE', + uri: 'bdev:///' + UUID + }); + stub3.onCall(1).resolves({ uri: 'nvmf://replica3' }); + // nexus should be updated to add the new child + stub1.onCall(1).resolves({ + uri: 'nvmf://replica3', + state: 'CHILD_DEGRADED', + rebuildProgress: 10 + }); + // nexus will be destroyed at the end + stub1.onCall(2).resolves({}); + + // update the spec + volume = await volumes.createVolume(UUID, { + replicaCount: 3, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 90, + limitBytes: 110, + protocol: 'nvmf' + }); - // now we cannot create the new replica - volumes.createVolume(UUID, { - replicaCount: 3, - preferredNodes: [], - requiredNodes: [], - requiredBytes: 90, - limitBytes: 110, - protocol: 'nbd' - }); - await waitUntil(() => volume.state === 'degraded', 'degraded volume'); + await waitUntil( + () => + volume.state === 'degraded' && + Object.keys(volume.replicas).length === 3 && + volume.nexus.children.length === 3, + 'new replica' + ); + const newReplica = Object.values(volume.replicas).find((r) => r.uri === 'nvmf://replica3'); + const nexus = volume.nexus; + const child = nexus.children[2]; + expect(nexus).not.to.be.null(); + expect(newReplica.pool.name).to.equal('pool3'); + expect(child.state).to.equal('CHILD_DEGRADED'); + + // simulate rebuild finish - the nexus should go away + child.state = 'CHILD_ONLINE'; + registry.emit('nexus', { + eventType: 'mod', + object: nexus + }); + await waitUntil( + () => volume.state === 'healthy' && volume.nexus === null, + 'healthy volume' + ); + expect(Object.keys(volume.replicas)).has.lengthOf(3); + }); + + it('should scale down if replicaCount is decreased', async () => { + // scale down involves nexus creation + stub1.onCall(0).resolves({ + uuid: UUID, + size: 95, + state: 'NEXUS_ONLINE', + children: [ + { + uri: `bdev:///${UUID}`, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + }, + { + uri: `nvmf://remote/${UUID}`, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + } + ] + }); + // node 1: updated nexus (remove-child) + stub1.onCall(1).resolves({}); + // node 2: destroyed replica + stub2.onCall(0).resolves({}); + // nexus will be destroyed at the end + stub1.onCall(2).resolves({}); + + // update the spec + await volumes.createVolume(UUID, { + replicaCount: 1, + preferredNodes: [], + requiredNodes: [], + requiredBytes: 90, + limitBytes: 110, + protocol: 'nvmf' + }); - // now create the pool and see if it gets used for the new replica - pool3 = new Pool({ - name: 'pool3', - disks: [], - capacity: 100, - used: 4, - state: 'POOL_DEGRADED' + // Nexus gets created and destroyed inbetween but it's difficult to + // capture that moment in the tests because we don't know the precise + // timing. + await waitUntil( + () => + Object.keys(volume.replicas).length === 1 && + volume.nexus === null, + 'replica to be destroyed' + ); + expect(volume.state).to.equal('healthy'); + const oldReplica = Object.values(volume.replicas).find((r) => r.uri === `nvmf://remote/${UUID}`); + expect(oldReplica).to.be.undefined(); }); - node3._registerPool(pool3); - - await waitUntil( - () => - nexus.children.length === 3 && - nexus.children.find((ch) => ch.uri === 'nvmf://replica3'), - 'new replica' - ); - expect(volume.state).to.equal('degraded'); }); }); @@ -1168,14 +1391,16 @@ module.exports = function () { await waitUntil(() => volume.state === 'faulted', 'offline volume'); }); - it('should move to "healthy" when volume is unpublished', async () => { - nexus.state = 'NEXUS_OFFLINE'; + it('should remain what it was when volume is unpublished', async () => { + nexus.children[0].state = 'CHILD_DEGRADED'; registry.emit('nexus', { - eventType: 'del', + eventType: 'mod', object: nexus }); + await waitUntil(() => volume.state === 'degraded', 'degraded volume'); await volume.unpublish(); - await waitUntil(() => volume.state === 'healthy', 'healthy volume'); + await sleep(EYE_BLINK_MS); + await waitUntil(() => volume.state === 'degraded', 'degraded volume'); }); it('should not move to any state when in "destroyed" state', async () => { @@ -1221,9 +1446,11 @@ module.exports = function () { ] }); stub1.onCall(1).resolves({ - deviceUri: 'file:///dev/nbd0' + deviceUri: 'nvmf://host/nqn' }); + // pretend that node1 is down + isSynced1.returns(false); // we unbind the nexus - that happens when node goes down nexus.unbind(); await waitUntil(() => volume.state === 'faulted', 'volume faulted'); @@ -1231,14 +1458,13 @@ module.exports = function () { expect(volume.publishedOn).to.equal('node1'); // this simulates node that has been just successfully sync'd - const isSyncedStub = sinon.stub(node1, 'isSynced'); - isSyncedStub.returns(true); + isSynced1.returns(true); node1.emit('node', { eventType: 'mod', object: node1 }); await waitUntil(() => volume.state === 'healthy', 'healthy volume'); - expect(volume.nexus.deviceUri).to.equal('file:///dev/nbd0'); + expect(volume.nexus.deviceUri).to.equal('nvmf://host/nqn'); expect(volume.publishedOn).to.equal('node1'); }); @@ -1332,7 +1558,7 @@ module.exports = function () { // this creates a volume used in subsequent cases it('should create a new volume', async () => { - // on node 1 is created replica + // on node 1 is created replica and nexus stub1.onCall(0).resolves({ uuid: UUID, pool: 'pool1', @@ -1341,6 +1567,28 @@ module.exports = function () { share: 'REPLICA_NONE', uri: 'bdev:///' + UUID }); + stub1.onCall(1).resolves({ + uuid: UUID, + size: 96, + state: 'NEXUS_ONLINE', + children: [ + { + uri: 'bdev:///' + UUID, + state: 'CHILD_ONLINE', + rebuildProgress: 0 + }, + { + uri: 'nvmf://replica2', + state: 'CHILD_ONLINE', + rebuildProgress: 0 + }, + { + uri: 'nvmf://replica3', + state: 'CHILD_ONLINE', + rebuildProgress: 0 + } + ] + }); // on node 2 is created replica and it is shared stub2.onCall(0).resolves({ uuid: UUID, @@ -1369,10 +1617,10 @@ module.exports = function () { requiredNodes: [], requiredBytes: 90, limitBytes: 110, - protocol: 'nbd' + protocol: 'nvmf' }); - sinon.assert.calledOnce(stub1); + sinon.assert.calledTwice(stub1); sinon.assert.calledWithMatch(stub1.firstCall, 'createReplica', { uuid: UUID, pool: 'pool1', @@ -1380,8 +1628,13 @@ module.exports = function () { thin: false, share: 'REPLICA_NONE' }); + sinon.assert.calledWithMatch(stub1.secondCall, 'createNexus', { + uuid: UUID, + size: 96, + children: ['bdev:///' + UUID, 'nvmf://replica2', 'nvmf://replica3'] + }); - sinon.assert.calledOnce(stub2); + sinon.assert.calledTwice(stub2); sinon.assert.calledWithMatch(stub2.firstCall, 'createReplica', { uuid: UUID, pool: 'pool2', @@ -1389,8 +1642,12 @@ module.exports = function () { thin: false, share: 'REPLICA_NONE' }); + sinon.assert.calledWithMatch(stub2.secondCall, 'shareReplica', { + uuid: UUID, + share: 'REPLICA_NVMF' + }); - sinon.assert.calledOnce(stub3); + sinon.assert.calledTwice(stub3); sinon.assert.calledWithMatch(stub3.firstCall, 'createReplica', { uuid: UUID, pool: 'pool3', @@ -1398,6 +1655,17 @@ module.exports = function () { thin: false, share: 'REPLICA_NONE' }); + sinon.assert.calledWithMatch(stub2.secondCall, 'shareReplica', { + uuid: UUID, + share: 'REPLICA_NVMF' + }); + + // wait for the nexus to be destroyed after creation + await sleep(EYE_BLINK_MS); + sinon.assert.calledThrice(stub1); + sinon.assert.calledWithMatch(stub1.thirdCall, 'destroyNexus', { + uuid: UUID + }); expect(volumes.get(UUID)).to.equal(volume); expect(volume.uuid).to.equal(UUID); @@ -1415,12 +1683,11 @@ module.exports = function () { expect(volume.replicas.node3.uuid).to.equal(UUID); expect(volume.state).to.equal('healthy'); - // 1 new + 3 new replicas + state change - expect(volEvents).to.have.lengthOf(5); + expect(volEvents).to.have.lengthOf(9); }); it('should publish the volume', async () => { - const deviceUri = 'file:///dev/nbd0'; + const deviceUri = 'nvmf://host/nqn'; // on node 1 is created nexus stub1.onCall(0).resolves({ uuid: UUID, @@ -1445,12 +1712,8 @@ module.exports = function () { ] }); stub1.onCall(1).resolves({ deviceUri }); - // on node 2 is shared replica - stub2.onCall(0).resolves({ uri: 'nvmf://replica2' }); - // on node 3 is shared replica - stub3.onCall(0).resolves({ uri: 'nvmf://replica3' }); - const uri = await volume.publish('nbd'); + const uri = await volume.publish('nvmf'); expect(uri).to.equal(deviceUri); sinon.assert.calledTwice(stub1); @@ -1462,20 +1725,11 @@ module.exports = function () { sinon.assert.calledWithMatch(stub1.secondCall, 'publishNexus', { uuid: UUID, key: '', - share: enums.NEXUS_NBD + share: enums.NEXUS_NVMF }); - sinon.assert.calledOnce(stub2); - sinon.assert.calledWithMatch(stub2.firstCall, 'shareReplica', { - uuid: UUID, - share: 'REPLICA_NVMF' - }); - - sinon.assert.calledOnce(stub3); - sinon.assert.calledWithMatch(stub3.firstCall, 'shareReplica', { - uuid: UUID, - share: 'REPLICA_NVMF' - }); + sinon.assert.notCalled(stub2); + sinon.assert.notCalled(stub3); expect(volume.getNodeName()).to.equal('node1'); expect(volume.getSize()).to.equal(96); @@ -1484,15 +1738,21 @@ module.exports = function () { expect(Object.keys(volume.replicas)).to.have.lengthOf(3); expect(volume.state).to.equal('healthy'); - // 5 mods (2 set share, 1 new nexus, 1 publish nexus, state change) - expect(volEvents).to.have.lengthOf(5); + // 3 mods (1 new nexus, 1 publish nexus, state change) + expect(volEvents).to.have.lengthOf(3); }); it('should unpublish the volume', async () => { stub1.onCall(0).resolves({}); + stub1.onCall(1).resolves({}); await volume.unpublish(); - sinon.assert.calledOnce(stub1); - sinon.assert.calledWithMatch(stub1, 'destroyNexus', { + // wait for the nexus to be destroyed after unpublish + await sleep(EYE_BLINK_MS); + sinon.assert.calledTwice(stub1); + sinon.assert.calledWithMatch(stub1.firstCall, 'unpublishNexus', { + uuid: UUID + }); + sinon.assert.calledWithMatch(stub1.secondCall, 'destroyNexus', { uuid: UUID }); expect(volume.getNodeName()).to.be.undefined(); @@ -1500,8 +1760,8 @@ module.exports = function () { expect(volume.nexus).is.null(); expect(volume.state).to.equal('healthy'); expect(Object.keys(volume.replicas)).to.have.length(3); - // 2 nexus events - expect(volEvents).to.have.lengthOf(2); + // 3 nexus events + expect(volEvents).to.have.lengthOf(3); }); it('should destroy the volume', async () => { diff --git a/csi/moac/test/workq_test.js b/csi/moac/test/workq_test.js index 51aaa9fae..e7c153796 100644 --- a/csi/moac/test/workq_test.js +++ b/csi/moac/test/workq_test.js @@ -5,7 +5,7 @@ const expect = require('chai').expect; const sinon = require('sinon'); const sleep = require('sleep-promise'); -const Workq = require('../workq'); +const { Workq } = require('../workq'); class Task { constructor (id, delay) { @@ -30,7 +30,7 @@ class Task { } module.exports = function () { - var clock; + let clock; beforeEach(() => { clock = sinon.useFakeTimers(); diff --git a/csi/moac/tsconfig.json b/csi/moac/tsconfig.json index fc025ce12..2e0947879 100644 --- a/csi/moac/tsconfig.json +++ b/csi/moac/tsconfig.json @@ -61,7 +61,7 @@ "resolveJsonModule": true /* allows for importing, extracting types from and generating .json files */ }, "files": [ - "watcher.ts", + "csi.ts", "nexus.ts", "node.ts", "node_operator.ts", @@ -71,5 +71,7 @@ "volume.ts", "volumes.ts", "volume_operator.ts", + "watcher.ts", + "workq.ts", ] } diff --git a/csi/moac/volume.ts b/csi/moac/volume.ts index 02128d964..ca1459296 100644 --- a/csi/moac/volume.ts +++ b/csi/moac/volume.ts @@ -46,13 +46,13 @@ export function volumeStateFromString(val: string): VolumeState { // maintaining desired redundancy. export class Volume { // volume spec properties - private uuid: string; - private replicaCount: number; - private preferredNodes: string[]; - private requiredNodes: string[]; - private requiredBytes: number; - private limitBytes: number; - private protocol: Protocol; + uuid: string; + replicaCount: number; + preferredNodes: string[]; + requiredNodes: string[]; + requiredBytes: number; + limitBytes: number; + protocol: Protocol; // volume status properties private size: number; private nexus: Nexus | null; @@ -129,6 +129,11 @@ export class Volume { // Publish the volume. That means, make it accessible through a block device. // + // NOTE: The function has a couple of async steps that can interfere with + // what happens in fsa(). Alternative implementation could be to just call + // fsa() and let it do all the work. But then we would need a mechanism to + // notify us when the operation is done. + // // @params protocol The nexus share protocol. // @return uri The URI to access the nexus. async publish(protocol: Protocol): Promise { @@ -142,14 +147,6 @@ export class Volume { if (!nexus) { // Ensure replicas can be accessed from nexus. Set share protocols. const [nexusNode, replicaSet] = await this._ensureReplicaShareProtocols(); - - if (!this.size) { - // the size will be the smallest replica - this.size = Object.values(this.replicas) - .map((r) => r.size) - .reduce((acc, cur) => (cur < acc ? cur : acc), Number.MAX_SAFE_INTEGER); - } - // create a new nexus with children (replicas) created in previous steps nexus = await this._createNexus(nexusNode, replicaSet); } else { log.debug(`Publishing volume ${this} that already has a nexus`) @@ -171,24 +168,14 @@ export class Volume { if (this.publishedOn) { this.publishedOn = undefined; if (this.nexus) { - // We can directly destroy the nexus without unsharing it first - // but later we will use this block of code in case we cannot - // destroy the nexus immediately because it is rebuilding - //if (this.nexus.getUri()) { - // try { - // await this.nexus.unpublish(); - // } catch (err) { - // log.error(`Defering nexus unpublish for volume ${this}: ${err}`) - // } - //} - try { - // TODO: defer destruction in case that the volume is rebuilding - await this.nexus.destroy(); - } catch (err) { - // We let unpublish to always succeed and rely on FSA to remove - // the nexus later when it's possible to do. - log.error(`Defering nexus destroy for volume ${this}: ${err}`) + if (this.nexus.getUri()) { + try { + await this.nexus.unpublish(); + } catch (err) { + log.error(`Defering nexus unpublish for ${this}: ${err}`) + } } + // it will be destroyed asynchronously by fsa() } this.emitEvent('mod'); this.fsa(); @@ -247,72 +234,37 @@ export class Volume { this._setState(VolumeState.Faulted); return; } - - if (!this.publishedOn) { - // If the volume hasn't been published we can't do anything more than what - // we have done (that is maintain required # of replicas). When we create - // the nexus, it may find out that some of the replicas are unusable, but - // we don't know that now. - this._setState(VolumeState.Healthy); - return; - } } // check that replicas are shared in the way they should be - let localNode: string = (this.nexus) ? this.nexus.node.name : this.publishedOn; - for (const nodeName in this.replicas) { - const replica = this.replicas[nodeName]; - if (replica.isOffline()) { - continue; - } - let share; - const isLocal = replica.pool!.node.name === localNode; - if (isLocal && replica.share !== 'REPLICA_NONE') { - // make sure that replica that is local to the nexus is accessed locally - share = 'REPLICA_NONE'; - } else if (!isLocal && replica.share === 'REPLICA_NONE') { - // make sure that replica that is remote to nexus can be accessed - share = 'REPLICA_NVMF'; - } - if (share) { - try { - await replica.setShare(share); - delete this.nodeBlackList[nodeName]; - // fsa will get called again because the replica was modified - return; - } catch (err) { - this.nodeBlackList[nodeName] = true; - log.error( - `Failed to set share protocol to ${share} for replica "${replica}": ${err}` - ); - } - } + let nexusNode, replicaSet; + try { + [nexusNode, replicaSet] = await this._ensureReplicaShareProtocols(); + } catch (err) { + log.warn(err.toString()); + return; } - // If we don't have a nexus and the volume is published, then try to create one + // If we don't have a nexus and we should have one then create it if (!this.nexus) { - assert(this.publishedOn); - let nexusNode = this.registry.getNode(this.publishedOn); - if (nexusNode && nexusNode.isSynced()) { - let replicas = []; - for (let nodeName in this.replicas) { - if (!this.replicas[nodeName].isOffline() && !this.nodeBlackList[nodeName]) { - replicas.push(this.replicas[nodeName]); + if ( + this.publishedOn || + replicaSet.length !== this.replicaCount + ) { + if (nexusNode && nexusNode.isSynced()) { + try { + await this._createNexus(nexusNode, replicaSet); + } catch (err) { + log.error(`Failed to create nexus for ${this} on "${this.publishedOn}": ${err}`); + this._setState(VolumeState.Faulted); } - } - if (replicas.length === 0) { - log.warn(`Cannot create nexus for ${this} because all replicas are bad`); - return; - } - try { - await this._createNexus(nexusNode, replicas); - } catch (err) { - log.error(`Failed to create nexus for ${this} on "${this.publishedOn}"`); + } else { + log.warn(`Cannot create nexus for ${this} because "${this.publishedOn}" is down`); this._setState(VolumeState.Faulted); } } else { - log.warn(`Cannot create nexus for ${this} because "${this.publishedOn}" is down`) - this._setState(VolumeState.Faulted); + // we have just right # of replicas and we don't need a nexus - ok + this._setState(VolumeState.Healthy); } // fsa will get called again when event about created nexus arrives return; @@ -396,7 +348,7 @@ export class Volume { assert(onlineCount >= this.replicaCount); this._setState(VolumeState.Healthy); - // If we have more online replicas then we need to, then remove one. + // If we have more online replicas than we need to, then remove one. // Child that is broken or without a replica goes first. let rmPair = childReplicaPairs.find( (pair) => !pair.r && pair.ch.state === 'CHILD_FAULTED' @@ -461,6 +413,18 @@ export class Volume { } catch (err) { logError(err); } + return; + } + + // Finally if everything is ok and volume isn't published, destroy the + // nexus. Leaving it around eats cpu cycles and induces network traffic + // between nexus and replicas. + if (!this.publishedOn) { + try { + await this.nexus.destroy(); + } catch (err) { + log.error(`Defering nexus destroy for ${this}: ${err}`) + } } } @@ -490,14 +454,31 @@ export class Volume { async create() { log.debug(`Creating the volume "${this}"`); + this.attach(); + // Ensure there is sufficient number of replicas for the volume. const newReplicaCount = this.replicaCount - Object.keys(this.replicas).length; if (newReplicaCount > 0) { // create more replicas if higher replication factor is desired await this._createReplicas(newReplicaCount); } - this._setState(VolumeState.Healthy); + const [nexusNode, replicaSet] = await this._ensureReplicaShareProtocols(); + if (!this.nexus) { + await this._createNexus(nexusNode, replicaSet); + } + this.state = VolumeState.Unknown; log.info(`Volume "${this}" with ${this.replicaCount} replica(s) and size ${this.size} was created`); + this.fsa(); + } + + // Attach whatever objects belong to the volume and can be found in the + // registry. + attach() { + this.registry.getReplicaSet(this.uuid).forEach((r: Replica) => this.newReplica(r)); + const nexus: Nexus = this.registry.getNexus(this.uuid); + if (nexus) { + this.newNexus(nexus); + } } // Update child devices of existing nexus or create a new nexus if it does not @@ -508,6 +489,12 @@ export class Volume { // @returns Created nexus object. // async _createNexus(node: Node, replicas: Replica[]): Promise { + if (!this.size) { + // the size will be the smallest replica + this.size = Object.values(replicas) + .map((r) => r.size) + .reduce((acc, cur) => (cur < acc ? cur : acc), Number.MAX_SAFE_INTEGER); + } return node.createNexus( this.uuid, this.size, @@ -626,41 +613,43 @@ export class Volume { return score; } - // Share replicas as appropriate to allow access from the nexus and return - // just replicas that should be used for the nexus (excessive replicas will - // be trimmed). + // Share replicas as appropriate to allow access from the nexus. // // @returns Node where nexus should be and list of replicas that should be - // used for nexus sorted by preference. + // used for the nexus sorted by preference. // async _ensureReplicaShareProtocols(): Promise<[Node, Replica[]]> { // sort replicas and remove replicas that aren't online const replicaSet = this ._prioritizeReplicas(Object.values(this.replicas)) - .filter((r) => !r.isOffline()); + .filter((r) => !r.isOffline()) + .filter((r) => !this.nodeBlackList[r.pool!.node.name]); if (replicaSet.length === 0) { throw new GrpcError( GrpcCode.INTERNAL, - `There are no replicas for volume "${this}"` + `There are no good replicas for volume "${this}"` ); } - replicaSet.splice(this.replicaCount); - // If nexus does not exist it will be created on one of the replica nodes - // with the least # of nexuses. let nexusNode; if (this.nexus) { nexusNode = this.nexus.node; - } else { + } else if (this.publishedOn) { + nexusNode = this.registry.getNode(this.publishedOn); + } + // If nexus does not exist it will be created on one of the replica nodes + // with the least # of nexuses. + if (!nexusNode) { nexusNode = replicaSet .map((r: Replica) => r.pool!.node) .sort((a: Node, b: Node) => a.nexus.length - b.nexus.length)[0]; } for (let i = 0; i < replicaSet.length; i++) { - const replica = replicaSet[i]; + const replica: Replica = replicaSet[i]; + const replicaNode: Node = replica.pool!.node; let share; - const local = replica.pool!.node === nexusNode; + const local = replicaNode === nexusNode; // make sure that replica which is local to the nexus is accessed locally if (local && replica.share !== 'REPLICA_NONE') { share = 'REPLICA_NONE'; @@ -671,9 +660,10 @@ export class Volume { if (share) { try { await replica.setShare(share); + delete this.nodeBlackList[replicaNode.name]; } catch (err) { - throw new GrpcError( - GrpcCode.INTERNAL, + this.nodeBlackList[replicaNode.name] = true; + log.error( `Failed to set share protocol to ${share} for replica "${replica}": ${err}` ); } @@ -871,4 +861,4 @@ export class Volume { // this function to print a stack as well, which is handy. function logError(err: any) { log.error(err.toString()); -} \ No newline at end of file +} diff --git a/csi/moac/volume_operator.ts b/csi/moac/volume_operator.ts index 8a01a346e..2dbeb9118 100644 --- a/csi/moac/volume_operator.ts +++ b/csi/moac/volume_operator.ts @@ -40,7 +40,6 @@ const yaml = require('js-yaml'); const EventStream = require('./event_stream'); const log = require('./logger').Logger('volume-operator'); -const Workq = require('./workq'); import assert from 'assert'; import * as fs from 'fs'; @@ -58,6 +57,7 @@ import { import { Protocol, protocolFromString } from './nexus'; import { Volumes } from './volumes'; import { VolumeState, volumeStateFromString } from './volume'; +import { Workq } from './workq'; const RESOURCE_NAME: string = 'mayastorvolume'; const crdVolume = yaml.safeLoad( @@ -170,8 +170,8 @@ export class VolumeOperator { volumes: Volumes; // Volume manager eventStream: any; // A stream of node, replica and nexus events. watcher: CustomResourceCache; // volume resource watcher. - workq: any; // Events from k8s are serialized so that we don't flood moac by - // concurrent changes to volumes. + workq: Workq; // Events from k8s are serialized so that we don't flood moac by + // concurrent changes to volumes. // Create volume operator object. // @@ -188,7 +188,7 @@ export class VolumeOperator { this.namespace = namespace; this.volumes = volumes; this.eventStream = null; - this.workq = new Workq(); + this.workq = new Workq('mayastorvolume'); this.watcher = new CustomResourceCache( this.namespace, RESOURCE_NAME, @@ -436,8 +436,9 @@ export class VolumeOperator { watcher.on('del', (obj: VolumeResource) => { // most likely it was not user but us (the operator) who deleted // the resource. So check if it really exists first. - if (this.volumes.get(obj.metadata.name!)) { - this.workq.push(obj.metadata.name, this._destroyVolume.bind(this)); + const name = obj.metadata.name!; + if (this.volumes.get(name)) { + this.workq.push(name, this._destroyVolume.bind(this)); } }); } diff --git a/csi/moac/volumes.ts b/csi/moac/volumes.ts index 2657fcfdc..735454da3 100644 --- a/csi/moac/volumes.ts +++ b/csi/moac/volumes.ts @@ -1,26 +1,33 @@ // Volume manager implementation. import assert from 'assert'; -import { Nexus } from './nexus'; -import { Replica } from './replica'; import { Volume, VolumeState } from './volume'; +import { Workq } from './workq'; const EventEmitter = require('events'); const EventStream = require('./event_stream'); const { GrpcCode, GrpcError } = require('./grpc_client'); const log = require('./logger').Logger('volumes'); +// Type used in "create volume" workq +type CreateArgs = { + uuid: string; + spec: any; +} + // Volume manager that emit events for new/modified/deleted volumes. export class Volumes extends EventEmitter { private registry: any; private events: any; // stream of events from registry private volumes: Record; // volumes indexed by uuid + private createWorkq: Workq; constructor (registry: any) { super(); this.registry = registry; this.events = null; this.volumes = {}; + this.createWorkq = new Workq('create volume'); } start() { @@ -88,9 +95,8 @@ export class Volumes extends EventEmitter { return Object.values(this.volumes); } - // Create volume object (just the object) and add it to the internal list - // of volumes. The method is idempotent. If a volume with the same uuid - // already exists, then update its parameters. + // We have to serialize create volume requests because concurrent creates + // can create havoc in space accounting and contribute to overall mess. // // @param {string} uuid ID of the volume. // @param {object} spec Properties of the volume. @@ -101,8 +107,17 @@ export class Volumes extends EventEmitter { // @params {number} spec.limitBytes The volume should not be bigger than this. // @params {string} spec.protocol The share protocol for the nexus. // @returns {object} New volume object. - // async createVolume(uuid: string, spec: any): Promise { + return await this.createWorkq.push({uuid, spec}, (args: CreateArgs) => { + return this._createVolume(args.uuid, args.spec); + }); + } + + // Create volume object (just the object) and add it to the internal list + // of volumes. The method is idempotent. If a volume with the same uuid + // already exists, then update its parameters. + // + async _createVolume(uuid: string, spec: any): Promise { if (!spec.requiredBytes || spec.requiredBytes < 0) { throw new GrpcError( GrpcCode.INVALID_ARGUMENT, @@ -127,12 +142,6 @@ export class Volumes extends EventEmitter { eventType: 'new', object: volume }); - // check for components that already exist and assign them to the volume - this.registry.getReplicaSet(uuid).forEach((r: Replica) => volume.newReplica(r)); - const nexus: Nexus = this.registry.getNexus(uuid); - if (nexus) { - volume.newNexus(nexus); - } try { await volume.create(); @@ -199,16 +208,9 @@ export class Volumes extends EventEmitter { object: volume }); }, spec, status.state, status.size, publishedOn); + volume.attach(); + volume.state = VolumeState.Unknown; this.volumes[uuid] = volume; - - // attach any associated replicas to the volume - this.registry.getReplicaSet(uuid).forEach((r: Replica) => volume.newReplica(r)); - - const nexus = this.registry.getNexus(uuid); - if (nexus) { - volume.newNexus(nexus); - } - volume._setState(VolumeState.Unknown); volume.fsa(); } return volume; diff --git a/csi/moac/workq.js b/csi/moac/workq.js deleted file mode 100644 index dfe29edef..000000000 --- a/csi/moac/workq.js +++ /dev/null @@ -1,62 +0,0 @@ -'use strict'; - -const assert = require('assert'); - -// Implementation of a simple work queue which takes a task, puts it to the -// queue and processes the task when all other tasks that were queued before -// have completed. This is useful if the task consists of async steps and one -// wants to be sure that at any given time only one task is being processed -// not to interfere with the other tasks. -class Workq { - constructor () { - this.queue = []; - this.inprog = false; - } - - // Put a task to the queue for processing. - // - // Since the method is async the caller can decide if she wants to block - // waiting until the task is processed or continue immediately. - // - // @param {*} arg Opaque context parameter passed to the func. - // @param {function} func Async function returning a promise. - // @returns {*} A promise fulfilled when the task is done. - // The value of the promise is the value returned by the func. - async push (arg, func) { - assert.strictEqual(typeof func, 'function'); - - var resolveCb; - var rejectCb; - var promise = new Promise((resolve, reject) => { - resolveCb = resolve; - rejectCb = reject; - }); - var task = { func, arg, resolveCb, rejectCb }; - - this.queue.push(task); - if (!this.inprog) { - this.inprog = true; - this._nextTask(); - } - return promise; - } - - // Pick and dispatch next task from the queue. - _nextTask () { - var self = this; - - var task = this.queue.shift(); - if (!task) { - self.inprog = false; - return; - } - - task - .func(task.arg) - .then((res) => task.resolveCb(res)) - .catch((err) => task.rejectCb(err)) - .finally(() => self._nextTask()); - } -} - -module.exports = Workq; diff --git a/csi/moac/workq.ts b/csi/moac/workq.ts new file mode 100644 index 000000000..733cfcfa3 --- /dev/null +++ b/csi/moac/workq.ts @@ -0,0 +1,73 @@ + +import assert from 'assert'; + +const log = require('./logger').Logger('workq'); + +type Task = { + func: (arg: A) => Promise; + arg: A; + resolveCb: (res: R) => void; + rejectCb: (err: any) => void; +} + +// Implementation of a simple work queue which takes a task, puts it to the +// queue and processes the task when all other tasks that were queued before +// have completed. This is useful if the task consists of async steps and one +// wants to be sure that at any given time only one task is being processed +// not to interfere with the other tasks. +export class Workq { + private name: string; + private queue: Task[]; + private inprog: boolean; + + constructor (name?: string) { + this.name = name || ''; + this.queue = []; + this.inprog = false; + } + + // Put a task to the queue for processing. + // + // Since the method is async the caller can decide if she wants to block + // waiting until the task is processed or continue immediately. + // + // @param arg Opaque context parameter passed to the func. + // @param func Async function returning a promise. + // @returns A promise fulfilled when the task is done. + // The value of the promise is the value returned by the func. + async push (arg: A, func: (arg: A) => Promise): Promise { + assert.strictEqual(typeof func, 'function'); + + return new Promise((resolve, reject) => { + let resolveCb = resolve; + let rejectCb = reject; + let task: Task = { func, arg, resolveCb, rejectCb }; + + this.queue.push(task); + if (!this.inprog) { + this.inprog = true; + this._nextTask(); + } else { + log.trace(`${this.name} task has been queued for later`); + } + }); + } + + // Pick and dispatch next task from the queue. + _nextTask () { + var self = this; + + var task = this.queue.shift(); + if (!task) { + self.inprog = false; + return; + } + + log.trace(`Dispatching a new ${this.name} task`); + task + .func(task.arg) + .then((res: any) => task!.resolveCb(res)) + .catch((err: any) => task!.rejectCb(err)) + .finally(() => self._nextTask()); + } +} diff --git a/deploy/README.md b/deploy/README.md index 8aab60cf3..31819f4ef 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -8,6 +8,7 @@ It is now hosted at [GitBook](https://mayastor.gitbook.io/introduction/). # Deployment Files -The files in this directory are provided as a convenience, a template for the successful test deployment of Mayastor to a cluster. +The files in this directory are provided as a convenience, a template for the +successful test deployment of Mayastor to a cluster. -**NOTE:** ALL versions of deployment files from v0.4.0 specify the 'latest' tag on Mayastor container images. If you wish to deploy earlier versions, your own builds, or a nightly build, you must change the image tags accordingly. +Most of them are generated from helm templates in `chart/` directory. diff --git a/deploy/csi-daemonset.yaml b/deploy/csi-daemonset.yaml index 1db3104bf..6d340e705 100644 --- a/deploy/csi-daemonset.yaml +++ b/deploy/csi-daemonset.yaml @@ -30,7 +30,7 @@ spec: # the same. containers: - name: mayastor-csi - image: mayadata/mayastor-csi:v0.7.0 + image: mayadata/mayastor-csi:v0.7.1 imagePullPolicy: Always # we need privileged because we mount filesystems and use mknod securityContext: @@ -73,15 +73,10 @@ spec: cpu: "100m" memory: "50Mi" - name: csi-driver-registrar - image: quay.io/k8scsi/csi-node-driver-registrar:v1.3.0 + image: quay.io/k8scsi/csi-node-driver-registrar:v2.1.0 args: - "--csi-address=/csi/csi.sock" - "--kubelet-registration-path=/var/lib/kubelet/plugins/mayastor.openebs.io/csi.sock" - lifecycle: - preStop: - exec: - # this is needed in order for CSI to detect that the plugin is gone - command: ["/bin/sh", "-c", "rm -f /registration/io.openebs.csi-mayastor-reg.sock /csi/csi.sock"] volumeMounts: - name: plugin-dir mountPath: /csi diff --git a/deploy/fio.yaml b/deploy/fio.yaml index 44a3dbfd0..f754a1c2c 100644 --- a/deploy/fio.yaml +++ b/deploy/fio.yaml @@ -17,3 +17,6 @@ spec: volumeMounts: - mountPath: "/volume" name: ms-volume + #volumeDevices: + # - devicePath: /dev/xvda + # name: ms-volume \ No newline at end of file diff --git a/deploy/mayastor-daemonset-config.yaml b/deploy/mayastor-daemonset-config.yaml deleted file mode 100644 index 8c8742ac9..000000000 --- a/deploy/mayastor-daemonset-config.yaml +++ /dev/null @@ -1,106 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - namespace: mayastor - name: mayastor - labels: - openebs/engine: mayastor -spec: - selector: - matchLabels: - app: mayastor - updateStrategy: - type: RollingUpdate - rollingUpdate: - maxUnavailable: 1 - minReadySeconds: 10 - template: - metadata: - labels: - app: mayastor - spec: - hostNetwork: true - # To resolve services from mayastor namespace - dnsPolicy: ClusterFirstWithHostNet - nodeSelector: - openebs.io/engine: mayastor - kubernetes.io/arch: amd64 - # NOTE: Each container must have mem/cpu limits defined in order to - # belong to Guaranteed QoS class, hence can never get evicted in case of - # pressure unless they exceed those limits. limits and requests must be - # the same. - initContainers: - - name: message-bus-probe - image: busybox:latest - command: ['sh', '-c', 'until nc -vz nats 4222; do echo "Waiting for message bus..."; sleep 1; done;'] - containers: - - name: mayastor - image: mayadata/mayastor:v0.7.0 - imagePullPolicy: Always - env: - - name: MY_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: IMPORT_NEXUSES - value: "false" - args: - - "-N$(MY_NODE_NAME)" - - "-g$(MY_POD_IP)" - - "-nnats" - - "-y/var/local/mayastor/config.yaml" - - "-C/var/local/mayastor/child-status-config.yaml" - securityContext: - privileged: true - volumeMounts: - - name: device - mountPath: /dev - - name: dshm - mountPath: /dev/shm - - name: configlocation - mountPath: /var/local/mayastor/ - - name: config - mountPath: /var/local/mayastor/config.yaml - - name: child-status-config - mountPath: /var/local/mayastor/child-status-config.yaml - resources: - limits: - cpu: "1" - memory: "512Mi" - hugepages-2Mi: "1Gi" - requests: - cpu: "1" - memory: "512Mi" - hugepages-2Mi: "1Gi" - ports: - - containerPort: 10124 - protocol: TCP - name: mayastor - volumes: - - name: device - hostPath: - path: /dev - type: Directory - - name: dshm - emptyDir: - medium: Memory - sizeLimit: "1Gi" - - name: hugepage - emptyDir: - medium: HugePages - - name: configlocation - hostPath: - path: /var/local/mayastor/ - type: DirectoryOrCreate - - name: config - hostPath: - path: /var/local/mayastor/config.yaml - type: FileOrCreate - - name: child-status-config - hostPath: - path: /var/local/mayastor/child-status-config.yaml - type: FileOrCreate diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index d92dee673..0982779e9 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -22,7 +22,7 @@ spec: app: mayastor spec: hostNetwork: true - # To resolve services from mayastor namespace + # To resolve services from mayastor's namespace dnsPolicy: ClusterFirstWithHostNet nodeSelector: openebs.io/engine: mayastor @@ -33,7 +33,7 @@ spec: command: ['sh', '-c', 'until nc -vz nats 4222; do echo "Waiting for message bus..."; sleep 1; done;'] containers: - name: mayastor - image: mayadata/mayastor:v0.7.0 + image: mayadata/mayastor:v0.7.1 imagePullPolicy: Always env: - name: MY_NODE_NAME @@ -61,7 +61,7 @@ spec: - "-g$(MY_POD_IP)" - "-nnats" - "-y/var/local/mayastor/config.yaml" - - "-m0x3" + - "-l1,2" securityContext: privileged: true volumeMounts: diff --git a/deploy/moac-deployment.yaml b/deploy/moac-deployment.yaml index ca215367e..3d09e47e3 100644 --- a/deploy/moac-deployment.yaml +++ b/deploy/moac-deployment.yaml @@ -1,19 +1,5 @@ --- # Source: mayastor/templates/moac-deployment.yaml -kind: Service -apiVersion: v1 -metadata: - name: moac - namespace: mayastor -spec: - selector: - app: moac - ports: - - protocol: TCP - port: 4000 - targetPort: 4000 ---- -# Source: mayastor/templates/moac-deployment.yaml kind: Deployment apiVersion: apps/v1 metadata: @@ -59,7 +45,7 @@ spec: mountPath: /var/lib/csi/sockets/pluginproxy/ - name: moac - image: mayadata/moac:v0.7.0 + image: mayadata/moac:v0.7.1 imagePullPolicy: Always args: - "--csi-address=$(CSI_ENDPOINT)" @@ -77,10 +63,6 @@ spec: volumeMounts: - name: socket-dir mountPath: /var/lib/csi/sockets/pluginproxy/ - ports: - - containerPort: 4000 - protocol: TCP - name: "rest-api" volumes: - name: socket-dir emptyDir: diff --git a/deploy/monitor/README.md b/deploy/monitor/README.md deleted file mode 100644 index 3729ebab1..000000000 --- a/deploy/monitor/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# Monitoring extension for MayaStor - -Currently it shows two graphs: iops and bandwidth for arbitrary replica. -The monitoring stack consists of: - -* telegraf: gathering stats from mayastor REST API endpoint -* influxdb: database for the stats -* grafana: graphical frontend - -Note that this is just a proof of concept for showing "something" at -events like KubeCon. Monitoring for MayaStor will need to be designed -from scratch at some point in future when requirements are clear. - -Metrics in influxDB don't reside on persistent volume so when the pod -is restarted, all saved measurements are gone. - -# Deployment - -We assume that mayastor (including moac) has been already deployed to -`mayastor` namespace. - -1. Create configmap holding configuration files for grafana: - ```bash - kubectl -n mayastor create configmap grafana-config \ - --from-file=datasources.yaml=grafana/datasources.yaml \ - --from-file=dashboards.yaml=grafana/dashboards.yaml \ - --from-file=mayastor-dashboard.json=grafana/mayastor-dashboard.json - ``` - -2. Create configmap holding configuration of telegraf: - ```bash - kubectl create -f telegraf-config.yaml - ``` - -3. Deploy all three components: telegraf, influxdb and grafana: - ```bash - kubectl create -f monitor-deployment.yaml - ``` - -4. Get port of grafana to be used for external access (in this case 30333): - ```bash - kubectl -n mayastor get svc - ``` - ``` - NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE - grafana NodePort 10.0.0.88 80:30333/TCP 3m10s - moac ClusterIP 10.0.0.184 4000/TCP 113m - ``` - -5. Put URL in following form to your web browser: - `http://:/` (user/password is "admin"). - Choose mayastor dashboard. diff --git a/deploy/monitor/grafana/dashboards.yaml b/deploy/monitor/grafana/dashboards.yaml deleted file mode 100644 index 3b978e625..000000000 --- a/deploy/monitor/grafana/dashboards.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: 1 - -providers: -- name: 'default' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards - options: - path: /var/lib/grafana/dashboards diff --git a/deploy/monitor/grafana/datasources.yaml b/deploy/monitor/grafana/datasources.yaml deleted file mode 100644 index e8053ac00..000000000 --- a/deploy/monitor/grafana/datasources.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# config file version -apiVersion: 1 - -# list of datasources to insert/update depending -# what's available in the database -datasources: - # name of the datasource. Required -- name: influxdb - # datasource type. Required - type: influxdb - # access mode. proxy or direct (Server or Browser in the UI). Required - access: proxy - # org id. will default to orgId 1 if not specified - orgId: 1 - # url - url: http://127.0.0.1:8086 - # database password, if used - password: telegraf - # database user, if used - user: telegraf - # database name, if used - database: mayastor - # version - version: 1 - # allow users to edit datasources from the UI. - editable: true diff --git a/deploy/monitor/grafana/mayastor-dashboard.json b/deploy/monitor/grafana/mayastor-dashboard.json deleted file mode 100644 index 60df97c50..000000000 --- a/deploy/monitor/grafana/mayastor-dashboard.json +++ /dev/null @@ -1,440 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "iteration": 1578992203638, - "links": [], - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "influxdb", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "alias": "read", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT non_negative_derivative(max(\"num_read_ops\"), 1s) FROM \"replica\" WHERE (\"uuid\" =~ /^$uuid$/ AND \"node\" =~ /^$node$/) AND time >= now() - 15m GROUP BY time(500ms) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "alias": "write", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT non_negative_derivative(max(\"num_write_ops\"), 1s) FROM \"replica\" WHERE (\"uuid\" =~ /^$uuid$/ AND \"node\" =~ /^$node$/) AND time >= now() - 15m GROUP BY time(500ms) fill(null);", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IOPS", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "influxdb", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 9 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "alias": "read", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT non_negative_derivative(max(\"bytes_read\"), 1s) FROM \"replica\" WHERE (\"uuid\" =~ /^$uuid$/ AND \"node\" =~ /^$node$/) AND time >= now() - 15m GROUP BY time(500ms) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "alias": "write", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT non_negative_derivative(max(\"bytes_written\"), 1s) FROM \"replica\" WHERE (\"uuid\" =~ /^$uuid$/ AND \"node\" =~ /^$node$/) AND time >= now() - 15m GROUP BY time(500ms) fill(null);", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Bandwidth", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "2s", - "schemaVersion": 21, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "tags": [], - "text": "node1", - "value": "node1" - }, - "datasource": "influxdb", - "definition": "SHOW TAG VALUES WITH KEY = \"node\"", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "node", - "options": [], - "query": "SHOW TAG VALUES WITH KEY = \"node\"", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "1b232114-7c71-40a9-8f35-004fc3878187", - "value": "1b232114-7c71-40a9-8f35-004fc3878187" - }, - "datasource": "influxdb", - "definition": "SHOW TAG VALUES WITH KEY = \"uuid\"", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "uuid", - "options": [], - "query": "SHOW TAG VALUES WITH KEY = \"uuid\"", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "2s", - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "MayaStor dashboard", - "uid": "EXv5v7PWk", - "version": 1 -} diff --git a/deploy/monitor/monitor-deployment.yaml b/deploy/monitor/monitor-deployment.yaml deleted file mode 100644 index abaf84ec9..000000000 --- a/deploy/monitor/monitor-deployment.yaml +++ /dev/null @@ -1,106 +0,0 @@ ---- -apiVersion: v1 -kind: Secret -metadata: - name: influxdb-secrets - namespace: mayastor -type: Opaque -stringData: - INFLUXDB_DB: mayastor - INFLUXDB_USER: telegraf - INFLUXDB_USER_PASSWORD: telegraf ---- -apiVersion: v1 -kind: Secret -metadata: - name: grafana-secrets - namespace: mayastor -type: Opaque -stringData: - GF_SECURITY_ADMIN_USER: admin - GF_SECURITY_ADMIN_PASSWORD: admin ---- -kind: Deployment -apiVersion: apps/v1 -metadata: - name: monitor - namespace: mayastor - labels: - app: monitor -spec: - replicas: 1 - selector: - matchLabels: - app: monitor - template: - metadata: - labels: - app: monitor - spec: - containers: - - name: telegraf - image: telegraf:latest - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /etc/telegraf/telegraf.conf - name: telegraf-config - subPath: telegraf.conf - readOnly: true - envFrom: - - secretRef: - name: influxdb-secrets - - name: influxdb - image: influxdb:latest - imagePullPolicy: IfNotPresent - envFrom: - - secretRef: - name: influxdb-secrets - - name: grafana - image: grafana/grafana:latest - imagePullPolicy: IfNotPresent - envFrom: - - secretRef: - name: grafana-secrets - env: - - name: INFLUXDB_HOST - value: "127.0.0.1" - - name: GF_SERVER_HTTP_PORT - value: "3000" - ports: - - containerPort: 3000 - protocol: TCP - volumeMounts: - - mountPath: /etc/grafana/provisioning/datasources/datasources.yaml - name: grafana-config - readOnly: true - subPath: datasources.yaml - - mountPath: /etc/grafana/provisioning/dashboards/dashboards.yaml - name: grafana-config - readOnly: true - subPath: dashboards.yaml - - mountPath: /var/lib/grafana/dashboards/mayastor-dashboard.json - name: grafana-config - readOnly: true - subPath: mayastor-dashboard.json - volumes: - - name: grafana-config - configMap: - name: grafana-config - - name: telegraf-config - configMap: - name: telegraf-config ---- -kind: Service -apiVersion: v1 -metadata: - name: grafana - namespace: mayastor -spec: - selector: - app: monitor - type: NodePort - ports: - - protocol: TCP - port: 80 - targetPort: 3000 - nodePort: 30333 diff --git a/deploy/monitor/telegraf-config.yaml b/deploy/monitor/telegraf-config.yaml deleted file mode 100644 index 1c449d251..000000000 --- a/deploy/monitor/telegraf-config.yaml +++ /dev/null @@ -1,78 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: telegraf-config - namespace: mayastor -data: - telegraf.conf: |+ - # Configuration for telegraf agent - [agent] - ## Default data collection interval for all inputs - interval = "5s" - ## Rounds collection interval to 'interval' - ## ie, if interval="10s" then always collect on :00, :10, :20, etc. - round_interval = true - ## Telegraf will send metrics to outputs in batches of at most - ## metric_batch_size metrics. - ## This controls the size of writes that Telegraf sends to output plugins. - metric_batch_size = 1000 - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). - metric_buffer_limit = 10000 - ## Collection jitter is used to jitter the collection by a random amount. - ## Each plugin will sleep for a random time within jitter before collecting. - ## This can be used to avoid many plugins querying things like sysfs at the - ## same time, which can have a measurable effect on the system. - collection_jitter = "0s" - ## Default flushing interval for all outputs. Maximum flush_interval will be - ## flush_interval + flush_jitter - flush_interval = "5s" - ## Jitter the flush interval by a random amount. This is primarily to avoid - ## large write spikes for users running a large number of telegraf instances. - ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s - flush_jitter = "0s" - ## By default or when set to "0s", precision will be set to the same - ## timestamp order as the collection interval, with the maximum being 1s. - ## ie, when interval = "10s", precision will be "1s" - ## when interval = "250ms", precision will be "1ms" - ## Precision will NOT be used for service inputs. It is up to each individual - ## service input to set the timestamp at the appropriate precision. - ## Valid time units are "ns", "us" (or "µs"), "ms", "s". - precision = "" - ## Logging configuration: - ## Run telegraf with debug log messages. - debug = false - ## Run telegraf in quiet mode (error log messages only). - quiet = false - ## Specify the log file name. The empty string means to log to stderr. - logfile = "" - ## Override default hostname, if empty use os.Hostname() - hostname = "" - ## If set to true, do no set the "host" tag in the telegraf agent. - omit_hostname = true - - # Configuration for sending metrics to InfluxDB - [[outputs.influxdb]] - ## The full HTTP or UDP URL for your InfluxDB instance. - urls = ["http://127.0.0.1:8086"] - ## The target database for metrics; will be created as needed. - database = "$INFLUXDB_DB" - timeout = "5s" - ## HTTP Basic Auth - username = "$INFLUXDB_USER" - password = "$INFLUXDB_USER_PASSWORD" - - # Configuration for gathering stats data from mayastor - [[inputs.http]] - name_override = "replica" - urls = ["http://moac:4000/stats"] - timeout = "5s" - data_format = "json" - json_query = "" - tag_keys = ["uuid", "pool", "node"] - json_time_key = "timestamp" - json_time_format = "2006-01-02T15:04:05Z07:00" - # default is UTC - json_timezone = "" diff --git a/deploy/namespace.yaml b/deploy/namespace.yaml deleted file mode 100644 index 1e426a7eb..000000000 --- a/deploy/namespace.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Namespace -apiVersion: v1 -metadata: - name: mayastor - labels: - name: mayastor diff --git a/deploy/pool.yaml b/deploy/pool.yaml index 6886ad33f..04ad4387d 100644 --- a/deploy/pool.yaml +++ b/deploy/pool.yaml @@ -1,10 +1,14 @@ +--- +# Source: mayastor/templates/pool.yaml apiVersion: "openebs.io/v1alpha1" kind: MayastorPool metadata: - #name: unique - generateName: pool- + # Name can be anything as long as it is unique + name: pool-on-NODE_NAME + # or let k8s to generate a unique pool name + #generateName: pool- namespace: mayastor spec: - node: MYHOSTNAME - disks: ["MYBLOCKDEVICE"] - + node: NODE_NAME + # ATM only one disk device is supported (i.e. /dev/nvme1n1) + disks: ["DEVICE"] diff --git a/deploy/pvc.yaml b/deploy/pvc.yaml index 769ffdad7..92eb66b6a 100644 --- a/deploy/pvc.yaml +++ b/deploy/pvc.yaml @@ -5,7 +5,9 @@ metadata: spec: accessModes: - ReadWriteOnce + # Change to "Block" for raw block device + volumeMode: Filesystem resources: requests: storage: 64Mi - storageClassName: mayastor + storageClassName: mayastor-nvmf diff --git a/doc/build.md b/doc/build.md index b129ed728..80c4ee56e 100644 --- a/doc/build.md +++ b/doc/build.md @@ -1,190 +1,230 @@ -## RUST +# Building Mayastor -We make use of async/await and therefor we *need* a compiler that supports that. We currently -use the nightly compiler. Nightly is required in all of the provided build possibilities listed below +> You will not need to always build Mayastor. We will provide official x86_64 +> & aarch64 binaries and images in future releases. -Build options -================== -- [Building with Nix (recommended)](#Building-the-sources-with-nixpkg) -- [Build inside docker](#Build-inside-docker) -- [Building the hard way](#Build-it-the-hard-way) +Mayastor is a multi-component [Rust][rust-lang] project that makes heavy use use of +[Nix][nix-explore] for our development and build process. -## Building the sources with nixpkg +If you're coming from a non-Rust (or non-Nix) background, **building Mayastor may be a bit +different than you're used to.** There is no `Makefile`, you won't need a build toolchain, +you won't need to worry about cross compiler toolchains, and all builds are reproducable. -As the underlaying distribution you can use nixos or any other linux -distribution if you install a nix package manager on top of it. -Example of nixos system configuration `/etc/nixos/configuration.nix` -suitable for a dev box: +## Table of Contents -```nix -{ config, pkgs, ... }: - -{ - imports = - [ # Include the results of the hardware scan. - ./hardware-configuration.nix - ]; - boot.loader.grub.enable = true; - boot.loader.grub.version = 2; - boot.loader.grub.device = "/dev/sda"; # or whatever is appropriate - boot.kernelModules = ["nbd" "xfs" "nvme_tcp"]; - boot.kernelParams = ["hugepages=512" "hugepagesz=2MB"]; - services.openssh.enable = true; - virtualisation.docker.enable = true; - users.users.your_username = { - isNormalUser = true; - extraGroups = [ "wheel" "docker" ]; - }; - system.stateVersion = "19.03"; - security.sudo.enable = true; - security.sudo.wheelNeedsPassword = false; -} -``` +* [Prerequisites](#Prerequisites) +* [Iterative Builds](#Iterative-Builds) +* [Artifacts](#Artifacts) + +## Prerequisites + +Mayastor **only** builds on modern Linuxes. We'd adore contributions to add support for +Windows, FreeBSD, OpenWRT, or other server platforms. + +If you do not have Linux system: + +* **Windows:** We recommend using [WSL2][windows-wsl2] if you only need to + build Mayastor. You'll need a [Hyper-V VM][windows-hyperv] if you want to use it. +* **Mac:** We recommend you use [Docker for Mac][docker-install] + and follow the Docker process described. Please let us know if you find a way to + run it! +* **FreeBSD:** We *think* this might actually work, SPDK is compatible! But, we haven't + tried it yet. +* **Others:** This is kind of a "Do-it-yourself" situation. Sorry, we can't be more help! -Installation of a [nix package manager](https://nixos.org/nix/download.html) on -other distros: +The only thing your system needs to build Mayastor is [**Nix**][nix-install]. + +Usually [Nix][nix-install] can be installed via (Do **not** use `sudo`!): ```bash curl -L https://nixos.org/nix/install | sh ``` -We have provided a `shell.nix` file that can be used to build and compile -MayaStor from source without impacting your system. -Follow the short instruction and you should be all set! +> **Can't install Nix?** +> +> That's totally fine. You can use [`docker`][docker-install] just fine for one-off or occasional PRs! +> +> This flow will get you a pre-fetched `nix` store: +> ```bash +> docker run --name mayastor-nix-prefetch -it -v $(pwd):/scratch:rw --privileged --workdir /scratch nixos/nix nix-shell --run "exit 0" +> docker commit mayastor-nix-prefetch mayastor/dev-env:latest +> docker rm mayastor-nix-prefetch +> docker run --rm -it -v $(pwd):/scratch:rw --workdir /scratch mayastor/dev-env:latest nix-shell +> ``` +> +> To re-enter, just run the last command again. + + +* Some of our team uses [NixOS][nixos] which has `nix` baked in, but you don't need to. +* Some of our team uses [`direnv][direnv], but you don't need to. + +For some tasks, we use features from `nixUnstable`. You can use `nixos-unstable` +**(or `nixpkgs-unstable` for `nix` users)** by [changing your channel][nix-channel]. + +First, setting the following: + +```nix +{ pkgs, ... }: { + nix.extraOptions = '' + experimental-features = nix-command flakes + ''; + nix.package = pkgs.nixUnstable; +} +``` + +Then, updating the channel: ```bash -cd MayaStor -nix-shell -git submodule update --init -cargo build --all +$ sudo nix-channel --list +nixos https://nixos.org/channels/nixos-20.09 +$ sudo nix-channel --remove nixos +$ sudo nix-channel --add https://nixos.org/channels/nixos-unstable nixos +$ sudo nixos-rebuild switch --update ``` -Binaries will be installed in `$(CURDIR)/target/release` after running the build you can use -`$(CURDIR)/target/release/mayastor-client` to create a Nexus. +> If you don't want, you can drop into a +`nixUnstable` supporting shell with: +> +> ```bash +> nix-shell -I nixpkgs=channel:nixpkgs-unstable -p nixUnstable --command "nix --experimental-features 'nix-command flakes' develop -f . mayastor" +> ``` +> +> Don't want to use `nixUnstable`? **That's ok!** Use `nix-shell` and `nix-build` as you normally would. + +**Want to run or hack on Mayastor?** *You need more configuration!* See +[running][doc-run], then [testing][doc-test]. -## Build inside docker -This is most close to the environment used for building and testing of -MayaStor in our github CI/CD pipeline. But could be used for other purposes -as well. The following command starts a container with build environment -for MayaStor. The sources are mounted from the host at /code/MayaStor. -That is optional. You can clone the sources from github instead. +You can use a tool like [`direnv`][direnv] to automate `nix shell` entry. + +## Iterative Builds + +Contributors often build Mayastor repeatedly during the development process. +Using [`nix develop`][nix-develop] to enter a more persistent development shell can help improve +iteration time: ```bash -docker run -it --privileged -v /dev:/dev:rw -v /dev/shm:/dev/shm:rw \ - -v /dev/hugepages:/dev/hugepages:rw --network host \ - -v /code/MayaStor:/MayaStor mayadata/ms-buildenv:latest /bin/sh +nix develop -f . mayastor ``` -Docker image is essentially just a nixos image with a couple of -utilities and pre-built mayastor dependencies. You can just -enter the nix shell and build the mayastor. +Once entered, you can start any tooling (eg `code .`) to ensure the correct resources are available. +The project can then be interacted with like any other Rust project. + +Building: ```bash -cd MayaStor -nix-shell -git submodule update --init -cargo build --all +cargo build +cargo build --release ``` -After that you should be able to start MayaStor: +**Want to run or hack on Mayastor?** *You need more configuration!* See +[running][doc-running], then [testing][doc-testing]. + +## Artifacts + +There are a few ways to build Mayastor! If you're hacking on Mayastor, it's best to use +[`nix develop`][nix-develop] (above) then turn to traditional Rust tools. If you're looking for releases, use [`nix build`][nix-build] or [`nix bundle`][nix-bundle] depending on your needs. + +> **Why is the build process this way?** +> +> Mayastor creates [*reproducable builds*][reproducable-builds], it won't use any of your +> local system dependencies (other than `nix`). This is a component of the best practices of the +> [Core Infrastructure Initiative][cii-best-practices]. More on how Nix works can be found in the +> [Nix paper][nix-paper]. +### Building non-portable Nix derivations + +You can build release binaries of Mayastor with [`nix build`](nix-build): ```bash -$ ./target/debug/mayastor -main.rs: 28:: *NOTICE*: free_pages: 658 nr_pages: 1024 -Starting SPDK v19.07 / DPDK 19.05.0 initialization... -[ DPDK EAL parameters: MayaStor --no-shconf -c 0x1 --log-level=lib.eal:6 --log-level=lib.cryptodev:5 --log-level=user1:6 --base-virtaddr=0x200000000000 --match-allocations --file-prefix=spdk_pid57086 ] -app.c: 627:spdk_app_start: *NOTICE*: Total cores available: 1 -reactor.c: 251:_spdk_reactor_run: *NOTICE*: Reactor started on core 0 -nexus_module.rs: 105:: *NOTICE*: Initializing Nexus CAS Module -cryptodev_aesni_mb_create() line 1304: IPSec Multi-buffer library version used: 0.52.0 - -executor.rs: 94:: *INFO*: Started future executor on thread ThreadId(1) -iscsi_target.rs: 85:: *INFO*: Created default iscsi portal group -iscsi_target.rs: 100:: *INFO*: Created default iscsi initiator group -nvmf_target.rs: 294:: *NOTICE*: Created nvmf target at 127.0.0.1:4401 -tcp.c: 535:spdk_nvmf_tcp_create: *NOTICE*: *** TCP Transport Init *** -nvmf_target.rs: 344:: *NOTICE*: Added tcp nvmf transport 127.0.0.1:4401 -tcp.c: 730:spdk_nvmf_tcp_listen: *NOTICE*: *** NVMe/TCP Target Listening on 127.0.0.1 port 4401 *** -nvmf_target.rs: 364:: *NOTICE*: nvmf target listens on 127.0.0.1:4401 -nvmf_target.rs: 415:: *NOTICE*: nvmf target 127.0.0.1:4401 accepts new connections -main.rs: 31:: *NOTICE*: MayaStor started (fcaf10b-modified)... +for PKG in moac mayastor; do + echo "Building ${PKG} to artifacts/pkgs/${PKG}"; \ + nix build -f . -o artifacts/pkgs/${PKG} ${PKG}; +done +``` + +Try them as if they were installed: + +```rust +nix shell -f . moac mayastor ``` -Feel free to change the [DockerFile](../Dockerfile) to your convenience. +### Building portable Nix bundles -### Justifications for the volume mounts: +In order to make an artifact which can be distributed, we use [`nix bundle`][nix-bundle]. -- `/dev` is needed to get access to any raw device you might want to consume as local storage and huge pages -- `/dev/shm` is needed as for a circular buffer that can trace any IO operations as they happen -- `--network host` is needed because we dont not use virtual networks (to reduce latency impact) -- `/code/MayaStor` the host path to your checked out source code +> **TODO:** We currently don't generate bundles some executables, such as +> `mayastor-client`. This is coming. -## Running tests within the container +```bash +for BUNDLE in mayastor; do + echo "Bundling ${BUNDLE} to artifacts/bundle/${BUNDLE}"; \ + nix bundle -f . -o artifacts/bundles/${BUNDLE} ${BUNDLE}; +done +``` -If you wish to run some of our higher-level test cases (like for example CSI), you need to make sure you have the -proper kernel modules loaded (nbd, xfs and nvme_tcp) as well as allocate at least some 2MB hugepages. +Test them: ```bash -modprobe {nbd,xfs,nvme_tcp} -echo 512 | sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages +for FILE in artifacts/bundles/*; do + echo "Testing bundle ${FILE}..." + ${FILE} --version +done ``` -Then, for example: +### Building Docker images + +Build the Docker images with [`nix build`](nix-build): ```bash -root@gilakubuntu:/MayaStor/test/grpc# ./node_modules/mocha/bin/mocha test_csi.js - csi - identity - ✓ probe - ✓ get plugin info - ✓ get plugin capabilities - node - ✓ get info - ✓ get capabilities - stage and unstage xfs volume - ✓ should be able to stage volume (69ms) - ✓ get volume stats (62ms) - ✓ staging the same volume again should return ok (idempotent) - ✓ staging a volume with a non existing bdev should fail with Internal Error (57ms) - ✓ staging a volume with the same staging path but with a different bdev should fail - ✓ should fail to stage a volume with the bdev using a different target path - ✓ should not unstage a volume with an unknown volumeid and return NOTFOUND error (51ms) - ✓ should fail to stage a volume with a missing volume ID - ✓ should fail to stage a volume with a missing stage target path - ✓ should fail to stage a volume with a missing access type - ✓ should fail to stage a volume with a missing accces mode - ✓ should fail to stage a volume with missing volume_capability section - ✓ should be able to unstage volume - stage and unstage ext4 volume - ✓ should be able to stage volume (59ms) - ✓ should be able to unstage volume (38ms) - stage misc - ✓ should fail to stage unsupported fs - ✓ should clean up nbd dev upon mount failure (47ms) - publish and unpublish - MULTI_NODE_READER_ONLY staged volume - ✓ should publish a volume in ro mode and test it is idempotent op - ✓ should fail when re-publishing with a different staging path - ✓ should fail with a missing target path - ✓ should fail to publish the volume as rw - ✓ should be able to unpublish ro volume (56ms) - ✓ should be able to unpublish rw volume - MULTI_NODE_SINGLE_WRITER staged volume - ✓ should publish ro volume - ✓ should publish rw volume (58ms) - ✓ should be able to unpublish ro volume - ✓ should be able to unpublish rw volume +for IMAGE in \ + moac mayastor-client mayastor mayastor-csi moac mayastor-client kiiss-service \ + node-service volume-service pool-service rest-service node-operator; \ +do + echo "Building ${IMAGE} to artifacts/docker/${IMAGE}.tar"; \ + nix build -f . -o artifacts/docker/${IMAGE}.tar images.${IMAGE}; +done +``` + +**Optionally,** the generated Docker images will **not** tag to `latest`. You may wish to do that if +you want to run them locally: +```bash +for FILE in artifacts/docker/*.tar; do + echo "Loading ${FILE}..." + docker load --quiet --input ${FILE} \ + | awk '{ print $3 }' \ + | ( \ + read IMAGE; \ + LATEST=$(echo ${IMAGE} | awk '{split($0,a,":"); print a[1]}'):latest; \ + echo "Tagging ${IMAGE} to ${LATEST} (from ${FILE})."; \ + docker tag ${IMAGE} ${LATEST}; \ + ); +done ``` -If you wish to run the MayaStor data path tests, make sure you specify `test-threads=1` +Then, to test the images: ```bash -cargo test -- --test-threads=1 +for FILE in artifacts/docker/*.tar; do + echo "Loading ${FILE}..." + docker load --quiet --input ${FILE} \ + | awk '{ print $3 }' \ + | ( \ + read IMAGE; \ + echo "Testing ${IMAGE} (from ${FILE})."; \ + docker run --rm --interactive ${IMAGE} --version; \ + docker rmi ${IMAGE} > /dev/null + ); +done ``` -## Build it the hard way +### Building KVM images + +> **TODO:** We're still writing this! Sorry! + +### Building Artifacts the Hard Way + +> This isn't really the 'hard way', you'll still use `cargo`. When you really want to build everything manually, the biggest hurdle to overcome is to install the SPDK/DPDK. As these are not packaged (or not recent) by most distro's its a manual step. We have provided scripts to make this as easy as @@ -196,8 +236,8 @@ The basic steps are: git submodule update --init --recursive sudo ./spdk-sys/spdk/scripts/pkgdep ./spdk-sys/build.sh --enable-debug --without-isal --with-iscsi-initiator --with-rdma \ - --with-internal-vhost-lib --disable-tests \ - --with-crypto + --with-internal-vhost-lib --disable-tests \ + --with-crypto ``` At this point you will have a .so file in `spdk-sys/build` you can leave it there and set the run path flag for rustc to find it: @@ -209,50 +249,22 @@ Or, you can copy over the .so to `/usr/local/lib` or something similar. One this is done, you should be able to run `cargo build --all` -## Building docker images - -Use NIX to build the images. Note that the images are based on NIX packages -that are built as part of building the image. The tag of the image will be -short commit hash of the top-most commit or a tag name of the commit if -present. Example of building a moac package: - -```bash -nix-build -A images.moac-image -``` - -At the end of the build is printed path to docker image tar archive. Load the -image into Docker (don't use _import_ command) and run bash to poke around: - -```bash -docker load -i /nix/store/hash-docker-image-moac.tar.gz -docker run --rm -it image-hash /bin/bash -``` - -Mayastor and csi plugin images can have multiple flavours. Production image -name does not contain the flavour name (i.e. `mayastor-image`). Debug image -contains the `dev` in its name (i.e. `mayastor-dev-image`). Mayastor package -has additional flavour called `adhoc` (`mayastor-adhoc-image`), that is handy -for testing because it is not based on mayastor package but rather on whatever -binaries are present in `target/debug` directory. - -## Some background information - -MayaStor makes use of subsystems that are not yet part of major distributions, for example: - - - nvme-tcp initiator (Linux 5.x and newer) - - Recent DPDK version (i.e 19.x) - -Fortunately, this is something that will be solved over time automatically. In the meantime, we have -tried to make it as simple as possible by providing several options for you. - -Mayastor, in all cases, **requires the nightly rust compiler with async support**. -You don't need to have a 5.x kernel unless you want to use NVMF. - -If you already have rust installed but not nightly, use rustup to install it before continuing. - -### spdk-sys - -The crate that provides the glue between SPDK and Mayastor is hosted in this [repo](https://github.com/openebs/spdk-sys) -feel free to go through it and determine if you want to install libspdk using those instructions or directly from -[here](https://github.com/openebs/spdk). If you chose either of these methods, make sure you install such that -during linking, it can be found. +[doc-run]: ./run.md +[doc-test]: ./test.md +[direnv]: https://direnv.net/ +[nix-explore]: https://nixos.org/explore.html +[nix-install]: https://nixos.org/download.html +[nix-develop]: https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-develop.html +[nix-paper]: https://edolstra.github.io/pubs/nixos-jfp-final.pdf +[nix-build]: https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-build.html +[nix-bundle]: https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-bundle.html +[nix-shell]: https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-shell.html +[nix-channel]: https://nixos.wiki/wiki/Nix_channels +[nixos]: https://nixos.org/ +[rust-lang]: https://www.rust-lang.org/ +[windows-wsl2]: https://wiki.ubuntu.com/WSL#Ubuntu_on_WSL +[windows-hyperv]: https://wiki.ubuntu.com/Hyper-V +[docker-install]: https://docs.docker.com/get-docker/ +[reproducable-builds]: https://reproducible-builds.org/ +[cii-best-practices]: https://www.coreinfrastructure.org/programs/best-practices-program/ +[direnv]: https://direnv.net/ diff --git a/doc/contribute.md b/doc/contribute.md deleted file mode 100644 index f9f844399..000000000 --- a/doc/contribute.md +++ /dev/null @@ -1,25 +0,0 @@ -## How to contribute - -## pre-commit - -We make use of pre-commit to run lint and style checks prior to pushing code. - -## Commit messages - -Try to keep the first line short and refer to a part that you are making changes to. For example: - -```bash -rpc: Want support for creating snaphots -``` - -or - -```bash -nexus: add metadata for resuming rebuild -``` - -Followed by a longer explanation. - -## Bors - -We are using bors bot to automate testing and merging of PRs in scalable way. diff --git a/doc/run.md b/doc/run.md new file mode 100644 index 000000000..59d375f9b --- /dev/null +++ b/doc/run.md @@ -0,0 +1,276 @@ +# Running Mayastor + +## Hard Requirements + +Mayastor supports the following [Instruction Set Architectures (ISA)][isa]: + + + x86_64 (Nehalem or later) + + aarch64 support (**Early access on [`aarch64`][aarch64-branch] -- Use caution**) + +Your system will need several [control groups][control-groups] configured. + +```nix +# /etc/nixos/configuration.nix +boot.kernelParams = [ "cgroup_enable=cpuset" "cgroup_memory=1" "cgroup_enable=memory" ]; +``` + +It will also need at least 512 2 MB Hugepages configured. + +> Learn more about hugepages: [parts 1][hugepages-lwn-one], [2][hugepages-lwn-two], +> [3][hugepages-lwn-three], [4][hugepages-lwn-four], [5][hugepages-lwn-five]. + +In NixOS: + +```nix +# /etc/nixos/configuration.nix +boot.kernelParams = [ "hugepagez=2M" "hugepages=4096" ]; +boot.kernel.sysctl = { + "vm.hugetlb_shm_group" = "6969"; +}; +systemd.mounts = [ + # disable mounting hugepages by systemd, + # it doesn't know about 1G pagesize + { where = "/dev/hugepages"; + enable = false; + } + { where = "/dev/hugepages/hugepages-2MB"; + enable = true; + what = "hugetlbfs"; + type = "hugetlbfs"; + options = "pagesize=2M,gid=6969,mode=0775"; + requiredBy = [ "basic.target" ]; + } + { where = "/dev/hugepages/hugepages-1G"; + enable = true; + what = "hugetlbfs"; + type = "hugetlbfs"; + options = "pagesize=1G,gid=6969,mode=0775"; + requiredBy = [ "basic.target" ]; + } +]; +users = { + users.${MAYASTOR_USER} = { + # ... + extraGroups = [ /* ... */ "hugepages" ]; + }; + groups = { + hugepages = { + gid = 6969; # Any ID works. + }; + }; +}; +``` + +**If you changed any of these, reboot after.** + +## Optional Prerequisites + +In order to use the full feature set of Mayastor, some or all of the following can be met: + +* A Linux Kernel 5.1+ (with [`io-uring`][io_uring-intro] support) +* The following kernel modules loaded: + + `nbd`: Network Block Device support + + `nvmet`: NVMe Target support + + `nvmet_rdma`: NVMe Target (rDMA) support + + `nvme_fabrics`: NVMe over Fabric support + + `nvme_tcp`: NVMe over TCP support + + `nvme_rdma`: NVMe (rDMA) support + + `nvme_loop`: NVMe Loop Device support + + To load these on NixOS: + + ```nix + # /etc/nixos/configuration.nix + boot.kernelModules = [ + "nbd" "xfs" "nvmet" "nvme_fabrics" "nvmet_rdma" "nvme_tcp" "nvme_rdma" "nvme_loop" + ]; + ``` + + To load these on non-NixOS machines: + + ```bash + modprobe nbd nvmet nvmet_rdma nvme_fabrics nvme_tcp nvme_rdma nvme_loop + ``` +* An NVMe device. (Typically via PCI-E through an standard slot or [M.2][m-dot-2] port) +* A version of [`nix`][nix-install] configured as in the [build guide.][doc-build] + +## Running binaries directly + +Like in [the build guide's *Iterative Build*][doc-build-iterative-builds] section invoke +[`nix shell`][nix-shell]. This time, don't pass `--derivation`, this will cause `nix` to +evaluate the output, instead of the derivation. + +```bash +nix shell -f . mayastor moac +mayastor-client --version +mayastor --version +``` + +Running `mayastor` should proceed without issue: + +```bash +❯ mayastor +[2021-01-28T14:40:30.659032358-08:00 INFO mayastor:mayastor.rs:46] Starting Mayastor .. +[2021-01-28T14:40:30.659087313-08:00 INFO mayastor:mayastor.rs:47] kernel io_uring support: yes +[2021-01-28T14:40:30.659108924-08:00 INFO mayastor:mayastor.rs:51] free_pages: 4096 nr_pages: 4096 +[2021-01-28T14:40:30.659132238-08:00 INFO mayastor::subsys::config:mod.rs:361] Applying Mayastor configuration settings +# ... +[2021-01-28T14:40:30.902100398-08:00 INFO mayastor:mayastor.rs:59] Mayastor started 🚀 ... +# ... +``` + +## Running Docker images directly + +[**After building the images**][doc-build-building-docker-images], load them: + +```bash +for FILE in artifacts/docker/*.tar; do + echo "Loading ${FILE}..." + docker load --quiet --input ${FILE} +done +``` + +**Notice the generated tags.** The [build guide][doc-build-building-docker-images] shows how to +retag these to `latest` if required. If you forget, check `docker images`. + +**`mayastor-client`** and **`mayastor-csi`**: + +```bash +docker run --interactive --rm mayadata/mayastor-client:${TAG} +docker run --interactive --rm mayadata/mayastor-csi:${TAG} +``` + +**`mayastor`** requires some special parameters: + +```bash +docker run \ + --rm \ + --interactive \ + --privileged \ + --volume /dev:/dev:rw \ + --volume /dev/shm:/dev/shm:rw \ + --volume /dev/hugepages:/dev/hugepages:rw \ + --network host \ + mayadata/mayastor:${TAG} +``` + +Why these parameters? +- `--privileged` to allow controlling memory policies. + + > **TODO:** We can use [control groups][control-groups] for this! +- `-v /dev:/dev:rw` is needed to get access to any raw device you might want to consume as local + storage and huge pages +- `-v /dev/shm:/dev/shm:rw` is needed as for a circular buffer that can trace any IO operations + as they happen +- `--network host` to bypass virtual networks which might impact latency. + +The **`moac`** container should not be directly launched. + +## Running as a systemd service + +> **TODO:** Mayastor currently depends on [Mozilla's Rust Overlay][mozilla-rust-overlay], +> so these instructions don't work, yet! + +On NixOS: + +```nix +# /etc/nixos/configuration.nix +nixpkgs.overlays = [ + (import "${(builtins.fetchGit { + url = "https://github.com/openebs/Mayastor.git"; + ref = "master"; + rev = "a9fc77f2ae30e909244556fc797451931dab3dd5"; + }).outPath}/nix/mayastor-overlay.nix") +]; + +systemd.services.mayastor = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + description = "A cloud native declarative data plane."; + serviceConfig = { + Type = "forking"; + User = "nobody"; + ExecStart = "${pkgs.mayastor}/bin/mayastor"; + AmbientCapabilities = "CAP_SETPCAP CAP_SYS_ADMIN CAP_IPC_LOCK CAP_SYS_NICE"; + }; +}; +``` + +On a non-NixOS system, create the following, and point `ExecStart` at your [`mayastor` +bundle][doc-build-building-portable-nix-bundles]: + +```systemd +# /etc/systemd/user/mayastor.service +[Unit] +After=network.target +Description=A cloud native declarative data plane. + +[Service] +AmbientCapabilities=CAP_SETPCAP CAP_SYS_ADMIN CAP_IPC_LOCK CAP_SYS_NICE +ExecStart=/usr/bin/mayastor +Type=simple +User=nobody +``` + +## Running as a NixOS Service + +**TODO:** This is not supported yet! This is an aspiration: + +```nix +# /etc/nixos/configuration.nix +services.mayastor.enable = true; +services.mayastor.grpc-bind = "0.0.0.0:10124"; +services.mayastor.config = /etc/mayastor/config.yml; +services.mayastor.huge-dir = /dev/hugepages/; +# Etc... +``` + +## Running in a VM + +> **TODO:** We're still writing this! Sorry! Let us know if you want us to prioritize this! + +## Running on a scratch Kubernetes cluster + +First, get your k8s cluster working, our team likes to use the scripts in `terraform/` for this. +That will use [`lxd`][lxd] and [`libvirtd`][libvirtd]. See [`terraform/README`][terraform-readme] +for detailed instructions. + +Otherwise, you can follow [the production guide][running-on-a-real-kubernetes-cluster] on your favourite. + +Our testing showed some common Kubernetes development environments are not sufficient for +Mayastor development. + +Here are the ones known to not work: + +* [`kind`][kind] + + +## Running on a real Kubernetes cluster + +We have a [production deployment manual & user manual][manual] prepared. Please refer to that for +production Mayastor deployment and operation instructions. + +[running-on-a-real-kubernetes-cluster]: #running-on-a-real-kubernetes-cluster +[doc-build]: ./build.md +[doc-build-iterative-builds]: ./build.md#Iterative-Builds +[doc-build-building-docker-images]: ./build.md#Building-Docker-images +[doc-build-building-portable-nix-bundles]: ./build.md#Building-portable-Nix-bundles +[doc-test]: ./test.md +[io_uring-intro]: https://unixism.net/loti/what_is_io_uring.html +[hugepages-lwn-one]: https://lwn.net/Articles/374424/ +[hugepages-lwn-two]: https://lwn.net/Articles/375096/ +[hugepages-lwn-three]: https://lwn.net/Articles/376606/ +[hugepages-lwn-four]: https://lwn.net/Articles/378641/ +[hugepages-lwn-five]: https://lwn.net/Articles/379748/ +[m-dot-2]: https://en.wikipedia.org/wiki/M.2 +[nix-install]: https://nixos.org/download.html +[nix-shell]: https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-shell.html +[control-groups]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/cgroups.html +[mozilla-rust-overlay]: https://github.com/mozilla/nixpkgs-mozilla/blob/master/rust-overlay.nix +[isa]: https://en.wikipedia.org/wiki/Instruction_set_architecture +[kind]: https://kind.sigs.k8s.io/ +[manual]: https://mayastor.gitbook.io/ +[lxd]: https://linuxcontainers.org/ +[libvirtd]: https://libvirt.org/index.html +[terraform-readme]: ./terraform/readme.adoc +[aarch64-branch]: \ No newline at end of file diff --git a/doc/test.md b/doc/test.md new file mode 100644 index 000000000..fd1ca759b --- /dev/null +++ b/doc/test.md @@ -0,0 +1,60 @@ +# Testing Mayastor + +In order to test Mayastor, you'll need to be able to [**run Mayastor**][doc-run], follow that guide for persistent hugepages & kernel module setup. + +Or, for ad-hoc: + +* Ensure at least 512 2MB hugepages. + + ```bash + echo 512 | sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + ``` +* Ensure several kernel modules are installed: + + ```bash + modprobe nbd xfs nvmet nvme_fabrics nvmet_rdma nvme_tcp nvme_rdma nvme_loop + ``` + +## Running the test suite + +Mayastor's unit tests, integration tests, and documentation tests via the conventional `cargo test`. + +> **An important note**: Mayastor tests need to run on the host with [`SYS_ADMIN` capabilities][sys-admin-capabilities]. +> +> You can see in `mayastor/.cargo/config` we override the test runner to execute as root, take this capability, then drop privileges. + +Mayastor uses [spdk][spdk] which is quite senistive to threading. This means tests need to run one at a time: + +```bash +cargo test -- --test-threads 1 +``` + + +## Running the end-to-end test suite + +Mayastor does more complete, end-to-end testing testing with [`mocha`][mocha]. It requires some extra setup. + +> **TODO:** We're still writing this! Sorry! Let us know if you want us to prioritize this! + + +## Running the gRPC test suite + +There is a bit of extra setup to the gRPC tests, you need to set up the node modules. + +To prepare: + +```bash +cd test/grpc/ +npm install +``` + +Then, to run the tests: + +```bash +./node_modules/mocha/bin/mocha test_csi.js +``` + +[spdk]: https://spdk.io/ +[doc-run]: ./run.md +[mocha]: https://mochajs.org/ +[sys-admin-capabilities]: https://man7.org/linux/man-pages/man7/capabilities.7.html \ No newline at end of file diff --git a/mayastor/.cargo/config b/mayastor/.cargo/config index ba5922101..3cce93847 100644 --- a/mayastor/.cargo/config +++ b/mayastor/.cargo/config @@ -1,4 +1,4 @@ # we need elevated privileges to run mayastor related tests # cargo will ask you nicely to type your password [target.x86_64-unknown-linux-gnu] -runner = 'sudo -E' +runner = ".cargo/runner.sh" diff --git a/mayastor/.cargo/runner.sh b/mayastor/.cargo/runner.sh new file mode 100755 index 000000000..5ae299df6 --- /dev/null +++ b/mayastor/.cargo/runner.sh @@ -0,0 +1,20 @@ +#! /usr/bin/env bash + +# Grab the arguments passed to the runner. +ARGS="${@}" + +if [[ $EUID -ne 0 ]]; then + MAYBE_SUDO='sudo -E' +else + MAYBE_SUDO='' +fi + +# Elevate to sudo so we can set some capabilities via `capsh`, then execute the args with the required capabilities: +# +# * Set `cap_setpcap` to be able to set [ambient capabilities](https://lwn.net/Articles/636533/) which can be inherited +# by children. +# * Set `cap_sys_admin,cap_ipc_lock,cap_sys_nice` as they are required by `mayastor`. +${MAYBE_SUDO} capsh \ + --caps="cap_setpcap+iep cap_sys_admin,cap_ipc_lock,cap_sys_nice+iep" \ + --addamb=cap_sys_admin --addamb=cap_ipc_lock --addamb=cap_sys_nice \ + -- -c "${ARGS}" diff --git a/mayastor/Cargo.toml b/mayastor/Cargo.toml index 4b6c0fc0b..11f6f7a50 100644 --- a/mayastor/Cargo.toml +++ b/mayastor/Cargo.toml @@ -3,10 +3,11 @@ authors = ["Jeffry Molanus "] edition = "2018" name = "mayastor" version = "0.1.0" +default-run = "mayastor-client" [[bin]] name = "mayastor" -path = "src/bin/main.rs" +path = "src/bin/mayastor.rs" [[bin]] name = "spdk" @@ -22,7 +23,7 @@ path = "src/bin/uring-support.rs" [[bin]] name = "mayastor-client" -path = "src/bin/cli/cli.rs" +path = "src/bin/mayastor-client/main.rs" [[bin]] name = "jsonrpc" @@ -82,7 +83,7 @@ smol = "1.0.0" dns-lookup = "1.0.4" ipnetwork = "0.17.0" bollard = "0.8.0" -mbus_api = { path = "../mbus-api" } +mbus_api = { path = "../control-plane/mbus-api" } nvmeadm = {path = "../nvmeadm", version = "0.1.0"} [dependencies.rpc] diff --git a/mayastor/src/bdev/nexus/mod.rs b/mayastor/src/bdev/nexus/mod.rs index 5c35d365a..712b274bd 100644 --- a/mayastor/src/bdev/nexus/mod.rs +++ b/mayastor/src/bdev/nexus/mod.rs @@ -59,3 +59,15 @@ pub fn nexus_instance_new(name: String, size: u64, children: Vec) { let list = instances(); list.push(Nexus::new(&name, size, None, Some(&children))); } + +/// called during shutdown so that all nexus children are in Destroying state +/// so that a possible remove event from SPDK also results in bdev removal +pub async fn nexus_children_to_destroying_state() { + info!("setting all nexus children to destroying state..."); + for nexus in instances() { + for child in nexus.children.iter() { + child.set_state(nexus_child::ChildState::Destroying); + } + } + info!("set all nexus children to destroying state"); +} diff --git a/mayastor/src/bdev/nexus/nexus_child.rs b/mayastor/src/bdev/nexus/nexus_child.rs index 0cbdefd8a..e05fe1995 100644 --- a/mayastor/src/bdev/nexus/nexus_child.rs +++ b/mayastor/src/bdev/nexus/nexus_child.rs @@ -32,6 +32,8 @@ pub enum ChildError { ChildNotClosed {}, #[snafu(display("Child is faulted, it cannot be reopened"))] ChildFaulted {}, + #[snafu(display("Child is being destroyed"))] + ChildBeingDestroyed {}, #[snafu(display( "Child is smaller than parent {} vs {}", child_size, @@ -99,6 +101,8 @@ pub enum ChildState { ConfigInvalid, /// the child is open for RW Open, + /// the child is being destroyed + Destroying, /// the child has been closed by the nexus Closed, /// the child is faulted @@ -112,6 +116,7 @@ impl Display for ChildState { Self::Init => write!(f, "Init"), Self::ConfigInvalid => write!(f, "Config parameters are invalid"), Self::Open => write!(f, "Child is open"), + Self::Destroying => write!(f, "Child is being destroyed"), Self::Closed => write!(f, "Closed"), } } @@ -132,6 +137,9 @@ pub struct NexusChild { /// current state of the child #[serde(skip_serializing)] pub state: AtomicCell, + /// previous state of the child + #[serde(skip_serializing)] + pub prev_state: AtomicCell, /// record of most-recent IO errors #[serde(skip_serializing)] pub(crate) err_store: Option, @@ -159,15 +167,15 @@ impl Display for NexusChild { impl NexusChild { pub(crate) fn set_state(&self, state: ChildState) { + let prev_state = self.state.swap(state); + self.prev_state.store(prev_state); trace!( "{}: child {}: state change from {} to {}", self.parent, self.name, - self.state.load().to_string(), + prev_state.to_string(), state.to_string(), ); - - self.state.store(state); } /// Open the child in RW mode and claim the device to be ours. If the child @@ -179,6 +187,7 @@ impl NexusChild { /// A child can only be opened if: /// - it's not faulted /// - it's not already opened + /// - it's not being destroyed pub(crate) fn open( &mut self, parent_size: u64, @@ -201,6 +210,13 @@ impl NexusChild { info!("called open on an already opened child"); return Ok(self.name.clone()); } + ChildState::Destroying => { + error!( + "{}: cannot open child {} being destroyed", + self.parent, self.name + ); + return Err(ChildError::ChildBeingDestroyed {}); + } _ => {} } @@ -353,7 +369,10 @@ impl NexusChild { // Only wait for bdev removal if the child has been initialised. // An uninitialized child won't have an underlying bdev. - if self.state.load() != ChildState::Init { + // Also check previous state as remove event may not have occurred + if self.state.load() != ChildState::Init + && self.prev_state.load() != ChildState::Init + { self.remove_channel.1.next().await; } @@ -369,11 +388,18 @@ impl NexusChild { pub(crate) fn remove(&mut self) { info!("Removing child {}", self.name); - // The bdev is being removed, so ensure we don't use it again. - self.bdev = None; + let mut state = self.state(); - let state = self.state(); + let mut destroying = false; + // Only remove the bdev if the child is being destroyed instead of + // a hot remove event + if state == ChildState::Destroying { + // The bdev is being removed, so ensure we don't use it again. + self.bdev = None; + destroying = true; + state = self.prev_state.load(); + } match state { ChildState::Open | Faulted(Reason::OutOfSync) => { // Change the state of the child to ensure it is taken out of @@ -381,11 +407,20 @@ impl NexusChild { self.set_state(ChildState::Closed) } // leave the state into whatever we found it as - _ => {} + _ => { + if destroying { + // Restore the previous state + info!( + "Restoring previous child state {}", + state.to_string() + ); + self.set_state(state); + } + } } // Remove the child from the I/O path. If we had an IO error the bdev, - // the channels where already reconfigured so we dont have to do + // the channels were already reconfigured so we don't have to do // that twice. if state != ChildState::Faulted(Reason::IoError) { let nexus_name = self.parent.clone(); @@ -397,10 +432,11 @@ impl NexusChild { }); } - // Dropping the last descriptor results in the bdev being removed. - // This must be performed in this function. - let desc = self.desc.take(); - drop(desc); + if destroying { + // Dropping the last descriptor results in the bdev being removed. + // This must be performed in this function. + self.desc.take(); + } self.remove_complete(); info!("Child {} removed", self.name); @@ -428,6 +464,7 @@ impl NexusChild { parent, desc: None, state: AtomicCell::new(ChildState::Init), + prev_state: AtomicCell::new(ChildState::Init), err_store: None, remove_channel: mpsc::channel(0), } @@ -436,7 +473,8 @@ impl NexusChild { /// destroy the child bdev pub(crate) async fn destroy(&self) -> Result<(), NexusBdevError> { trace!("destroying child {:?}", self); - if let Some(_bdev) = &self.bdev { + if self.bdev.is_some() { + self.set_state(ChildState::Destroying); bdev_destroy(&self.name).await } else { warn!("Destroy child without bdev"); diff --git a/mayastor/src/bin/cli/bdev_cli.rs b/mayastor/src/bin/mayastor-client/bdev_cli.rs similarity index 100% rename from mayastor/src/bin/cli/bdev_cli.rs rename to mayastor/src/bin/mayastor-client/bdev_cli.rs diff --git a/mayastor/src/bin/cli/context.rs b/mayastor/src/bin/mayastor-client/context.rs similarity index 100% rename from mayastor/src/bin/cli/context.rs rename to mayastor/src/bin/mayastor-client/context.rs diff --git a/mayastor/src/bin/cli/device_cli.rs b/mayastor/src/bin/mayastor-client/device_cli.rs similarity index 100% rename from mayastor/src/bin/cli/device_cli.rs rename to mayastor/src/bin/mayastor-client/device_cli.rs diff --git a/mayastor/src/bin/cli/jsonrpc_cli.rs b/mayastor/src/bin/mayastor-client/jsonrpc_cli.rs similarity index 100% rename from mayastor/src/bin/cli/jsonrpc_cli.rs rename to mayastor/src/bin/mayastor-client/jsonrpc_cli.rs diff --git a/mayastor/src/bin/cli/cli.rs b/mayastor/src/bin/mayastor-client/main.rs similarity index 100% rename from mayastor/src/bin/cli/cli.rs rename to mayastor/src/bin/mayastor-client/main.rs diff --git a/mayastor/src/bin/cli/nexus_child_cli.rs b/mayastor/src/bin/mayastor-client/nexus_child_cli.rs similarity index 100% rename from mayastor/src/bin/cli/nexus_child_cli.rs rename to mayastor/src/bin/mayastor-client/nexus_child_cli.rs diff --git a/mayastor/src/bin/cli/nexus_cli.rs b/mayastor/src/bin/mayastor-client/nexus_cli.rs similarity index 100% rename from mayastor/src/bin/cli/nexus_cli.rs rename to mayastor/src/bin/mayastor-client/nexus_cli.rs diff --git a/mayastor/src/bin/cli/perf_cli.rs b/mayastor/src/bin/mayastor-client/perf_cli.rs similarity index 100% rename from mayastor/src/bin/cli/perf_cli.rs rename to mayastor/src/bin/mayastor-client/perf_cli.rs diff --git a/mayastor/src/bin/cli/pool_cli.rs b/mayastor/src/bin/mayastor-client/pool_cli.rs similarity index 100% rename from mayastor/src/bin/cli/pool_cli.rs rename to mayastor/src/bin/mayastor-client/pool_cli.rs diff --git a/mayastor/src/bin/cli/rebuild_cli.rs b/mayastor/src/bin/mayastor-client/rebuild_cli.rs similarity index 100% rename from mayastor/src/bin/cli/rebuild_cli.rs rename to mayastor/src/bin/mayastor-client/rebuild_cli.rs diff --git a/mayastor/src/bin/cli/replica_cli.rs b/mayastor/src/bin/mayastor-client/replica_cli.rs similarity index 100% rename from mayastor/src/bin/cli/replica_cli.rs rename to mayastor/src/bin/mayastor-client/replica_cli.rs diff --git a/mayastor/src/bin/cli/snapshot_cli.rs b/mayastor/src/bin/mayastor-client/snapshot_cli.rs similarity index 100% rename from mayastor/src/bin/cli/snapshot_cli.rs rename to mayastor/src/bin/mayastor-client/snapshot_cli.rs diff --git a/mayastor/src/bin/main.rs b/mayastor/src/bin/mayastor.rs similarity index 100% rename from mayastor/src/bin/main.rs rename to mayastor/src/bin/mayastor.rs diff --git a/mayastor/src/core/bdev.rs b/mayastor/src/core/bdev.rs index 23a7041a6..b123295fc 100644 --- a/mayastor/src/core/bdev.rs +++ b/mayastor/src/core/bdev.rs @@ -161,7 +161,7 @@ impl Bdev { bdev: *mut spdk_bdev, _ctx: *mut c_void, ) { - let bdev = Bdev(NonNull::new(bdev).unwrap()); + let bdev = Bdev::from_ptr(bdev).unwrap(); // Take the appropriate action for the given event type match event { spdk_sys::SPDK_BDEV_EVENT_REMOVE => { @@ -171,9 +171,9 @@ impl Bdev { } } spdk_sys::SPDK_BDEV_EVENT_RESIZE => { - info!("Received resize event for bdev {}", bdev.name()) + warn!("Received resize event for bdev {}", bdev.name()) } - spdk_sys::SPDK_BDEV_EVENT_MEDIA_MANAGEMENT => info!( + spdk_sys::SPDK_BDEV_EVENT_MEDIA_MANAGEMENT => warn!( "Received media management event for bdev {}", bdev.name() ), diff --git a/mayastor/src/core/env.rs b/mayastor/src/core/env.rs index 72703a4c5..e66243f76 100644 --- a/mayastor/src/core/env.rs +++ b/mayastor/src/core/env.rs @@ -40,7 +40,7 @@ use spdk_sys::{ }; use crate::{ - bdev::nexus::nexus_child_status_config::ChildStatusConfig, + bdev::{nexus, nexus::nexus_child_status_config::ChildStatusConfig}, core::{ reactor::{Reactor, ReactorState, Reactors}, Cores, @@ -279,6 +279,7 @@ async fn do_shutdown(arg: *mut c_void) { } iscsi::fini(); + nexus::nexus_children_to_destroying_state().await; unsafe { spdk_rpc_finish(); spdk_subsystem_fini(Some(reactors_stop), arg); diff --git a/mayastor/src/grpc/mayastor_grpc.rs b/mayastor/src/grpc/mayastor_grpc.rs index 525861117..41a400c5e 100644 --- a/mayastor/src/grpc/mayastor_grpc.rs +++ b/mayastor/src/grpc/mayastor_grpc.rs @@ -117,7 +117,7 @@ impl mayastor_server::Mayastor for MayastorSvc { sync_config(pool_grpc::share_replica(args)).await } - #[instrument(level = "debug", err)] + #[instrument(level = "info", err)] async fn create_nexus( &self, request: Request, @@ -128,8 +128,7 @@ impl mayastor_server::Mayastor for MayastorSvc { let name = uuid_to_name(&args.uuid)?; locally! { async move { nexus_create(&name, args.size, Some(&args.uuid), &args.children).await - }} - ; + }}; let nexus = nexus_lookup(&uuid)?; info!("Created nexus {}", uuid); Ok(Response::new(nexus.to_grpc())) diff --git a/mayastor/src/grpc/nexus_grpc.rs b/mayastor/src/grpc/nexus_grpc.rs index f45cbd6cf..693322e6b 100644 --- a/mayastor/src/grpc/nexus_grpc.rs +++ b/mayastor/src/grpc/nexus_grpc.rs @@ -21,6 +21,7 @@ impl From for rpc::ChildState { ChildState::Init => rpc::ChildState::ChildDegraded, ChildState::ConfigInvalid => rpc::ChildState::ChildFaulted, ChildState::Open => rpc::ChildState::ChildOnline, + ChildState::Destroying => rpc::ChildState::ChildDegraded, ChildState::Closed => rpc::ChildState::ChildDegraded, ChildState::Faulted(reason) => match reason { Reason::OutOfSync => rpc::ChildState::ChildDegraded, diff --git a/nix/mayastor-overlay.nix b/nix/mayastor-overlay.nix index d49a99804..e2382a474 100644 --- a/nix/mayastor-overlay.nix +++ b/nix/mayastor-overlay.nix @@ -8,6 +8,7 @@ self: super: { mayastor-adhoc = (super.callPackage ./pkgs/mayastor { }).adhoc; moac = (import ./../csi/moac { pkgs = super; }).package; images = super.callPackage ./pkgs/images { }; + control-plane = super.callPackage ./pkgs/control-plane { }; ms-buildenv = super.callPackage ./pkgs/ms-buildenv { }; mkContainerEnv = super.callPackage ./lib/mkContainerEnv.nix { }; diff --git a/nix/pkgs/control-plane/cargo-project.nix b/nix/pkgs/control-plane/cargo-project.nix new file mode 100644 index 000000000..aa700c632 --- /dev/null +++ b/nix/pkgs/control-plane/cargo-project.nix @@ -0,0 +1,65 @@ +{ stdenv +, clang +, git +, lib +, llvmPackages +, makeRustPlatform +, openssl +, pkg-config +, protobuf +, sources +, pkgs +}: +let + channel = import ../../lib/rust.nix { inherit sources; }; + rustPlatform = makeRustPlatform { + rustc = channel.stable.rust; + cargo = channel.stable.cargo; + }; + whitelistSource = src: allowedPrefixes: + builtins.filterSource + (path: type: + lib.any + (allowedPrefix: + lib.hasPrefix (toString (src + "/${allowedPrefix}")) path) + allowedPrefixes) + src; + version_drv = import ../../lib/version.nix { inherit lib stdenv git; }; + version = builtins.readFile "${version_drv}"; + buildProps = rec { + name = "control-plane"; + #cargoSha256 = "0000000000000000000000000000000000000000000000000000"; + cargoSha256 = "02qf9pnja4cn31qnzawbrqhny88ja19sqm68zy12ly4vmg6dd3lf"; + inherit version; + src = whitelistSource ../../../. (pkgs.callPackage ../mayastor { }).src_list; + cargoBuildFlags = [ "-p mbus_api" "-p agents" "-p rest" "-p operators" ]; + + LIBCLANG_PATH = "${llvmPackages.libclang}/lib"; + PROTOC = "${protobuf}/bin/protoc"; + PROTOC_INCLUDE = "${protobuf}/include"; + + nativeBuildInputs = [ + clang + pkg-config + ]; + buildInputs = [ + llvmPackages.libclang + openssl + ]; + verifyCargoDeps = false; + doCheck = false; + meta = { platforms = stdenv.lib.platforms.linux; }; + }; +in +{ + release = rustPlatform.buildRustPackage + (buildProps // { + buildType = "release"; + buildInputs = buildProps.buildInputs; + }); + debug = rustPlatform.buildRustPackage + (buildProps // { + buildType = "debug"; + buildInputs = buildProps.buildInputs; + }); +} diff --git a/nix/pkgs/control-plane/default.nix b/nix/pkgs/control-plane/default.nix new file mode 100644 index 000000000..412ec78c7 --- /dev/null +++ b/nix/pkgs/control-plane/default.nix @@ -0,0 +1,38 @@ +{ stdenv +, git +, lib +, pkgs +}: +let + project-builder = pkgs.callPackage ../control-plane/cargo-project.nix { }; + versionDrv = import ../../lib/version.nix { inherit lib stdenv git; }; + version = builtins.readFile "${versionDrv}"; + agent = { name, src }: stdenv.mkDerivation { + inherit src; + name = "${name}-${version}"; + installPhase = '' + mkdir -p $out/bin + cp $src/bin/${name} $out/bin/${name}-agent + ''; + }; + operator = { name, src }: stdenv.mkDerivation { + inherit src; + name = "${name}-${version}"; + installPhase = '' + mkdir -p $out/bin + cp $src/bin/${name}-op $out/bin/${name}-operator + ''; + }; + components = { src }: { + kiiss = agent { inherit src; name = "kiiss"; }; + node = agent { inherit src; name = "node"; }; + pool = agent { inherit src; name = "pool"; }; + volume = agent { inherit src; name = "volume"; }; + rest = agent { inherit src; name = "rest"; }; + node-op = operator { inherit src; name = "node"; }; + }; +in +{ + release = components { src = project-builder.release; }; + debug = components { src = project-builder.debug; }; +} diff --git a/nix/pkgs/images/default.nix b/nix/pkgs/images/default.nix index 9068bb3cf..98db721eb 100644 --- a/nix/pkgs/images/default.nix +++ b/nix/pkgs/images/default.nix @@ -19,6 +19,8 @@ , mayastor-dev , mayastor-adhoc , utillinux +, control-plane +, tini }: let versionDrv = import ../../lib/version.nix { inherit lib stdenv git; }; @@ -51,7 +53,7 @@ let mkdir -p var/tmp ''; }; - servicesImageProps = { + clientImageProps = { tag = version; created = "now"; config = { @@ -62,35 +64,78 @@ let mkdir -p var/tmp ''; }; + operatorImageProps = { + tag = version; + created = "now"; + config = { + Env = [ "PATH=${env}" ]; + }; + }; + agentImageProps = { + tag = version; + created = "now"; + config = { + Env = [ "PATH=${env}" ]; + }; + }; + build-control-plane-image = { build, name, binary, config ? { } }: dockerTools.buildImage { + tag = version; + created = "now"; + name = "mayadata/mayastor-${name}"; + contents = [ tini busybox control-plane.${build}.${name} ]; + config = { Entrypoint = [ "tini" "--" "${binary}" ]; } // config; + }; + build-agent-image = { build, name, config ? { } }: build-control-plane-image { + inherit build name; + binary = "${name}-agent"; + }; + build-operator-image = { build, name, config ? { } }: build-control-plane-image { + inherit build; + name = "${name}-op"; + binary = "${name}-operator"; + }; + + operator-images = { build }: { + node = build-operator-image { inherit build; name = "node"; }; + }; + agent-images = { build }: { + kiiss = build-agent-image { inherit build; name = "kiiss"; }; + node = build-agent-image { inherit build; name = "node"; }; + pool = build-agent-image { inherit build; name = "pool"; }; + volume = build-agent-image { inherit build; name = "volume"; }; + rest = build-agent-image { + inherit build; name = "rest"; + config = { ExposedPorts = { "8080/tcp" = { }; "8081/tcp" = { }; }; }; + }; + }; + mayastorIscsiadm = writeScriptBin "mayastor-iscsiadm" '' + #!${stdenv.shell} + chroot /host /usr/bin/env -i PATH="/sbin:/bin:/usr/bin" iscsiadm "$@" + ''; in -rec { - mayastor-image = dockerTools.buildImage (mayastorImageProps // { +{ + mayastor = dockerTools.buildImage (mayastorImageProps // { name = "mayadata/mayastor"; contents = [ busybox mayastor ]; }); - mayastor-dev-image = dockerTools.buildImage (mayastorImageProps // { + mayastor-dev = dockerTools.buildImage (mayastorImageProps // { name = "mayadata/mayastor-dev"; contents = [ busybox mayastor-dev ]; }); - mayastor-adhoc-image = dockerTools.buildImage (mayastorImageProps // { + mayastor-adhoc = dockerTools.buildImage (mayastorImageProps // { name = "mayadata/mayastor-adhoc"; contents = [ busybox mayastor-adhoc ]; }); - mayastorIscsiadm = writeScriptBin "mayastor-iscsiadm" '' - #!${stdenv.shell} - chroot /host /usr/bin/env -i PATH="/sbin:/bin:/usr/bin" iscsiadm "$@" - ''; - - mayastor-csi-image = dockerTools.buildLayeredImage (mayastorCsiImageProps // { + mayastor-csi = dockerTools.buildLayeredImage (mayastorCsiImageProps // { name = "mayadata/mayastor-csi"; contents = [ busybox mayastor mayastorIscsiadm ]; maxLayers = 42; }); - mayastor-csi-dev-image = dockerTools.buildImage (mayastorCsiImageProps // { + mayastor-csi-dev = dockerTools.buildImage (mayastorCsiImageProps // { name = "mayadata/mayastor-csi-dev"; contents = [ busybox mayastor-dev mayastorIscsiadm ]; }); @@ -98,7 +143,7 @@ rec { # The algorithm for placing packages into the layers is not optimal. # There are a couple of layers with negligable size and then there is one # big layer with everything else. That defeats the purpose of layering. - moac-image = dockerTools.buildLayeredImage { + moac = dockerTools.buildLayeredImage { name = "mayadata/moac"; tag = version; created = "now"; @@ -122,23 +167,15 @@ rec { maxLayers = 42; }; - services-kiiss-image = dockerTools.buildLayeredImage (servicesImageProps // { - name = "mayadata/services-kiiss"; - contents = [ busybox mayastor ]; - config = { Entrypoint = [ "/bin/kiiss" ]; }; - maxLayers = 42; - }); - - services-kiiss-dev-image = dockerTools.buildImage (servicesImageProps // { - name = "mayadata/services-kiiss-dev"; - contents = [ busybox mayastor ]; - config = { Entrypoint = [ "/bin/kiiss" ]; }; - }); - - mayastor-client-image = dockerTools.buildImage (servicesImageProps // { + mayastor-client = dockerTools.buildImage (clientImageProps // { name = "mayadata/mayastor-client"; contents = [ busybox mayastor ]; config = { Entrypoint = [ "/bin/mayastor-client" ]; }; }); + agents = agent-images { build = "release"; }; + agents-dev = agent-images { build = "debug"; }; + + operators = operator-images { build = "release"; }; + operators-dev = operator-images { build = "debug"; }; } diff --git a/nix/pkgs/mayastor/default.nix b/nix/pkgs/mayastor/default.nix index 4c62e6793..94b78b47d 100644 --- a/nix/pkgs/mayastor/default.nix +++ b/nix/pkgs/mayastor/default.nix @@ -19,6 +19,8 @@ , sources , xfsprogs , utillinux +, rustup +, docker-compose }: let channel = import ../../lib/rust.nix { inherit sources; }; @@ -36,34 +38,36 @@ let src; version_drv = import ../../lib/version.nix { inherit lib stdenv git; }; version = builtins.readFile "${version_drv}"; + src_list = [ + "Cargo.lock" + "Cargo.toml" + "cli" + "csi" + "devinfo" + "jsonrpc" + "mayastor" + "nvmeadm" + "rpc" + "spdk-sys" + "sysfs" + "control-plane" + "composer" + ]; buildProps = rec { name = "mayastor"; #cargoSha256 = "0000000000000000000000000000000000000000000000000000"; - cargoSha256 = "1c93jzly0pa2k7h40m4fn86v39n8a9kra2087rxnqa9nk0gw0lha"; + cargoSha256 = "1c5zwaivwsx7gznjvsd0gfhbvjji5q1qbjacdm6vfapqv9i79yfn"; inherit version; - src = whitelistSource ../../../. [ - "Cargo.lock" - "Cargo.toml" - "cli" - "csi" - "devinfo" - "jsonrpc" - "mayastor" - "nvmeadm" - "rpc" - "spdk-sys" - "sysfs" - "mbus-api" - "services" - "rest" - "operators" - "composer" - ]; - + src = whitelistSource ../../../. src_list; LIBCLANG_PATH = "${llvmPackages.libclang}/lib"; PROTOC = "${protobuf}/bin/protoc"; PROTOC_INCLUDE = "${protobuf}/include"; + # Before editing dependencies, consider: + # https://nixos.org/manual/nixpkgs/stable/#ssec-cross-dependency-implementation + # https://nixos.org/manual/nixpkgs/stable/#ssec-stdenv-dependencies + basePackages = [ + ]; nativeBuildInputs = [ clang pkg-config @@ -85,6 +89,7 @@ let }; in { + inherit src_list; release = rustPlatform.buildRustPackage (buildProps // { buildType = "release"; @@ -106,7 +111,6 @@ in ../../../target/debug/mayastor-csi ../../../target/debug/mayastor-client ../../../target/debug/jsonrpc - ../../../target/debug/kiiss ]; buildInputs = [ diff --git a/nvmeadm/src/nvmf_discovery.rs b/nvmeadm/src/nvmf_discovery.rs index 891efdd6d..858280430 100644 --- a/nvmeadm/src/nvmf_discovery.rs +++ b/nvmeadm/src/nvmf_discovery.rs @@ -529,5 +529,5 @@ pub fn disconnect(nqn: &str) -> Result { Ok(e) }) .collect(); - Ok(subsys.unwrap().len()) + Ok(subsys?.len()) } diff --git a/nvmeadm/tests/discovery_test.rs b/nvmeadm/tests/discovery_test.rs index 9bf04afb1..c36cee935 100644 --- a/nvmeadm/tests/discovery_test.rs +++ b/nvmeadm/tests/discovery_test.rs @@ -117,7 +117,8 @@ impl NvmfTarget { impl Drop for NvmfTarget { fn drop(&mut self) { // Ensure we end with no connected disk - let _ = disconnect(SERVED_DISK_NQN); + disconnect(SERVED_DISK_NQN) + .expect("Should disconnect from the target device"); // Kill the spdk nvmf target self.spdk_proc.kill().expect("Failed to kill SPDK process"); @@ -156,7 +157,7 @@ fn connect_test() { #[test] fn disconnect_test() { - let _ = disconnect("mynqn"); + disconnect("mynqn").expect("Should disconnect from the target device"); } #[test] diff --git a/rest/service/src/main.rs b/rest/service/src/main.rs deleted file mode 100644 index 106f9dd19..000000000 --- a/rest/service/src/main.rs +++ /dev/null @@ -1,89 +0,0 @@ -mod v0; - -use actix_web::{middleware, App, HttpServer}; -use rustls::{ - internal::pemfile::{certs, rsa_private_keys}, - NoClientAuth, - ServerConfig, -}; -use std::io::BufReader; -use structopt::StructOpt; - -#[derive(Debug, StructOpt)] -struct CliArgs { - /// The Rest Server address to bind to - /// Default: 0.0.0.0:8080 - #[structopt(long, short, default_value = "0.0.0.0:8080")] - rest: String, - /// The Nats Server URL or address to connect to - /// Default: nats://0.0.0.0:4222 - #[structopt(long, short, default_value = "nats://0.0.0.0:4222")] - nats: String, - - /// Trace rest requests to the Jaeger endpoint agent - #[structopt(long, short)] - jaeger: Option, -} - -use actix_web_opentelemetry::RequestTracing; -use opentelemetry::{ - global, - sdk::{propagation::TraceContextPropagator, trace::Tracer}, -}; -use opentelemetry_jaeger::Uninstall; - -fn init_tracing() -> Option<(Tracer, Uninstall)> { - if let Ok(filter) = tracing_subscriber::EnvFilter::try_from_default_env() { - tracing_subscriber::fmt().with_env_filter(filter).init(); - } else { - tracing_subscriber::fmt().with_env_filter("info").init(); - } - if let Some(agent) = CliArgs::from_args().jaeger { - tracing::info!("Starting jaeger trace pipeline at {}...", agent); - // Start a new jaeger trace pipeline - global::set_text_map_propagator(TraceContextPropagator::new()); - let (_tracer, _uninstall) = opentelemetry_jaeger::new_pipeline() - .with_agent_endpoint(agent) - .with_service_name("rest-server") - .install() - .expect("Jaeger pipeline install error"); - Some((_tracer, _uninstall)) - } else { - None - } -} - -#[actix_web::main] -async fn main() -> std::io::Result<()> { - // need to keep the jaeger pipeline tracer alive, if enabled - let _tracer = init_tracing(); - - mbus_api::message_bus_init(CliArgs::from_args().nats).await; - - // dummy certificates - let mut config = ServerConfig::new(NoClientAuth::new()); - let cert_file = &mut BufReader::new( - &std::include_bytes!("../../certs/rsa/user.chain")[..], - ); - let key_file = &mut BufReader::new( - &std::include_bytes!("../../certs/rsa/user.rsa")[..], - ); - let cert_chain = certs(cert_file).unwrap(); - let mut keys = rsa_private_keys(key_file).unwrap(); - config.set_single_cert(cert_chain, keys.remove(0)).unwrap(); - - HttpServer::new(move || { - App::new() - .wrap(RequestTracing::new()) - .wrap(middleware::Logger::default()) - .service(v0::nodes::factory()) - .service(v0::pools::factory()) - .service(v0::replicas::factory()) - .service(v0::nexuses::factory()) - .service(v0::children::factory()) - .service(v0::volumes::factory()) - }) - .bind_rustls(CliArgs::from_args().rest, config)? - .run() - .await -} diff --git a/rest/service/src/v0/mod.rs b/rest/service/src/v0/mod.rs deleted file mode 100644 index acf3f4569..000000000 --- a/rest/service/src/v0/mod.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Version 0 of the URI's -//! Ex: /v0/nodes - -pub mod children; -pub mod nexuses; -pub mod nodes; -pub mod pools; -pub mod replicas; -pub mod volumes; - -use mbus_api::{ - message_bus::v0::{MessageBus, *}, - v0::Filter, -}; -use rest_client::versions::v0::*; - -use actix_web::{ - delete, - dev::{AppService, HttpServiceFactory}, - get, - put, - web, - HttpRequest, - HttpResponse, - Responder, -}; diff --git a/rest/service/src/v0/nodes.rs b/rest/service/src/v0/nodes.rs deleted file mode 100644 index e49e68e3f..000000000 --- a/rest/service/src/v0/nodes.rs +++ /dev/null @@ -1,21 +0,0 @@ -use super::*; - -struct Factory {} -impl HttpServiceFactory for Factory { - fn register(self, config: &mut AppService) { - get_node.register(config); - get_nodes.register(config); - } -} -pub(crate) fn factory() -> impl HttpServiceFactory { - Factory {} -} - -#[get("/v0/nodes")] -async fn get_nodes() -> impl Responder { - RestRespond::result(MessageBus::get_nodes().await) -} -#[get("/v0/nodes/{id}")] -async fn get_node(web::Path(node_id): web::Path) -> impl Responder { - RestRespond::result(MessageBus::get_node(&node_id).await) -} diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 000000000..870bbe4e5 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +stable \ No newline at end of file diff --git a/scripts/cargo-test.sh b/scripts/cargo-test.sh index faa090eb2..242149a95 100755 --- a/scripts/cargo-test.sh +++ b/scripts/cargo-test.sh @@ -18,7 +18,8 @@ export PATH=$PATH:${HOME}/.cargo/bin ( cd jsonrpc && cargo test ) # test dependencies cargo build --bins -for test in composer mayastor services rest; do - ( cd ${test} && cargo test -- --test-threads=1 ) +( cd mayastor && cargo test -- --test-threads=1 ) +for test in composer agents rest; do + cargo test -p ${test} -- --test-threads=1 done ( cd nvmeadm && cargo test ) diff --git a/scripts/e2e-cluster-dump.sh b/scripts/e2e-cluster-dump.sh index 3a0521a44..912777cdf 100755 --- a/scripts/e2e-cluster-dump.sh +++ b/scripts/e2e-cluster-dump.sh @@ -20,7 +20,7 @@ EOF function cluster-get { echo "-- PODS mayastor* --------------------" - # csi tests creates relevant namespaces containing mayastor + # The CSI tests creates namespaces containing the text mayastor mns=$(kubectl get ns | grep mayastor | sed -e "s/ .*//") for ns in $mns do @@ -42,11 +42,16 @@ function cluster-get { kubectl -n mayastor get msn --sort-by=.metadata.creationTimestamp echo "-- K8s Nodes -----------------------------" kubectl get nodes -o wide --show-labels + echo "-- K8s Deployments -------------------" + kubectl -n mayastor get deployments + echo "-- K8s Daemonsets --------------------" + kubectl -n mayastor get daemonsets + } function cluster-describe { echo "-- PODS mayastor* --------------------" - # csi tests creates relevant namespaces containing mayastor + # The CSI tests creates namespaces containing the text mayastor mns=$(kubectl get ns | grep mayastor | sed -e "s/ .*//") for ns in $mns do @@ -66,100 +71,133 @@ function cluster-describe { kubectl -n mayastor describe msv echo "-- Mayastor Nodes --------------------" kubectl -n mayastor describe msn - echo "-- K8s Nodes -----------------------------" + echo "-- K8s Nodes -------------------------" kubectl describe nodes + echo "-- K8s Deployments -------------------" + kubectl -n mayastor describe deployments + echo "-- K8s Daemonsets --------------------" + kubectl -n mayastor describe daemonsets } -function logs-csi-containers { - mayastor_csipods=$(kubectl -n mayastor get pods | grep mayastor-csi | sed -e 's/ .*//') - for pod in $mayastor_csipods - do - echo "# $pod csi-driver-registrar $* ---------------------------------" - kubectl -n mayastor logs "$@" "$pod" csi-driver-registrar - done +function podHasRestarts { + rst=$(kubectl -n mayastor get pods "$1" | grep -v NAME | awk '{print $4}') - moacpod=$(kubectl -n mayastor get pods | grep moac | sed -e 's/ .*//') - echo "# $moacpod csi-provisioner $* ---------------------------------" - kubectl -n mayastor logs "$@" "$moacpod" csi-provisioner - echo "# $moacpod csi-attacher $* ---------------------------------" - kubectl -n mayastor logs "$@" "$moacpod" csi-attacher + # Adjust the return value, to yield readable statements, like: + # if podHasRestarts $podname ; then + # handle_restarted_pods + # fi + if [ $((rst)) -ne 0 ]; then + return 0 + else + return 1 + fi +} + +# args filename kubectlargs +# filename == "" -> stdout +function kubectlEmitLogs { + fname=$1 + shift + + if [ -n "$fname" ]; then + kubectl -n mayastor logs "$@" >& "$fname" + else + kubectl -n mayastor logs "$@" + fi +} + +# args = destdir podname containername +# if $destdir != "" then log files are generate in $destdir +# with the name of the pod and container. +function emitPodContainerLogs { + destdir=$1 + podname=$2 + containername=$3 + + if [ -z "$podname" ] || [ -z "$containername" ]; then + echo "ERROR calling emitPodContainerLogs" + return + fi + + if podHasRestarts "$podname" ; then + if [ -z "$destdir" ]; then + echo "# $podname $containername previous -------------------" + logfile="" + else + logfile="$destdir/$podname.$containername.previous.log" + fi + + kubectlEmitLogs "$logfile" -p "$podname" "$containername" + fi + + if [ -z "$destdir" ]; then + echo "# $podname $containername ----------------------------" + logfile="" + else + logfile="$destdir/$podname.$containername.log" + fi + + kubectlEmitLogs "$logfile" "$podname" "$containername" } -function logs-csi-mayastor { +# arg1 = destdir or "" for stdout +function getLogsMayastorCSI { mayastor_csipods=$(kubectl -n mayastor get pods | grep mayastor-csi | sed -e 's/ .*//') for pod in $mayastor_csipods do - echo "# $pod mayastor-csi $* ---------------------------------" - kubectl -n mayastor logs "$@" "$pod" mayastor-csi + # emitPodContainerLogs destdir podname containername + emitPodContainerLogs "$1" "$pod" mayastor-csi + emitPodContainerLogs "$1" "$pod" csi-driver-registrar done } -function logs-mayastor { +# arg1 = destdir or "" for stdout +function getLogsMayastor { mayastor_pods=$(kubectl -n mayastor get pods | grep mayastor | grep -v mayastor-csi | sed -e 's/ .*//') for pod in $mayastor_pods do - echo "# $pod mayastor $* ---------------------------------" - kubectl -n mayastor logs "$@" "$pod" mayastor + # emitPodContainerLogs destdir podname containername + emitPodContainerLogs "$1" "$pod" mayastor done } -function logs-moac { +# arg1 = destdir or "" for stdout +function getLogsMOAC { moacpod=$(kubectl -n mayastor get pods | grep moac | sed -e 's/ .*//') - echo "# $moacpod moac $* ---------------------------------" - kubectl -n mayastor logs "$@" "$moacpod" moac + # emitPodContainerLogs destdir podname containername + emitPodContainerLogs "$1" "$moacpod" moac + emitPodContainerLogs "$1" "$moacpod" csi-provisioner + emitPodContainerLogs "$1" "$moacpod" csi-attacher } # $1 = podlogs, 0 => do not generate pod logs -function dump-to-stdout { - echo "# Cluster ---------------------------------" - cluster-get - cluster-describe - - if [ "$1" -ne 0 ]; then - logs-moac - logs-mayastor - logs-csi-mayastor - logs-csi-containers - - logs-moac -p - logs-mayastor -p - logs-csi-mayastor -p - logs-csi-containers -p +# $2 = [destdir] undefined => dump to stdout, +# otherwise generate log files in $destdir +function getLogs { + podlogs="$1" + shift + dest="$1" + shift + + if [ -n "$dest" ]; + then + mkdir -p "$dest" fi - echo "# END ---------------------------------" -} -# $1 = podlogs, 0 => do not generate pod logs -# $2 = dest mkdir $dest and generate logs there. -function dump-to-dir { - dest="$2" - echo "Generating logs in $dest" - mkdir -p "$dest" - - cluster-get >& "$dest/cluster.get.txt" - cluster-describe >& "$dest/cluster.describe.txt" - - if [ "$1" -ne 0 ]; then - logs-moac >& "$dest/moac.log" - logs-mayastor >& "$dest/mayastor.log" - logs-csi-mayastor >& "$dest/csi-mayastor.log" - logs-csi-containers >& "$dest/csi-containers.log" - - logs-moac -p >& "$dest/moac.previous.log" - logs-mayastor -p >& "$dest/mayastor.previous.log" - logs-csi-mayastor -p >& "$dest/csi-mayastor.previous.log" - logs-csi-containers -p >& "$dest/csi-containers.previous.log" + if [ "$podlogs" -ne 0 ]; then + getLogsMOAC "$dest" + getLogsMayastor "$dest" + getLogsMayastorCSI "$dest" fi -} -# $1 = podlogs, 0 => do not generate pod logs -# $2 = [destdir] undefined => dump to stdout, -# otherwise generate log files in $destdir -function dump { - if [ -z "$2" ]; then - dump-to-stdout "$1" + if [ -n "$dest" ]; + then + cluster-get >& "$dest/cluster.get.txt" + cluster-describe >& "$dest/cluster.describe.txt" + echo "logfiles generated in $dest" else - dump-to-dir "$1" "$2" + cluster-get + cluster-describe fi } @@ -185,5 +223,5 @@ while [ "$#" -gt 0 ]; do shift done -# @here dump to stdout -dump "$podlogs" "$destdir" +# getLogs podlogs destdir +getLogs "$podlogs" "$destdir" diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index 38e744f11..52de8e059 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -1,18 +1,27 @@ #!/usr/bin/env bash -set -eux +set -eu SCRIPTDIR=$(dirname "$(realpath "$0")") -# new tests should be added before the replica_pod_remove test -#TESTS="install basic_volume_io csi replica rebuild node_disconnect/replica_pod_remove uninstall" -TESTS="install basic_volume_io csi uninstall" -DEVICE= -REGISTRY= -TAG= TESTDIR=$(realpath "$SCRIPTDIR/../test/e2e") REPORTSDIR=$(realpath "$SCRIPTDIR/..") -GENERATE_LOGS=0 -ON_FAIL="continue" + +# List and Sequence of tests. +#tests="install basic_volume_io csi replica rebuild node_disconnect/replica_pod_remove uninstall" +# Restrictions: +# 1. resource_check MUST follow csi +# resource_check is a follow up check for the 3rd party CSI test suite. +# 2. replicas_pod_remove SHOULD be the last test before uninstall +# this is a disruptive test. +tests="install basic_volume_io csi resource_check uninstall" + +device= +registry= +tag="ci" +generate_logs=0 +on_fail="stop" +uninstall_cleanup="n" +logsdir="" help() { cat < Device path to use for storage pools. --registry Registry to pull the mayastor images from. --tag Docker image tag of mayastor images (default "ci") - --tests Lists of tests to run, delimited by spaces (default: "$TESTS") + --tests Lists of tests to run, delimited by spaces (default: "$tests") Note: the last 2 tests should be (if they are to be run) node_disconnect/replica_pod_remove uninstall --reportsdir Path to use for junit xml test reports (default: repo root) - --logs Generate logs and cluster state dump at the end of successful test run. - --onfail On fail, stop immediately or continue default($ON_FAIL) + --logs Generate logs and cluster state dump at the end of successful test run, + prior to uninstall. + --logsdir Location to generate logs (default: emit to stdout). + --onfail On fail, stop immediately or continue default($on_fail) Behaviour for "continue" only differs if uninstall is in the list of tests (the default). + --uninstall_cleanup On uninstall cleanup for reusable cluster. default($uninstall_cleanup) Examples: - $0 --registry 127.0.0.1:5000 --tag a80ce0c + $0 --device /dev/nvme0n1 --registry 127.0.0.1:5000 --tag a80ce0c EOF } @@ -39,19 +51,19 @@ while [ "$#" -gt 0 ]; do case "$1" in -d|--device) shift - DEVICE=$1 + device=$1 ;; -r|--registry) shift - REGISTRY=$1 + registry=$1 ;; -t|--tag) shift - TAG=$1 + tag=$1 ;; -T|--tests) shift - TESTS="$1" + tests="$1" ;; -R|--reportsdir) shift @@ -61,23 +73,38 @@ while [ "$#" -gt 0 ]; do help exit 0 ;; - -l|--logs) - GENERATE_LOGS=1 + --logs) + generate_logs=1 + ;; + --logsdir) + shift + logsdir="$1" ;; --onfail) shift case $1 in continue) - ON_FAIL=$1 + on_fail=$1 ;; stop) - ON_FAIL=$1 + on_fail=$1 + ;; + *) + help + exit 2 + esac + ;; + --uninstall_cleanup) + shift + case $1 in + y|n) + uninstall_cleanup=$1 ;; *) help exit 2 esac - ;; + ;; *) echo "Unknown option: $1" help @@ -87,19 +114,22 @@ while [ "$#" -gt 0 ]; do shift done -if [ -z "$DEVICE" ]; then +if [ -z "$device" ]; then echo "Device for storage pools must be specified" help exit 1 fi -export e2e_pool_device=$DEVICE +export e2e_pool_device=$device -if [ -n "$TAG" ]; then - export e2e_image_tag="$TAG" +if [ -z "$registry" ]; then + echo "Registry to pull the mayastor images from, must be specified" + help + exit 1 fi +export e2e_docker_registry="$registry" -if [ -n "$REGISTRY" ]; then - export e2e_docker_registry="$REGISTRY" +if [ -n "$tag" ]; then + export e2e_image_tag="$tag" fi export e2e_reports_dir="$REPORTSDIR" @@ -108,6 +138,12 @@ if [ ! -d "$e2e_reports_dir" ] ; then exit 1 fi +if [ "$uninstall_cleanup" == 'n' ] ; then + export e2e_uninstall_cleanup=0 +else + export e2e_uninstall_cleanup=1 +fi + test_failed=0 # Run go test in directory specified as $1 (relative path) @@ -115,11 +151,13 @@ function runGoTest { cd "$TESTDIR" echo "Running go test in $PWD/\"$1\"" if [ -z "$1" ] || [ ! -d "$1" ]; then + echo "Unable to locate test directory $PWD/\"$1\"" return 1 fi cd "$1" if ! go test -v . -ginkgo.v -ginkgo.progress -timeout 0; then + generate_logs=1 return 1 fi @@ -131,38 +169,59 @@ contains() { [[ $1 =~ (^|[[:space:]])$2($|[[:space:]]) ]] && return 0 || return 1 } -echo "list of tests: $TESTS" -for dir in $TESTS; do +echo "Environment:" +echo " e2e_pool_device=$e2e_pool_device" +echo " e2e_image_tag=$e2e_image_tag" +echo " e2e_docker_registry=$e2e_docker_registry" +echo " e2e_reports_dir=$e2e_reports_dir" +echo " e2e_uninstall_cleanup=$e2e_uninstall_cleanup" + + +echo "list of tests: $tests" +for testname in $tests; do # defer uninstall till after other tests have been run. - if [ "$dir" != "uninstall" ] ; then - if ! runGoTest "$dir" ; then + if [ "$testname" != "uninstall" ] ; then + if ! runGoTest "$testname" ; then + echo "Test \"$testname\" Failed!!" test_failed=1 break fi if ! ("$SCRIPTDIR"/e2e_check_pod_restarts.sh) ; then + echo "Test \"$testname\" Failed!! mayastor pods were restarted." test_failed=1 + generate_logs=1 break fi fi done -if [ "$test_failed" -ne 0 ]; then - if ! "$SCRIPTDIR"/e2e-cluster-dump.sh ; then - # ignore failures in the dump script - : +if [ "$generate_logs" -ne 0 ]; then + if [ -n "$logsdir" ]; then + if ! "$SCRIPTDIR"/e2e-cluster-dump.sh --destdir "$logsdir" ; then + # ignore failures in the dump script + : + fi + else + if ! "$SCRIPTDIR"/e2e-cluster-dump.sh ; then + # ignore failures in the dump script + : + fi fi +fi - if [ "$ON_FAIL" == "stop" ]; then - exit 3 - fi +if [ "$test_failed" -ne 0 ] && [ "$on_fail" == "stop" ]; then + exit 3 fi # Always run uninstall test if specified -if contains "$TESTS" "uninstall" ; then +if contains "$tests" "uninstall" ; then if ! runGoTest "uninstall" ; then + echo "Test \"uninstall\" Failed!!" test_failed=1 + # Dump to the screen only, we do NOT want to overwrite + # logfiles that may have been generated. if ! "$SCRIPTDIR"/e2e-cluster-dump.sh --clusteronly ; then # ignore failures in the dump script : @@ -172,14 +231,7 @@ fi if [ "$test_failed" -ne 0 ]; then echo "At least one test has FAILED!" - exit 1 -fi - -if [ "$GENERATE_LOGS" -ne 0 ]; then - if ! "$SCRIPTDIR"/e2e-cluster-dump.sh ; then - # ignore failures in the dump script - : - fi + exit 1 fi echo "All tests have PASSED!" diff --git a/scripts/generate-deploy-yamls.sh b/scripts/generate-deploy-yamls.sh index 21fb5d7b5..ea2e47b58 100755 --- a/scripts/generate-deploy-yamls.sh +++ b/scripts/generate-deploy-yamls.sh @@ -43,10 +43,10 @@ tmpd=$(mktemp -d /tmp/generate-deploy.sh.XXXXXXXX) # shellcheck disable=SC2064 trap "rm -fr '$tmpd'" HUP QUIT EXIT TERM INT -if [ "$mayastor_images_repo" = "NONE" ]; then - helm template --set "mayastorImagesTag=$1" mayastor "$SCRIPTDIR/../chart" --output-dir="$tmpd" --namespace mayastor -else - helm template --set "mayastorImagesTag=$1,mayastorImagesRepo=$mayastor_images_repo" mayastor "$SCRIPTDIR/../chart" --output-dir="$tmpd" --namespace mayastor +template_params="mayastorImagesTag=$1" +if [ "$mayastor_images_repo" != "NONE" ]; then + template_params="$template_params,mayastorImagesRepo=$mayastor_images_repo" fi +helm template --set "$template_params" mayastor "$SCRIPTDIR/../chart" --output-dir="$tmpd" --namespace mayastor mv "$tmpd"/mayastor/templates/*.yaml "$TARGET_DIR" diff --git a/scripts/release.sh b/scripts/release.sh index 7b68d51a4..297a4ac83 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -134,15 +134,15 @@ for name in $IMAGES; do # If we're skipping the build, then we just want to upload # the images we already have locally. if [ -z $SKIP_BUILD ]; then - archive=${name}-image + archive=${name} if [ -z "$REGISTRY" ] && dockerhub_tag_exists $image $TAG; then echo "Skipping $image:$TAG that already exists" continue fi echo "Building $image:$TAG ..." - $NIX_BUILD --out-link $archive -A images.$archive - $DOCKER load -i $archive - $RM $archive + $NIX_BUILD --out-link $archive-image -A images.$archive + $DOCKER load -i $archive-image + $RM $archive-image if [ "$image" != "$image_basename" ]; then echo "Renaming $image_basename:$TAG to $image:$TAG" $DOCKER tag "${image_basename}:$TAG" "$image:$TAG" diff --git a/shell.nix b/shell.nix index 6c574895a..a16644e31 100644 --- a/shell.nix +++ b/shell.nix @@ -22,6 +22,8 @@ mkShell { hardeningDisable = [ "fortify" ]; buildInputs = [ docker-compose + kubectl + kind clang cowsay e2fsprogs @@ -29,6 +31,7 @@ mkShell { envsubst # for e2e tests gdb go + git gptfdisk kubernetes-helm libaio diff --git a/terraform/README.adoc b/terraform/README.adoc index 75318206d..5a8ffeebd 100644 --- a/terraform/README.adoc +++ b/terraform/README.adoc @@ -272,7 +272,7 @@ mayastor-csi images. Replace "hostname" by the name of your registry host. [source,bash] ---- -nix-build '' -A images.moac-image +nix-build '' -A images.moac docker load ' -A images.moac-image +nix-build '' -A images.moac skopeo copy --dest-tls-verify=false docker-archive:result docker://hostname:5000/mayadata/moac:latest ---- diff --git a/test/e2e/common/util.go b/test/e2e/common/util.go index b796b3653..ab6a5ccd4 100644 --- a/test/e2e/common/util.go +++ b/test/e2e/common/util.go @@ -388,11 +388,7 @@ func RunFio(podName string, duration int) { } func FioReadyPod() bool { - var fioPod corev1.Pod - if gTestEnv.K8sClient.Get(context.TODO(), types.NamespacedName{Name: "fio", Namespace: "default"}, &fioPod) != nil { - return false - } - return fioPod.Status.Phase == v1.PodRunning + return IsPodRunning("fio") } func IsPodRunning(podName string) bool { @@ -443,7 +439,7 @@ func DeleteAllPods() (bool, int) { numPods := 0 pods, err := gTestEnv.KubeInt.CoreV1().Pods("default").List(context.TODO(), metav1.ListOptions{}) if err != nil { - logf.Log.Error(err, "DeleteAllPods: list pods failed.") + logf.Log.Info("DeleteAllPods: list pods failed.", "error", err) success = false } if err == nil && pods != nil { @@ -893,6 +889,64 @@ func IsVolumePublished(uuid string) bool { return true } +func CheckForPVCs() (bool, error) { + logf.Log.Info("CheckForPVCs") + foundResources := false + + pvcs, err := gTestEnv.KubeInt.CoreV1().PersistentVolumeClaims("default").List(context.TODO(), metav1.ListOptions{}) + if err == nil && pvcs != nil && len(pvcs.Items) != 0 { + logf.Log.Info("CheckForVolumeResources: found PersistentVolumeClaims", + "PersistentVolumeClaims", pvcs.Items) + foundResources = true + } + return foundResources, err +} + +func CheckForPVs() (bool, error) { + logf.Log.Info("CheckForPVs") + foundResources := false + + pvs, err := gTestEnv.KubeInt.CoreV1().PersistentVolumes().List(context.TODO(), metav1.ListOptions{}) + if err == nil && pvs != nil && len(pvs.Items) != 0 { + logf.Log.Info("CheckForVolumeResources: found PersistentVolumes", + "PersistentVolumes", pvs.Items) + foundResources = true + } + return foundResources, err +} + +func CheckForMSVs() (bool, error) { + logf.Log.Info("CheckForMSVs") + foundResources := false + + msvGVR := schema.GroupVersionResource{ + Group: "openebs.io", + Version: "v1alpha1", + Resource: "mayastorvolumes", + } + + msvs, err := gTestEnv.DynamicClient.Resource(msvGVR).Namespace("mayastor").List(context.TODO(), metav1.ListOptions{}) + if err == nil && msvs != nil && len(msvs.Items) != 0 { + logf.Log.Info("CheckForVolumeResources: found MayastorVolumes", + "MayastorVolumes", msvs.Items) + foundResources = true + } + return foundResources, err +} + +func CheckForTestPods() (bool, error) { + logf.Log.Info("CheckForTestPods") + foundPods := false + + pods, err := gTestEnv.KubeInt.CoreV1().Pods("default").List(context.TODO(), metav1.ListOptions{}) + if err == nil && pods != nil && len(pods.Items) != 0 { + logf.Log.Info("CheckForTestPods", + "Pods", pods.Items) + foundPods = true + } + return foundPods, err +} + // Make best attempt to delete PVCs, PVs and MSVs func DeleteAllVolumeResources() (bool, bool) { logf.Log.Info("DeleteAllVolumeResources") @@ -903,7 +957,7 @@ func DeleteAllVolumeResources() (bool, bool) { // Phase 1 to delete dangling resources pvcs, err := gTestEnv.KubeInt.CoreV1().PersistentVolumeClaims("default").List(context.TODO(), metav1.ListOptions{}) if err != nil { - logf.Log.Error(err, "DeleteAllVolumeResources: list PVCs failed.") + logf.Log.Info("DeleteAllVolumeResources: list PVCs failed.", "error", err) success = false } if err == nil && pvcs != nil && len(pvcs.Items) != 0 { @@ -919,7 +973,7 @@ func DeleteAllVolumeResources() (bool, bool) { // Delete all PVs found pvs, err := gTestEnv.KubeInt.CoreV1().PersistentVolumes().List(context.TODO(), metav1.ListOptions{}) if err != nil { - logf.Log.Error(err, "DeleteAllVolumeResources: list PVs failed.") + logf.Log.Info("DeleteAllVolumeResources: list PVs failed.", "error", err) } if err == nil && pvs != nil && len(pvs.Items) != 0 { logf.Log.Info("DeleteAllVolumeResources: deleting PersistentVolumes") @@ -988,6 +1042,48 @@ func DeleteAllVolumeResources() (bool, bool) { return success, foundResources } +func DeletePools() { + poolGVR := schema.GroupVersionResource{ + Group: "openebs.io", + Version: "v1alpha1", + Resource: "mayastorpools", + } + + pools, err := gTestEnv.DynamicClient.Resource(poolGVR).Namespace("mayastor").List(context.TODO(), metav1.ListOptions{}) + if err != nil { + // This function may be called by AfterSuite by uninstall test so listing MSVs may fail correctly + logf.Log.Info("DeletePools: list MSPs failed.", "Error", err) + } + if err == nil && pools != nil && len(pools.Items) != 0 { + logf.Log.Info("DeletePools: deleting MayastorPools") + for _, pool := range pools.Items { + logf.Log.Info("DeletePools: deleting", "pool", pool.GetName()) + err = gTestEnv.DynamicClient.Resource(poolGVR).Namespace("mayastor").Delete(context.TODO(), pool.GetName(), metav1.DeleteOptions{}) + if err != nil { + logf.Log.Error(err, "Failed to delete pool", pool.GetName()) + } + } + } + + numPools := 0 + // Wait 2 minutes for resources to be deleted + for attempts := 0; attempts < 120; attempts++ { + pools, err := gTestEnv.DynamicClient.Resource(poolGVR).Namespace("mayastor").List(context.TODO(), metav1.ListOptions{}) + if err == nil && pools != nil { + numPools = len(pools.Items) + } + if numPools == 0 { + break + } + time.Sleep(1 * time.Second) + } + + logf.Log.Info("DeletePools: ", "Pool count", numPools) + if numPools != 0 { + logf.Log.Info("DeletePools: ", "Pools", pools.Items) + } +} + func AfterSuiteCleanup() { logf.Log.Info("AfterSuiteCleanup") _, _ = DeleteAllVolumeResources() diff --git a/test/e2e/install/install_test.go b/test/e2e/install/install_test.go index 49128aa3d..7e5906318 100644 --- a/test/e2e/install/install_test.go +++ b/test/e2e/install/install_test.go @@ -114,6 +114,13 @@ func getDeployYamlDir() string { return path.Clean(filename + "/../../../../deploy") } +// Create mayastor namespace +func createNamespace() { + cmd := exec.Command("kubectl", "create", "namespace", "mayastor") + out, err := cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred(), "%s", out) +} + // Helper for passing yaml from the deploy directory to kubectl func applyDeployYaml(filename string) { cmd := exec.Command("kubectl", "apply", "-f", filename) @@ -128,10 +135,6 @@ func getTemplateYamlDir() string { return path.Clean(filename + "/../deploy") } -func makeImageName(registryAddress string, imagename string, imageversion string) string { - return registryAddress + "/mayadata/" + imagename + ":" + imageversion -} - func generateYamls(imageTag string, registryAddress string) { bashcmd := fmt.Sprintf("../../../scripts/generate-deploy-yamls.sh -t ../../../test-yamls %s %s", imageTag, registryAddress) cmd := exec.Command("bash", "-c", bashcmd) @@ -150,13 +153,41 @@ func mayastorReadyPodCount() int { return int(mayastorDaemonSet.Status.NumberAvailable) } -func moacReadyPodCount() int { +func moacReady() bool { var moacDeployment appsV1.Deployment if k8sClient.Get(context.TODO(), types.NamespacedName{Name: "moac", Namespace: "mayastor"}, &moacDeployment) != nil { - fmt.Println("Failed to get MOAC deployment") - return -1 + logf.Log.Info("Failed to get MOAC deployment") + return false } - return int(moacDeployment.Status.AvailableReplicas) + + // { Remove/Reduce verbosity once we have fixed install test occasional failure. + logf.Log.Info("moacDeployment.Status", + "ObservedGeneration", moacDeployment.Status.ObservedGeneration, + "Replicas", moacDeployment.Status.Replicas, + "UpdatedReplicas", moacDeployment.Status.UpdatedReplicas, + "ReadyReplicas", moacDeployment.Status.ReadyReplicas, + "AvailableReplicas", moacDeployment.Status.AvailableReplicas, + "UnavailableReplicas", moacDeployment.Status.UnavailableReplicas, + "CollisionCount", moacDeployment.Status.CollisionCount) + for ix, condition := range moacDeployment.Status.Conditions { + logf.Log.Info("Condition", "ix", ix, + "Status", condition.Status, + "Type", condition.Type, + "Message", condition.Message, + "Reason", condition.Reason) + } + // } + + for _, condition := range moacDeployment.Status.Conditions { + if condition.Type == appsV1.DeploymentAvailable { + if condition.Status == coreV1.ConditionTrue { + logf.Log.Info("MOAC is Available") + return true + } + } + } + logf.Log.Info("MOAC is Not Available") + return false } // create pools for the cluster @@ -204,7 +235,7 @@ func installMayastor() { fmt.Printf("tag %v, registry %v, # of mayastor instances=%v\n", imageTag, registryAddress, numMayastorInstances) // FIXME use absolute paths, do not depend on CWD - applyDeployYaml("namespace.yaml") + createNamespace() applyDeployYaml("storage-class.yaml") applyDeployYaml("moac-rbac.yaml") applyDeployYaml("mayastorpoolcrd.yaml") @@ -214,20 +245,27 @@ func installMayastor() { applyDeployYaml("../test-yamls/moac-deployment.yaml") applyDeployYaml("../test-yamls/mayastor-daemonset.yaml") - // Given the yamls and the environment described in the test readme, + // Given the yaml files and the environment described in the test readme, // we expect mayastor to be running on exactly numMayastorInstances nodes. - Eventually(mayastorReadyPodCount, + Eventually(func() int { + return mayastorReadyPodCount() + }, "180s", // timeout "1s", // polling interval ).Should(Equal(numMayastorInstances)) - Eventually(moacReadyPodCount(), + // Wait for MOAC to be ready before creating the pools, + Eventually(func() bool { + return moacReady() + }, "360s", // timeout - "1s", // polling interval - ).Should(Equal(1)) + "2s", // polling interval + ).Should(Equal(true)) // Now create pools on all nodes. createPools(mayastorNodes) + + // Mayastor has been installed and is now ready for use. } func TestInstallSuite(t *testing.T) { diff --git a/test/e2e/node_disconnect/lib/io_connect_node.sh b/test/e2e/node_disconnect/lib/io_connect_node.sh deleted file mode 100755 index 05e26f94e..000000000 --- a/test/e2e/node_disconnect/lib/io_connect_node.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# Script to disconnect a node from another node using iptables -# $1 is the hostname of the node to change -# $2 is the target IP address of the connection to change -# $3 is "DISCONNECT" or "RECONNECT" -# $4 is "DROP" or "REJECT" - -# edit the line below, if necessary, or set KUBESPRAY_REPO when calling -KUBESPRAY_REPO="${KUBESPRAY_REPO:-$HOME/work/kubespray}" - -if [ $# -ne 4 ]; - then echo "specify node-name, target node-ip-address, action (DISCONNECT or RECONNECT), and (DROP or REJECT)" - exit 1 -fi - -if [ "$3" = "DISCONNECT" ]; then - action="I" -elif [ "$3" = "RECONNECT" ]; then - action="D" -else - echo "specify action (DISCONNECT or RECONNECT)" - exit 1 -fi - -if [ "$4" != "DROP" ] && [ "$4" != "REJECT" ]; then - echo "specify DROP or REJECT" - exit 1 -fi - - -cd ${KUBESPRAY_REPO} - -node_name=$1 -other_ip=$2 - -# apply the rule to block/unblock it -vagrant ssh ${node_name} -c "sh -c 'sudo iptables -${action} INPUT -s ${other_ip} -j $4'" -vagrant ssh ${node_name} -c "sh -c 'sudo iptables -${action} OUTPUT -s ${other_ip} -j $4'" - diff --git a/test/e2e/node_disconnect/lib/node_disconnect_lib.go b/test/e2e/node_disconnect/lib/node_disconnect_lib.go index 9006503c8..73cf5f380 100644 --- a/test/e2e/node_disconnect/lib/node_disconnect_lib.go +++ b/test/e2e/node_disconnect/lib/node_disconnect_lib.go @@ -2,8 +2,8 @@ package node_disconnect_lib import ( "e2e-basic/common" - "fmt" - "os/exec" + + logf "sigs.k8s.io/controller-runtime/pkg/log" . "github.com/onsi/gomega" ) @@ -17,10 +17,10 @@ const ( ) type DisconnectEnv struct { - nodeToIsolate string - otherNodes []string + replicaToRemove string + allMayastorNodes []string + unusedNodes []string uuid string - disconnectMethod string volToDelete string storageClass string fioPodName string @@ -34,142 +34,76 @@ func createFioOnRefugeNode(podName string, volClaimName string) { Expect(err).ToNot(HaveOccurred()) } -// disconnect a node from the other nodes in the cluster -func DisconnectNode(nodeName string, otherNodes []string, method string) { - for _, targetIP := range otherNodes { - cmd := exec.Command("bash", "../lib/io_connect_node.sh", nodeName, targetIP, "DISCONNECT", method) - cmd.Dir = "./" - _, err := cmd.CombinedOutput() - Expect(err).ToNot(HaveOccurred()) - } -} - -// reconnect a node to the other nodes in the cluster -func (env *DisconnectEnv) ReconnectNode(checkError bool) { - for _, targetIP := range env.otherNodes { - cmd := exec.Command("bash", "../lib/io_connect_node.sh", env.nodeToIsolate, targetIP, "RECONNECT", env.disconnectMethod) - cmd.Dir = "./" - _, err := cmd.CombinedOutput() - if checkError { - Expect(err).ToNot(HaveOccurred()) - } - } - env.nodeToIsolate = "" - env.disconnectMethod = "" -} - +// prevent mayastor pod from running on the given node func SuppressMayastorPodOn(nodeName string) { common.UnlabelNode(nodeName, engineLabel) err := common.WaitForPodNotRunningOnNode(mayastorRegexp, namespace, nodeName, podUnscheduleTimeoutSecs) Expect(err).ToNot(HaveOccurred()) } -// reconnect a node to the other nodes in the cluster +// allow mayastor pod to run on the given node +func UnsuppressMayastorPodOn(nodeName string) { + // add the mayastor label to the node + common.LabelNode(nodeName, engineLabel, mayastorLabel) + err := common.WaitForPodRunningOnNode(mayastorRegexp, namespace, nodeName, podRescheduleTimeoutSecs) + Expect(err).ToNot(HaveOccurred()) +} + +// allow mayastor pod to run on the suppressed node func (env *DisconnectEnv) UnsuppressMayastorPod() { - if env.nodeToIsolate != "" { - // add the mayastor label to the node - common.LabelNode(env.nodeToIsolate, engineLabel, mayastorLabel) - err := common.WaitForPodRunningOnNode(mayastorRegexp, namespace, env.nodeToIsolate, podRescheduleTimeoutSecs) - Expect(err).ToNot(HaveOccurred()) - env.nodeToIsolate = "" + if env.replicaToRemove != "" { + UnsuppressMayastorPodOn(env.replicaToRemove) + env.replicaToRemove = "" } } -// return the node name to isolate and a vector of IP addresses to isolate -func getNodes(uuid string) (string, []string) { +// return the node of the replica to remove, the nodes in the +// volume and a vector of the mayastor-hosting nodes in the cluster +func getNodes(uuid string) (string, []string, []string) { nodeList, err := common.GetNodeLocs() Expect(err).ToNot(HaveOccurred()) - var nodeToIsolate = "" - nexusNode, replicas := common.GetMsvNodes(uuid) + var replicaToRemove = "" + nexusNode, replicaNodes := common.GetMsvNodes(uuid) Expect(nexusNode).NotTo(Equal("")) - fmt.Printf("nexus node is \"%s\"\n", nexusNode) - - var otherAddresses []string // find a node which is not the nexus and is a replica - for _, node := range replicas { + for _, node := range replicaNodes { if node != nexusNode { - nodeToIsolate = node + replicaToRemove = node break } } - Expect(nodeToIsolate).NotTo(Equal("")) + Expect(replicaToRemove).NotTo(Equal("")) - // get a list of the other ip addresses in the cluster + // get a list of all of the mayastor nodes in the cluster + var allMayastorNodes []string for _, node := range nodeList { - if node.NodeName != nodeToIsolate { - otherAddresses = append(otherAddresses, node.IPAddress) + if node.MayastorNode { + allMayastorNodes = append(allMayastorNodes, node.NodeName) } } - Expect(len(otherAddresses)).To(BeNumerically(">", 0)) - - fmt.Printf("node to isolate is \"%s\"\n", nodeToIsolate) - return nodeToIsolate, otherAddresses -} - -// Run fio against the cluster while a replica is being removed and reconnected to the network -func (env *DisconnectEnv) LossTest() { - fmt.Printf("disconnecting \"%s\"\n", env.nodeToIsolate) - DisconnectNode(env.nodeToIsolate, env.otherNodes, env.disconnectMethod) - - fmt.Printf("running fio\n") - common.RunFio(env.fioPodName, 20) - - fmt.Printf("waiting up to %s for disconnection to affect the nexus\n", disconnectionTimeoutSecs) - Eventually(func() string { - return common.GetMsvState(env.uuid) - }, - disconnectionTimeoutSecs, // timeout - "1s", // polling interval - ).Should(Equal("degraded")) - - fmt.Printf("volume is in state \"%s\"\n", common.GetMsvState(env.uuid)) - - fmt.Printf("running fio while node is disconnected\n") - common.RunFio(env.fioPodName, 20) - - fmt.Printf("reconnecting \"%s\"\n", env.nodeToIsolate) - env.ReconnectNode(true) - - fmt.Printf("running fio when node is reconnected\n") - common.RunFio(env.fioPodName, 20) -} - -// Remove the replica without running IO and verify that the volume becomes degraded but is still functional -func (env *DisconnectEnv) LossWhenIdleTest() { - fmt.Printf("disconnecting \"%s\"\n", env.nodeToIsolate) - - DisconnectNode(env.nodeToIsolate, env.otherNodes, env.disconnectMethod) - - fmt.Printf("waiting up to %s for disconnection to affect the nexus\n", disconnectionTimeoutSecs) - Eventually(func() string { - return common.GetMsvState(env.uuid) - }, - disconnectionTimeoutSecs, // timeout - "1s", // polling interval - ).Should(Equal("degraded")) - - fmt.Printf("volume is in state \"%s\"\n", common.GetMsvState(env.uuid)) - - fmt.Printf("running fio while node is disconnected\n") - common.RunFio(env.fioPodName, 20) - - fmt.Printf("reconnecting \"%s\"\n", env.nodeToIsolate) - env.ReconnectNode(true) - - fmt.Printf("running fio when node is reconnected\n") - common.RunFio(env.fioPodName, 20) + logf.Log.Info("identified nodes", "nexus", nexusNode, "node of replica to remove", replicaToRemove) + return replicaToRemove, replicaNodes, allMayastorNodes } // Run fio against the cluster while a replica mayastor pod is unscheduled and then rescheduled func (env *DisconnectEnv) PodLossTest() { - fmt.Printf("removing mayastor pod from node \"%s\"\n", env.nodeToIsolate) - SuppressMayastorPodOn(env.nodeToIsolate) + Expect(len(env.allMayastorNodes)).To(BeNumerically(">=", 2)) // must support >= 2 replicas + + // disable mayastor on the spare nodes so that moac cannot assign + // them to the volume to replace the faulted one. We want to keep + // the volume degraded before restoring the suppressed node. + for _, node := range env.unusedNodes { + logf.Log.Info("suppressing mayastor on unused node", "node", node) + SuppressMayastorPodOn(node) + } + logf.Log.Info("removing mayastor replica", "node", env.replicaToRemove) + SuppressMayastorPodOn(env.replicaToRemove) - fmt.Printf("waiting up to %s for pod removal to affect the nexus\n", disconnectionTimeoutSecs) + logf.Log.Info("waiting for pod removal to affect the nexus", "timeout", disconnectionTimeoutSecs) Eventually(func() string { - fmt.Printf("running fio against volume\n") + logf.Log.Info("running fio against the volume") common.RunFio(env.fioPodName, 5) return common.GetMsvState(env.uuid) }, @@ -177,17 +111,17 @@ func (env *DisconnectEnv) PodLossTest() { "1s", // polling interval ).Should(Equal("degraded")) - fmt.Printf("volume is in state \"%s\"\n", common.GetMsvState(env.uuid)) + logf.Log.Info("volume condition", "state", common.GetMsvState(env.uuid)) - fmt.Printf("running fio against the degraded volume\n") + logf.Log.Info("running fio against the degraded volume") common.RunFio(env.fioPodName, 20) - fmt.Printf("enabling mayastor pod on node \"%s\"\n", env.nodeToIsolate) + logf.Log.Info("enabling mayastor pod", "node", env.replicaToRemove) env.UnsuppressMayastorPod() - fmt.Printf("waiting up to %s for the volume to be repaired\n", repairTimeoutSecs) + logf.Log.Info("waiting for the volume to be repaired", "timeout", repairTimeoutSecs) Eventually(func() string { - fmt.Printf("running fio while volume is being repaired\n") + logf.Log.Info("running fio while volume is being repaired") common.RunFio(env.fioPodName, 5) return common.GetMsvState(env.uuid) }, @@ -195,119 +129,70 @@ func (env *DisconnectEnv) PodLossTest() { "1s", // polling interval ).Should(Equal("healthy")) - fmt.Printf("volume is in state \"%s\"\n", common.GetMsvState(env.uuid)) - - fmt.Printf("running fio against the repaired volume\n") - common.RunFio(env.fioPodName, 20) -} - -// Run fio against the cluster while a replica node is being removed, -// wait for the volume to become degraded, then wait for it to be repaired. -// Run fio against repaired volume, and again after node is reconnected. -func (env *DisconnectEnv) ReplicaReassignTest() { - // This test needs at least 4 nodes, a refuge node, a mayastor node to isolate, and 2 other mayastor nodes - Expect(len(env.otherNodes)).To(BeNumerically(">=", 3)) - - fmt.Printf("disconnecting \"%s\"\n", env.nodeToIsolate) - DisconnectNode(env.nodeToIsolate, env.otherNodes, env.disconnectMethod) - - fmt.Printf("running fio against the volume\n") - common.RunFio(env.fioPodName, 20) - - fmt.Printf("waiting up to %s for disconnection to affect the nexus\n", disconnectionTimeoutSecs) - Eventually(func() string { - return common.GetMsvState(env.uuid) - }, - disconnectionTimeoutSecs, // timeout - "1s", // polling interval - ).Should(Equal("degraded")) - - fmt.Printf("volume is in state \"%s\"\n", common.GetMsvState(env.uuid)) - - fmt.Printf("waiting up to %s for the volume to be repaired\n", repairTimeoutSecs) - Eventually(func() string { - return common.GetMsvState(env.uuid) - }, - repairTimeoutSecs, // timeout - "1s", // polling interval - ).Should(Equal("healthy")) - - fmt.Printf("volume is in state \"%s\"\n", common.GetMsvState(env.uuid)) + logf.Log.Info("volume condition", "state", common.GetMsvState(env.uuid)) - fmt.Printf("running fio while node is disconnected\n") - common.RunFio(env.fioPodName, 20) - - fmt.Printf("reconnecting \"%s\"\n", env.nodeToIsolate) - env.ReconnectNode(true) - - fmt.Printf("running fio when node is reconnected\n") + logf.Log.Info("running fio against the repaired volume") common.RunFio(env.fioPodName, 20) } -// Common steps required when setting up the test when using a refuge node. -// Creates the PVC, deploys fio on the refuge node, determines the nodes -// used by the volume and selects a non-nexus replica node to isolate. -func SetupWithRefuge(pvcName string, storageClassName string, fioPodName string, disconnectMethod string) DisconnectEnv { - env := DisconnectEnv{} - - env.uuid = common.MkPVC(pvcName, storageClassName) - env.volToDelete = pvcName - env.storageClass = storageClassName - env.disconnectMethod = disconnectMethod - - createFioOnRefugeNode(fioPodName, pvcName) - - fmt.Printf("waiting for fio\n") - Eventually(func() bool { - return common.FioReadyPod() - }, - defTimeoutSecs, // timeout - "1s", // polling interval - ).Should(Equal(true)) - env.fioPodName = fioPodName - - env.nodeToIsolate, env.otherNodes = getNodes(env.uuid) - return env -} - // Common steps required when setting up the test. -// Creates the PVC, deploys fio, determines the nodes used by the volume -// and selects a non-nexus replica node to isolate +// Creates the PVC, deploys fio, and records variables needed for the +// test in the DisconnectEnv structure func Setup(pvcName string, storageClassName string, fioPodName string) DisconnectEnv { env := DisconnectEnv{} - env.uuid = common.MkPVC(pvcName, storageClassName) env.volToDelete = pvcName env.storageClass = storageClassName - env.disconnectMethod = "" + env.uuid = common.MkPVC(pvcName, storageClassName) podObj := common.CreateFioPodDef(fioPodName, pvcName) _, err := common.CreatePod(podObj) Expect(err).ToNot(HaveOccurred()) - fmt.Printf("waiting for fio\n") + env.fioPodName = fioPodName + logf.Log.Info("waiting for pod", "name", env.fioPodName) Eventually(func() bool { - return common.FioReadyPod() + return common.IsPodRunning(env.fioPodName) }, defTimeoutSecs, // timeout "1s", // polling interval ).Should(Equal(true)) - env.fioPodName = fioPodName - env.nodeToIsolate, env.otherNodes = getNodes(env.uuid) + var replicaNodes []string + env.replicaToRemove, replicaNodes, env.allMayastorNodes = getNodes(env.uuid) + + // Identify mayastor nodes not currently part of the volume + for _, node := range env.allMayastorNodes { + unused := true + for _, replica := range replicaNodes { + if node == replica { // part of the current volume + unused = false + break + } + } + if unused { + env.unusedNodes = append(env.unusedNodes, node) + } + } return env } // Common steps required when tearing down the test func (env *DisconnectEnv) Teardown() { + var err error + + env.UnsuppressMayastorPod() + + for _, node := range env.unusedNodes { + UnsuppressMayastorPodOn(node) + } if env.fioPodName != "" { - fmt.Printf("removing fio pod\n") - err := common.DeletePod(env.fioPodName) - Expect(err).ToNot(HaveOccurred()) + err = common.DeletePod(env.fioPodName) env.fioPodName = "" } if env.volToDelete != "" { common.RmPVC(env.volToDelete, env.storageClass) env.volToDelete = "" } + Expect(err).ToNot(HaveOccurred()) } diff --git a/test/e2e/node_disconnect/replica_disconnect/replica_disconnection_test.go b/test/e2e/node_disconnect/replica_disconnect/replica_disconnection_test.go deleted file mode 100644 index ca58766fb..000000000 --- a/test/e2e/node_disconnect/replica_disconnect/replica_disconnection_test.go +++ /dev/null @@ -1,106 +0,0 @@ -package replica_disconnection_test - -import ( - "e2e-basic/common" - disconnect_lib "e2e-basic/node_disconnect/lib" - - "os" - "testing" - - . "github.com/onsi/ginkgo" - "github.com/onsi/ginkgo/reporters" - . "github.com/onsi/gomega" - - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" -) - -var gStorageClasses []string - -var env disconnect_lib.DisconnectEnv - -const reject = "REJECT" -const drop = "DROP" -const run_drop = false - -func TestNodeLoss(t *testing.T) { - RegisterFailHandler(Fail) - reportDir := os.Getenv("e2e_reports_dir") - junitReporter := reporters.NewJUnitReporter(reportDir + "/replica-disconnect-junit.xml") - RunSpecsWithDefaultAndCustomReporters(t, "Replica disconnection tests", - []Reporter{junitReporter}) -} - -var _ = Describe("Mayastor replica disconnection test", func() { - - It("should create a refuge node and wait for the pods to re-deploy", func() { - disconnect_lib.DisconnectSetup() - }) - - It("should define the storage classes to use", func() { - common.MkStorageClass("mayastor-iscsi-2", 2, "iscsi", "io.openebs.csi-mayastor") - gStorageClasses = append(gStorageClasses, "mayastor-iscsi-2") - common.MkStorageClass("mayastor-nvmf-2", 2, "nvmf", "io.openebs.csi-mayastor") - gStorageClasses = append(gStorageClasses, "mayastor-nvmf-2") - }) - - It("should verify nvmf nexus behaviour when a node becomes inaccessible (iptables REJECT)", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-nvmf", "mayastor-nvmf-2", "fio", reject) - env.LossTest() - env.Teardown() - }) - - It("should verify iscsi nexus behaviour when a node becomes inaccessible (iptables REJECT)", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-iscsi", "mayastor-iscsi-2", "fio", reject) - env.LossTest() - env.Teardown() - }) - - if run_drop { - It("should verify nvmf nexus behaviour when a node becomes inaccessible (iptables DROP)", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-nvmf", "mayastor-nvmf-2", "fio", drop) - env.LossTest() - env.Teardown() - }) - - It("should verify iscsi nexus behaviour when a node becomes inaccessible (iptables DROP)", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-iscsi", "mayastor-iscsi-2", "fio", drop) - env.LossTest() - env.Teardown() - }) - } - - It("should verify nvmf nexus behaviour when a node becomes inaccessible when no IO is received (iptables REJECT)", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-nvmf", "mayastor-nvmf-2", "fio", reject) - env.LossWhenIdleTest() - env.Teardown() - }) - - It("should verify iscsi nexus behaviour when a node becomes inaccessible when no IO is received (iptables REJECT)", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-iscsi", "mayastor-iscsi-2", "fio", reject) - env.LossWhenIdleTest() - env.Teardown() - }) -}) - -var _ = BeforeSuite(func(done Done) { - logf.SetLogger(zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))) - common.SetupTestEnv() - close(done) -}, 60) - -var _ = AfterSuite(func() { - // NB This only tears down the local structures for talking to the cluster, - // not the kubernetes cluster itself. - By("tearing down the test environment") - - // ensure node is reconnected in the event of a test failure - env.ReconnectNode(false) - env.Teardown() - - for _, sc := range gStorageClasses { - common.RmStorageClass(sc) - } - disconnect_lib.DisconnectTeardown() - common.TeardownTestEnv() -}) diff --git a/test/e2e/node_disconnect/replica_pod_remove/replica_pod_remove_test.go b/test/e2e/node_disconnect/replica_pod_remove/replica_pod_remove_test.go index 8a8776473..2e3d92a44 100644 --- a/test/e2e/node_disconnect/replica_pod_remove/replica_pod_remove_test.go +++ b/test/e2e/node_disconnect/replica_pod_remove/replica_pod_remove_test.go @@ -3,6 +3,7 @@ package replica_pod_remove_test import ( "e2e-basic/common" disconnect_lib "e2e-basic/node_disconnect/lib" + logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -15,7 +16,8 @@ import ( ) var env disconnect_lib.DisconnectEnv -var gStorageClass string = "" + +const gStorageClass = "mayastor-nvmf-pod-remove-test-sc" func TestMayastorPodLoss(t *testing.T) { RegisterFailHandler(Fail) @@ -26,14 +28,19 @@ func TestMayastorPodLoss(t *testing.T) { } var _ = Describe("Mayastor replica pod removal test", func() { + AfterEach(func() { + logf.Log.Info("AfterEach") + env.Teardown() // removes fio pod and volume + common.RmStorageClass(gStorageClass) - It("should define the storage class to use", func() { - common.MkStorageClass("mayastor-nvmf-3", 3, "nvmf", "io.openebs.csi-mayastor") - gStorageClass = "mayastor-nvmf-3" + // Check resource leakage. + err := common.AfterEachCheck() + Expect(err).ToNot(HaveOccurred()) }) It("should verify nvmf nexus behaviour when a mayastor pod is removed", func() { - env = disconnect_lib.Setup("loss-test-pvc-nvmf", "mayastor-nvmf-3", "fio") + common.MkStorageClass(gStorageClass, 2, "nvmf", "io.openebs.csi-mayastor") + env = disconnect_lib.Setup("loss-test-pvc-nvmf", gStorageClass, "fio-pod-remove-test") env.PodLossTest() }) }) @@ -46,12 +53,5 @@ var _ = BeforeSuite(func(done Done) { var _ = AfterSuite(func() { By("tearing down the test environment") - - env.UnsuppressMayastorPod() - env.Teardown() // removes fio pod and volume - - if gStorageClass != "" { - common.RmStorageClass(gStorageClass) - } common.TeardownTestEnv() }) diff --git a/test/e2e/node_disconnect/replica_reassign/replica_reassign_test.go b/test/e2e/node_disconnect/replica_reassign/replica_reassign_test.go deleted file mode 100644 index 2a223c826..000000000 --- a/test/e2e/node_disconnect/replica_reassign/replica_reassign_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package replica_reassignment_test - -import ( - "e2e-basic/common" - disconnect_lib "e2e-basic/node_disconnect/lib" - - "os" - "testing" - - . "github.com/onsi/ginkgo" - "github.com/onsi/ginkgo/reporters" - . "github.com/onsi/gomega" - - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" -) - -var gStorageClass string - -var env disconnect_lib.DisconnectEnv - -const reject = "REJECT" - -func TestReplicaReassign(t *testing.T) { - RegisterFailHandler(Fail) - reportDir := os.Getenv("e2e_reports_dir") - junitReporter := reporters.NewJUnitReporter(reportDir + "/replica-reassign-junit.xml") - RunSpecsWithDefaultAndCustomReporters(t, "Replica reassignment test", - []Reporter{junitReporter}) -} - -var _ = Describe("Mayastor replica reassignment test", func() { - - It("should create a refuge node and wait for the pods to re-deploy", func() { - disconnect_lib.DisconnectSetup() - }) - - It("should define the storage class to use", func() { - common.MkStorageClass("mayastor-nvmf-2", 2, "nvmf", "io.openebs.csi-mayastor") - gStorageClass = "mayastor-nvmf-2" - }) - - It("should verify nvmf nexus repair of volume when a node becomes inaccessible", func() { - env = disconnect_lib.SetupWithRefuge("loss-test-pvc-nvmf", "mayastor-nvmf-2", "fio", reject) - env.ReplicaReassignTest() - env.Teardown() - }) -}) - -var _ = BeforeSuite(func(done Done) { - logf.SetLogger(zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))) - common.SetupTestEnv() - close(done) -}, 60) - -var _ = AfterSuite(func() { - // NB This only tears down the local structures for talking to the cluster, - // not the kubernetes cluster itself. - By("tearing down the test environment") - - // ensure node is reconnected in the event of a test failure - env.ReconnectNode(false) - env.Teardown() - - if gStorageClass != "" { - common.RmStorageClass(gStorageClass) - } - disconnect_lib.DisconnectTeardown() - common.TeardownTestEnv() -}) diff --git a/test/e2e/nightly/README.md b/test/e2e/pvc_stress_fio/README.md similarity index 90% rename from test/e2e/nightly/README.md rename to test/e2e/pvc_stress_fio/README.md index 0a1dbef1a..b648a22b4 100644 --- a/test/e2e/nightly/README.md +++ b/test/e2e/pvc_stress_fio/README.md @@ -1,11 +1,3 @@ -## About -Long running stress e2e tests for mayastor - -To run the tests use the `test.sh` file. - -When adding a test make sure to bump the timeout value suitably. - -## Tests ### pvc_stress_fio ``` Do { @@ -62,8 +54,3 @@ Then: The PVC and its corresponding PV should be removed Note: For development purposes the number of cycles for each test can be changed through environment variables. 1 `e2e_pvc_stress_cd_cycles` 2 `e2e_pvc_stress_crud_cycles` - -To run tests from here use the command line below. -``` -go test -v ./... -ginkgo.v -ginkgo.progress -timeout 0 -``` diff --git a/test/e2e/nightly/pvc_stress_fio/pvc_stress_fio_test.go b/test/e2e/pvc_stress_fio/pvc_stress_fio_test.go similarity index 100% rename from test/e2e/nightly/pvc_stress_fio/pvc_stress_fio_test.go rename to test/e2e/pvc_stress_fio/pvc_stress_fio_test.go diff --git a/test/e2e/resource_check/resource_check_test.go b/test/e2e/resource_check/resource_check_test.go new file mode 100644 index 000000000..728a402c3 --- /dev/null +++ b/test/e2e/resource_check/resource_check_test.go @@ -0,0 +1,72 @@ +package basic_test + +import ( + "e2e-basic/common" + "os" + "testing" + + . "github.com/onsi/ginkgo" + "github.com/onsi/ginkgo/reporters" + . "github.com/onsi/gomega" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +// Check that there are no artefacts left over from +// the previous 3rd party test. +func resourceCheck() { + + found, err := common.CheckForTestPods() + if err != nil { + logf.Log.Error(err, "Failed to check for test pods.") + } else { + Expect(found).To(BeFalse()) + } + + found, err = common.CheckForPVCs() + if err != nil { + logf.Log.Error(err, "Failed to check for PVCs") + } + Expect(found).To(BeFalse()) + + found, err = common.CheckForPVs() + if err != nil { + logf.Log.Error(err, "Failed to check PVs") + } + Expect(found).To(BeFalse()) + + found, err = common.CheckForMSVs() + if err != nil { + logf.Log.Error(err, "Failed to check MSVs") + } + Expect(found).To(BeFalse()) +} + +func TestResourceCheck(t *testing.T) { + RegisterFailHandler(Fail) + + reportDir := os.Getenv("e2e_reports_dir") + junitReporter := reporters.NewJUnitReporter(reportDir + "/resource_check-junit.xml") + RunSpecsWithDefaultAndCustomReporters(t, "Resource Check Suite", + []Reporter{junitReporter}) +} + +var _ = Describe("Mayastor resource check", func() { + It("should have no resources allocated", func() { + resourceCheck() + }) +}) + +var _ = BeforeSuite(func(done Done) { + logf.SetLogger(zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))) + common.SetupTestEnv() + + close(done) +}, 60) + +var _ = AfterSuite(func() { + // NB This only tears down the local structures for talking to the cluster, + // not the kubernetes cluster itself. + By("tearing down the test environment") + common.TeardownTestEnv() +}) diff --git a/test/e2e/uninstall/uninstall_test.go b/test/e2e/uninstall/uninstall_test.go index fd2ae6977..3d5aa389b 100644 --- a/test/e2e/uninstall/uninstall_test.go +++ b/test/e2e/uninstall/uninstall_test.go @@ -16,6 +16,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" ) +var cleanup = false + // Encapsulate the logic to find where the deploy yamls are func getDeployYamlDir() string { _, filename, _, _ := runtime.Caller(0) @@ -33,41 +35,88 @@ func deleteDeployYaml(filename string) { // Helper for deleting mayastor CRDs func deleteCRD(crdName string) { cmd := exec.Command("kubectl", "delete", "crd", crdName) - _, err := cmd.CombinedOutput() - Expect(err).ToNot(HaveOccurred()) + _ = cmd.Run() +} + +// Create mayastor namespace +func deleteNamespace() { + cmd := exec.Command("kubectl", "delete", "namespace", "mayastor") + out, err := cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred(), "%s", out) } // Teardown mayastor on the cluster under test. // We deliberately call out to kubectl, rather than constructing the client-go // objects, so that we can verfiy the local deploy yamls are correct. func teardownMayastor() { - // The correct sequence for a reusable cluster is - // Delete all pods in the default namespace - // Delete all pvcs - // Then uninstall mayastor - podsDeleted, podCount := common.DeleteAllPods() - pvcsDeleted, pvcsFound := common.DeleteAllVolumeResources() + var podsDeleted bool + var pvcsDeleted bool + var podCount int + var pvcsFound bool + + logf.Log.Info("Settings:", "cleanup", cleanup) + if !cleanup { + found, err := common.CheckForTestPods() + if err != nil { + logf.Log.Error(err, "Failed to checking for test pods.") + } else { + Expect(found).To(BeFalse()) + } + + found, err = common.CheckForPVCs() + if err != nil { + logf.Log.Error(err, "Failed to check for PVCs") + } + Expect(found).To(BeFalse()) + + found, err = common.CheckForPVs() + if err != nil { + logf.Log.Error(err, "Failed to check PVs") + } + Expect(found).To(BeFalse()) + + found, err = common.CheckForMSVs() + if err != nil { + logf.Log.Error(err, "Failed to check MSVs") + } + Expect(found).To(BeFalse()) + + } else { + // The correct sequence for a reusable cluster is + // Delete all pods in the default namespace + // Delete all pvcs + // Delete all mayastor pools + // Then uninstall mayastor + podsDeleted, podCount = common.DeleteAllPods() + pvcsDeleted, pvcsFound = common.DeleteAllVolumeResources() + } + + common.DeletePools() logf.Log.Info("Cleanup done, Uninstalling mayastor") // Deletes can stall indefinitely, try to mitigate this - // by running the deletes in different threads + // by running the deletes on different threads go deleteDeployYaml("csi-daemonset.yaml") - time.Sleep(10 * time.Second) go deleteDeployYaml("mayastor-daemonset.yaml") - time.Sleep(5 * time.Second) go deleteDeployYaml("moac-deployment.yaml") - time.Sleep(5 * time.Second) go deleteDeployYaml("nats-deployment.yaml") - time.Sleep(5 * time.Second) { - iters := 18 - logf.Log.Info("Waiting for Mayastor pods to be deleted", "timeout seconds", iters*10) + const timeOutSecs = 240 + const sleepSecs = 10 + maxIters := (timeOutSecs + sleepSecs - 1) / sleepSecs numMayastorPods := common.MayastorUndeletedPodCount() - for attempts := 0; attempts < iters && numMayastorPods != 0; attempts++ { - time.Sleep(10 * time.Second) + if numMayastorPods != 0 { + logf.Log.Info("Waiting for Mayastor pods to be deleted", + "timeout", timeOutSecs) + } + for iter := 0; iter < maxIters && numMayastorPods != 0; iter++ { + logf.Log.Info("\tWaiting ", + "seconds", sleepSecs, + "numMayastorPods", numMayastorPods, + "iter", iter) numMayastorPods = common.MayastorUndeletedPodCount() - logf.Log.Info("", "numMayastorPods", numMayastorPods) + time.Sleep(sleepSecs * time.Second) } } @@ -78,21 +127,44 @@ func teardownMayastor() { deleteDeployYaml("storage-class.yaml") deleteCRD("mayastornodes.openebs.io") deleteCRD("mayastorvolumes.openebs.io") - // Attempt to forcefully delete pods - // TODO replace this function call when a single cluster is used for a single test run, with a check. - forceDeleted := common.ForceDeleteMayastorPods() - deleteDeployYaml("namespace.yaml") - Expect(forceDeleted).To(BeFalse()) - - Expect(podsDeleted).To(BeTrue()) - Expect(podCount).To(BeZero()) - Expect(pvcsFound).To(BeFalse()) - Expect(pvcsDeleted).To(BeTrue()) - Expect(common.MayastorUndeletedPodCount()).To(Equal(0)) + + if cleanup { + // Attempt to forcefully delete mayastor pods + forceDeleted := common.ForceDeleteMayastorPods() + // FIXME: Temporarily disable this assert CAS-651 has been fixed + // Expect(forceDeleted).To(BeFalse()) + if forceDeleted { + logf.Log.Info("WARNING: Mayastor pods were force deleted at uninstall!!!") + } + deleteNamespace() + // delete the namespace prior to possibly failing the uninstall + // to yield a reusable cluster on fail. + Expect(podsDeleted).To(BeTrue()) + Expect(podCount).To(BeZero()) + Expect(pvcsFound).To(BeFalse()) + Expect(pvcsDeleted).To(BeTrue()) + } else { + // FIXME: Temporarily disable this assert CAS-651 has been fixed + // and force delete lingering mayastor pods. + // Expect(common.MayastorUndeletedPodCount()).To(Equal(0)) + if common.MayastorUndeletedPodCount() != 0 { + logf.Log.Info("WARNING: Mayastor pods not deleted at uninstall, forcing deletion.") + common.ForceDeleteMayastorPods() + } + // More verbose here as deleting the namespace is often where this + // test hangs. + logf.Log.Info("Deleting the mayastor namespace") + deleteNamespace() + logf.Log.Info("Deleted the mayastor namespace") + } } func TestTeardownSuite(t *testing.T) { RegisterFailHandler(Fail) + + if os.Getenv("e2e_uninstall_cleanup") != "0" { + cleanup = true + } reportDir := os.Getenv("e2e_reports_dir") junitReporter := reporters.NewJUnitReporter(reportDir + "/uninstall-junit.xml") RunSpecsWithDefaultAndCustomReporters(t, "Basic Teardown Suite", diff --git a/test/grpc/test_cli.js b/test/grpc/test_cli.js index 3e1561711..d5b6e5dd0 100644 --- a/test/grpc/test_cli.js +++ b/test/grpc/test_cli.js @@ -54,7 +54,7 @@ function runMockServer (rules) { mayastorMockServer.listen('127.0.0.1:' + EGRESS_PORT); } -describe('cli', function () { +describe('mayastor-client', function () { describe('success', function () { before(() => { process.env.RUST_BACKTRACE = '1'; diff --git a/test/grpc/test_nexus.js b/test/grpc/test_nexus.js index 139df9f8b..2b2e09fc0 100644 --- a/test/grpc/test_nexus.js +++ b/test/grpc/test_nexus.js @@ -827,7 +827,17 @@ describe('nexus', function () { } }); - // must be last nvmf test as it removes ns + it('should create nexus with nvmf target as child', async () => { + const args = { + uuid: UUID, + size: diskSize, + children: [ + `nvmf://127.0.0.1:8420/nqn.2019-05.io.openebs:${TGTUUID}` + ] + }; + await createNexus(args); + }); + it('should remove namespace from nvmf subsystem', (done) => { const args = { nqn: `nqn.2019-05.io.openebs:${TGTUUID}`, @@ -836,6 +846,39 @@ describe('nexus', function () { common.jsonrpcCommand('/tmp/target.sock', 'nvmf_subsystem_remove_ns', args, done); }); + it('should still have bdev of removed child after remove event', (done) => { + common.jsonrpcCommand(null, 'bdev_get_bdevs', (err, out) => { + if (err) return done(err); + const bdevs = JSON.parse(out); + const match = `127.0.0.1:8420/nqn.2019-05.io.openebs:${TGTUUID}n1`; + var i; + for (i in bdevs) { + if (bdevs[i].name === match) { + return done(); + } + } + done(new Error('bdev not found')); + }); + }); + + it('should have nexus in faulted state and its child in degraded state', (done) => { + client.listNexus({}, (err, res) => { + if (err) return done(err); + assert.lengthOf(res.nexus_list, 1); + const nexus = res.nexus_list[0]; + + assert.equal(nexus.uuid, UUID); + assert.equal(nexus.state, 'NEXUS_FAULTED'); + assert.lengthOf(nexus.children, 1); + assert.equal(nexus.children[0].state, 'CHILD_DEGRADED'); + done(); + }); + }); + + it('should destroy nexus', async () => { + await destroyNexus({ uuid: UUID }); + }); + it('should fail to create nexus with child that has no namespaces', (done) => { const args = { uuid: UUID, @@ -852,6 +895,28 @@ describe('nexus', function () { }); }); + it('should add namespace back to nvmf subsystem', (done) => { + const args = { + nqn: `nqn.2019-05.io.openebs:${TGTUUID}`, + namespace: { + bdev_name: 'Malloc0' + } + }; + common.jsonrpcCommand('/tmp/target.sock', 'nvmf_subsystem_add_ns', args, done); + }); + + it('should create then destroy nexus with previously asynchronously removed nvmf target as child', async () => { + const args = { + uuid: UUID, + size: diskSize, + children: [ + `nvmf://127.0.0.1:8420/nqn.2019-05.io.openebs:${TGTUUID}` + ] + }; + await createNexus(args); + await destroyNexus({ uuid: UUID }); + }); + it('should have zero nexus devices left', (done) => { client.listNexus({}, (err, res) => { if (err) return done(err);