diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json
index 86e769452..5d58b231a 100644
--- a/dev/.documenter-siteinfo.json
+++ b/dev/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-12-12T15:48:22","documenter_version":"1.8.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.11.2","generation_timestamp":"2024-12-13T11:52:44","documenter_version":"1.8.0"}}
\ No newline at end of file
diff --git a/dev/assets/README/index.html b/dev/assets/README/index.html
index 14fc83f40..cf72d04cb 100644
--- a/dev/assets/README/index.html
+++ b/dev/assets/README/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Introduction · DataFrames.jl</title><meta name="title" content="Introduction · DataFrames.jl"/><meta property="og:title" content="Introduction · DataFrames.jl"/><meta property="twitter:title" content="Introduction · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/assets/README/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/assets/README/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/assets/README/"/><script data-outdated-warner src="../warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../themeswap.js"></script><link href="../favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../lib/types/">Types</a></li><li><a class="tocitem" href="../../lib/functions/">Functions</a></li><li><a class="tocitem" href="../../lib/indexing/">Indexing</a></li><li><a class="tocitem" href="../../lib/metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Introduction</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Introduction</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/assets/README.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Introduction"><a class="docs-heading-anchor" href="#Introduction">Introduction</a><a id="Introduction-1"></a><a class="docs-heading-anchor-permalink" href="#Introduction" title="Permalink"></a></h1><p>In this folder we store the following data sets:</p><ul><li>german_credit.csv</li><li>iris.csv</li></ul><h1 id="German-Credit-data-set"><a class="docs-heading-anchor" href="#German-Credit-data-set">German Credit data set</a><a id="German-Credit-data-set-1"></a><a class="docs-heading-anchor-permalink" href="#German-Credit-data-set" title="Permalink"></a></h1><h2 id="License:"><a class="docs-heading-anchor" href="#License:">License:</a><a id="License:-1"></a><a class="docs-heading-anchor-permalink" href="#License:" title="Permalink"></a></h2><p>https://opendatacommons.org/licenses/dbcl/1-0/</p><h2 id="Source:"><a class="docs-heading-anchor" href="#Source:">Source:</a><a id="Source:-1"></a><a class="docs-heading-anchor-permalink" href="#Source:" title="Permalink"></a></h2><p>https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data) Professor Dr. Hans Hofmann Institut für Statistik und Ökonometrie Universität Hamburg FB Wirtschaftswissenschaften Von-Melle-Park 5 2000 Hamburg 13</p><p>The original data is from <a href="https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)">UCI</a>, and the file stored here is from <a href="https://www.kaggle.com/uciml/german-credit">Kaggle</a></p><h1 id="Iris-data-set"><a class="docs-heading-anchor" href="#Iris-data-set">Iris data set</a><a id="Iris-data-set-1"></a><a class="docs-heading-anchor-permalink" href="#Iris-data-set" title="Permalink"></a></h1><h2 id="License"><a class="docs-heading-anchor" href="#License">License</a><a id="License-1"></a><a class="docs-heading-anchor-permalink" href="#License" title="Permalink"></a></h2><p>https://creativecommons.org/publicdomain/zero/1.0/</p><h2 id="Source:-2"><a class="docs-heading-anchor" href="#Source:-2">Source:</a><a class="docs-heading-anchor-permalink" href="#Source:-2" title="Permalink"></a></h2><p>https://archive.ics.uci.edu/ml/datasets/Iris Creator: R.A. Fisher</p></article><nav class="docs-footer"><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Introduction · DataFrames.jl</title><meta name="title" content="Introduction · DataFrames.jl"/><meta property="og:title" content="Introduction · DataFrames.jl"/><meta property="twitter:title" content="Introduction · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/assets/README/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/assets/README/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/assets/README/"/><script data-outdated-warner src="../warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../themeswap.js"></script><link href="../favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../lib/types/">Types</a></li><li><a class="tocitem" href="../../lib/functions/">Functions</a></li><li><a class="tocitem" href="../../lib/indexing/">Indexing</a></li><li><a class="tocitem" href="../../lib/metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Introduction</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Introduction</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/assets/README.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Introduction"><a class="docs-heading-anchor" href="#Introduction">Introduction</a><a id="Introduction-1"></a><a class="docs-heading-anchor-permalink" href="#Introduction" title="Permalink"></a></h1><p>In this folder we store the following data sets:</p><ul><li>german_credit.csv</li><li>iris.csv</li></ul><h1 id="German-Credit-data-set"><a class="docs-heading-anchor" href="#German-Credit-data-set">German Credit data set</a><a id="German-Credit-data-set-1"></a><a class="docs-heading-anchor-permalink" href="#German-Credit-data-set" title="Permalink"></a></h1><h2 id="License:"><a class="docs-heading-anchor" href="#License:">License:</a><a id="License:-1"></a><a class="docs-heading-anchor-permalink" href="#License:" title="Permalink"></a></h2><p>https://opendatacommons.org/licenses/dbcl/1-0/</p><h2 id="Source:"><a class="docs-heading-anchor" href="#Source:">Source:</a><a id="Source:-1"></a><a class="docs-heading-anchor-permalink" href="#Source:" title="Permalink"></a></h2><p>https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data) Professor Dr. Hans Hofmann Institut für Statistik und Ökonometrie Universität Hamburg FB Wirtschaftswissenschaften Von-Melle-Park 5 2000 Hamburg 13</p><p>The original data is from <a href="https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)">UCI</a>, and the file stored here is from <a href="https://www.kaggle.com/uciml/german-credit">Kaggle</a></p><h1 id="Iris-data-set"><a class="docs-heading-anchor" href="#Iris-data-set">Iris data set</a><a id="Iris-data-set-1"></a><a class="docs-heading-anchor-permalink" href="#Iris-data-set" title="Permalink"></a></h1><h2 id="License"><a class="docs-heading-anchor" href="#License">License</a><a id="License-1"></a><a class="docs-heading-anchor-permalink" href="#License" title="Permalink"></a></h2><p>https://creativecommons.org/publicdomain/zero/1.0/</p><h2 id="Source:-2"><a class="docs-heading-anchor" href="#Source:-2">Source:</a><a class="docs-heading-anchor-permalink" href="#Source:-2" title="Permalink"></a></h2><p>https://archive.ics.uci.edu/ml/datasets/Iris Creator: R.A. Fisher</p></article><nav class="docs-footer"><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/index.html b/dev/index.html
index c30ae16bb..a0feb67c9 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Introduction · DataFrames.jl</title><meta name="title" content="Introduction · DataFrames.jl"/><meta property="og:title" content="Introduction · DataFrames.jl"/><meta property="twitter:title" content="Introduction · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/"/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script><link href="assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href><img src="assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href>DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Introduction</a><ul class="internal"><li><a class="tocitem" href="#What-is-DataFrames.jl?"><span>What is DataFrames.jl?</span></a></li><li><a class="tocitem" href="#DataFrames.jl-and-the-Julia-Data-Ecosystem"><span>DataFrames.jl and the Julia Data Ecosystem</span></a></li><li><a class="tocitem" href="#Questions?"><span>Questions?</span></a></li><li><a class="tocitem" href="#Package-Manual"><span>Package Manual</span></a></li><li><a class="tocitem" href="#API"><span>API</span></a></li><li><a class="tocitem" href="#Index"><span>Index</span></a></li></ul></li><li><a class="tocitem" href="man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="man/joins/">Joins</a></li><li><a class="tocitem" href="man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="man/sorting/">Sorting</a></li><li><a class="tocitem" href="man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="man/missing/">Missing Data</a></li><li><a class="tocitem" href="man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="lib/types/">Types</a></li><li><a class="tocitem" href="lib/functions/">Functions</a></li><li><a class="tocitem" href="lib/indexing/">Indexing</a></li><li><a class="tocitem" href="lib/metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Introduction</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Introduction</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="DataFrames.jl"><a class="docs-heading-anchor" href="#DataFrames.jl">DataFrames.jl</a><a id="DataFrames.jl-1"></a><a class="docs-heading-anchor-permalink" href="#DataFrames.jl" title="Permalink"></a></h1><p>Welcome to the DataFrames.jl documentation!</p><p>This resource aims to teach you everything you need to know to get up and running with tabular data manipulation using the DataFrames.jl package.</p><p>For more illustrations of DataFrames.jl usage, in particular in conjunction with other packages you can check-out the following resources (they are kept up to date with the released version of DataFrames.jl):</p><ul><li><a href="https://www.jstatsoft.org/article/view/v107i04">DataFrames.jl: Flexible and Fast Tabular Data in Julia</a> article published in the <em>Journal of Statistical Software</em></li><li><a href="https://www.ahsmart.com/pub/data-wrangling-with-data-frames-jl-cheat-sheet/">Data Wrangling with DataFrames.jl Cheat Sheet</a></li><li><a href="https://github.com/bkamins/Julia-DataFrames-Tutorial/">DataFrames Tutorial using Jupyter Notebooks</a></li><li><a href="https://github.com/JuliaAcademy/DataFrames">Julia Academy DataFrames.jl tutorial</a></li><li><a href="https://github.com/bkamins/JuliaCon2023-Tutorial">JuliaCon 2023</a>, <a href="https://github.com/bkamins/JuliaCon2022-DataFrames-Tutorial">JuliaCon 2022</a>, <a href="https://github.com/bkamins/JuliaCon2021-DataFrames-Tutorial">JuliaCon 2021</a>, <a href="https://github.com/bkamins/JuliaCon2020-DataFrames-Tutorial">JuliaCon 2020</a>, <a href="https://github.com/bkamins/JuliaCon2019-DataFrames-Tutorial">JuliaCon 2019</a>, <a href="https://github.com/bkamins/ODSC-EUROPE-2021">ODSC Europe 2021</a> tutorials, and <a href="https://github.com/bkamins/PyDataGlobal2020">PyData Global 2020</a></li><li><a href="https://github.com/bkamins/DataFrames-Showcase">DataFrames.jl showcase</a></li></ul><p>If you prefer to learn DataFrames.jl from a book you can consider reading:</p><ul><li><a href="https://github.com/bkamins/JuliaForDataAnalysis">Julia for Data Analysis</a>;</li><li><a href="https://juliadatascience.io/">Julia Data Science</a>.</li></ul><h2 id="What-is-DataFrames.jl?"><a class="docs-heading-anchor" href="#What-is-DataFrames.jl?">What is DataFrames.jl?</a><a id="What-is-DataFrames.jl?-1"></a><a class="docs-heading-anchor-permalink" href="#What-is-DataFrames.jl?" title="Permalink"></a></h2><p>DataFrames.jl provides a set of tools for working with tabular data in Julia. Its design and functionality are similar to those of <a href="https://pandas.pydata.org/">pandas</a> (in Python) and <code>data.frame</code>, <a href="https://rdatatable.gitlab.io/data.table/"><code>data.table</code></a> and <a href="https://dplyr.tidyverse.org/">dplyr</a> (in R), making it  a great general purpose data science tool.</p><p>DataFrames.jl plays a central role in the Julia Data ecosystem, and has tight integrations with a range of different libraries. DataFrames.jl isn&#39;t the only tool for working with tabular data in Julia – as noted below, there are some other great libraries for certain use-cases – but it provides great data wrangling functionality through a familiar interface.</p><p>To understand the toolchain in more detail, have a look at the tutorials in this manual. New users can start with the <a href="man/basics/#First-Steps-with-DataFrames.jl">First Steps with DataFrames.jl</a> section.</p><p>You may find the <a href="https://juliadata.github.io/DataFramesMeta.jl/stable/">DataFramesMeta.jl</a> package or one of the other convenience packages discussed in the <a href="man/querying_frameworks/#Data-manipulation-frameworks">Data manipulation frameworks</a> section of this manual helpful when writing more advanced data transformations, especially if you do not have a significant programming experience. These packages provide convenience syntax similar to <a href="https://dplyr.tidyverse.org/">dplyr</a> in R.</p><p>If you use metadata when working with DataFrames.jl you might find the <a href="https://github.com/JuliaData/TableMetadataTools.jl">TableMetadataTools.jl</a> package useful. This package defines several convenience functions for performing typical metadata operations.</p><h2 id="DataFrames.jl-and-the-Julia-Data-Ecosystem"><a class="docs-heading-anchor" href="#DataFrames.jl-and-the-Julia-Data-Ecosystem">DataFrames.jl and the Julia Data Ecosystem</a><a id="DataFrames.jl-and-the-Julia-Data-Ecosystem-1"></a><a class="docs-heading-anchor-permalink" href="#DataFrames.jl-and-the-Julia-Data-Ecosystem" title="Permalink"></a></h2><p>The Julia data ecosystem can be a difficult space for new users to navigate, in part because the Julia ecosystem tends to distribute functionality across different libraries more than some other languages. Because many people coming to DataFrames.jl are just starting to explore the Julia data ecosystem, below is a list of well-supported libraries that provide different data science tools, along with a few notes about what makes each library special, and how well integrated they are with DataFrames.jl.</p><ul><li><strong>Statistics</strong><ul><li><a href="https://github.com/JuliaStats/StatsKit.jl">StatsKit.jl</a>: A convenience meta-package which loads a set of essential packages for statistics, including those mentioned below in this section and DataFrames.jl itself.</li><li><a href="https://docs.julialang.org/en/v1/stdlib/Statistics/">Statistics</a>: The Julia standard library comes with a wide range of statistics functionality, but to gain access to these functions you must call <code>using Statistics</code>.</li><li><a href="https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/">LinearAlgebra</a>: Like <code>Statistics</code>, many linear algebra features (factorizations, inversions, etc.) live in a library you have to load to use.</li><li><a href="https://docs.julialang.org/en/v1/stdlib/SparseArrays/">SparseArrays</a> are also in the standard library but must be loaded to be used.</li><li><a href="https://github.com/nalimilan/FreqTables.jl">FreqTables.jl</a>: Create frequency tables / cross-tabulations. Tightly integrated with DataFrames.jl.</li><li><a href="https://juliastats.org/HypothesisTests.jl/stable/">HypothesisTests.jl</a>: A range of hypothesis testing tools.</li><li><a href="https://juliastats.org/GLM.jl/stable/manual/">GLM.jl</a>: Tools for estimating linear and generalized linear models. Tightly integrated with DataFrames.jl.</li><li><a href="https://juliastats.org/StatsModels.jl/stable/">StatsModels.jl</a>: For converting heterogeneous <code>DataFrame</code> into homogeneous matrices for use with linear algebra libraries or machine learning applications that don&#39;t directly support <code>DataFrame</code>s. Will do things like convert categorical variables into indicators/one-hot-encodings, create interaction terms, etc.</li><li><a href="https://multivariatestatsjl.readthedocs.io/en/stable/index.html">MultivariateStats.jl</a>: linear regression, ridge regression, PCA, component analyses tools. Not well integrated with DataFrames.jl, but easily used in combination with <code>StatsModels</code>.</li></ul></li><li><strong>Machine Learning</strong><ul><li><a href="https://github.com/alan-turing-institute/MLJ.jl">MLJ.jl</a>: if you&#39;re more of an applied user, there is a single package the pulls from all these different libraries and provides a single, scikit-learn inspired API: MLJ.jl. MLJ.jl provides a common interface for a wide range of machine learning algorithms.</li><li><a href="https://cstjean.github.io/ScikitLearn.jl/stable/">ScikitLearn.jl</a>: A Julia wrapper around the full Python scikit-learn machine learning library. Not well integrated with DataFrames.jl, but can be combined using StatsModels.jl.</li><li><a href="https://github.com/IBM/AutoMLPipeline.jl">AutoMLPipeline</a>: A package that makes it trivial to create complex ML pipeline structures using simple expressions. It leverages on the built-in macro programming features of Julia to symbolically process, manipulate pipeline expressions, and makes it easy to discover optimal structures for machine learning regression and classification.</li><li>Deep learning: <a href="https://denizyuret.github.io/Knet.jl/stable/tutorial/#Introduction-to-Knet-1">KNet.jl</a> and <a href="https://github.com/FluxML/Flux.jl">Flux.jl</a>.</li></ul></li><li><strong>Plotting</strong><ul><li><a href="http://docs.juliaplots.org/latest/">Plots.jl</a>: Powerful, modern plotting library with a syntax akin to that of <a href="https://matplotlib.org/">matplotlib</a> (in Python) or <code>plot</code> (in R). <a href="http://docs.juliaplots.org/latest/tutorial/#Using-Plot-Recipes-1">StatsPlots.jl</a> provides Plots.jl with recipes for many standard statistical plots.</li><li><a href="http://gadflyjl.org/stable/">Gadfly.jl</a>: High-level plotting library with a &quot;grammar of graphics&quot; syntax akin to that of <a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a> (in R).</li><li><a href="http://juliaplots.org/AlgebraOfGraphics.jl/stable/">AlgebraOfGraphics.jl</a>: A &quot;grammar of graphics&quot; library build upon <a href="https://docs.makie.org/stable/">Makie.jl</a>.</li><li><a href="https://www.queryverse.org/VegaLite.jl/stable/">VegaLite.jl</a>: High-level plotting library that uses a different &quot;grammar of graphics&quot; syntax and has an emphasis on interactive graphics.</li></ul></li><li><strong>Data Wrangling</strong>:<ul><li><a href="https://github.com/invenia/Impute.jl">Impute.jl</a>: various methods for handling missing data in vectors, matrices and tables.</li><li><a href="https://github.com/JuliaData/DataFramesMeta.jl">DataFramesMeta.jl</a>: A range of convenience functions for DataFrames.jl that augment <code>select</code> and <code>transform</code> to provide a user experience similar to that provided by <a href="https://dplyr.tidyverse.org/">dplyr</a> in R.</li><li><a href="https://github.com/jkrumbiegel/DataFrameMacros.jl">DataFrameMacros.jl</a>: Provides macro versions of the common DataFrames.jl functions similar to DataFramesMeta.jl, with convenient syntax for the manipulation of multiple columns at once.</li><li><a href="https://github.com/queryverse/Query.jl">Query.jl</a>: Query.jl provides a single framework for data wrangling that works with a range of libraries, including DataFrames.jl, other tabular data libraries (more on those below), and even non-tabular data. Provides many convenience functions analogous to those in dplyr in R or <a href="https://en.wikipedia.org/wiki/Language_Integrated_Query">LINQ</a>.</li><li>You can find more information on these packages in the <a href="man/querying_frameworks/#Data-manipulation-frameworks">Data manipulation frameworks</a> section of this manual.</li></ul></li><li><strong>And More!</strong><ul><li><a href="https://github.com/JuliaGraphs/Graphs.jl">Graphs.jl</a>: A pure-Julia, high performance network analysis library. Edgelists in <code>DataFrame</code>s can be easily converted into graphs using the <a href="https://github.com/JuliaGraphs/GraphDataFrameBridge.jl">GraphDataFrameBridge.jl</a> package.</li></ul></li><li><strong>IO</strong>:<ul><li>DataFrames.jl work well with a range of formats, including:<ul><li>CSV files (using <a href="https://github.com/JuliaData/CSV.jl">CSV.jl</a>),</li><li>Apache Arrow (using <a href="https://github.com/JuliaData/Arrow.jl">Arrow.jl</a>)</li><li>reading Stata, SAS and SPSS files (using <a href="https://github.com/junyuan-chen/ReadStatTables.jl">ReadStatTables.jl</a>; alternatively <a href="https://www.queryverse.org/">Queryverse</a> users can choose <a href="https://github.com/queryverse/StatFiles.jl">StatFiles.jl</a>),</li><li>Parquet files (using <a href="https://gitlab.com/ExpandingMan/Parquet2.jl">Parquet2.jl</a>),</li><li>reading R data files (.rda, .RData) (using <a href="https://github.com/JuliaData/RData.jl">RData.jl</a>).</li></ul></li></ul></li></ul><p>While not all of these libraries are tightly integrated with DataFrames.jl, because <code>DataFrame</code>s are essentially collections of aligned Julia vectors, so it is easy to (a) pull out a vector for use with a non-DataFrames-integrated library, or (b) convert your table into a homogeneously-typed matrix using the <code>Matrix</code> constructor or StatsModels.jl.</p><h3 id="Other-Julia-Tabular-Libraries"><a class="docs-heading-anchor" href="#Other-Julia-Tabular-Libraries">Other Julia Tabular Libraries</a><a id="Other-Julia-Tabular-Libraries-1"></a><a class="docs-heading-anchor-permalink" href="#Other-Julia-Tabular-Libraries" title="Permalink"></a></h3><p>DataFrames.jl is a great general purpose tool for data manipulation and wrangling, but it&#39;s not ideal for all applications. For users with more specialized needs, consider using:</p><ul><li><a href="https://juliadata.github.io/TypedTables.jl/stable/">TypedTables.jl</a>: Type-stable heterogeneous tables. Useful for improved performance when the structure of your table is relatively stable and does not feature thousands of columns.</li><li><a href="https://juliadata.github.io/JuliaDB.jl/stable/">JuliaDB.jl</a>: For users working with data that is too large to fit in memory, we suggest JuliaDB.jl, which offers better performance for large datasets, and can handle out-of-core data manipulations (Python users can think of JuliaDB.jl as the Julia version of <a href="https://dask.org/">dask</a>).</li></ul><p>Note that most tabular data libraries in the Julia ecosystem (including DataFrames.jl) support a common interface (defined in the <a href="https://github.com/JuliaData/Tables.jl">Tables.jl</a> package). As a result, some libraries are capable or working with a range of tabular data structures, making it easy to move between tabular libraries as your needs change. A user of <a href="https://github.com/queryverse/Query.jl">Query.jl</a>, for example, can use the same code to manipulate data in a <code>DataFrame</code>, a <code>Table</code> (defined by TypedTables.jl), or a JuliaDB table.</p><h2 id="Questions?"><a class="docs-heading-anchor" href="#Questions?">Questions?</a><a id="Questions?-1"></a><a class="docs-heading-anchor-permalink" href="#Questions?" title="Permalink"></a></h2><p>If there is something you expect DataFrames to be capable of, but cannot figure out how to do, please reach out with questions in Domains/Data on <a href="https://discourse.julialang.org/new-topic?title=[DataFrames%20Question]:%20&amp;body=%23%20Question:%0A%0A%23%20Dataset%20(if%20applicable):%0A%0A%23%20Minimal%20Working%20Example%20(if%20applicable):%0A&amp;category=Domains/Data&amp;tags=question">Discourse</a>. Additionally you might want to listen to an introduction to DataFrames.jl on <a href="https://juliaacademy.com/p/introduction-to-dataframes-jl">JuliaAcademy</a>.</p><p>Please report bugs by <a href="https://github.com/JuliaData/DataFrames.jl/issues/new">opening an issue</a>.</p><p>You can follow the <strong>source</strong> links throughout the documentation to jump right to the source files on GitHub to make pull requests for improving the documentation and function capabilities.</p><p>Please review <a href="https://github.com/JuliaData/DataFrames.jl/blob/main/CONTRIBUTING.md">DataFrames contributing guidelines</a> before submitting your first PR!</p><p>Information on specific versions can be found on the <a href="https://github.com/JuliaData/DataFrames.jl/releases">Release page</a>.</p><h2 id="Package-Manual"><a class="docs-heading-anchor" href="#Package-Manual">Package Manual</a><a id="Package-Manual-1"></a><a class="docs-heading-anchor-permalink" href="#Package-Manual" title="Permalink"></a></h2><ul><li><a href="man/basics/#First-Steps-with-DataFrames.jl">First Steps with DataFrames.jl</a></li><li class="no-marker"><ul><li><a href="man/basics/#Setting-up-the-Environment">Setting up the Environment</a></li><li><a href="man/basics/#Constructors-and-Basic-Utility-Functions">Constructors and Basic Utility Functions</a></li><li><a href="man/basics/#Getting-and-Setting-Data-in-a-Data-Frame">Getting and Setting Data in a Data Frame</a></li><li><a href="man/basics/#Basic-Usage-of-Transformation-Functions">Basic Usage of Transformation Functions</a></li></ul></li><li><a href="man/getting_started/#Getting-Started">Getting Started</a></li><li class="no-marker"><ul><li><a href="man/getting_started/#Installation">Installation</a></li><li><a href="man/getting_started/#The-DataFrame-Type">The <code>DataFrame</code> Type</a></li></ul></li><li><a href="man/joins/#Database-Style-Joins">Database-Style Joins</a></li><li class="no-marker"><ul><li><a href="man/joins/#Introduction-to-joins">Introduction to joins</a></li><li><a href="man/joins/#Key-value-comparisons-and-floating-point-values">Key value comparisons and floating point values</a></li><li><a href="man/joins/#Joining-on-key-columns-with-different-names">Joining on key columns with different names</a></li><li><a href="man/joins/#Handling-of-duplicate-keys-and-tracking-source-data-frame">Handling of duplicate keys and tracking source data frame</a></li><li><a href="man/joins/#Renaming-joined-columns">Renaming joined columns</a></li><li><a href="man/joins/#Matching-missing-values-in-joins">Matching missing values in joins</a></li><li><a href="man/joins/#Specifying-row-order-in-the-join-result">Specifying row order in the join result</a></li><li><a href="man/joins/#In-place-left-join">In-place left join</a></li></ul></li><li><a href="man/split_apply_combine/#The-Split-Apply-Combine-Strategy">The Split-Apply-Combine Strategy</a></li><li class="no-marker"><ul><li><a href="man/split_apply_combine/#Design-of-the-split-apply-combine-support">Design of the split-apply-combine support</a></li><li><a href="man/split_apply_combine/#Examples-of-the-split-apply-combine-operations">Examples of the split-apply-combine operations</a></li><li><a href="man/split_apply_combine/#Using-GroupedDataFrame-as-an-iterable-and-indexable-object">Using <code>GroupedDataFrame</code> as an iterable and indexable object</a></li><li><a href="man/split_apply_combine/#Simulating-the-SQL-where-clause">Simulating the SQL <code>where</code> clause</a></li><li><a href="man/split_apply_combine/#Column-independent-operations">Column-independent operations</a></li><li><a href="man/split_apply_combine/#Column-independent-operations-versus-functions">Column-independent operations versus functions</a></li><li><a href="man/split_apply_combine/#Specifying-group-order-in-groupby">Specifying group order in <code>groupby</code></a></li></ul></li><li><a href="man/reshaping_and_pivoting/#Reshaping-and-Pivoting-Data">Reshaping and Pivoting Data</a></li><li><a href="man/sorting/#Sorting">Sorting</a></li><li><a href="man/categorical/#man-categorical">Categorical Data</a></li><li><a href="man/missing/#Missing-Data">Missing Data</a></li><li><a href="man/comparisons/#Comparisons">Comparisons</a></li><li class="no-marker"><ul><li><a href="man/comparisons/#Comparison-with-the-Python-package-pandas">Comparison with the Python package pandas</a></li><li><a href="man/comparisons/#Comparison-with-the-R-package-dplyr">Comparison with the R package dplyr</a></li><li><a href="man/comparisons/#Comparison-with-the-R-package-data.table">Comparison with the R package data.table</a></li><li><a href="man/comparisons/#Comparison-with-Stata-(version-8-and-above)">Comparison with Stata (version 8 and above)</a></li></ul></li><li><a href="man/querying_frameworks/#Data-manipulation-frameworks">Data manipulation frameworks</a></li><li class="no-marker"><ul><li><a href="man/querying_frameworks/#TidierData.jl">TidierData.jl</a></li><li><a href="man/querying_frameworks/#DataFramesMeta.jl">DataFramesMeta.jl</a></li><li><a href="man/querying_frameworks/#DataFrameMacros.jl">DataFrameMacros.jl</a></li><li><a href="man/querying_frameworks/#Query.jl">Query.jl</a></li></ul></li></ul><h2 id="API"><a class="docs-heading-anchor" href="#API">API</a><a id="API-1"></a><a class="docs-heading-anchor-permalink" href="#API" title="Permalink"></a></h2><p>Only exported (i.e. available for use without <code>DataFrames.</code> qualifier after loading the DataFrames.jl package with <code>using DataFrames</code>) types and functions are considered a part of the public API of the DataFrames.jl package. In general all such objects are documented in this manual (in case some documentation is missing please kindly report an issue <a href="https://github.com/JuliaData/DataFrames.jl/issues/new">here</a>).</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Breaking changes to public and documented API are avoided in DataFrames.jl where possible.</p><p>The following changes are not considered breaking:</p><ul><li>specific floating point values computed by operations may change at any time; users should rely only on approximate accuracy;</li><li>in functions that use the default random number generator provided by Base Julia the specific random numbers computed may change across Julia versions;</li><li>if the changed functionality is classified as a bug;</li><li>if the changed behavior was not documented; two major cases are:<ol><li>in its implementation some function accepted a wider range of arguments that it was documented to handle - changes in handling of undocumented arguments are not considered as breaking;</li><li>the type of the value returned by a function changes, but it still follows the contract specified in the documentation; for example if a function is documented to return a vector then changing its type from <code>Vector</code> to <code>PooledVector</code> is not considered as breaking;</li></ol></li><li>error behavior: code that threw an exception can change exception type thrown or stop throwing an exception;</li><li>changes in display (how objects are printed);</li><li>changes to the state of global objects from Base Julia whose state normally is considered volatile (e.g. state of global random number generator).</li></ul><p>All types and functions that are part of public API are guaranteed to go through a deprecation period before a breaking change is made to them or they would be removed.</p><p>The standard practice is that breaking changes are implemented when a major release of DataFrames.jl is made (e.g. functionalities deprecated in a 1.x release would be changed in the 2.0 release).</p><p>In rare cases a breaking change might be introduced in a minor release. In such a case the changed behavior still goes through one minor release during which it is deprecated. The situations where such a breaking change might be allowed are (still such breaking changes will be avoided if possible):</p><ul><li>the affected functionality was previously clearly identified in the documentation as being subject to changes (for example in DataFrames.jl 1.4 release propagation rules of <code>:note</code>-style metadata are documented as such);</li><li>the change is on the border of being classified as a bug (in rare cases even if a behavior of some function was documented its consequences for certain argument combinations could be decided to be unintended and not wanted);</li><li>the change is needed to adjust DataFrames.jl functionality to changes in Base Julia.</li></ul></div></div><p>Please be warned that while Julia allows you to access internal functions or types of DataFrames.jl these can change without warning between versions of DataFrames.jl. In particular it is not safe to directly access fields of types that are a part of public API of the DataFrames.jl package using e.g. the <code>getfield</code> function. Whenever some operation on fields of defined types is considered allowed an appropriate exported function should be used instead.</p><ul><li><a href="lib/types/#Types">Types</a></li><li class="no-marker"><ul><li><a href="lib/types/#Type-hierarchy-design">Type hierarchy design</a></li><li><a href="lib/types/#man-columnhandling">The design of handling of columns of a <code>DataFrame</code></a></li><li><a href="lib/types/#Types-specification">Types specification</a></li></ul></li><li><a href="lib/functions/#Functions">Functions</a></li><li class="no-marker"><ul><li><a href="lib/functions/#Multithreading-support">Multithreading support</a></li><li><a href="lib/functions/#Index">Index</a></li><li><a href="lib/functions/#Constructing-data-frames">Constructing data frames</a></li><li><a href="lib/functions/#Summary-information">Summary information</a></li><li><a href="lib/functions/#Working-with-column-names">Working with column names</a></li><li><a href="lib/functions/#Mutating-and-transforming-data-frames-and-grouped-data-frames">Mutating and transforming data frames and grouped data frames</a></li><li><a href="lib/functions/#Reshaping-data-frames-between-tall-and-wide-formats">Reshaping data frames between tall and wide formats</a></li><li><a href="lib/functions/#Sorting">Sorting</a></li><li><a href="lib/functions/#Joining">Joining</a></li><li><a href="lib/functions/#Grouping">Grouping</a></li><li><a href="lib/functions/#Filtering-rows">Filtering rows</a></li><li><a href="lib/functions/#Working-with-missing-values">Working with missing values</a></li><li><a href="lib/functions/#Iteration">Iteration</a></li><li><a href="lib/functions/#Equality">Equality</a></li><li><a href="lib/functions/#Metadata">Metadata</a></li></ul></li><li><a href="lib/indexing/#Indexing">Indexing</a></li><li class="no-marker"><ul><li><a href="lib/indexing/#General-rules">General rules</a></li><li><a href="lib/indexing/#getindex-and-view"><code>getindex</code> and <code>view</code></a></li><li><a href="lib/indexing/#setindex!"><code>setindex!</code></a></li><li><a href="lib/indexing/#Broadcasting">Broadcasting</a></li><li><a href="lib/indexing/#Indexing-GroupedDataFrames">Indexing <code>GroupedDataFrame</code>s</a></li></ul></li><li><a href="lib/indexing/#Common-API-for-types-defined-in-DataFrames.jl">Common API for types defined in DataFrames.jl</a></li></ul><h2 id="Index"><a class="docs-heading-anchor" href="#Index">Index</a><a id="Index-1"></a><a class="docs-heading-anchor-permalink" href="#Index" title="Permalink"></a></h2><ul><li><a href="lib/types/#DataFrames.AbstractDataFrame"><code>DataFrames.AbstractDataFrame</code></a></li><li><a href="lib/types/#DataFrames.AsTable"><code>DataFrames.AsTable</code></a></li><li><a href="lib/types/#DataFrames.DataFrame"><code>DataFrames.DataFrame</code></a></li><li><a href="lib/types/#DataFrames.DataFrameColumns"><code>DataFrames.DataFrameColumns</code></a></li><li><a href="lib/types/#DataFrames.DataFrameRow"><code>DataFrames.DataFrameRow</code></a></li><li><a href="lib/types/#DataFrames.DataFrameRows"><code>DataFrames.DataFrameRows</code></a></li><li><a href="lib/types/#DataFrames.GroupKey"><code>DataFrames.GroupKey</code></a></li><li><a href="lib/types/#DataFrames.GroupKeys"><code>DataFrames.GroupKeys</code></a></li><li><a href="lib/types/#DataFrames.GroupedDataFrame"><code>DataFrames.GroupedDataFrame</code></a></li><li><a href="lib/types/#DataFrames.RepeatedVector"><code>DataFrames.RepeatedVector</code></a></li><li><a href="lib/types/#DataFrames.StackedVector"><code>DataFrames.StackedVector</code></a></li><li><a href="lib/types/#DataFrames.SubDataFrame"><code>DataFrames.SubDataFrame</code></a></li><li><a href="lib/functions/#Base.Iterators.only"><code>Base.Iterators.only</code></a></li><li><a href="lib/functions/#Base.Iterators.partition"><code>Base.Iterators.partition</code></a></li><li><a href="lib/functions/#Base.allunique"><code>Base.allunique</code></a></li><li><a href="lib/functions/#Base.append!"><code>Base.append!</code></a></li><li><a href="lib/functions/#Base.copy"><code>Base.copy</code></a></li><li><a href="lib/functions/#Base.deleteat!"><code>Base.deleteat!</code></a></li><li><a href="lib/functions/#Base.eachcol"><code>Base.eachcol</code></a></li><li><a href="lib/functions/#Base.eachrow"><code>Base.eachrow</code></a></li><li><a href="lib/functions/#Base.empty"><code>Base.empty</code></a></li><li><a href="lib/functions/#Base.empty!"><code>Base.empty!</code></a></li><li><a href="lib/functions/#Base.filter"><code>Base.filter</code></a></li><li><a href="lib/functions/#Base.filter!"><code>Base.filter!</code></a></li><li><a href="lib/functions/#Base.first"><code>Base.first</code></a></li><li><a href="lib/functions/#Base.get"><code>Base.get</code></a></li><li><a href="lib/functions/#Base.hcat"><code>Base.hcat</code></a></li><li><a href="lib/functions/#Base.insert!"><code>Base.insert!</code></a></li><li><a href="lib/functions/#Base.invpermute!"><code>Base.invpermute!</code></a></li><li><a href="lib/functions/#Base.isapprox"><code>Base.isapprox</code></a></li><li><a href="lib/functions/#Base.isempty"><code>Base.isempty</code></a></li><li><a href="lib/functions/#Base.issorted"><code>Base.issorted</code></a></li><li><a href="lib/functions/#Base.keepat!"><code>Base.keepat!</code></a></li><li><a href="lib/functions/#Base.keys"><code>Base.keys</code></a></li><li><a href="lib/functions/#Base.last"><code>Base.last</code></a></li><li><a href="lib/functions/#Base.length"><code>Base.length</code></a></li><li><a href="lib/functions/#Base.names"><code>Base.names</code></a></li><li><a href="lib/functions/#Base.ndims"><code>Base.ndims</code></a></li><li><a href="lib/functions/#Base.pairs"><code>Base.pairs</code></a></li><li><a href="lib/functions/#Base.parent"><code>Base.parent</code></a></li><li><a href="lib/functions/#Base.permute!"><code>Base.permute!</code></a></li><li><a href="lib/functions/#Base.permutedims"><code>Base.permutedims</code></a></li><li><a href="lib/functions/#Base.pop!"><code>Base.pop!</code></a></li><li><a href="lib/functions/#Base.popat!"><code>Base.popat!</code></a></li><li><a href="lib/functions/#Base.popfirst!"><code>Base.popfirst!</code></a></li><li><a href="lib/functions/#Base.prepend!"><code>Base.prepend!</code></a></li><li><a href="lib/functions/#Base.propertynames"><code>Base.propertynames</code></a></li><li><a href="lib/functions/#Base.push!"><code>Base.push!</code></a></li><li><a href="lib/functions/#Base.pushfirst!"><code>Base.pushfirst!</code></a></li><li><a href="lib/functions/#Base.reduce"><code>Base.reduce</code></a></li><li><a href="lib/functions/#Base.repeat"><code>Base.repeat</code></a></li><li><a href="lib/functions/#Base.resize!"><code>Base.resize!</code></a></li><li><a href="lib/functions/#Base.reverse"><code>Base.reverse</code></a></li><li><a href="lib/functions/#Base.reverse!"><code>Base.reverse!</code></a></li><li><a href="lib/functions/#Base.show"><code>Base.show</code></a></li><li><a href="lib/functions/#Base.similar"><code>Base.similar</code></a></li><li><a href="lib/functions/#Base.size"><code>Base.size</code></a></li><li><a href="lib/functions/#Base.sort"><code>Base.sort</code></a></li><li><a href="lib/functions/#Base.sort!"><code>Base.sort!</code></a></li><li><a href="lib/functions/#Base.sortperm"><code>Base.sortperm</code></a></li><li><a href="lib/functions/#Base.stack"><code>Base.stack</code></a></li><li><a href="lib/functions/#Base.unique"><code>Base.unique</code></a></li><li><a href="lib/functions/#Base.unique!"><code>Base.unique!</code></a></li><li><a href="lib/functions/#Base.values"><code>Base.values</code></a></li><li><a href="lib/functions/#Base.vcat"><code>Base.vcat</code></a></li><li><a href="lib/functions/#DataAPI.allcombinations"><code>DataAPI.allcombinations</code></a></li><li><a href="lib/functions/#DataAPI.antijoin"><code>DataAPI.antijoin</code></a></li><li><a href="lib/functions/#DataAPI.colmetadata"><code>DataAPI.colmetadata</code></a></li><li><a href="lib/functions/#DataAPI.colmetadata!"><code>DataAPI.colmetadata!</code></a></li><li><a href="lib/functions/#DataAPI.colmetadatakeys"><code>DataAPI.colmetadatakeys</code></a></li><li><a href="lib/functions/#DataAPI.crossjoin"><code>DataAPI.crossjoin</code></a></li><li><a href="lib/functions/#DataAPI.deletecolmetadata!"><code>DataAPI.deletecolmetadata!</code></a></li><li><a href="lib/functions/#DataAPI.deletemetadata!"><code>DataAPI.deletemetadata!</code></a></li><li><a href="lib/functions/#DataAPI.describe"><code>DataAPI.describe</code></a></li><li><a href="lib/functions/#DataAPI.emptycolmetadata!"><code>DataAPI.emptycolmetadata!</code></a></li><li><a href="lib/functions/#DataAPI.emptymetadata!"><code>DataAPI.emptymetadata!</code></a></li><li><a href="lib/functions/#DataAPI.groupby"><code>DataAPI.groupby</code></a></li><li><a href="lib/functions/#DataAPI.innerjoin"><code>DataAPI.innerjoin</code></a></li><li><a href="lib/functions/#DataAPI.leftjoin"><code>DataAPI.leftjoin</code></a></li><li><a href="lib/functions/#DataAPI.metadata"><code>DataAPI.metadata</code></a></li><li><a href="lib/functions/#DataAPI.metadata!"><code>DataAPI.metadata!</code></a></li><li><a href="lib/functions/#DataAPI.metadatakeys"><code>DataAPI.metadatakeys</code></a></li><li><a href="lib/functions/#DataAPI.ncol"><code>DataAPI.ncol</code></a></li><li><a href="lib/functions/#DataAPI.nrow"><code>DataAPI.nrow</code></a></li><li><a href="lib/functions/#DataAPI.outerjoin"><code>DataAPI.outerjoin</code></a></li><li><a href="lib/functions/#DataAPI.rightjoin"><code>DataAPI.rightjoin</code></a></li><li><a href="lib/functions/#DataAPI.rownumber"><code>DataAPI.rownumber</code></a></li><li><a href="lib/functions/#DataAPI.semijoin"><code>DataAPI.semijoin</code></a></li><li><a href="lib/functions/#DataFrames.allowmissing!"><code>DataFrames.allowmissing!</code></a></li><li><a href="lib/functions/#DataFrames.combine"><code>DataFrames.combine</code></a></li><li><a href="lib/functions/#DataFrames.completecases"><code>DataFrames.completecases</code></a></li><li><a href="lib/functions/#DataFrames.disallowmissing!"><code>DataFrames.disallowmissing!</code></a></li><li><a href="lib/functions/#DataFrames.dropmissing"><code>DataFrames.dropmissing</code></a></li><li><a href="lib/functions/#DataFrames.dropmissing!"><code>DataFrames.dropmissing!</code></a></li><li><a href="lib/functions/#DataFrames.fillcombinations"><code>DataFrames.fillcombinations</code></a></li><li><a href="lib/functions/#DataFrames.flatten"><code>DataFrames.flatten</code></a></li><li><a href="lib/functions/#DataFrames.groupcols"><code>DataFrames.groupcols</code></a></li><li><a href="lib/functions/#DataFrames.groupindices"><code>DataFrames.groupindices</code></a></li><li><a href="lib/functions/#DataFrames.insertcols"><code>DataFrames.insertcols</code></a></li><li><a href="lib/functions/#DataFrames.insertcols!"><code>DataFrames.insertcols!</code></a></li><li><a href="lib/functions/#DataFrames.leftjoin!"><code>DataFrames.leftjoin!</code></a></li><li><a href="lib/functions/#DataFrames.mapcols"><code>DataFrames.mapcols</code></a></li><li><a href="lib/functions/#DataFrames.mapcols!"><code>DataFrames.mapcols!</code></a></li><li><a href="lib/functions/#DataFrames.nonunique"><code>DataFrames.nonunique</code></a></li><li><a href="lib/functions/#DataFrames.order"><code>DataFrames.order</code></a></li><li><a href="lib/functions/#DataFrames.proprow"><code>DataFrames.proprow</code></a></li><li><a href="lib/functions/#DataFrames.rename"><code>DataFrames.rename</code></a></li><li><a href="lib/functions/#DataFrames.rename!"><code>DataFrames.rename!</code></a></li><li><a href="lib/functions/#DataFrames.repeat!"><code>DataFrames.repeat!</code></a></li><li><a href="lib/functions/#DataFrames.select"><code>DataFrames.select</code></a></li><li><a href="lib/functions/#DataFrames.select!"><code>DataFrames.select!</code></a></li><li><a href="lib/functions/#DataFrames.subset"><code>DataFrames.subset</code></a></li><li><a href="lib/functions/#DataFrames.subset!"><code>DataFrames.subset!</code></a></li><li><a href="lib/functions/#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a></li><li><a href="lib/functions/#DataFrames.transform"><code>DataFrames.transform</code></a></li><li><a href="lib/functions/#DataFrames.transform!"><code>DataFrames.transform!</code></a></li><li><a href="lib/functions/#DataFrames.unstack"><code>DataFrames.unstack</code></a></li><li><a href="lib/functions/#DataFrames.valuecols"><code>DataFrames.valuecols</code></a></li><li><a href="lib/functions/#Missings.allowmissing"><code>Missings.allowmissing</code></a></li><li><a href="lib/functions/#Missings.disallowmissing"><code>Missings.disallowmissing</code></a></li><li><a href="lib/functions/#Random.shuffle"><code>Random.shuffle</code></a></li><li><a href="lib/functions/#Random.shuffle!"><code>Random.shuffle!</code></a></li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="man/basics/">First Steps with DataFrames.jl »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Introduction · DataFrames.jl</title><meta name="title" content="Introduction · DataFrames.jl"/><meta property="og:title" content="Introduction · DataFrames.jl"/><meta property="twitter:title" content="Introduction · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/"/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script><link href="assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href><img src="assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href>DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Introduction</a><ul class="internal"><li><a class="tocitem" href="#What-is-DataFrames.jl?"><span>What is DataFrames.jl?</span></a></li><li><a class="tocitem" href="#DataFrames.jl-and-the-Julia-Data-Ecosystem"><span>DataFrames.jl and the Julia Data Ecosystem</span></a></li><li><a class="tocitem" href="#Questions?"><span>Questions?</span></a></li><li><a class="tocitem" href="#Package-Manual"><span>Package Manual</span></a></li><li><a class="tocitem" href="#API"><span>API</span></a></li><li><a class="tocitem" href="#Index"><span>Index</span></a></li></ul></li><li><a class="tocitem" href="man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="man/joins/">Joins</a></li><li><a class="tocitem" href="man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="man/sorting/">Sorting</a></li><li><a class="tocitem" href="man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="man/missing/">Missing Data</a></li><li><a class="tocitem" href="man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="lib/types/">Types</a></li><li><a class="tocitem" href="lib/functions/">Functions</a></li><li><a class="tocitem" href="lib/indexing/">Indexing</a></li><li><a class="tocitem" href="lib/metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Introduction</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Introduction</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="DataFrames.jl"><a class="docs-heading-anchor" href="#DataFrames.jl">DataFrames.jl</a><a id="DataFrames.jl-1"></a><a class="docs-heading-anchor-permalink" href="#DataFrames.jl" title="Permalink"></a></h1><p>Welcome to the DataFrames.jl documentation!</p><p>This resource aims to teach you everything you need to know to get up and running with tabular data manipulation using the DataFrames.jl package.</p><p>For more illustrations of DataFrames.jl usage, in particular in conjunction with other packages you can check-out the following resources (they are kept up to date with the released version of DataFrames.jl):</p><ul><li><a href="https://www.jstatsoft.org/article/view/v107i04">DataFrames.jl: Flexible and Fast Tabular Data in Julia</a> article published in the <em>Journal of Statistical Software</em></li><li><a href="https://www.ahsmart.com/pub/data-wrangling-with-data-frames-jl-cheat-sheet/">Data Wrangling with DataFrames.jl Cheat Sheet</a></li><li><a href="https://github.com/bkamins/Julia-DataFrames-Tutorial/">DataFrames Tutorial using Jupyter Notebooks</a></li><li><a href="https://github.com/JuliaAcademy/DataFrames">Julia Academy DataFrames.jl tutorial</a></li><li><a href="https://github.com/bkamins/JuliaCon2023-Tutorial">JuliaCon 2023</a>, <a href="https://github.com/bkamins/JuliaCon2022-DataFrames-Tutorial">JuliaCon 2022</a>, <a href="https://github.com/bkamins/JuliaCon2021-DataFrames-Tutorial">JuliaCon 2021</a>, <a href="https://github.com/bkamins/JuliaCon2020-DataFrames-Tutorial">JuliaCon 2020</a>, <a href="https://github.com/bkamins/JuliaCon2019-DataFrames-Tutorial">JuliaCon 2019</a>, <a href="https://github.com/bkamins/ODSC-EUROPE-2021">ODSC Europe 2021</a> tutorials, and <a href="https://github.com/bkamins/PyDataGlobal2020">PyData Global 2020</a></li><li><a href="https://github.com/bkamins/DataFrames-Showcase">DataFrames.jl showcase</a></li></ul><p>If you prefer to learn DataFrames.jl from a book you can consider reading:</p><ul><li><a href="https://github.com/bkamins/JuliaForDataAnalysis">Julia for Data Analysis</a>;</li><li><a href="https://juliadatascience.io/">Julia Data Science</a>.</li></ul><h2 id="What-is-DataFrames.jl?"><a class="docs-heading-anchor" href="#What-is-DataFrames.jl?">What is DataFrames.jl?</a><a id="What-is-DataFrames.jl?-1"></a><a class="docs-heading-anchor-permalink" href="#What-is-DataFrames.jl?" title="Permalink"></a></h2><p>DataFrames.jl provides a set of tools for working with tabular data in Julia. Its design and functionality are similar to those of <a href="https://pandas.pydata.org/">pandas</a> (in Python) and <code>data.frame</code>, <a href="https://rdatatable.gitlab.io/data.table/"><code>data.table</code></a> and <a href="https://dplyr.tidyverse.org/">dplyr</a> (in R), making it  a great general purpose data science tool.</p><p>DataFrames.jl plays a central role in the Julia Data ecosystem, and has tight integrations with a range of different libraries. DataFrames.jl isn&#39;t the only tool for working with tabular data in Julia – as noted below, there are some other great libraries for certain use-cases – but it provides great data wrangling functionality through a familiar interface.</p><p>To understand the toolchain in more detail, have a look at the tutorials in this manual. New users can start with the <a href="man/basics/#First-Steps-with-DataFrames.jl">First Steps with DataFrames.jl</a> section.</p><p>You may find the <a href="https://juliadata.github.io/DataFramesMeta.jl/stable/">DataFramesMeta.jl</a> package or one of the other convenience packages discussed in the <a href="man/querying_frameworks/#Data-manipulation-frameworks">Data manipulation frameworks</a> section of this manual helpful when writing more advanced data transformations, especially if you do not have a significant programming experience. These packages provide convenience syntax similar to <a href="https://dplyr.tidyverse.org/">dplyr</a> in R.</p><p>If you use metadata when working with DataFrames.jl you might find the <a href="https://github.com/JuliaData/TableMetadataTools.jl">TableMetadataTools.jl</a> package useful. This package defines several convenience functions for performing typical metadata operations.</p><h2 id="DataFrames.jl-and-the-Julia-Data-Ecosystem"><a class="docs-heading-anchor" href="#DataFrames.jl-and-the-Julia-Data-Ecosystem">DataFrames.jl and the Julia Data Ecosystem</a><a id="DataFrames.jl-and-the-Julia-Data-Ecosystem-1"></a><a class="docs-heading-anchor-permalink" href="#DataFrames.jl-and-the-Julia-Data-Ecosystem" title="Permalink"></a></h2><p>The Julia data ecosystem can be a difficult space for new users to navigate, in part because the Julia ecosystem tends to distribute functionality across different libraries more than some other languages. Because many people coming to DataFrames.jl are just starting to explore the Julia data ecosystem, below is a list of well-supported libraries that provide different data science tools, along with a few notes about what makes each library special, and how well integrated they are with DataFrames.jl.</p><ul><li><strong>Statistics</strong><ul><li><a href="https://github.com/JuliaStats/StatsKit.jl">StatsKit.jl</a>: A convenience meta-package which loads a set of essential packages for statistics, including those mentioned below in this section and DataFrames.jl itself.</li><li><a href="https://docs.julialang.org/en/v1/stdlib/Statistics/">Statistics</a>: The Julia standard library comes with a wide range of statistics functionality, but to gain access to these functions you must call <code>using Statistics</code>.</li><li><a href="https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/">LinearAlgebra</a>: Like <code>Statistics</code>, many linear algebra features (factorizations, inversions, etc.) live in a library you have to load to use.</li><li><a href="https://docs.julialang.org/en/v1/stdlib/SparseArrays/">SparseArrays</a> are also in the standard library but must be loaded to be used.</li><li><a href="https://github.com/nalimilan/FreqTables.jl">FreqTables.jl</a>: Create frequency tables / cross-tabulations. Tightly integrated with DataFrames.jl.</li><li><a href="https://juliastats.org/HypothesisTests.jl/stable/">HypothesisTests.jl</a>: A range of hypothesis testing tools.</li><li><a href="https://juliastats.org/GLM.jl/stable/manual/">GLM.jl</a>: Tools for estimating linear and generalized linear models. Tightly integrated with DataFrames.jl.</li><li><a href="https://juliastats.org/StatsModels.jl/stable/">StatsModels.jl</a>: For converting heterogeneous <code>DataFrame</code> into homogeneous matrices for use with linear algebra libraries or machine learning applications that don&#39;t directly support <code>DataFrame</code>s. Will do things like convert categorical variables into indicators/one-hot-encodings, create interaction terms, etc.</li><li><a href="https://multivariatestatsjl.readthedocs.io/en/stable/index.html">MultivariateStats.jl</a>: linear regression, ridge regression, PCA, component analyses tools. Not well integrated with DataFrames.jl, but easily used in combination with <code>StatsModels</code>.</li></ul></li><li><strong>Machine Learning</strong><ul><li><a href="https://github.com/alan-turing-institute/MLJ.jl">MLJ.jl</a>: if you&#39;re more of an applied user, there is a single package the pulls from all these different libraries and provides a single, scikit-learn inspired API: MLJ.jl. MLJ.jl provides a common interface for a wide range of machine learning algorithms.</li><li><a href="https://cstjean.github.io/ScikitLearn.jl/stable/">ScikitLearn.jl</a>: A Julia wrapper around the full Python scikit-learn machine learning library. Not well integrated with DataFrames.jl, but can be combined using StatsModels.jl.</li><li><a href="https://github.com/IBM/AutoMLPipeline.jl">AutoMLPipeline</a>: A package that makes it trivial to create complex ML pipeline structures using simple expressions. It leverages on the built-in macro programming features of Julia to symbolically process, manipulate pipeline expressions, and makes it easy to discover optimal structures for machine learning regression and classification.</li><li>Deep learning: <a href="https://denizyuret.github.io/Knet.jl/stable/tutorial/#Introduction-to-Knet-1">KNet.jl</a> and <a href="https://github.com/FluxML/Flux.jl">Flux.jl</a>.</li></ul></li><li><strong>Plotting</strong><ul><li><a href="http://docs.juliaplots.org/latest/">Plots.jl</a>: Powerful, modern plotting library with a syntax akin to that of <a href="https://matplotlib.org/">matplotlib</a> (in Python) or <code>plot</code> (in R). <a href="http://docs.juliaplots.org/latest/tutorial/#Using-Plot-Recipes-1">StatsPlots.jl</a> provides Plots.jl with recipes for many standard statistical plots.</li><li><a href="http://gadflyjl.org/stable/">Gadfly.jl</a>: High-level plotting library with a &quot;grammar of graphics&quot; syntax akin to that of <a href="https://ggplot2.tidyverse.org/reference/ggplot.html">ggplot</a> (in R).</li><li><a href="http://juliaplots.org/AlgebraOfGraphics.jl/stable/">AlgebraOfGraphics.jl</a>: A &quot;grammar of graphics&quot; library build upon <a href="https://docs.makie.org/stable/">Makie.jl</a>.</li><li><a href="https://www.queryverse.org/VegaLite.jl/stable/">VegaLite.jl</a>: High-level plotting library that uses a different &quot;grammar of graphics&quot; syntax and has an emphasis on interactive graphics.</li></ul></li><li><strong>Data Wrangling</strong>:<ul><li><a href="https://github.com/invenia/Impute.jl">Impute.jl</a>: various methods for handling missing data in vectors, matrices and tables.</li><li><a href="https://github.com/JuliaData/DataFramesMeta.jl">DataFramesMeta.jl</a>: A range of convenience functions for DataFrames.jl that augment <code>select</code> and <code>transform</code> to provide a user experience similar to that provided by <a href="https://dplyr.tidyverse.org/">dplyr</a> in R.</li><li><a href="https://github.com/jkrumbiegel/DataFrameMacros.jl">DataFrameMacros.jl</a>: Provides macro versions of the common DataFrames.jl functions similar to DataFramesMeta.jl, with convenient syntax for the manipulation of multiple columns at once.</li><li><a href="https://github.com/queryverse/Query.jl">Query.jl</a>: Query.jl provides a single framework for data wrangling that works with a range of libraries, including DataFrames.jl, other tabular data libraries (more on those below), and even non-tabular data. Provides many convenience functions analogous to those in dplyr in R or <a href="https://en.wikipedia.org/wiki/Language_Integrated_Query">LINQ</a>.</li><li>You can find more information on these packages in the <a href="man/querying_frameworks/#Data-manipulation-frameworks">Data manipulation frameworks</a> section of this manual.</li></ul></li><li><strong>And More!</strong><ul><li><a href="https://github.com/JuliaGraphs/Graphs.jl">Graphs.jl</a>: A pure-Julia, high performance network analysis library. Edgelists in <code>DataFrame</code>s can be easily converted into graphs using the <a href="https://github.com/JuliaGraphs/GraphDataFrameBridge.jl">GraphDataFrameBridge.jl</a> package.</li></ul></li><li><strong>IO</strong>:<ul><li>DataFrames.jl work well with a range of formats, including:<ul><li>CSV files (using <a href="https://github.com/JuliaData/CSV.jl">CSV.jl</a>),</li><li>Apache Arrow (using <a href="https://github.com/JuliaData/Arrow.jl">Arrow.jl</a>)</li><li>reading Stata, SAS and SPSS files (using <a href="https://github.com/junyuan-chen/ReadStatTables.jl">ReadStatTables.jl</a>; alternatively <a href="https://www.queryverse.org/">Queryverse</a> users can choose <a href="https://github.com/queryverse/StatFiles.jl">StatFiles.jl</a>),</li><li>Parquet files (using <a href="https://gitlab.com/ExpandingMan/Parquet2.jl">Parquet2.jl</a>),</li><li>reading R data files (.rda, .RData) (using <a href="https://github.com/JuliaData/RData.jl">RData.jl</a>).</li></ul></li></ul></li></ul><p>While not all of these libraries are tightly integrated with DataFrames.jl, because <code>DataFrame</code>s are essentially collections of aligned Julia vectors, so it is easy to (a) pull out a vector for use with a non-DataFrames-integrated library, or (b) convert your table into a homogeneously-typed matrix using the <code>Matrix</code> constructor or StatsModels.jl.</p><h3 id="Other-Julia-Tabular-Libraries"><a class="docs-heading-anchor" href="#Other-Julia-Tabular-Libraries">Other Julia Tabular Libraries</a><a id="Other-Julia-Tabular-Libraries-1"></a><a class="docs-heading-anchor-permalink" href="#Other-Julia-Tabular-Libraries" title="Permalink"></a></h3><p>DataFrames.jl is a great general purpose tool for data manipulation and wrangling, but it&#39;s not ideal for all applications. For users with more specialized needs, consider using:</p><ul><li><a href="https://juliadata.github.io/TypedTables.jl/stable/">TypedTables.jl</a>: Type-stable heterogeneous tables. Useful for improved performance when the structure of your table is relatively stable and does not feature thousands of columns.</li><li><a href="https://juliadata.github.io/JuliaDB.jl/stable/">JuliaDB.jl</a>: For users working with data that is too large to fit in memory, we suggest JuliaDB.jl, which offers better performance for large datasets, and can handle out-of-core data manipulations (Python users can think of JuliaDB.jl as the Julia version of <a href="https://dask.org/">dask</a>).</li></ul><p>Note that most tabular data libraries in the Julia ecosystem (including DataFrames.jl) support a common interface (defined in the <a href="https://github.com/JuliaData/Tables.jl">Tables.jl</a> package). As a result, some libraries are capable or working with a range of tabular data structures, making it easy to move between tabular libraries as your needs change. A user of <a href="https://github.com/queryverse/Query.jl">Query.jl</a>, for example, can use the same code to manipulate data in a <code>DataFrame</code>, a <code>Table</code> (defined by TypedTables.jl), or a JuliaDB table.</p><h2 id="Questions?"><a class="docs-heading-anchor" href="#Questions?">Questions?</a><a id="Questions?-1"></a><a class="docs-heading-anchor-permalink" href="#Questions?" title="Permalink"></a></h2><p>If there is something you expect DataFrames to be capable of, but cannot figure out how to do, please reach out with questions in Domains/Data on <a href="https://discourse.julialang.org/new-topic?title=[DataFrames%20Question]:%20&amp;body=%23%20Question:%0A%0A%23%20Dataset%20(if%20applicable):%0A%0A%23%20Minimal%20Working%20Example%20(if%20applicable):%0A&amp;category=Domains/Data&amp;tags=question">Discourse</a>. Additionally you might want to listen to an introduction to DataFrames.jl on <a href="https://juliaacademy.com/p/introduction-to-dataframes-jl">JuliaAcademy</a>.</p><p>Please report bugs by <a href="https://github.com/JuliaData/DataFrames.jl/issues/new">opening an issue</a>.</p><p>You can follow the <strong>source</strong> links throughout the documentation to jump right to the source files on GitHub to make pull requests for improving the documentation and function capabilities.</p><p>Please review <a href="https://github.com/JuliaData/DataFrames.jl/blob/main/CONTRIBUTING.md">DataFrames contributing guidelines</a> before submitting your first PR!</p><p>Information on specific versions can be found on the <a href="https://github.com/JuliaData/DataFrames.jl/releases">Release page</a>.</p><h2 id="Package-Manual"><a class="docs-heading-anchor" href="#Package-Manual">Package Manual</a><a id="Package-Manual-1"></a><a class="docs-heading-anchor-permalink" href="#Package-Manual" title="Permalink"></a></h2><ul><li><a href="man/basics/#First-Steps-with-DataFrames.jl">First Steps with DataFrames.jl</a></li><li class="no-marker"><ul><li><a href="man/basics/#Setting-up-the-Environment">Setting up the Environment</a></li><li><a href="man/basics/#Constructors-and-Basic-Utility-Functions">Constructors and Basic Utility Functions</a></li><li><a href="man/basics/#Getting-and-Setting-Data-in-a-Data-Frame">Getting and Setting Data in a Data Frame</a></li><li><a href="man/basics/#Manipulation-Functions">Manipulation Functions</a></li><li><a href="man/basics/#Approach-Comparison">Approach Comparison</a></li></ul></li><li><a href="man/getting_started/#Getting-Started">Getting Started</a></li><li class="no-marker"><ul><li><a href="man/getting_started/#Installation">Installation</a></li><li><a href="man/getting_started/#The-DataFrame-Type">The <code>DataFrame</code> Type</a></li></ul></li><li><a href="man/joins/#Database-Style-Joins">Database-Style Joins</a></li><li class="no-marker"><ul><li><a href="man/joins/#Introduction-to-joins">Introduction to joins</a></li><li><a href="man/joins/#Key-value-comparisons-and-floating-point-values">Key value comparisons and floating point values</a></li><li><a href="man/joins/#Joining-on-key-columns-with-different-names">Joining on key columns with different names</a></li><li><a href="man/joins/#Handling-of-duplicate-keys-and-tracking-source-data-frame">Handling of duplicate keys and tracking source data frame</a></li><li><a href="man/joins/#Renaming-joined-columns">Renaming joined columns</a></li><li><a href="man/joins/#Matching-missing-values-in-joins">Matching missing values in joins</a></li><li><a href="man/joins/#Specifying-row-order-in-the-join-result">Specifying row order in the join result</a></li><li><a href="man/joins/#In-place-left-join">In-place left join</a></li></ul></li><li><a href="man/split_apply_combine/#The-Split-Apply-Combine-Strategy">The Split-Apply-Combine Strategy</a></li><li class="no-marker"><ul><li><a href="man/split_apply_combine/#Design-of-the-split-apply-combine-support">Design of the split-apply-combine support</a></li><li><a href="man/split_apply_combine/#Examples-of-the-split-apply-combine-operations">Examples of the split-apply-combine operations</a></li><li><a href="man/split_apply_combine/#Using-GroupedDataFrame-as-an-iterable-and-indexable-object">Using <code>GroupedDataFrame</code> as an iterable and indexable object</a></li><li><a href="man/split_apply_combine/#Simulating-the-SQL-where-clause">Simulating the SQL <code>where</code> clause</a></li><li><a href="man/split_apply_combine/#Column-independent-operations">Column-independent operations</a></li><li><a href="man/split_apply_combine/#Column-independent-operations-versus-functions">Column-independent operations versus functions</a></li><li><a href="man/split_apply_combine/#Specifying-group-order-in-groupby">Specifying group order in <code>groupby</code></a></li></ul></li><li><a href="man/reshaping_and_pivoting/#Reshaping-and-Pivoting-Data">Reshaping and Pivoting Data</a></li><li><a href="man/sorting/#Sorting">Sorting</a></li><li><a href="man/categorical/#man-categorical">Categorical Data</a></li><li><a href="man/missing/#Missing-Data">Missing Data</a></li><li><a href="man/comparisons/#Comparisons">Comparisons</a></li><li class="no-marker"><ul><li><a href="man/comparisons/#Comparison-with-the-Python-package-pandas">Comparison with the Python package pandas</a></li><li><a href="man/comparisons/#Comparison-with-the-R-package-dplyr">Comparison with the R package dplyr</a></li><li><a href="man/comparisons/#Comparison-with-the-R-package-data.table">Comparison with the R package data.table</a></li><li><a href="man/comparisons/#Comparison-with-Stata-(version-8-and-above)">Comparison with Stata (version 8 and above)</a></li></ul></li><li><a href="man/querying_frameworks/#Data-manipulation-frameworks">Data manipulation frameworks</a></li><li class="no-marker"><ul><li><a href="man/querying_frameworks/#TidierData.jl">TidierData.jl</a></li><li><a href="man/querying_frameworks/#DataFramesMeta.jl">DataFramesMeta.jl</a></li><li><a href="man/querying_frameworks/#DataFrameMacros.jl">DataFrameMacros.jl</a></li><li><a href="man/querying_frameworks/#Query.jl">Query.jl</a></li></ul></li></ul><h2 id="API"><a class="docs-heading-anchor" href="#API">API</a><a id="API-1"></a><a class="docs-heading-anchor-permalink" href="#API" title="Permalink"></a></h2><p>Only exported (i.e. available for use without <code>DataFrames.</code> qualifier after loading the DataFrames.jl package with <code>using DataFrames</code>) types and functions are considered a part of the public API of the DataFrames.jl package. In general all such objects are documented in this manual (in case some documentation is missing please kindly report an issue <a href="https://github.com/JuliaData/DataFrames.jl/issues/new">here</a>).</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Breaking changes to public and documented API are avoided in DataFrames.jl where possible.</p><p>The following changes are not considered breaking:</p><ul><li>specific floating point values computed by operations may change at any time; users should rely only on approximate accuracy;</li><li>in functions that use the default random number generator provided by Base Julia the specific random numbers computed may change across Julia versions;</li><li>if the changed functionality is classified as a bug;</li><li>if the changed behavior was not documented; two major cases are:<ol><li>in its implementation some function accepted a wider range of arguments that it was documented to handle - changes in handling of undocumented arguments are not considered as breaking;</li><li>the type of the value returned by a function changes, but it still follows the contract specified in the documentation; for example if a function is documented to return a vector then changing its type from <code>Vector</code> to <code>PooledVector</code> is not considered as breaking;</li></ol></li><li>error behavior: code that threw an exception can change exception type thrown or stop throwing an exception;</li><li>changes in display (how objects are printed);</li><li>changes to the state of global objects from Base Julia whose state normally is considered volatile (e.g. state of global random number generator).</li></ul><p>All types and functions that are part of public API are guaranteed to go through a deprecation period before a breaking change is made to them or they would be removed.</p><p>The standard practice is that breaking changes are implemented when a major release of DataFrames.jl is made (e.g. functionalities deprecated in a 1.x release would be changed in the 2.0 release).</p><p>In rare cases a breaking change might be introduced in a minor release. In such a case the changed behavior still goes through one minor release during which it is deprecated. The situations where such a breaking change might be allowed are (still such breaking changes will be avoided if possible):</p><ul><li>the affected functionality was previously clearly identified in the documentation as being subject to changes (for example in DataFrames.jl 1.4 release propagation rules of <code>:note</code>-style metadata are documented as such);</li><li>the change is on the border of being classified as a bug (in rare cases even if a behavior of some function was documented its consequences for certain argument combinations could be decided to be unintended and not wanted);</li><li>the change is needed to adjust DataFrames.jl functionality to changes in Base Julia.</li></ul></div></div><p>Please be warned that while Julia allows you to access internal functions or types of DataFrames.jl these can change without warning between versions of DataFrames.jl. In particular it is not safe to directly access fields of types that are a part of public API of the DataFrames.jl package using e.g. the <code>getfield</code> function. Whenever some operation on fields of defined types is considered allowed an appropriate exported function should be used instead.</p><ul><li><a href="lib/types/#Types">Types</a></li><li class="no-marker"><ul><li><a href="lib/types/#Type-hierarchy-design">Type hierarchy design</a></li><li><a href="lib/types/#man-columnhandling">The design of handling of columns of a <code>DataFrame</code></a></li><li><a href="lib/types/#Types-specification">Types specification</a></li></ul></li><li><a href="lib/functions/#Functions">Functions</a></li><li class="no-marker"><ul><li><a href="lib/functions/#Multithreading-support">Multithreading support</a></li><li><a href="lib/functions/#Index">Index</a></li><li><a href="lib/functions/#Constructing-data-frames">Constructing data frames</a></li><li><a href="lib/functions/#Summary-information">Summary information</a></li><li><a href="lib/functions/#Working-with-column-names">Working with column names</a></li><li><a href="lib/functions/#Mutating-and-transforming-data-frames-and-grouped-data-frames">Mutating and transforming data frames and grouped data frames</a></li><li><a href="lib/functions/#Reshaping-data-frames-between-tall-and-wide-formats">Reshaping data frames between tall and wide formats</a></li><li><a href="lib/functions/#Sorting">Sorting</a></li><li><a href="lib/functions/#Joining">Joining</a></li><li><a href="lib/functions/#Grouping">Grouping</a></li><li><a href="lib/functions/#Filtering-rows">Filtering rows</a></li><li><a href="lib/functions/#Working-with-missing-values">Working with missing values</a></li><li><a href="lib/functions/#Iteration">Iteration</a></li><li><a href="lib/functions/#Equality">Equality</a></li><li><a href="lib/functions/#Metadata">Metadata</a></li></ul></li><li><a href="lib/indexing/#Indexing">Indexing</a></li><li class="no-marker"><ul><li><a href="lib/indexing/#General-rules">General rules</a></li><li><a href="lib/indexing/#getindex-and-view"><code>getindex</code> and <code>view</code></a></li><li><a href="lib/indexing/#setindex!"><code>setindex!</code></a></li><li><a href="lib/indexing/#Broadcasting">Broadcasting</a></li><li><a href="lib/indexing/#Indexing-GroupedDataFrames">Indexing <code>GroupedDataFrame</code>s</a></li></ul></li><li><a href="lib/indexing/#Common-API-for-types-defined-in-DataFrames.jl">Common API for types defined in DataFrames.jl</a></li></ul><h2 id="Index"><a class="docs-heading-anchor" href="#Index">Index</a><a id="Index-1"></a><a class="docs-heading-anchor-permalink" href="#Index" title="Permalink"></a></h2><ul><li><a href="lib/types/#DataFrames.AbstractDataFrame"><code>DataFrames.AbstractDataFrame</code></a></li><li><a href="lib/types/#DataFrames.AsTable"><code>DataFrames.AsTable</code></a></li><li><a href="lib/types/#DataFrames.DataFrame"><code>DataFrames.DataFrame</code></a></li><li><a href="lib/types/#DataFrames.DataFrameColumns"><code>DataFrames.DataFrameColumns</code></a></li><li><a href="lib/types/#DataFrames.DataFrameRow"><code>DataFrames.DataFrameRow</code></a></li><li><a href="lib/types/#DataFrames.DataFrameRows"><code>DataFrames.DataFrameRows</code></a></li><li><a href="lib/types/#DataFrames.GroupKey"><code>DataFrames.GroupKey</code></a></li><li><a href="lib/types/#DataFrames.GroupKeys"><code>DataFrames.GroupKeys</code></a></li><li><a href="lib/types/#DataFrames.GroupedDataFrame"><code>DataFrames.GroupedDataFrame</code></a></li><li><a href="lib/types/#DataFrames.RepeatedVector"><code>DataFrames.RepeatedVector</code></a></li><li><a href="lib/types/#DataFrames.StackedVector"><code>DataFrames.StackedVector</code></a></li><li><a href="lib/types/#DataFrames.SubDataFrame"><code>DataFrames.SubDataFrame</code></a></li><li><a href="lib/functions/#Base.Iterators.only"><code>Base.Iterators.only</code></a></li><li><a href="lib/functions/#Base.Iterators.partition"><code>Base.Iterators.partition</code></a></li><li><a href="lib/functions/#Base.allunique"><code>Base.allunique</code></a></li><li><a href="lib/functions/#Base.append!"><code>Base.append!</code></a></li><li><a href="lib/functions/#Base.copy"><code>Base.copy</code></a></li><li><a href="lib/functions/#Base.deleteat!"><code>Base.deleteat!</code></a></li><li><a href="lib/functions/#Base.eachcol"><code>Base.eachcol</code></a></li><li><a href="lib/functions/#Base.eachrow"><code>Base.eachrow</code></a></li><li><a href="lib/functions/#Base.empty"><code>Base.empty</code></a></li><li><a href="lib/functions/#Base.empty!"><code>Base.empty!</code></a></li><li><a href="lib/functions/#Base.filter"><code>Base.filter</code></a></li><li><a href="lib/functions/#Base.filter!"><code>Base.filter!</code></a></li><li><a href="lib/functions/#Base.first"><code>Base.first</code></a></li><li><a href="lib/functions/#Base.get"><code>Base.get</code></a></li><li><a href="lib/functions/#Base.hcat"><code>Base.hcat</code></a></li><li><a href="lib/functions/#Base.insert!"><code>Base.insert!</code></a></li><li><a href="lib/functions/#Base.invpermute!"><code>Base.invpermute!</code></a></li><li><a href="lib/functions/#Base.isapprox"><code>Base.isapprox</code></a></li><li><a href="lib/functions/#Base.isempty"><code>Base.isempty</code></a></li><li><a href="lib/functions/#Base.issorted"><code>Base.issorted</code></a></li><li><a href="lib/functions/#Base.keepat!"><code>Base.keepat!</code></a></li><li><a href="lib/functions/#Base.keys"><code>Base.keys</code></a></li><li><a href="lib/functions/#Base.last"><code>Base.last</code></a></li><li><a href="lib/functions/#Base.length"><code>Base.length</code></a></li><li><a href="lib/functions/#Base.names"><code>Base.names</code></a></li><li><a href="lib/functions/#Base.ndims"><code>Base.ndims</code></a></li><li><a href="lib/functions/#Base.pairs"><code>Base.pairs</code></a></li><li><a href="lib/functions/#Base.parent"><code>Base.parent</code></a></li><li><a href="lib/functions/#Base.permute!"><code>Base.permute!</code></a></li><li><a href="lib/functions/#Base.permutedims"><code>Base.permutedims</code></a></li><li><a href="lib/functions/#Base.pop!"><code>Base.pop!</code></a></li><li><a href="lib/functions/#Base.popat!"><code>Base.popat!</code></a></li><li><a href="lib/functions/#Base.popfirst!"><code>Base.popfirst!</code></a></li><li><a href="lib/functions/#Base.prepend!"><code>Base.prepend!</code></a></li><li><a href="lib/functions/#Base.propertynames"><code>Base.propertynames</code></a></li><li><a href="lib/functions/#Base.push!"><code>Base.push!</code></a></li><li><a href="lib/functions/#Base.pushfirst!"><code>Base.pushfirst!</code></a></li><li><a href="lib/functions/#Base.reduce"><code>Base.reduce</code></a></li><li><a href="lib/functions/#Base.repeat"><code>Base.repeat</code></a></li><li><a href="lib/functions/#Base.resize!"><code>Base.resize!</code></a></li><li><a href="lib/functions/#Base.reverse"><code>Base.reverse</code></a></li><li><a href="lib/functions/#Base.reverse!"><code>Base.reverse!</code></a></li><li><a href="lib/functions/#Base.show"><code>Base.show</code></a></li><li><a href="lib/functions/#Base.similar"><code>Base.similar</code></a></li><li><a href="lib/functions/#Base.size"><code>Base.size</code></a></li><li><a href="lib/functions/#Base.sort"><code>Base.sort</code></a></li><li><a href="lib/functions/#Base.sort!"><code>Base.sort!</code></a></li><li><a href="lib/functions/#Base.sortperm"><code>Base.sortperm</code></a></li><li><a href="lib/functions/#Base.stack"><code>Base.stack</code></a></li><li><a href="lib/functions/#Base.unique"><code>Base.unique</code></a></li><li><a href="lib/functions/#Base.unique!"><code>Base.unique!</code></a></li><li><a href="lib/functions/#Base.values"><code>Base.values</code></a></li><li><a href="lib/functions/#Base.vcat"><code>Base.vcat</code></a></li><li><a href="lib/functions/#DataAPI.allcombinations"><code>DataAPI.allcombinations</code></a></li><li><a href="lib/functions/#DataAPI.antijoin"><code>DataAPI.antijoin</code></a></li><li><a href="lib/functions/#DataAPI.colmetadata"><code>DataAPI.colmetadata</code></a></li><li><a href="lib/functions/#DataAPI.colmetadata!"><code>DataAPI.colmetadata!</code></a></li><li><a href="lib/functions/#DataAPI.colmetadatakeys"><code>DataAPI.colmetadatakeys</code></a></li><li><a href="lib/functions/#DataAPI.crossjoin"><code>DataAPI.crossjoin</code></a></li><li><a href="lib/functions/#DataAPI.deletecolmetadata!"><code>DataAPI.deletecolmetadata!</code></a></li><li><a href="lib/functions/#DataAPI.deletemetadata!"><code>DataAPI.deletemetadata!</code></a></li><li><a href="lib/functions/#DataAPI.describe"><code>DataAPI.describe</code></a></li><li><a href="lib/functions/#DataAPI.emptycolmetadata!"><code>DataAPI.emptycolmetadata!</code></a></li><li><a href="lib/functions/#DataAPI.emptymetadata!"><code>DataAPI.emptymetadata!</code></a></li><li><a href="lib/functions/#DataAPI.groupby"><code>DataAPI.groupby</code></a></li><li><a href="lib/functions/#DataAPI.innerjoin"><code>DataAPI.innerjoin</code></a></li><li><a href="lib/functions/#DataAPI.leftjoin"><code>DataAPI.leftjoin</code></a></li><li><a href="lib/functions/#DataAPI.metadata"><code>DataAPI.metadata</code></a></li><li><a href="lib/functions/#DataAPI.metadata!"><code>DataAPI.metadata!</code></a></li><li><a href="lib/functions/#DataAPI.metadatakeys"><code>DataAPI.metadatakeys</code></a></li><li><a href="lib/functions/#DataAPI.ncol"><code>DataAPI.ncol</code></a></li><li><a href="lib/functions/#DataAPI.nrow"><code>DataAPI.nrow</code></a></li><li><a href="lib/functions/#DataAPI.outerjoin"><code>DataAPI.outerjoin</code></a></li><li><a href="lib/functions/#DataAPI.rightjoin"><code>DataAPI.rightjoin</code></a></li><li><a href="lib/functions/#DataAPI.rownumber"><code>DataAPI.rownumber</code></a></li><li><a href="lib/functions/#DataAPI.semijoin"><code>DataAPI.semijoin</code></a></li><li><a href="lib/functions/#DataFrames.allowmissing!"><code>DataFrames.allowmissing!</code></a></li><li><a href="lib/functions/#DataFrames.combine"><code>DataFrames.combine</code></a></li><li><a href="lib/functions/#DataFrames.completecases"><code>DataFrames.completecases</code></a></li><li><a href="lib/functions/#DataFrames.disallowmissing!"><code>DataFrames.disallowmissing!</code></a></li><li><a href="lib/functions/#DataFrames.dropmissing"><code>DataFrames.dropmissing</code></a></li><li><a href="lib/functions/#DataFrames.dropmissing!"><code>DataFrames.dropmissing!</code></a></li><li><a href="lib/functions/#DataFrames.fillcombinations"><code>DataFrames.fillcombinations</code></a></li><li><a href="lib/functions/#DataFrames.flatten"><code>DataFrames.flatten</code></a></li><li><a href="lib/functions/#DataFrames.groupcols"><code>DataFrames.groupcols</code></a></li><li><a href="lib/functions/#DataFrames.groupindices"><code>DataFrames.groupindices</code></a></li><li><a href="lib/functions/#DataFrames.insertcols"><code>DataFrames.insertcols</code></a></li><li><a href="lib/functions/#DataFrames.insertcols!"><code>DataFrames.insertcols!</code></a></li><li><a href="lib/functions/#DataFrames.leftjoin!"><code>DataFrames.leftjoin!</code></a></li><li><a href="lib/functions/#DataFrames.mapcols"><code>DataFrames.mapcols</code></a></li><li><a href="lib/functions/#DataFrames.mapcols!"><code>DataFrames.mapcols!</code></a></li><li><a href="lib/functions/#DataFrames.nonunique"><code>DataFrames.nonunique</code></a></li><li><a href="lib/functions/#DataFrames.order"><code>DataFrames.order</code></a></li><li><a href="lib/functions/#DataFrames.proprow"><code>DataFrames.proprow</code></a></li><li><a href="lib/functions/#DataFrames.rename"><code>DataFrames.rename</code></a></li><li><a href="lib/functions/#DataFrames.rename!"><code>DataFrames.rename!</code></a></li><li><a href="lib/functions/#DataFrames.repeat!"><code>DataFrames.repeat!</code></a></li><li><a href="lib/functions/#DataFrames.select"><code>DataFrames.select</code></a></li><li><a href="lib/functions/#DataFrames.select!"><code>DataFrames.select!</code></a></li><li><a href="lib/functions/#DataFrames.subset"><code>DataFrames.subset</code></a></li><li><a href="lib/functions/#DataFrames.subset!"><code>DataFrames.subset!</code></a></li><li><a href="lib/functions/#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a></li><li><a href="lib/functions/#DataFrames.transform"><code>DataFrames.transform</code></a></li><li><a href="lib/functions/#DataFrames.transform!"><code>DataFrames.transform!</code></a></li><li><a href="lib/functions/#DataFrames.unstack"><code>DataFrames.unstack</code></a></li><li><a href="lib/functions/#DataFrames.valuecols"><code>DataFrames.valuecols</code></a></li><li><a href="lib/functions/#Missings.allowmissing"><code>Missings.allowmissing</code></a></li><li><a href="lib/functions/#Missings.disallowmissing"><code>Missings.disallowmissing</code></a></li><li><a href="lib/functions/#Random.shuffle"><code>Random.shuffle</code></a></li><li><a href="lib/functions/#Random.shuffle!"><code>Random.shuffle!</code></a></li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="man/basics/">First Steps with DataFrames.jl »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/lib/functions/index.html b/dev/lib/functions/index.html
index 6a8a9f35f..d252b1c78 100644
--- a/dev/lib/functions/index.html
+++ b/dev/lib/functions/index.html
@@ -22,7 +22,7 @@
    3 │     1  b     const
    4 │     2  b     const
    5 │     1  c     const
-   6 │     2  c     const</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1480-L1526">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.copy" href="#Base.copy"><code>Base.copy</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">copy(df::DataFrame; copycols::Bool=true)</code></pre><p>Copy data frame <code>df</code>. If <code>copycols=true</code> (the default), return a new  <code>DataFrame</code> holding copies of column vectors in <code>df</code>. If <code>copycols=false</code>, return a new <code>DataFrame</code> sharing column vectors with <code>df</code>.</p><p>Metadata: this function preserves all table-level and column-level metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L795-L804">source</a></section><section><div><pre><code class="language-julia hljs">copy(dfr::DataFrameRow)</code></pre><p>Construct a <code>NamedTuple</code> with the same contents as the <a href="../types/#DataFrames.DataFrameRow"><code>DataFrameRow</code></a>. This method returns a <code>NamedTuple</code> so that the returned object is not affected by changes to the parent data frame of which <code>dfr</code> is a view.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframerow/dataframerow.jl#L433-L440">source</a></section><section><div><pre><code class="language-julia hljs">copy(key::GroupKey)</code></pre><p>Construct a <code>NamedTuple</code> with the same contents as the <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L740-L744">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.similar" href="#Base.similar"><code>Base.similar</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">similar(df::AbstractDataFrame, rows::Integer=nrow(df))</code></pre><p>Create a new <code>DataFrame</code> with the same column names and column element types as <code>df</code>. An optional second argument can be provided to request a number of rows that is different than the number of rows present in <code>df</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L465-L473">source</a></section></article><h2 id="Summary-information"><a class="docs-heading-anchor" href="#Summary-information">Summary information</a><a id="Summary-information-1"></a><a class="docs-heading-anchor-permalink" href="#Summary-information" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.describe" href="#DataAPI.describe"><code>DataAPI.describe</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">describe(df::AbstractDataFrame; cols=:)
+   6 │     2  c     const</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1480-L1526">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.copy" href="#Base.copy"><code>Base.copy</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">copy(df::DataFrame; copycols::Bool=true)</code></pre><p>Copy data frame <code>df</code>. If <code>copycols=true</code> (the default), return a new  <code>DataFrame</code> holding copies of column vectors in <code>df</code>. If <code>copycols=false</code>, return a new <code>DataFrame</code> sharing column vectors with <code>df</code>.</p><p>Metadata: this function preserves all table-level and column-level metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L795-L804">source</a></section><section><div><pre><code class="language-julia hljs">copy(dfr::DataFrameRow)</code></pre><p>Construct a <code>NamedTuple</code> with the same contents as the <a href="../types/#DataFrames.DataFrameRow"><code>DataFrameRow</code></a>. This method returns a <code>NamedTuple</code> so that the returned object is not affected by changes to the parent data frame of which <code>dfr</code> is a view.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframerow/dataframerow.jl#L433-L440">source</a></section><section><div><pre><code class="language-julia hljs">copy(key::GroupKey)</code></pre><p>Construct a <code>NamedTuple</code> with the same contents as the <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L740-L744">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.similar" href="#Base.similar"><code>Base.similar</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">similar(df::AbstractDataFrame, rows::Integer=nrow(df))</code></pre><p>Create a new <code>DataFrame</code> with the same column names and column element types as <code>df</code>. An optional second argument can be provided to request a number of rows that is different than the number of rows present in <code>df</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L465-L473">source</a></section></article><h2 id="Summary-information"><a class="docs-heading-anchor" href="#Summary-information">Summary information</a><a id="Summary-information-1"></a><a class="docs-heading-anchor-permalink" href="#Summary-information" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.describe" href="#DataAPI.describe"><code>DataAPI.describe</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">describe(df::AbstractDataFrame; cols=:)
 describe(df::AbstractDataFrame, stats::Union{Symbol, Pair}...; cols=:)</code></pre><p>Return descriptive statistics for a data frame as a new <code>DataFrame</code> where each row represents a variable and each column a summary statistic.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the <code>AbstractDataFrame</code></li><li><code>stats::Union{Symbol, Pair}...</code> : the summary statistics to report. Arguments can be:<ul><li>A symbol from the list <code>:mean</code>, <code>:std</code>, <code>:min</code>, <code>:q25</code>, <code>:median</code>, <code>:q75</code>, <code>:max</code>, <code>:sum</code>, <code>:eltype</code>, <code>:nunique</code>, <code>:nuniqueall</code>, <code>:first</code>, <code>:last</code>, <code>:nnonmissing</code>, and <code>:nmissing</code>. The default statistics used are <code>:mean</code>, <code>:min</code>, <code>:median</code>, <code>:max</code>, <code>:nmissing</code>, and <code>:eltype</code>.</li><li><code>:detailed</code> as the only <code>Symbol</code> argument to return all statistics except <code>:first</code>, <code>:last</code>, <code>:sum</code>, <code>:nuniqueall</code>, and <code>:nnonmissing</code>.</li><li><code>:all</code> as the only <code>Symbol</code> argument to return all statistics.</li><li>A <code>function =&gt; name</code> pair where <code>name</code> is a <code>Symbol</code> or string. This will create a column of summary statistics with the provided name.</li></ul></li><li><code>cols</code> : a keyword argument allowing to select only a subset or transformation of columns from <code>df</code> to describe. Can be any column selector or transformation accepted by <a href="#DataFrames.select"><code>select</code></a>.</li></ul><p><strong>Details</strong></p><p>For <code>Real</code> columns, compute the mean, standard deviation, minimum, first quantile, median, third quantile, and maximum. If a column does not derive from <code>Real</code>, <code>describe</code> will attempt to calculate all statistics, using <code>nothing</code> as a fall-back in the case of an error.</p><p>When <code>stats</code> contains <code>:nunique</code>, <code>describe</code> will report the number of unique values in a column. If a column&#39;s base type derives from <code>Real</code>, <code>:nunique</code> will return <code>nothing</code>s. Use <code>:nuniqueall</code> to report the number of unique values in all columns.</p><p>Missing values are filtered in the calculation of all statistics, however the column <code>:nmissing</code> will report the number of missing values of that variable and <code>:nnonmissing</code> the number of non-missing values.</p><p>If custom functions are provided, they are called repeatedly with the vector corresponding to each column as the only argument. For columns allowing for missing values, the vector is wrapped in a call to <code>skipmissing</code>: custom functions must therefore support such objects (and not only vectors), and cannot access missing values.</p><p>Metadata: this function drops all metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:10, x=0.1:0.1:1.0, y=&#39;a&#39;:&#39;j&#39;);
 
 julia&gt; describe(df)
@@ -57,7 +57,7 @@
  Row │ variable  min      sum
      │ Symbol    Float64  Float64
 ─────┼────────────────────────────
-   1 │ x             0.1      5.5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L603-L689">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.isempty" href="#Base.isempty"><code>Base.isempty</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">isempty(df::AbstractDataFrame)</code></pre><p>Return <code>true</code> if data frame <code>df</code> has zero rows, and <code>false</code> otherwise.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L429-L433">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.length" href="#Base.length"><code>Base.length</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">length(dfr::DataFrameRow)</code></pre><p>Return the number of elements of <code>dfr</code>.</p><p>See also: <a href="#Base.size"><code>size</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; dfr = DataFrame(a=1:3, b=&#39;a&#39;:&#39;c&#39;)[1, :]
+   1 │ x             0.1      5.5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L603-L689">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.isempty" href="#Base.isempty"><code>Base.isempty</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">isempty(df::AbstractDataFrame)</code></pre><p>Return <code>true</code> if data frame <code>df</code> has zero rows, and <code>false</code> otherwise.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L429-L433">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.length" href="#Base.length"><code>Base.length</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">length(dfr::DataFrameRow)</code></pre><p>Return the number of elements of <code>dfr</code>.</p><p>See also: <a href="#Base.size"><code>size</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; dfr = DataFrame(a=1:3, b=&#39;a&#39;:&#39;c&#39;)[1, :]
 DataFrameRow
  Row │ a      b
      │ Int64  Char
@@ -65,15 +65,15 @@
    1 │     1  a
 
 julia&gt; length(dfr)
-2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframerow/dataframerow.jl#L355-L374">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.ncol" href="#DataAPI.ncol"><code>DataAPI.ncol</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ncol(df::AbstractDataFrame)</code></pre><p>Return the number of columns in an <code>AbstractDataFrame</code> <code>df</code>.</p><p>See also <a href="#DataAPI.nrow"><code>nrow</code></a>, <a href="#Base.size"><code>size</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:10, x=rand(10), y=rand([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], 10));
+2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframerow/dataframerow.jl#L355-L374">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.ncol" href="#DataAPI.ncol"><code>DataAPI.ncol</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ncol(df::AbstractDataFrame)</code></pre><p>Return the number of columns in an <code>AbstractDataFrame</code> <code>df</code>.</p><p>See also <a href="#DataAPI.nrow"><code>nrow</code></a>, <a href="#Base.size"><code>size</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:10, x=rand(10), y=rand([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], 10));
 
 julia&gt; ncol(df)
-3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L411-L426">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.ndims" href="#Base.ndims"><code>Base.ndims</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ndims(::AbstractDataFrame)
-ndims(::Type{&lt;:AbstractDataFrame})</code></pre><p>Return the number of dimensions of a data frame, which is always <code>2</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L438-L443">source</a></section><section><div><pre><code class="language-julia hljs">ndims(::DataFrameRow)
-ndims(::Type{&lt;:DataFrameRow})</code></pre><p>Return the number of dimensions of a data frame row, which is always <code>1</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframerow/dataframerow.jl#L377-L382">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.nrow" href="#DataAPI.nrow"><code>DataAPI.nrow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">nrow(df::AbstractDataFrame)</code></pre><p>Return the number of rows in an <code>AbstractDataFrame</code> <code>df</code>.</p><p>See also: <a href="#DataAPI.ncol"><code>ncol</code></a>, <a href="#Base.size"><code>size</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:10, x=rand(10), y=rand([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], 10));
+3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L411-L426">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.ndims" href="#Base.ndims"><code>Base.ndims</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ndims(::AbstractDataFrame)
+ndims(::Type{&lt;:AbstractDataFrame})</code></pre><p>Return the number of dimensions of a data frame, which is always <code>2</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L438-L443">source</a></section><section><div><pre><code class="language-julia hljs">ndims(::DataFrameRow)
+ndims(::Type{&lt;:DataFrameRow})</code></pre><p>Return the number of dimensions of a data frame row, which is always <code>1</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframerow/dataframerow.jl#L377-L382">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.nrow" href="#DataAPI.nrow"><code>DataAPI.nrow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">nrow(df::AbstractDataFrame)</code></pre><p>Return the number of rows in an <code>AbstractDataFrame</code> <code>df</code>.</p><p>See also: <a href="#DataAPI.ncol"><code>ncol</code></a>, <a href="#Base.size"><code>size</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:10, x=rand(10), y=rand([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], 10));
 
 julia&gt; nrow(df)
-10</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L449-L464">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.rownumber" href="#DataAPI.rownumber"><code>DataAPI.rownumber</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rownumber(dfr::DataFrameRow)</code></pre><p>Return a row number in the <code>AbstractDataFrame</code> that <code>dfr</code> was created from.</p><p>Note that this differs from the first element in the tuple returned by <code>parentindices</code>. The latter gives the row number in the <code>parent(dfr)</code>, which is the source <code>DataFrame</code> where data that <code>dfr</code> gives access to is stored.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(reshape(1:12, 3, 4), :auto)
+10</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L449-L464">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.rownumber" href="#DataAPI.rownumber"><code>DataAPI.rownumber</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rownumber(dfr::DataFrameRow)</code></pre><p>Return a row number in the <code>AbstractDataFrame</code> that <code>dfr</code> was created from.</p><p>Note that this differs from the first element in the tuple returned by <code>parentindices</code>. The latter gives the row number in the <code>parent(dfr)</code>, which is the source <code>DataFrame</code> where data that <code>dfr</code> gives access to is stored.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(reshape(1:12, 3, 4), :auto)
 3×4 DataFrame
  Row │ x1     x2     x3     x4
      │ Int64  Int64  Int64  Int64
@@ -132,7 +132,7 @@
 ─────┼────────────────────────────
    1 │     1      4      7     10
    2 │     2      5      8     11
-   3 │     3      6      9     12</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframerow/dataframerow.jl#L121-L193">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.show" href="#Base.show"><code>Base.show</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">show([io::IO, ]df::AbstractDataFrame;
+   3 │     3      6      9     12</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframerow/dataframerow.jl#L121-L193">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.show" href="#Base.show"><code>Base.show</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">show([io::IO, ]df::AbstractDataFrame;
      allrows::Bool = !get(io, :limit, false),
      allcols::Bool = !get(io, :limit, false),
      allgroups::Bool = !get(io, :limit, false),
@@ -151,7 +151,7 @@
 ───────────────
      1  x
      2  y
-     3  z</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/show.jl#L284-L338">source</a></section><section><div><pre><code class="language-julia hljs">show(io::IO, mime::MIME, df::AbstractDataFrame)</code></pre><p>Render a data frame to an I/O stream in MIME type <code>mime</code>.</p><p><strong>Arguments</strong></p><ul><li><code>io::IO</code>: The I/O stream to which <code>df</code> will be printed.</li><li><code>mime::MIME</code>: supported MIME types are: <code>&quot;text/plain&quot;</code>, <code>&quot;text/html&quot;</code>, <code>&quot;text/latex&quot;</code>, <code>&quot;text/csv&quot;</code>, <code>&quot;text/tab-separated-values&quot;</code> (the last two MIME types do not support  showing <code>#undef</code> values)</li><li><code>df::AbstractDataFrame</code>: The data frame to print.</li></ul><p>Additionally selected MIME types support passing the following keyword arguments:</p><ul><li>MIME type <code>&quot;text/plain&quot;</code> accepts all listed keyword arguments and their behavior is identical as for <code>show(::IO, ::AbstractDataFrame)</code></li><li>MIME type <code>&quot;text/html&quot;</code> accepts the following keyword arguments:<ul><li><code>eltypes::Bool = true</code>: Whether to print the column types under column names.</li><li><code>summary::Bool = true</code>: Whether to print a brief string summary of the data frame.</li><li><code>max_column_width::AbstractString = &quot;&quot;</code>: The maximum column width. It must     be a string containing a valid CSS length. For example, passing     &quot;100px&quot; will limit the width of all columns to 100 pixels. If empty,     the columns will be rendered without limits.</li><li><code>kwargs...</code>: Any keyword argument supported by the function <code>pretty_table</code> of PrettyTables.jl can be passed here to customize the output.</li></ul></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; show(stdout, MIME(&quot;text/latex&quot;), DataFrame(A=1:3, B=[&quot;x&quot;, &quot;y&quot;, &quot;z&quot;]))
+     3  z</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/show.jl#L284-L338">source</a></section><section><div><pre><code class="language-julia hljs">show(io::IO, mime::MIME, df::AbstractDataFrame)</code></pre><p>Render a data frame to an I/O stream in MIME type <code>mime</code>.</p><p><strong>Arguments</strong></p><ul><li><code>io::IO</code>: The I/O stream to which <code>df</code> will be printed.</li><li><code>mime::MIME</code>: supported MIME types are: <code>&quot;text/plain&quot;</code>, <code>&quot;text/html&quot;</code>, <code>&quot;text/latex&quot;</code>, <code>&quot;text/csv&quot;</code>, <code>&quot;text/tab-separated-values&quot;</code> (the last two MIME types do not support  showing <code>#undef</code> values)</li><li><code>df::AbstractDataFrame</code>: The data frame to print.</li></ul><p>Additionally selected MIME types support passing the following keyword arguments:</p><ul><li>MIME type <code>&quot;text/plain&quot;</code> accepts all listed keyword arguments and their behavior is identical as for <code>show(::IO, ::AbstractDataFrame)</code></li><li>MIME type <code>&quot;text/html&quot;</code> accepts the following keyword arguments:<ul><li><code>eltypes::Bool = true</code>: Whether to print the column types under column names.</li><li><code>summary::Bool = true</code>: Whether to print a brief string summary of the data frame.</li><li><code>max_column_width::AbstractString = &quot;&quot;</code>: The maximum column width. It must     be a string containing a valid CSS length. For example, passing     &quot;100px&quot; will limit the width of all columns to 100 pixels. If empty,     the columns will be rendered without limits.</li><li><code>kwargs...</code>: Any keyword argument supported by the function <code>pretty_table</code> of PrettyTables.jl can be passed here to customize the output.</li></ul></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; show(stdout, MIME(&quot;text/latex&quot;), DataFrame(A=1:3, B=[&quot;x&quot;, &quot;y&quot;, &quot;z&quot;]))
 \begin{tabular}{r|cc}
 	&amp; A &amp; B\\
 	\hline
@@ -167,13 +167,13 @@
 &quot;A&quot;,&quot;B&quot;
 1,&quot;x&quot;
 2,&quot;y&quot;
-3,&quot;z&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/io.jl#L89-L134">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.size" href="#Base.size"><code>Base.size</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">size(df::AbstractDataFrame[, dim])</code></pre><p>Return a tuple containing the number of rows and columns of <code>df</code>. Optionally a dimension <code>dim</code> can be specified, where <code>1</code> corresponds to rows and <code>2</code> corresponds to columns.</p><p>See also: <a href="#DataAPI.nrow"><code>nrow</code></a>, <a href="#DataAPI.ncol"><code>ncol</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=&#39;a&#39;:&#39;c&#39;);
+3,&quot;z&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/io.jl#L89-L134">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.size" href="#Base.size"><code>Base.size</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">size(df::AbstractDataFrame[, dim])</code></pre><p>Return a tuple containing the number of rows and columns of <code>df</code>. Optionally a dimension <code>dim</code> can be specified, where <code>1</code> corresponds to rows and <code>2</code> corresponds to columns.</p><p>See also: <a href="#DataAPI.nrow"><code>nrow</code></a>, <a href="#DataAPI.ncol"><code>ncol</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=&#39;a&#39;:&#39;c&#39;);
 
 julia&gt; size(df)
 (3, 2)
 
 julia&gt; size(df, 1)
-3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L379-L399">source</a></section><section><div><pre><code class="language-julia hljs">size(dfr::DataFrameRow[, dim])</code></pre><p>Return a 1-tuple containing the number of elements of <code>dfr</code>. If an optional dimension <code>dim</code> is specified, it must be <code>1</code>, and the number of elements is returned directly as a number.</p><p>See also: <a href="#Base.length"><code>length</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; dfr = DataFrame(a=1:3, b=&#39;a&#39;:&#39;c&#39;)[1, :]
+3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L379-L399">source</a></section><section><div><pre><code class="language-julia hljs">size(dfr::DataFrameRow[, dim])</code></pre><p>Return a 1-tuple containing the number of elements of <code>dfr</code>. If an optional dimension <code>dim</code> is specified, it must be <code>1</code>, and the number of elements is returned directly as a number.</p><p>See also: <a href="#Base.length"><code>length</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; dfr = DataFrame(a=1:3, b=&#39;a&#39;:&#39;c&#39;)[1, :]
 DataFrameRow
  Row │ a      b
      │ Int64  Char
@@ -184,7 +184,7 @@
 (2,)
 
 julia&gt; size(dfr, 1)
-2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframerow/dataframerow.jl#L327-L351">source</a></section></article><h2 id="Working-with-column-names"><a class="docs-heading-anchor" href="#Working-with-column-names">Working with column names</a><a id="Working-with-column-names-1"></a><a class="docs-heading-anchor-permalink" href="#Working-with-column-names" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.names" href="#Base.names"><code>Base.names</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">names(df::AbstractDataFrame, cols=:)
+2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframerow/dataframerow.jl#L327-L351">source</a></section></article><h2 id="Working-with-column-names"><a class="docs-heading-anchor" href="#Working-with-column-names">Working with column names</a><a id="Working-with-column-names-1"></a><a class="docs-heading-anchor-permalink" href="#Working-with-column-names" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.names" href="#Base.names"><code>Base.names</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">names(df::AbstractDataFrame, cols=:)
 names(df::DataFrameRow, cols=:)
 names(df::GroupedDataFrame, cols=:)
 names(df::DataFrameRows, cols=:)
@@ -229,7 +229,7 @@
 julia&gt; names(df, any.(ismissing, eachcol(df))) # pick columns that contain missing values
 2-element Vector{String}:
  &quot;x1&quot;
- &quot;x3&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L29-L98">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.propertynames" href="#Base.propertynames"><code>Base.propertynames</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">propertynames(df::AbstractDataFrame)</code></pre><p>Return a freshly allocated <code>Vector{Symbol}</code> of names of columns contained in <code>df</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L452-L456">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.rename" href="#DataFrames.rename"><code>DataFrames.rename</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rename(df::AbstractDataFrame, vals::AbstractVector{Symbol};
+ &quot;x3&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L29-L98">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.propertynames" href="#Base.propertynames"><code>Base.propertynames</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">propertynames(df::AbstractDataFrame)</code></pre><p>Return a freshly allocated <code>Vector{Symbol}</code> of names of columns contained in <code>df</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L452-L456">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.rename" href="#DataFrames.rename"><code>DataFrames.rename</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rename(df::AbstractDataFrame, vals::AbstractVector{Symbol};
        makeunique::Bool=false)
 rename(df::AbstractDataFrame, vals::AbstractVector{&lt;:AbstractString};
        makeunique::Bool=false)
@@ -291,7 +291,7 @@
      │ Int64  Int64  Int64
 ─────┼─────────────────────
    1 │     1      2      3
-</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L269-L371">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.rename!" href="#DataFrames.rename!"><code>DataFrames.rename!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};
+</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L269-L371">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.rename!" href="#DataFrames.rename!"><code>DataFrames.rename!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};
         makeunique::Bool=false)
 rename!(df::AbstractDataFrame, vals::AbstractVector{&lt;:AbstractString};
         makeunique::Bool=false)
@@ -342,7 +342,7 @@
      │ Int64  Int64  Int64
 ─────┼─────────────────────
    1 │     1      2      3
-</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L119-L209">source</a></section></article><h2 id="Mutating-and-transforming-data-frames-and-grouped-data-frames"><a class="docs-heading-anchor" href="#Mutating-and-transforming-data-frames-and-grouped-data-frames">Mutating and transforming data frames and grouped data frames</a><a id="Mutating-and-transforming-data-frames-and-grouped-data-frames-1"></a><a class="docs-heading-anchor-permalink" href="#Mutating-and-transforming-data-frames-and-grouped-data-frames" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.append!" href="#Base.append!"><code>Base.append!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">append!(df::DataFrame, tables...; cols::Symbol=:setequal,
+</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L119-L209">source</a></section></article><h2 id="Mutating-and-transforming-data-frames-and-grouped-data-frames"><a class="docs-heading-anchor" href="#Mutating-and-transforming-data-frames-and-grouped-data-frames">Mutating and transforming data frames and grouped data frames</a><a id="Mutating-and-transforming-data-frames-and-grouped-data-frames-1"></a><a class="docs-heading-anchor-permalink" href="#Mutating-and-transforming-data-frames-and-grouped-data-frames" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.append!" href="#Base.append!"><code>Base.append!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">append!(df::DataFrame, tables...; cols::Symbol=:setequal,
         promote::Bool=(cols in [:union, :subset]))</code></pre><p>Add the rows of tables passed as <code>tables</code> to the end of <code>df</code>. If the table is not an <code>AbstractDataFrame</code> then it is converted using <code>DataFrame(table, copycols=false)</code> before being appended.</p><p>The exact behavior of <code>append!</code> depends on the <code>cols</code> argument:</p><ul><li>If <code>cols == :setequal</code> (this is the default) then <code>df2</code> must contain exactly the same columns as <code>df</code> (but possibly in a different order).</li><li>If <code>cols == :orderequal</code> then <code>df2</code> must contain the same columns in the same order (for <code>AbstractDict</code> this option requires that <code>keys(row)</code> matches <code>propertynames(df)</code> to allow for support of ordered dicts; however, if <code>df2</code> is a <code>Dict</code> an error is thrown as it is an unordered collection).</li><li>If <code>cols == :intersect</code> then <code>df2</code> may contain more columns than <code>df</code>, but all column names that are present in <code>df</code> must be present in <code>df2</code> and only these are used.</li><li>If <code>cols == :subset</code> then <code>append!</code> behaves like for <code>:intersect</code> but if some column is missing in <code>df2</code> then a <code>missing</code> value is pushed to <code>df</code>.</li><li>If <code>cols == :union</code> then <code>append!</code> adds columns missing in <code>df</code> that are present in <code>df2</code>, for columns present in <code>df</code> but missing in <code>df2</code> a <code>missing</code> value is pushed.</li></ul><p>If <code>promote=true</code> and element type of a column present in <code>df</code> does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in <code>df</code>. If <code>promote=false</code> an error is thrown.</p><p>The above rule has the following exceptions:</p><ul><li>If <code>df</code> has no columns then copies of columns from <code>df2</code> are added to it.</li><li>If <code>df2</code> has no columns then calling <code>append!</code> leaves <code>df</code> unchanged.</li></ul><p>Please note that <code>append!</code> must not be used on a <code>DataFrame</code> that contains columns that are aliases (equal when compared with <code>===</code>).</p><p>Metadata: table-level <code>:note</code>-style metadata and column-level <code>:note</code>-style metadata for columns present in <code>df</code> are preserved. If new columns are added their <code>:note</code>-style metadata is copied from the appended table. Other metadata is dropped.</p><p>See also: use <a href="#Base.push!"><code>push!</code></a> to add individual rows to a data frame, <a href="#Base.prepend!"><code>prepend!</code></a> to add a table at the beginning, and <a href="#Base.vcat"><code>vcat</code></a> to vertically concatenate data frames.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(A=1:3, B=1:3)
 3×2 DataFrame
  Row │ A      B
@@ -385,7 +385,7 @@
    3 │       6.0        6  missing
    4 │       1.0  missing  missing
    5 │ missing    missing        1
-   6 │ missing    missing        2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/insertion.jl#L1-L92">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.combine" href="#DataFrames.combine"><code>DataFrames.combine</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">combine(df::AbstractDataFrame, args...;
+   6 │ missing    missing        2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/insertion.jl#L1-L92">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.combine" href="#DataFrames.combine"><code>DataFrames.combine</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">combine(df::AbstractDataFrame, args...;
         renamecols::Bool=true, threads::Bool=true)
 combine(f::Callable, df::AbstractDataFrame;
         renamecols::Bool=true, threads::Bool=true)
@@ -625,7 +625,7 @@
    5 │     3      2      3      5
    6 │     3      2      7      9
    7 │     4      1      4      5
-   8 │     4      1      8      9</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selection.jl#L1396-L1673">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.fillcombinations" href="#DataFrames.fillcombinations"><code>DataFrames.fillcombinations</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">fillcombinations(df::AbstractDataFrame, indexcols;
+   8 │     4      1      8      9</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selection.jl#L1396-L1673">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.fillcombinations" href="#DataFrames.fillcombinations"><code>DataFrames.fillcombinations</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">fillcombinations(df::AbstractDataFrame, indexcols;
                      allowduplicates::Bool=false,
                      fill=missing)</code></pre><p>Generate all combinations of levels of column(s) <code>indexcols</code> in data frame <code>df</code>. Levels and their order are determined by the <code>levels</code> function (i.e. unique values sorted lexicographically by default, or a custom set of levels for e.g. <code>CategoricalArray</code> columns), in addition to <code>missing</code> if present.</p><p>For combinations of <code>indexcols</code> not present in <code>df</code> these columns are filled with the <code>fill</code> value (<code>missing</code> by default).</p><p>If <code>allowduplicates=false</code> (the default) <code>indexcols</code> may only contain unique combinations of <code>indexcols</code> values. If <code>allowduplicates=true</code> duplicates are allowed.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:2, y=&#39;a&#39;:&#39;b&#39;, z=[&quot;x&quot;, &quot;y&quot;])
 2×3 DataFrame
@@ -653,7 +653,7 @@
    1 │      1  a     x
    2 │      0  b     x
    3 │      0  a     y
-   4 │      2  b     y</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1421-L1471">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.flatten" href="#DataFrames.flatten"><code>DataFrames.flatten</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">flatten(df::AbstractDataFrame, cols; scalar::Type=Union{})</code></pre><p>When columns <code>cols</code> of data frame <code>df</code> have iterable elements that define <code>length</code> (for example a <code>Vector</code> of <code>Vector</code>s), return a <code>DataFrame</code> where each element of each <code>col</code> in <code>cols</code> is flattened, meaning the column corresponding to <code>col</code> becomes a longer vector where the original entries are concatenated. Elements of row <code>i</code> of <code>df</code> in columns other than <code>cols</code> will be repeated according to the length of <code>df[i, col]</code>. These lengths must therefore be the same for each <code>col</code> in <code>cols</code>, or else an error is raised. Note that these elements are not copied, and thus if they are mutable changing them in the returned <code>DataFrame</code> will affect <code>df</code>.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>scalar</code> is passed then values that have this type in flattened columns are treated as scalars and broadcasted as many times as is needed to match lengths of values stored in other columns. If all values in a row are scalars, a single row is produced.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
+   4 │      2  b     y</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1421-L1471">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.flatten" href="#DataFrames.flatten"><code>DataFrames.flatten</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">flatten(df::AbstractDataFrame, cols; scalar::Type=Union{})</code></pre><p>When columns <code>cols</code> of data frame <code>df</code> have iterable elements that define <code>length</code> (for example a <code>Vector</code> of <code>Vector</code>s), return a <code>DataFrame</code> where each element of each <code>col</code> in <code>cols</code> is flattened, meaning the column corresponding to <code>col</code> becomes a longer vector where the original entries are concatenated. Elements of row <code>i</code> of <code>df</code> in columns other than <code>cols</code> will be repeated according to the length of <code>df[i, col]</code>. These lengths must therefore be the same for each <code>col</code> in <code>cols</code>, or else an error is raised. Note that these elements are not copied, and thus if they are mutable changing them in the returned <code>DataFrame</code> will affect <code>df</code>.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>scalar</code> is passed then values that have this type in flattened columns are treated as scalars and broadcasted as many times as is needed to match lengths of values stored in other columns. If all values in a row are scalars, a single row is produced.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
 2×3 DataFrame
  Row │ a      b       c
      │ Int64  Array…  Array…
@@ -730,7 +730,7 @@
    2 │     1        2        6
    3 │     2  missing  missing
    4 │     3  missing        7
-   5 │     3  missing        8</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2054-L2158">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.hcat" href="#Base.hcat"><code>Base.hcat</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">hcat(df::AbstractDataFrame...;
+   5 │     3  missing        8</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2054-L2158">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.hcat" href="#Base.hcat"><code>Base.hcat</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">hcat(df::AbstractDataFrame...;
      makeunique::Bool=false, copycols::Bool=true)</code></pre><p>Horizontally concatenate data frames.</p><p>If <code>makeunique=false</code> (the default) column names of passed objects must be unique. If <code>makeunique=true</code> then duplicate column names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate).</p><p>If <code>copycols=true</code> (the default) then the <code>DataFrame</code> returned by <code>hcat</code> will contain copied columns from the source data frames. If <code>copycols=false</code> then it will contain columns as they are stored in the source (without copying). This option should be used with caution as mutating either the columns in sources or in the returned <code>DataFrame</code> might lead to the corruption of the other object.</p><p>Metadata: <code>hcat</code> propagates table-level <code>:note</code>-style metadata for keys that are present in all passed data frames and have the same value; it propagates column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(A=1:3, B=1:3)
 3×2 DataFrame
  Row │ A      B
@@ -764,7 +764,7 @@
 julia&gt; df3 = hcat(df1, df2, makeunique=true, copycols=false);
 
 julia&gt; df3.A === df1.A
-true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1574-L1632">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.insert!" href="#Base.insert!"><code>Base.insert!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray};
+true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1574-L1632">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.insert!" href="#Base.insert!"><code>Base.insert!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray};
         cols::Symbol=:setequal, promote::Bool=false)
 insert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple,
                                                   AbstractDict, Tables.AbstractRow};
@@ -835,7 +835,7 @@
    5 │ b              2  missing
    6 │ c              3  missing
    7 │ a              1  missing
-   8 │ 1.0      missing        1.0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/insertion.jl#L653-L738">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.insertcols" href="#DataFrames.insertcols"><code>DataFrames.insertcols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">insertcols(df::AbstractDataFrame[, col], (name=&gt;val)::Pair...;
+   8 │ 1.0      missing        1.0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/insertion.jl#L653-L738">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.insertcols" href="#DataFrames.insertcols"><code>DataFrames.insertcols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">insertcols(df::AbstractDataFrame[, col], (name=&gt;val)::Pair...;
            after::Bool=false, makeunique::Bool=false, copycols::Bool=true)</code></pre><p>Insert a column into a copy of <code>df</code> data frame using the <a href="#DataFrames.insertcols!"><code>insertcols!</code></a> function and return the newly created data frame.</p><p>If <code>col</code> is omitted it is set to <code>ncol(df)+1</code> (the column is inserted as the last column).</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the data frame to which we want to add columns</li><li><code>col</code> : a position at which we want to insert a column, passed as an integer or a column name (a string or a <code>Symbol</code>); the column selected with <code>col</code> and columns following it are shifted to the right in <code>df</code> after the operation</li><li><code>name</code> : the name of the new column</li><li><code>val</code> : an <code>AbstractVector</code> giving the contents of the new column or a value of any type other than <code>AbstractArray</code> which will be repeated to fill a new vector; As a particular rule a values stored in a <code>Ref</code> or a <code>0</code>-dimensional <code>AbstractArray</code> are unwrapped and treated in the same way</li><li><code>after</code> : if <code>true</code> columns are inserted after <code>col</code></li><li><code>makeunique</code> : defines what to do if <code>name</code> already exists in <code>df</code>; if it is <code>false</code> an error will be thrown; if it is <code>true</code> a new unique name will be generated by adding a suffix</li><li><code>copycols</code> : whether vectors passed as columns should be copied</li></ul><p>If <code>val</code> is an <code>AbstractRange</code> then the result of <code>collect(val)</code> is inserted.</p><p>If <code>df</code> is a <code>SubDataFrame</code> then it must have been created with <code>:</code> as column selector (otherwise an error is thrown). In this case the <code>copycols</code> keyword argument is ignored (i.e. the added column is always copied) and the parent data frame&#39;s column is filled with <code>missing</code> in rows that are filtered out by <code>df</code>.</p><p>If <code>df</code> isa <code>DataFrame</code> that has no columns and only values other than <code>AbstractVector</code> are passed then it is used to create a one-element column. If <code>df</code> isa <code>DataFrame</code> that has no columns and at least one <code>AbstractVector</code> is passed then its length is used to determine the number of elements in all created columns. In all other cases the number of rows in all created columns must match <code>nrow(df)</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also <a href="#DataFrames.insertcols!"><code>insertcols!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3)
 3×1 DataFrame
  Row │ a
@@ -870,7 +870,7 @@
 ─────┼──────────────
    1 │     1      7
    2 │     2      8
-   3 │     3      9</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2647-L2698">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.insertcols!" href="#DataFrames.insertcols!"><code>DataFrames.insertcols!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">insertcols!(df::AbstractDataFrame[, col], (name=&gt;val)::Pair...;
+   3 │     3      9</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2647-L2698">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.insertcols!" href="#DataFrames.insertcols!"><code>DataFrames.insertcols!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">insertcols!(df::AbstractDataFrame[, col], (name=&gt;val)::Pair...;
             after::Bool=false, makeunique::Bool=false, copycols::Bool=true)</code></pre><p>Insert a column into a data frame in place. Return the updated data frame.</p><p>If <code>col</code> is omitted it is set to <code>ncol(df)+1</code> (the column is inserted as the last column).</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the data frame to which we want to add columns</li><li><code>col</code> : a position at which we want to insert a column, passed as an integer or a column name (a string or a <code>Symbol</code>); the column selected with <code>col</code> and columns following it are shifted to the right in <code>df</code> after the operation</li><li><code>name</code> : the name of the new column</li><li><code>val</code> : an <code>AbstractVector</code> giving the contents of the new column or a value of any type other than <code>AbstractArray</code> which will be repeated to fill a new vector; As a particular rule a values stored in a <code>Ref</code> or a <code>0</code>-dimensional <code>AbstractArray</code> are unwrapped and treated in the same way</li><li><code>after</code> : if <code>true</code> columns are inserted after <code>col</code></li><li><code>makeunique</code> : defines what to do if <code>name</code> already exists in <code>df</code>; if it is <code>false</code> an error will be thrown; if it is <code>true</code> a new unique name will be generated by adding a suffix</li><li><code>copycols</code> : whether vectors passed as columns should be copied</li></ul><p>If <code>val</code> is an <code>AbstractRange</code> then the result of <code>collect(val)</code> is inserted.</p><p>If <code>df</code> is a <code>SubDataFrame</code> then it must have been created with <code>:</code> as column selector (otherwise an error is thrown). In this case the <code>copycols</code> keyword argument is ignored (i.e. the added column is always copied) and the parent data frame&#39;s column is filled with <code>missing</code> in rows that are filtered out by <code>df</code>.</p><p>If <code>df</code> isa <code>DataFrame</code> that has no columns and only values other than <code>AbstractVector</code> are passed then it is used to create a one-element column. If <code>df</code> isa <code>DataFrame</code> that has no columns and at least one <code>AbstractVector</code> is passed then its length is used to determine the number of elements in all created columns. In all other cases the number of rows in all created columns must match <code>nrow(df)</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p>See also <a href="#DataFrames.insertcols"><code>insertcols</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3)
 3×1 DataFrame
  Row │ a
@@ -905,7 +905,7 @@
 ─────┼──────────────────────────────────
    1 │ a         7      2      3      1
    2 │ b         8      3      4      2
-   3 │ c         9      4      5      3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2704-L2755">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.invpermute!" href="#Base.invpermute!"><code>Base.invpermute!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">invpermute!(df::AbstractDataFrame, p)</code></pre><p>Like <a href="#Base.permute!"><code>permute!</code></a>, but the inverse of the given permutation is applied.</p><p><code>invpermute!</code> will produce a correct result even if some columns of passed data frame or permutation <code>p</code> are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>invpermute!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
+   3 │ c         9      4      5      3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2704-L2755">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.invpermute!" href="#Base.invpermute!"><code>Base.invpermute!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">invpermute!(df::AbstractDataFrame, p)</code></pre><p>Like <a href="#Base.permute!"><code>permute!</code></a>, but the inverse of the given permutation is applied.</p><p><code>invpermute!</code> will produce a correct result even if some columns of passed data frame or permutation <code>p</code> are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>invpermute!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
 5×3 DataFrame
  Row │ a      b      c
      │ Int64  Int64  Int64
@@ -936,7 +936,7 @@
    2 │     2      7     12
    3 │     3      8     13
    4 │     4      9     14
-   5 │     5     10     15</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2486-L2535">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.mapcols" href="#DataFrames.mapcols"><code>DataFrames.mapcols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())</code></pre><p>Return a <code>DataFrame</code> where each column of <code>df</code> selected by <code>cols</code> (by default, all columns) is transformed using function <code>f</code>. Columns not selected by <code>cols</code> are copied.</p><p><code>f</code> must return <code>AbstractVector</code> objects all with the same length or scalars (all values other than <code>AbstractVector</code> are considered to be a scalar).</p><p>The <code>cols</code> column selector can be any value accepted as column selector by the <code>names</code> function.</p><p>Note that <code>mapcols</code> guarantees not to reuse the columns from <code>df</code> in the returned <code>DataFrame</code>. If <code>f</code> returns its argument then it gets copied before being stored.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
+   5 │     5     10     15</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2486-L2535">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.mapcols" href="#DataFrames.mapcols"><code>DataFrames.mapcols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())</code></pre><p>Return a <code>DataFrame</code> where each column of <code>df</code> selected by <code>cols</code> (by default, all columns) is transformed using function <code>f</code>. Columns not selected by <code>cols</code> are copied.</p><p><code>f</code> must return <code>AbstractVector</code> objects all with the same length or scalars (all values other than <code>AbstractVector</code> are considered to be a scalar).</p><p>The <code>cols</code> column selector can be any value accepted as column selector by the <code>names</code> function.</p><p>Note that <code>mapcols</code> guarantees not to reuse the columns from <code>df</code> in the returned <code>DataFrame</code>. If <code>f</code> returns its argument then it gets copied before being stored.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
 4×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -964,7 +964,7 @@
    1 │     1    121
    2 │     2    144
    3 │     3    169
-   4 │     4    196</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L413-L462">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.mapcols!" href="#DataFrames.mapcols!"><code>DataFrames.mapcols!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All())</code></pre><p>Update a <code>DataFrame</code> in-place where each column of <code>df</code> selected by <code>cols</code> (by default, all columns) is transformed using function <code>f</code>. Columns not selected by <code>cols</code> are left unchanged.</p><p><code>f</code> must return <code>AbstractVector</code> objects all with the same length or scalars (all values other than <code>AbstractVector</code> are considered to be a scalar).</p><p>Note that <code>mapcols!</code> reuses the columns from <code>df</code> if they are returned by <code>f</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
+   4 │     4    196</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L413-L462">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.mapcols!" href="#DataFrames.mapcols!"><code>DataFrames.mapcols!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All())</code></pre><p>Update a <code>DataFrame</code> in-place where each column of <code>df</code> selected by <code>cols</code> (by default, all columns) is transformed using function <code>f</code>. Columns not selected by <code>cols</code> are left unchanged.</p><p><code>f</code> must return <code>AbstractVector</code> objects all with the same length or scalars (all values other than <code>AbstractVector</code> are considered to be a scalar).</p><p>Note that <code>mapcols!</code> reuses the columns from <code>df</code> if they are returned by <code>f</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
 4×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -996,7 +996,7 @@
    1 │     2    121
    2 │     8    144
    3 │    18    169
-   4 │    32    196</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L497-L547">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.permute!" href="#Base.permute!"><code>Base.permute!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">permute!(df::AbstractDataFrame, p)</code></pre><p>Permute data frame <code>df</code> in-place, according to permutation <code>p</code>. Throws <code>ArgumentError</code> if <code>p</code> is not a permutation.</p><p>To return a new data frame instead of permuting <code>df</code> in-place, use <code>df[p, :]</code>.</p><p><code>permute!</code> will produce a correct result even if some columns of passed data frame or permutation <code>p</code> are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>permute!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
+   4 │    32    196</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L497-L547">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.permute!" href="#Base.permute!"><code>Base.permute!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">permute!(df::AbstractDataFrame, p)</code></pre><p>Permute data frame <code>df</code> in-place, according to permutation <code>p</code>. Throws <code>ArgumentError</code> if <code>p</code> is not a permutation.</p><p>To return a new data frame instead of permuting <code>df</code> in-place, use <code>df[p, :]</code>.</p><p><code>permute!</code> will produce a correct result even if some columns of passed data frame or permutation <code>p</code> are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>permute!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
 5×3 DataFrame
  Row │ a      b      c
      │ Int64  Int64  Int64
@@ -1016,7 +1016,7 @@
    2 │     3      8     13
    3 │     1      6     11
    4 │     2      7     12
-   5 │     4      9     14</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2441-L2482">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.prepend!" href="#Base.prepend!"><code>Base.prepend!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">prepend!(df::DataFrame, tables...; cols::Symbol=:setequal,
+   5 │     4      9     14</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2441-L2482">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.prepend!" href="#Base.prepend!"><code>Base.prepend!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">prepend!(df::DataFrame, tables...; cols::Symbol=:setequal,
          promote::Bool=(cols in [:union, :subset]))</code></pre><p>Add the rows of tables passed as <code>tables</code> to the beginning of <code>df</code>. If the table is not an <code>AbstractDataFrame</code> then it is converted using <code>DataFrame(table, copycols=false)</code> before being appended.</p><p>Add the rows of <code>df2</code> to the beginning of <code>df</code>. If the second argument <code>table</code> is not an <code>AbstractDataFrame</code> then it is converted using <code>DataFrame(table, copycols=false)</code> before being prepended.</p><p>The exact behavior of <code>prepend!</code> depends on the <code>cols</code> argument:</p><ul><li>If <code>cols == :setequal</code> (this is the default) then <code>df2</code> must contain exactly the same columns as <code>df</code> (but possibly in a different order).</li><li>If <code>cols == :orderequal</code> then <code>df2</code> must contain the same columns in the same order (for <code>AbstractDict</code> this option requires that <code>keys(row)</code> matches <code>propertynames(df)</code> to allow for support of ordered dicts; however, if <code>df2</code> is a <code>Dict</code> an error is thrown as it is an unordered collection).</li><li>If <code>cols == :intersect</code> then <code>df2</code> may contain more columns than <code>df</code>, but all column names that are present in <code>df</code> must be present in <code>df2</code> and only these are used.</li><li>If <code>cols == :subset</code> then <code>append!</code> behaves like for <code>:intersect</code> but if some column is missing in <code>df2</code> then a <code>missing</code> value is pushed to <code>df</code>.</li><li>If <code>cols == :union</code> then <code>append!</code> adds columns missing in <code>df</code> that are present in <code>df2</code>, for columns present in <code>df</code> but missing in <code>df2</code> a <code>missing</code> value is pushed.</li></ul><p>If <code>promote=true</code> and element type of a column present in <code>df</code> does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in <code>df</code>. If <code>promote=false</code> an error is thrown.</p><p>The above rule has the following exceptions:</p><ul><li>If <code>df</code> has no columns then copies of columns from <code>df2</code> are added to it.</li><li>If <code>df2</code> has no columns then calling <code>prepend!</code> leaves <code>df</code> unchanged.</li></ul><p>Please note that <code>prepend!</code> must not be used on a <code>DataFrame</code> that contains columns that are aliases (equal when compared with <code>===</code>).</p><p>Metadata: table-level <code>:note</code>-style metadata and column-level <code>:note</code>-style metadata for columns present in <code>df</code> are preserved. If new columns are added their <code>:note</code>-style metadata is copied from the appended table. Other metadata is dropped.</p><p>See also: use <a href="#Base.pushfirst!"><code>pushfirst!</code></a> to add individual rows at the beginning of a data frame, <a href="#Base.append!"><code>append!</code></a> to add a table at the end, and <a href="#Base.vcat"><code>vcat</code></a> to vertically concatenate data frames.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(A=1:3, B=1:3)
 3×2 DataFrame
  Row │ A      B
@@ -1059,7 +1059,7 @@
    3 │ missing    missing        2
    4 │       4.0        4  missing
    5 │       5.0        5  missing
-   6 │       6.0        6  missing</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/insertion.jl#L118-L213">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.push!" href="#Base.push!"><code>Base.push!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">push!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
+   6 │       6.0        6  missing</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/insertion.jl#L118-L213">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.push!" href="#Base.push!"><code>Base.push!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">push!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
       cols::Symbol=:setequal, promote::Bool=false)
 push!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,
                                 Tables.AbstractRow}...;
@@ -1139,7 +1139,7 @@
 ─────┼──────────────
    1 │     1      2
    2 │     3      4
-   3 │     5      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/insertion.jl#L443-L537">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.pushfirst!" href="#Base.pushfirst!"><code>Base.pushfirst!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
+   3 │     5      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/insertion.jl#L443-L537">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.pushfirst!" href="#Base.pushfirst!"><code>Base.pushfirst!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
            cols::Symbol=:setequal, promote::Bool=false)
 pushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,
                                      Tables.AbstractRow}...;
@@ -1219,7 +1219,7 @@
 ─────┼──────────────
    1 │     3      4
    2 │     5      6
-   3 │     1      2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/insertion.jl#L548-L642">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.reduce" href="#Base.reduce"><code>Base.reduce</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">reduce(::typeof(vcat),
+   3 │     1      2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/insertion.jl#L548-L642">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.reduce" href="#Base.reduce"><code>Base.reduce</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">reduce(::typeof(vcat),
        dfs::Union{AbstractVector{&lt;:AbstractDataFrame},
                   Tuple{AbstractDataFrame, Vararg{AbstractDataFrame}}};
        cols::Union{Symbol, AbstractVector{Symbol},
@@ -1278,7 +1278,7 @@
    6 │     6        6  missing       2
    7 │     7  missing        7       3
    8 │     8  missing        8       3
-   9 │     9  missing        9       3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L607-L693">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.repeat" href="#Base.repeat"><code>Base.repeat</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">repeat(df::AbstractDataFrame; inner::Integer = 1, outer::Integer = 1)</code></pre><p>Construct a data frame by repeating rows in <code>df</code>. <code>inner</code> specifies how many times each row is repeated, and <code>outer</code> specifies how many times the full set of rows is repeated.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
+   9 │     9  missing        9       3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L607-L693">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.repeat" href="#Base.repeat"><code>Base.repeat</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">repeat(df::AbstractDataFrame; inner::Integer = 1, outer::Integer = 1)</code></pre><p>Construct a data frame by repeating rows in <code>df</code>. <code>inner</code> specifies how many times each row is repeated, and <code>outer</code> specifies how many times the full set of rows is repeated.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
 2×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -1302,7 +1302,7 @@
    9 │     1      3
   10 │     1      3
   11 │     2      4
-  12 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1818-L1855">source</a></section><section><div><pre><code class="language-julia hljs">repeat(df::AbstractDataFrame, count::Integer)</code></pre><p>Construct a data frame by repeating each row in <code>df</code> the number of times specified by <code>count</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
+  12 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1818-L1855">source</a></section><section><div><pre><code class="language-julia hljs">repeat(df::AbstractDataFrame, count::Integer)</code></pre><p>Construct a data frame by repeating each row in <code>df</code> the number of times specified by <code>count</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
 2×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -1318,7 +1318,7 @@
    1 │     1      3
    2 │     2      4
    3 │     1      3
-   4 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1862-L1890">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.repeat!" href="#DataFrames.repeat!"><code>DataFrames.repeat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">repeat!(df::DataFrame; inner::Integer=1, outer::Integer=1)</code></pre><p>Update a data frame <code>df</code> in-place by repeating its rows. <code>inner</code> specifies how many times each row is repeated, and <code>outer</code> specifies how many times the full set of rows is repeated. Columns of <code>df</code> are freshly allocated.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
+   4 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1862-L1890">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.repeat!" href="#DataFrames.repeat!"><code>DataFrames.repeat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">repeat!(df::DataFrame; inner::Integer=1, outer::Integer=1)</code></pre><p>Update a data frame <code>df</code> in-place by repeating its rows. <code>inner</code> specifies how many times each row is repeated, and <code>outer</code> specifies how many times the full set of rows is repeated. Columns of <code>df</code> are freshly allocated.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
 2×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -1344,7 +1344,7 @@
    9 │     1      3
   10 │     1      3
   11 │     2      4
-  12 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1356-L1395">source</a></section><section><div><pre><code class="language-julia hljs">repeat!(df::DataFrame, count::Integer)</code></pre><p>Update a data frame <code>df</code> in-place by repeating its rows the number of times specified by <code>count</code>. Columns of <code>df</code> are freshly allocated.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
+  12 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1356-L1395">source</a></section><section><div><pre><code class="language-julia hljs">repeat!(df::DataFrame, count::Integer)</code></pre><p>Update a data frame <code>df</code> in-place by repeating its rows the number of times specified by <code>count</code>. Columns of <code>df</code> are freshly allocated.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Example</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
 2×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -1360,7 +1360,7 @@
    1 │     1      3
    2 │     2      4
    3 │     1      3
-   4 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1409-L1437">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.reverse" href="#Base.reverse"><code>Base.reverse</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">reverse(df::AbstractDataFrame, start=1, stop=nrow(df))</code></pre><p>Return a data frame containing the rows in <code>df</code> in reversed order. If <code>start</code> and <code>stop</code> are provided, only rows in the <code>start:stop</code> range are affected.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
+   4 │     2      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1409-L1437">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.reverse" href="#Base.reverse"><code>Base.reverse</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">reverse(df::AbstractDataFrame, start=1, stop=nrow(df))</code></pre><p>Return a data frame containing the rows in <code>df</code> in reversed order. If <code>start</code> and <code>stop</code> are provided, only rows in the <code>start:stop</code> range are affected.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
 5×3 DataFrame
  Row │ a      b      c
      │ Int64  Int64  Int64
@@ -1391,7 +1391,7 @@
    2 │     3      8     13
    3 │     2      7     12
    4 │     4      9     14
-   5 │     5     10     15</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2241-L2285">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.reverse!" href="#Base.reverse!"><code>Base.reverse!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">reverse!(df::AbstractDataFrame, start=1, stop=nrow(df))</code></pre><p>Mutate data frame in-place to reverse its row order. If <code>start</code> and <code>stop</code> are provided, only rows in the <code>start:stop</code> range are affected.</p><p><code>reverse!</code> will produce a correct result even if some columns of passed data frame are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>reverse!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
+   5 │     5     10     15</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2241-L2285">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.reverse!" href="#Base.reverse!"><code>Base.reverse!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">reverse!(df::AbstractDataFrame, start=1, stop=nrow(df))</code></pre><p>Mutate data frame in-place to reverse its row order. If <code>start</code> and <code>stop</code> are provided, only rows in the <code>start:stop</code> range are affected.</p><p><code>reverse!</code> will produce a correct result even if some columns of passed data frame are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>reverse!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:5, b=6:10, c=11:15)
 5×3 DataFrame
  Row │ a      b      c
      │ Int64  Int64  Int64
@@ -1422,7 +1422,7 @@
    2 │     3      8     13
    3 │     4      9     14
    4 │     2      7     12
-   5 │     1      6     11</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2289-L2339">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.select" href="#DataFrames.select"><code>DataFrames.select</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">select(df::AbstractDataFrame, args...;
+   5 │     1      6     11</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2289-L2339">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.select" href="#DataFrames.select"><code>DataFrames.select</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">select(df::AbstractDataFrame, args...;
        copycols::Bool=true, renamecols::Bool=true, threads::Bool=true)
 select(args::Callable, df::DataFrame;
        renamecols::Bool=true, threads::Bool=true)
@@ -1658,14 +1658,14 @@
    5 │     2      3    0.375             2          2
    6 │     1      5    0.625             1          4
    7 │     1      5    0.625             1          5
-   8 │     2      3    0.375             2          3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selection.jl#L1016-L1302">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.select!" href="#DataFrames.select!"><code>DataFrames.select!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">select!(df::AbstractDataFrame, args...;
+   8 │     2      3    0.375             2          3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selection.jl#L1016-L1302">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.select!" href="#DataFrames.select!"><code>DataFrames.select!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">select!(df::AbstractDataFrame, args...;
         renamecols::Bool=true, threads::Bool=true)
 select!(args::Base.Callable, df::DataFrame;
         renamecols::Bool=true, threads::Bool=true)
 select!(gd::GroupedDataFrame, args...; ungroup::Bool=true,
         renamecols::Bool=true, threads::Bool=true)
 select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true,
-        renamecols::Bool=true, threads::Bool=true)</code></pre><p>Mutate <code>df</code> or <code>gd</code> in place to retain only columns or transformations specified by <code>args...</code> and return it. The result is guaranteed to have the same number of rows as <code>df</code> or parent of <code>gd</code>, except when no columns are selected (in which case the result has zero rows).</p><p>If a <code>SubDataFrame</code> or <code>GroupedDataFrame{SubDataFrame}</code> is passed, the parent data frame is updated using columns generated by <code>args...</code>, following the same rules as indexing:</p><ul><li>for existing columns filtered-out rows are filled with values present in the old columns</li><li>for new columns (which is only allowed if <code>SubDataFrame</code> was created with <code>:</code> as column selector) filtered-out rows are filled with <code>missing</code></li><li>dropped columns (which are only allowed if <code>SubDataFrame</code> was created with <code>:</code> as column selector) are removed</li><li>if <code>SubDataFrame</code> was not created with <code>:</code> as column selector then <code>select!</code> is only allowed if the transformations keep exactly the same sequence of column names as is in the passed <code>df</code></li></ul><p>If a <code>GroupedDataFrame</code> is passed then it is updated to reflect the new rows of its updated parent. If there are independent <code>GroupedDataFrame</code> objects constructed using the same parent data frame they might get corrupt.</p><p>Below detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.</p><p>All these operations are supported both for <code>AbstractDataFrame</code> (when split and combine steps are skipped) and <code>GroupedDataFrame</code>. Technically, <code>AbstractDataFrame</code> is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the <code>keepkeys</code> and <code>ungroup</code> keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.</p><p>In order to perform operations by groups you first need to create a <code>GroupedDataFrame</code> object from your data frame using the <code>groupby</code> function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.</p><p>Operations can then be applied on each group using one of the following functions:</p><ul><li><code>combine</code>: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in <code>GroupedDataFrame</code>; it is typically used to compute summary statistics by group; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>select</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; <code>select!</code> is an in-place version of <code>select</code>; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>transform</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; <code>transform!</code> is an in-place version of <code>transform</code>; existing columns in the source data frame are put as first columns in the result;</li></ul><p>As a special case, if a <code>GroupedDataFrame</code> that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.</p><p>All these functions take a specification of one or more functions to apply to each subset of the <code>DataFrame</code>. This specification can be of the following forms:</p><ol><li>standard column selectors (integers, <code>Symbol</code>s, strings, vectors of integers, vectors of <code>Symbol</code>s, vectors of strings, <code>All</code>, <code>Cols</code>, <code>:</code>, <code>Between</code>, <code>Not</code> and regular expressions)</li><li>a <code>cols =&gt; function</code> pair indicating that <code>function</code> should be called with positional arguments holding columns <code>cols</code>, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that <code>function</code> returns a single value or a vector; the generated name is created by concatenating source column name and <code>function</code> name by default (see examples below).</li><li>a <code>cols =&gt; function =&gt; target_cols</code> form additionally explicitly specifying the target column or columns, which must be a single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>. Additionally it can be a <code>Function</code> which takes a string or a vector of strings as an argument containing names of columns selected by <code>cols</code>, and returns the target columns names (all accepted types except <code>AsTable</code> are allowed).</li><li>a <code>col =&gt; target_cols</code> pair, which renames the column <code>col</code> to <code>target_cols</code>, which must be single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>.</li><li>column-independent operations <code>function =&gt; target_cols</code> or just <code>function</code> for specific <code>function</code>s where the input columns are omitted; without <code>target_cols</code> the new column has the same name as <code>function</code>, otherwise it must be single name (as a <code>Symbol</code> or a string). Supported <code>function</code>s are:<ul><li><code>nrow</code> to efficiently compute the number of rows in each group.</li><li><code>proprow</code> to efficiently compute the proportion of rows in each group.</li><li><code>eachindex</code> to return a vector holding the number of each row within each group.</li><li><code>groupindices</code> to return the group number.</li></ul></li><li>vectors or matrices containing transformations specified by the <code>Pair</code> syntax described in points 2 to 5</li><li>a function which will be called with a <code>SubDataFrame</code> corresponding to each group if a <code>GroupedDataFrame</code> is processed, or with the data frame itself if an <code>AbstractDataFrame</code> is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case <code>SubDataFrame</code> avoids excessive compilation)</li></ol><p>Note! If the expression of the form <code>x =&gt; y</code> is passed then except for the special convenience form <code>nrow =&gt; target_cols</code> it is always interpreted as <code>cols =&gt; function</code>. In particular the following expression <code>function =&gt; target_cols</code> is not a valid transformation specification.</p><p>Note! If <code>cols</code> or <code>target_cols</code> are one of <code>All</code>, <code>Cols</code>, <code>Between</code>, or <code>Not</code>, broadcasting using <code>.=&gt;</code> is supported and is equivalent to broadcasting the result of <code>names(df, cols)</code> or <code>names(df, target_cols)</code>. This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.</p><p>All functions have two types of signatures. One of them takes a <code>GroupedDataFrame</code> as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a <code>Function</code> or a <code>Type</code> is passed as the first argument and a <code>GroupedDataFrame</code> as the second argument (similar to <code>map</code>).</p><p>As a special rule, with the <code>cols =&gt; function</code> and <code>cols =&gt; function =&gt; target_cols</code> syntaxes, if <code>cols</code> is wrapped in an <code>AsTable</code> object then a <code>NamedTuple</code> containing columns selected by <code>cols</code> is passed to <code>function</code>. The documentation of <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> provides more information about this functionality, in particular covering performance considerations.</p><p>What is allowed for <code>function</code> to return is determined by the <code>target_cols</code> value:</p><ol><li>If both <code>cols</code> and <code>target_cols</code> are omitted (so only a <code>function</code> is passed), then returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code> or a <code>DataFrameRow</code> will produce multiple columns in the result. Returning any other value produces a single column.</li><li>If <code>target_cols</code> is a <code>Symbol</code> or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code>, or a <code>DataFrameRow</code> raises an error.</li><li>If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings or <code>AsTable</code> it is assumed that <code>function</code> returns multiple columns. If <code>function</code> returns one of <code>AbstractDataFrame</code>, <code>NamedTuple</code>, <code>DataFrameRow</code>, <code>Tables.AbstractRow</code>, <code>AbstractMatrix</code> then rules described in point 1 above apply. If <code>function</code> returns an <code>AbstractVector</code> then each element of this vector must support the <code>keys</code> function, which must return a collection of <code>Symbol</code>s, strings or integers; the return value of <code>keys</code> must be identical for all elements. Then as many columns are created as there are elements in the return value of the <code>keys</code> function. If <code>target_cols</code> is <code>AsTable</code> then their names are set to be equal to the key names except if <code>keys</code> returns integers, in which case they are prefixed by <code>x</code> (so the column names are e.g. <code>x1</code>, <code>x2</code>, ...). If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings then column names produced using the rules above are ignored and replaced by <code>target_cols</code> (the number of columns must be the same as the length of <code>target_cols</code> in this case). If <code>fun</code> returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the <code>Tables.columntable</code> function is called on it to get the resulting columns and their names. The names are retained when <code>target_cols</code> is <code>AsTable</code> and are replaced if <code>target_cols</code> is a vector of <code>Symbol</code>s or strings.</li></ol><p>In all of these cases, <code>function</code> can return either a single row or multiple rows. As a particular rule, values wrapped in a <code>Ref</code> or a <code>0</code>-dimensional <code>AbstractArray</code> are unwrapped and then treated as a single row.</p><p><code>select</code>/<code>select!</code> and <code>transform</code>/<code>transform!</code> always return a data frame with the same number and order of rows as the source (even if <code>GroupedDataFrame</code> had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).</p><p>For <code>combine</code>, rows in the returned object appear in the order of groups in the <code>GroupedDataFrame</code>. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a <code>DataFrame()</code> or <code>NamedTuple()</code> is returned, in which case a given group is skipped.</p><p>It is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.</p><p>To apply <code>function</code> to each row instead of whole columns, it can be wrapped in a <code>ByRow</code> struct. <code>cols</code> can be any column indexing syntax, in which case <code>function</code> will be passed one argument for each of the columns specified by <code>cols</code> or a <code>NamedTuple</code> of them if specified columns are wrapped in <code>AsTable</code>. If <code>ByRow</code> is used it is allowed for <code>cols</code> to select an empty set of columns, in which case <code>function</code> is called for each row without any arguments and an empty <code>NamedTuple</code> is passed if empty set of columns is wrapped in <code>AsTable</code>.</p><p>If a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. <code>select!(df, [:a], :, r&quot;a&quot;)</code> is allowed) and only the first occurrence is used. In particular a syntax to move column <code>:col</code> to the first position in the data frame is <code>select!(df, :col, :)</code>. On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. <code>select!(df, :a, :a =&gt; :a)</code> or <code>select!(df, :a, :a =&gt; ByRow(sin) =&gt; :a)</code> are not allowed.</p><p>In general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: <code>:x1</code>, <code>[:x1, :x2]</code>, <code>:x1 =&gt; :x2</code>, <code>:x1 =&gt; identity =&gt; :x2</code>, or <code>:x1 =&gt; (x -&gt; @view x[inds])</code> (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the <code>copycols</code> keyword argument:</p><ul><li>if <code>copycols=true</code> then results of such transformations always perform a copy of the source column or its view;</li><li>if <code>copycols=false</code> then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using <code>===</code>, which excludes views of source columns) performs a copy;</li></ul><p>Note that performing <code>transform!</code> or <code>select!</code> assumes that <code>copycols=false</code>.</p><p>If <code>df</code> is a <code>SubDataFrame</code> and <code>copycols=true</code> then a <code>DataFrame</code> is returned and the same copying rules apply as for a <code>DataFrame</code> input: this means in particular that selected columns will be copied. If <code>copycols=false</code>, a <code>SubDataFrame</code> is returned without copying columns and in this case transforming or renaming columns is not allowed.</p><p>If a <code>GroupedDataFrame</code> is passed and <code>threads=true</code> (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like <code>sum</code> and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or <code>threads=false</code> must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for <code>DataFrame</code> inputs.</p><p>In order to improve the performance of the operations some transformations invoke optimized implementation, see <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> for details.</p><p><strong>Keyword arguments</strong></p><ul><li><code>renamecols::Bool=true</code> : whether in the <code>cols =&gt; function</code> form automatically generated column names should include the name of transformation functions or not.</li><li><code>ungroup::Bool=true</code> : whether the return value of the operation on <code>gd</code> should be a data frame or a <code>GroupedDataFrame</code>.</li><li><code>threads::Bool=true</code> : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to <code>false</code> if some transformations require serial execution or are not thread-safe.</li></ul><p>Metadata: this function propagates table-level <code>:note</code>-style metadata. Column-level <code>:note</code>-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with <code>identity</code> or <code>copy</code> to a single column    even if column name is changed (this includes column renaming).    As a special case for <code>GroupedDataFrame</code> if the output has the same name    as a grouping column and <code>keepkeys=true</code>, metadata is taken from    original grouping column.</p><p>See <a href="#DataFrames.select"><code>select</code></a> for examples.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selection.jl#L903-L949">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Random.shuffle" href="#Random.shuffle"><code>Random.shuffle</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">shuffle([rng=GLOBAL_RNG,] df::AbstractDataFrame)</code></pre><p>Return a copy of <code>df</code> with randomly permuted rows. The optional <code>rng</code> argument specifies a random number generator.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; using Random, StableRNGs
+        renamecols::Bool=true, threads::Bool=true)</code></pre><p>Mutate <code>df</code> or <code>gd</code> in place to retain only columns or transformations specified by <code>args...</code> and return it. The result is guaranteed to have the same number of rows as <code>df</code> or parent of <code>gd</code>, except when no columns are selected (in which case the result has zero rows).</p><p>If a <code>SubDataFrame</code> or <code>GroupedDataFrame{SubDataFrame}</code> is passed, the parent data frame is updated using columns generated by <code>args...</code>, following the same rules as indexing:</p><ul><li>for existing columns filtered-out rows are filled with values present in the old columns</li><li>for new columns (which is only allowed if <code>SubDataFrame</code> was created with <code>:</code> as column selector) filtered-out rows are filled with <code>missing</code></li><li>dropped columns (which are only allowed if <code>SubDataFrame</code> was created with <code>:</code> as column selector) are removed</li><li>if <code>SubDataFrame</code> was not created with <code>:</code> as column selector then <code>select!</code> is only allowed if the transformations keep exactly the same sequence of column names as is in the passed <code>df</code></li></ul><p>If a <code>GroupedDataFrame</code> is passed then it is updated to reflect the new rows of its updated parent. If there are independent <code>GroupedDataFrame</code> objects constructed using the same parent data frame they might get corrupt.</p><p>Below detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.</p><p>All these operations are supported both for <code>AbstractDataFrame</code> (when split and combine steps are skipped) and <code>GroupedDataFrame</code>. Technically, <code>AbstractDataFrame</code> is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the <code>keepkeys</code> and <code>ungroup</code> keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.</p><p>In order to perform operations by groups you first need to create a <code>GroupedDataFrame</code> object from your data frame using the <code>groupby</code> function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.</p><p>Operations can then be applied on each group using one of the following functions:</p><ul><li><code>combine</code>: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in <code>GroupedDataFrame</code>; it is typically used to compute summary statistics by group; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>select</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; <code>select!</code> is an in-place version of <code>select</code>; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>transform</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; <code>transform!</code> is an in-place version of <code>transform</code>; existing columns in the source data frame are put as first columns in the result;</li></ul><p>As a special case, if a <code>GroupedDataFrame</code> that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.</p><p>All these functions take a specification of one or more functions to apply to each subset of the <code>DataFrame</code>. This specification can be of the following forms:</p><ol><li>standard column selectors (integers, <code>Symbol</code>s, strings, vectors of integers, vectors of <code>Symbol</code>s, vectors of strings, <code>All</code>, <code>Cols</code>, <code>:</code>, <code>Between</code>, <code>Not</code> and regular expressions)</li><li>a <code>cols =&gt; function</code> pair indicating that <code>function</code> should be called with positional arguments holding columns <code>cols</code>, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that <code>function</code> returns a single value or a vector; the generated name is created by concatenating source column name and <code>function</code> name by default (see examples below).</li><li>a <code>cols =&gt; function =&gt; target_cols</code> form additionally explicitly specifying the target column or columns, which must be a single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>. Additionally it can be a <code>Function</code> which takes a string or a vector of strings as an argument containing names of columns selected by <code>cols</code>, and returns the target columns names (all accepted types except <code>AsTable</code> are allowed).</li><li>a <code>col =&gt; target_cols</code> pair, which renames the column <code>col</code> to <code>target_cols</code>, which must be single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>.</li><li>column-independent operations <code>function =&gt; target_cols</code> or just <code>function</code> for specific <code>function</code>s where the input columns are omitted; without <code>target_cols</code> the new column has the same name as <code>function</code>, otherwise it must be single name (as a <code>Symbol</code> or a string). Supported <code>function</code>s are:<ul><li><code>nrow</code> to efficiently compute the number of rows in each group.</li><li><code>proprow</code> to efficiently compute the proportion of rows in each group.</li><li><code>eachindex</code> to return a vector holding the number of each row within each group.</li><li><code>groupindices</code> to return the group number.</li></ul></li><li>vectors or matrices containing transformations specified by the <code>Pair</code> syntax described in points 2 to 5</li><li>a function which will be called with a <code>SubDataFrame</code> corresponding to each group if a <code>GroupedDataFrame</code> is processed, or with the data frame itself if an <code>AbstractDataFrame</code> is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case <code>SubDataFrame</code> avoids excessive compilation)</li></ol><p>Note! If the expression of the form <code>x =&gt; y</code> is passed then except for the special convenience form <code>nrow =&gt; target_cols</code> it is always interpreted as <code>cols =&gt; function</code>. In particular the following expression <code>function =&gt; target_cols</code> is not a valid transformation specification.</p><p>Note! If <code>cols</code> or <code>target_cols</code> are one of <code>All</code>, <code>Cols</code>, <code>Between</code>, or <code>Not</code>, broadcasting using <code>.=&gt;</code> is supported and is equivalent to broadcasting the result of <code>names(df, cols)</code> or <code>names(df, target_cols)</code>. This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.</p><p>All functions have two types of signatures. One of them takes a <code>GroupedDataFrame</code> as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a <code>Function</code> or a <code>Type</code> is passed as the first argument and a <code>GroupedDataFrame</code> as the second argument (similar to <code>map</code>).</p><p>As a special rule, with the <code>cols =&gt; function</code> and <code>cols =&gt; function =&gt; target_cols</code> syntaxes, if <code>cols</code> is wrapped in an <code>AsTable</code> object then a <code>NamedTuple</code> containing columns selected by <code>cols</code> is passed to <code>function</code>. The documentation of <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> provides more information about this functionality, in particular covering performance considerations.</p><p>What is allowed for <code>function</code> to return is determined by the <code>target_cols</code> value:</p><ol><li>If both <code>cols</code> and <code>target_cols</code> are omitted (so only a <code>function</code> is passed), then returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code> or a <code>DataFrameRow</code> will produce multiple columns in the result. Returning any other value produces a single column.</li><li>If <code>target_cols</code> is a <code>Symbol</code> or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code>, or a <code>DataFrameRow</code> raises an error.</li><li>If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings or <code>AsTable</code> it is assumed that <code>function</code> returns multiple columns. If <code>function</code> returns one of <code>AbstractDataFrame</code>, <code>NamedTuple</code>, <code>DataFrameRow</code>, <code>Tables.AbstractRow</code>, <code>AbstractMatrix</code> then rules described in point 1 above apply. If <code>function</code> returns an <code>AbstractVector</code> then each element of this vector must support the <code>keys</code> function, which must return a collection of <code>Symbol</code>s, strings or integers; the return value of <code>keys</code> must be identical for all elements. Then as many columns are created as there are elements in the return value of the <code>keys</code> function. If <code>target_cols</code> is <code>AsTable</code> then their names are set to be equal to the key names except if <code>keys</code> returns integers, in which case they are prefixed by <code>x</code> (so the column names are e.g. <code>x1</code>, <code>x2</code>, ...). If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings then column names produced using the rules above are ignored and replaced by <code>target_cols</code> (the number of columns must be the same as the length of <code>target_cols</code> in this case). If <code>fun</code> returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the <code>Tables.columntable</code> function is called on it to get the resulting columns and their names. The names are retained when <code>target_cols</code> is <code>AsTable</code> and are replaced if <code>target_cols</code> is a vector of <code>Symbol</code>s or strings.</li></ol><p>In all of these cases, <code>function</code> can return either a single row or multiple rows. As a particular rule, values wrapped in a <code>Ref</code> or a <code>0</code>-dimensional <code>AbstractArray</code> are unwrapped and then treated as a single row.</p><p><code>select</code>/<code>select!</code> and <code>transform</code>/<code>transform!</code> always return a data frame with the same number and order of rows as the source (even if <code>GroupedDataFrame</code> had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).</p><p>For <code>combine</code>, rows in the returned object appear in the order of groups in the <code>GroupedDataFrame</code>. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a <code>DataFrame()</code> or <code>NamedTuple()</code> is returned, in which case a given group is skipped.</p><p>It is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.</p><p>To apply <code>function</code> to each row instead of whole columns, it can be wrapped in a <code>ByRow</code> struct. <code>cols</code> can be any column indexing syntax, in which case <code>function</code> will be passed one argument for each of the columns specified by <code>cols</code> or a <code>NamedTuple</code> of them if specified columns are wrapped in <code>AsTable</code>. If <code>ByRow</code> is used it is allowed for <code>cols</code> to select an empty set of columns, in which case <code>function</code> is called for each row without any arguments and an empty <code>NamedTuple</code> is passed if empty set of columns is wrapped in <code>AsTable</code>.</p><p>If a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. <code>select!(df, [:a], :, r&quot;a&quot;)</code> is allowed) and only the first occurrence is used. In particular a syntax to move column <code>:col</code> to the first position in the data frame is <code>select!(df, :col, :)</code>. On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. <code>select!(df, :a, :a =&gt; :a)</code> or <code>select!(df, :a, :a =&gt; ByRow(sin) =&gt; :a)</code> are not allowed.</p><p>In general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: <code>:x1</code>, <code>[:x1, :x2]</code>, <code>:x1 =&gt; :x2</code>, <code>:x1 =&gt; identity =&gt; :x2</code>, or <code>:x1 =&gt; (x -&gt; @view x[inds])</code> (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the <code>copycols</code> keyword argument:</p><ul><li>if <code>copycols=true</code> then results of such transformations always perform a copy of the source column or its view;</li><li>if <code>copycols=false</code> then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using <code>===</code>, which excludes views of source columns) performs a copy;</li></ul><p>Note that performing <code>transform!</code> or <code>select!</code> assumes that <code>copycols=false</code>.</p><p>If <code>df</code> is a <code>SubDataFrame</code> and <code>copycols=true</code> then a <code>DataFrame</code> is returned and the same copying rules apply as for a <code>DataFrame</code> input: this means in particular that selected columns will be copied. If <code>copycols=false</code>, a <code>SubDataFrame</code> is returned without copying columns and in this case transforming or renaming columns is not allowed.</p><p>If a <code>GroupedDataFrame</code> is passed and <code>threads=true</code> (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like <code>sum</code> and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or <code>threads=false</code> must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for <code>DataFrame</code> inputs.</p><p>In order to improve the performance of the operations some transformations invoke optimized implementation, see <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> for details.</p><p><strong>Keyword arguments</strong></p><ul><li><code>renamecols::Bool=true</code> : whether in the <code>cols =&gt; function</code> form automatically generated column names should include the name of transformation functions or not.</li><li><code>ungroup::Bool=true</code> : whether the return value of the operation on <code>gd</code> should be a data frame or a <code>GroupedDataFrame</code>.</li><li><code>threads::Bool=true</code> : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to <code>false</code> if some transformations require serial execution or are not thread-safe.</li></ul><p>Metadata: this function propagates table-level <code>:note</code>-style metadata. Column-level <code>:note</code>-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with <code>identity</code> or <code>copy</code> to a single column    even if column name is changed (this includes column renaming).    As a special case for <code>GroupedDataFrame</code> if the output has the same name    as a grouping column and <code>keepkeys=true</code>, metadata is taken from    original grouping column.</p><p>See <a href="#DataFrames.select"><code>select</code></a> for examples.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selection.jl#L903-L949">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Random.shuffle" href="#Random.shuffle"><code>Random.shuffle</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">shuffle([rng=GLOBAL_RNG,] df::AbstractDataFrame)</code></pre><p>Return a copy of <code>df</code> with randomly permuted rows. The optional <code>rng</code> argument specifies a random number generator.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; using Random, StableRNGs
 
 julia&gt; rng = StableRNG(1234);
 
@@ -1678,7 +1678,7 @@
    2 │     1      1
    3 │     3      3
    4 │     5      5
-   5 │     4      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2539-L2565">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Random.shuffle!" href="#Random.shuffle!"><code>Random.shuffle!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">shuffle!([rng=GLOBAL_RNG,] df::AbstractDataFrame)</code></pre><p>Randomly permute rows of <code>df</code> in-place. The optional <code>rng</code> argument specifies a random number generator.</p><p><code>shuffle!</code> will produce a correct result even if some columns of passed data frame are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>shuffle!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; using Random, StableRNGs
+   5 │     4      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2539-L2565">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Random.shuffle!" href="#Random.shuffle!"><code>Random.shuffle!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">shuffle!([rng=GLOBAL_RNG,] df::AbstractDataFrame)</code></pre><p>Randomly permute rows of <code>df</code> in-place. The optional <code>rng</code> argument specifies a random number generator.</p><p><code>shuffle!</code> will produce a correct result even if some columns of passed data frame are identical (checked with <code>===</code>). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then <code>shuffle!</code> result might be incorrect.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>Metadata having other styles is dropped (from parent data frame when <code>df</code> is a <code>SubDataFrame</code>).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; using Random, StableRNGs
 
 julia&gt; rng = StableRNG(1234);
 
@@ -1691,11 +1691,11 @@
    2 │     1      1
    3 │     3      3
    4 │     5      5
-   5 │     4      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2571-L2603">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.table_transformation" href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">table_transformation(df_sel::AbstractDataFrame, fun)</code></pre><p>This is the function called when <code>AsTable(...) =&gt; fun</code> is requested. The <code>df_sel</code> argument is a data frame storing columns selected by the <code>AsTable(...)</code> selector.</p><p>By default it calls <code>default_table_transformation</code>. However, it is allowed to add special methods for specific types of <code>fun</code>, as long as the result matches what would be produced by <code>default_table_transformation</code>, except that it is allowed to perform <code>eltype</code> conversion of the resulting vectors or value type promotions that are consistent with <code>promote_type</code>.</p><p>It is guaranteed that <code>df_sel</code> has at least one column.</p><p>The main use of special <code>table_transformation</code> methods is to provide more efficient than the default implementations of requested <code>fun</code> transformation.</p><p>This function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental.</p><p>Fast paths are implemented within DataFrames.jl for the following functions <code>fun</code>:</p><ul><li><code>sum</code>, <code>ByRow(sum)</code>, <code>ByRow(sum∘skipmissing)</code></li><li><code>length</code>, <code>ByRow(length)</code>, <code>ByRow(length∘skipmissing)</code></li><li><code>mean</code>, <code>ByRow(mean)</code>, <code>ByRow(mean∘skipmissing)</code></li><li><code>ByRow(var)</code>, <code>ByRow(var∘skipmissing)</code></li><li><code>ByRow(std)</code>, <code>ByRow(std∘skipmissing)</code></li><li><code>ByRow(median)</code>, <code>ByRow(median∘skipmissing)</code></li><li><code>minimum</code>, <code>ByRow(minimum)</code>, <code>ByRow(minimum∘skipmissing)</code></li><li><code>maximum</code>, <code>ByRow(maximum)</code>, <code>ByRow(maximum∘skipmissing)</code></li><li><code>fun∘collect</code> and <code>ByRow(fun∘collect)</code> where <code>fun</code> is any function</li></ul><p>Note that in order to improve the performance <code>ByRow(sum)</code>, <code>ByRow(sum∘skipmissing)</code>, <code>ByRow(mean)</code>, and <code>ByRow(mean∘skipmissing)</code> perform all operations in the target element type. In some very rare cases (like mixing very large <code>Int64</code> values and <code>Float64</code> values) it can lead to a result different from the one that would be obtained by calling the function outside of DataFrames.jl. The way to avoid this precision loss is to use an anonymous function, e.g. instead of <code>ByRow(sum)</code> use <code>ByRow(x -&gt; sum(x))</code>. However, in general for such scenarios even standard aggregation functions should not be considered to provide reliable output, and users are recommended to switch to higher precision calculations. An example of a case when standard <code>sum</code> is affected by the situation discussed is:</p><pre><code class="nohighlight hljs">julia&gt; sum(Any[typemax(Int), typemax(Int), 1.0])
+   5 │     4      4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2571-L2603">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.table_transformation" href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">table_transformation(df_sel::AbstractDataFrame, fun)</code></pre><p>This is the function called when <code>AsTable(...) =&gt; fun</code> is requested. The <code>df_sel</code> argument is a data frame storing columns selected by the <code>AsTable(...)</code> selector.</p><p>By default it calls <code>default_table_transformation</code>. However, it is allowed to add special methods for specific types of <code>fun</code>, as long as the result matches what would be produced by <code>default_table_transformation</code>, except that it is allowed to perform <code>eltype</code> conversion of the resulting vectors or value type promotions that are consistent with <code>promote_type</code>.</p><p>It is guaranteed that <code>df_sel</code> has at least one column.</p><p>The main use of special <code>table_transformation</code> methods is to provide more efficient than the default implementations of requested <code>fun</code> transformation.</p><p>This function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental.</p><p>Fast paths are implemented within DataFrames.jl for the following functions <code>fun</code>:</p><ul><li><code>sum</code>, <code>ByRow(sum)</code>, <code>ByRow(sum∘skipmissing)</code></li><li><code>length</code>, <code>ByRow(length)</code>, <code>ByRow(length∘skipmissing)</code></li><li><code>mean</code>, <code>ByRow(mean)</code>, <code>ByRow(mean∘skipmissing)</code></li><li><code>ByRow(var)</code>, <code>ByRow(var∘skipmissing)</code></li><li><code>ByRow(std)</code>, <code>ByRow(std∘skipmissing)</code></li><li><code>ByRow(median)</code>, <code>ByRow(median∘skipmissing)</code></li><li><code>minimum</code>, <code>ByRow(minimum)</code>, <code>ByRow(minimum∘skipmissing)</code></li><li><code>maximum</code>, <code>ByRow(maximum)</code>, <code>ByRow(maximum∘skipmissing)</code></li><li><code>fun∘collect</code> and <code>ByRow(fun∘collect)</code> where <code>fun</code> is any function</li></ul><p>Note that in order to improve the performance <code>ByRow(sum)</code>, <code>ByRow(sum∘skipmissing)</code>, <code>ByRow(mean)</code>, and <code>ByRow(mean∘skipmissing)</code> perform all operations in the target element type. In some very rare cases (like mixing very large <code>Int64</code> values and <code>Float64</code> values) it can lead to a result different from the one that would be obtained by calling the function outside of DataFrames.jl. The way to avoid this precision loss is to use an anonymous function, e.g. instead of <code>ByRow(sum)</code> use <code>ByRow(x -&gt; sum(x))</code>. However, in general for such scenarios even standard aggregation functions should not be considered to provide reliable output, and users are recommended to switch to higher precision calculations. An example of a case when standard <code>sum</code> is affected by the situation discussed is:</p><pre><code class="nohighlight hljs">julia&gt; sum(Any[typemax(Int), typemax(Int), 1.0])
 -1.0
 
 julia&gt; sum(Any[1.0, typemax(Int), typemax(Int)])
-1.8446744073709552e19</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selectionfast.jl#L1-L53">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.transform" href="#DataFrames.transform"><code>DataFrames.transform</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">transform(df::AbstractDataFrame, args...;
+1.8446744073709552e19</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selectionfast.jl#L1-L53">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.transform" href="#DataFrames.transform"><code>DataFrames.transform</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">transform(df::AbstractDataFrame, args...;
           copycols::Bool=true, renamecols::Bool=true, threads::Bool=true)
 transform(f::Callable, df::DataFrame;
           renamecols::Bool=true, threads::Bool=true)
@@ -1727,14 +1727,14 @@
    2 │    10
 
 julia&gt; transform(gdf, x -&gt; (x=10,), keepkeys=true)
-ERROR: ArgumentError: column :x in returned data frame is not equal to grouping key :x</code></pre><p>See <a href="#DataFrames.select"><code>select</code></a> for more examples.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selection.jl#L1316-L1382">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.transform!" href="#DataFrames.transform!"><code>DataFrames.transform!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">transform!(df::AbstractDataFrame, args...;
+ERROR: ArgumentError: column :x in returned data frame is not equal to grouping key :x</code></pre><p>See <a href="#DataFrames.select"><code>select</code></a> for more examples.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selection.jl#L1316-L1382">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.transform!" href="#DataFrames.transform!"><code>DataFrames.transform!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">transform!(df::AbstractDataFrame, args...;
            renamecols::Bool=true, threads::Bool=true)
 transform!(args::Callable, df::AbstractDataFrame;
            renamecols::Bool=true, threads::Bool=true)
 transform!(gd::GroupedDataFrame, args...;
            ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)
 transform!(f::Base.Callable, gd::GroupedDataFrame;
-           ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)</code></pre><p>Mutate <code>df</code> or <code>gd</code> in place to add columns specified by <code>args...</code> and return it. The result is guaranteed to have the same number of rows as <code>df</code>. Equivalent to <code>select!(df, :, args...)</code> or <code>select!(gd, :, args...)</code>, except that column renaming performs a copy.</p><p>Below detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.</p><p>All these operations are supported both for <code>AbstractDataFrame</code> (when split and combine steps are skipped) and <code>GroupedDataFrame</code>. Technically, <code>AbstractDataFrame</code> is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the <code>keepkeys</code> and <code>ungroup</code> keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.</p><p>In order to perform operations by groups you first need to create a <code>GroupedDataFrame</code> object from your data frame using the <code>groupby</code> function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.</p><p>Operations can then be applied on each group using one of the following functions:</p><ul><li><code>combine</code>: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in <code>GroupedDataFrame</code>; it is typically used to compute summary statistics by group; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>select</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; <code>select!</code> is an in-place version of <code>select</code>; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>transform</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; <code>transform!</code> is an in-place version of <code>transform</code>; existing columns in the source data frame are put as first columns in the result;</li></ul><p>As a special case, if a <code>GroupedDataFrame</code> that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.</p><p>All these functions take a specification of one or more functions to apply to each subset of the <code>DataFrame</code>. This specification can be of the following forms:</p><ol><li>standard column selectors (integers, <code>Symbol</code>s, strings, vectors of integers, vectors of <code>Symbol</code>s, vectors of strings, <code>All</code>, <code>Cols</code>, <code>:</code>, <code>Between</code>, <code>Not</code> and regular expressions)</li><li>a <code>cols =&gt; function</code> pair indicating that <code>function</code> should be called with positional arguments holding columns <code>cols</code>, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that <code>function</code> returns a single value or a vector; the generated name is created by concatenating source column name and <code>function</code> name by default (see examples below).</li><li>a <code>cols =&gt; function =&gt; target_cols</code> form additionally explicitly specifying the target column or columns, which must be a single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>. Additionally it can be a <code>Function</code> which takes a string or a vector of strings as an argument containing names of columns selected by <code>cols</code>, and returns the target columns names (all accepted types except <code>AsTable</code> are allowed).</li><li>a <code>col =&gt; target_cols</code> pair, which renames the column <code>col</code> to <code>target_cols</code>, which must be single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>.</li><li>column-independent operations <code>function =&gt; target_cols</code> or just <code>function</code> for specific <code>function</code>s where the input columns are omitted; without <code>target_cols</code> the new column has the same name as <code>function</code>, otherwise it must be single name (as a <code>Symbol</code> or a string). Supported <code>function</code>s are:<ul><li><code>nrow</code> to efficiently compute the number of rows in each group.</li><li><code>proprow</code> to efficiently compute the proportion of rows in each group.</li><li><code>eachindex</code> to return a vector holding the number of each row within each group.</li><li><code>groupindices</code> to return the group number.</li></ul></li><li>vectors or matrices containing transformations specified by the <code>Pair</code> syntax described in points 2 to 5</li><li>a function which will be called with a <code>SubDataFrame</code> corresponding to each group if a <code>GroupedDataFrame</code> is processed, or with the data frame itself if an <code>AbstractDataFrame</code> is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case <code>SubDataFrame</code> avoids excessive compilation)</li></ol><p>Note! If the expression of the form <code>x =&gt; y</code> is passed then except for the special convenience form <code>nrow =&gt; target_cols</code> it is always interpreted as <code>cols =&gt; function</code>. In particular the following expression <code>function =&gt; target_cols</code> is not a valid transformation specification.</p><p>Note! If <code>cols</code> or <code>target_cols</code> are one of <code>All</code>, <code>Cols</code>, <code>Between</code>, or <code>Not</code>, broadcasting using <code>.=&gt;</code> is supported and is equivalent to broadcasting the result of <code>names(df, cols)</code> or <code>names(df, target_cols)</code>. This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.</p><p>All functions have two types of signatures. One of them takes a <code>GroupedDataFrame</code> as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a <code>Function</code> or a <code>Type</code> is passed as the first argument and a <code>GroupedDataFrame</code> as the second argument (similar to <code>map</code>).</p><p>As a special rule, with the <code>cols =&gt; function</code> and <code>cols =&gt; function =&gt; target_cols</code> syntaxes, if <code>cols</code> is wrapped in an <code>AsTable</code> object then a <code>NamedTuple</code> containing columns selected by <code>cols</code> is passed to <code>function</code>. The documentation of <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> provides more information about this functionality, in particular covering performance considerations.</p><p>What is allowed for <code>function</code> to return is determined by the <code>target_cols</code> value:</p><ol><li>If both <code>cols</code> and <code>target_cols</code> are omitted (so only a <code>function</code> is passed), then returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code> or a <code>DataFrameRow</code> will produce multiple columns in the result. Returning any other value produces a single column.</li><li>If <code>target_cols</code> is a <code>Symbol</code> or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code>, or a <code>DataFrameRow</code> raises an error.</li><li>If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings or <code>AsTable</code> it is assumed that <code>function</code> returns multiple columns. If <code>function</code> returns one of <code>AbstractDataFrame</code>, <code>NamedTuple</code>, <code>DataFrameRow</code>, <code>Tables.AbstractRow</code>, <code>AbstractMatrix</code> then rules described in point 1 above apply. If <code>function</code> returns an <code>AbstractVector</code> then each element of this vector must support the <code>keys</code> function, which must return a collection of <code>Symbol</code>s, strings or integers; the return value of <code>keys</code> must be identical for all elements. Then as many columns are created as there are elements in the return value of the <code>keys</code> function. If <code>target_cols</code> is <code>AsTable</code> then their names are set to be equal to the key names except if <code>keys</code> returns integers, in which case they are prefixed by <code>x</code> (so the column names are e.g. <code>x1</code>, <code>x2</code>, ...). If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings then column names produced using the rules above are ignored and replaced by <code>target_cols</code> (the number of columns must be the same as the length of <code>target_cols</code> in this case). If <code>fun</code> returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the <code>Tables.columntable</code> function is called on it to get the resulting columns and their names. The names are retained when <code>target_cols</code> is <code>AsTable</code> and are replaced if <code>target_cols</code> is a vector of <code>Symbol</code>s or strings.</li></ol><p>In all of these cases, <code>function</code> can return either a single row or multiple rows. As a particular rule, values wrapped in a <code>Ref</code> or a <code>0</code>-dimensional <code>AbstractArray</code> are unwrapped and then treated as a single row.</p><p><code>select</code>/<code>select!</code> and <code>transform</code>/<code>transform!</code> always return a data frame with the same number and order of rows as the source (even if <code>GroupedDataFrame</code> had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).</p><p>For <code>combine</code>, rows in the returned object appear in the order of groups in the <code>GroupedDataFrame</code>. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a <code>DataFrame()</code> or <code>NamedTuple()</code> is returned, in which case a given group is skipped.</p><p>It is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.</p><p>To apply <code>function</code> to each row instead of whole columns, it can be wrapped in a <code>ByRow</code> struct. <code>cols</code> can be any column indexing syntax, in which case <code>function</code> will be passed one argument for each of the columns specified by <code>cols</code> or a <code>NamedTuple</code> of them if specified columns are wrapped in <code>AsTable</code>. If <code>ByRow</code> is used it is allowed for <code>cols</code> to select an empty set of columns, in which case <code>function</code> is called for each row without any arguments and an empty <code>NamedTuple</code> is passed if empty set of columns is wrapped in <code>AsTable</code>.</p><p>If a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. <code>select!(df, [:a], :, r&quot;a&quot;)</code> is allowed) and only the first occurrence is used. In particular a syntax to move column <code>:col</code> to the first position in the data frame is <code>select!(df, :col, :)</code>. On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. <code>select!(df, :a, :a =&gt; :a)</code> or <code>select!(df, :a, :a =&gt; ByRow(sin) =&gt; :a)</code> are not allowed.</p><p>In general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: <code>:x1</code>, <code>[:x1, :x2]</code>, <code>:x1 =&gt; :x2</code>, <code>:x1 =&gt; identity =&gt; :x2</code>, or <code>:x1 =&gt; (x -&gt; @view x[inds])</code> (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the <code>copycols</code> keyword argument:</p><ul><li>if <code>copycols=true</code> then results of such transformations always perform a copy of the source column or its view;</li><li>if <code>copycols=false</code> then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using <code>===</code>, which excludes views of source columns) performs a copy;</li></ul><p>Note that performing <code>transform!</code> or <code>select!</code> assumes that <code>copycols=false</code>.</p><p>If <code>df</code> is a <code>SubDataFrame</code> and <code>copycols=true</code> then a <code>DataFrame</code> is returned and the same copying rules apply as for a <code>DataFrame</code> input: this means in particular that selected columns will be copied. If <code>copycols=false</code>, a <code>SubDataFrame</code> is returned without copying columns and in this case transforming or renaming columns is not allowed.</p><p>If a <code>GroupedDataFrame</code> is passed and <code>threads=true</code> (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like <code>sum</code> and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or <code>threads=false</code> must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for <code>DataFrame</code> inputs.</p><p>In order to improve the performance of the operations some transformations invoke optimized implementation, see <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> for details.</p><p><strong>Keyword arguments</strong></p><ul><li><code>renamecols::Bool=true</code> : whether in the <code>cols =&gt; function</code> form automatically generated column names should include the name of transformation functions or not.</li><li><code>ungroup::Bool=true</code> : whether the return value of the operation on <code>gd</code> should be a data frame or a <code>GroupedDataFrame</code>.</li><li><code>threads::Bool=true</code> : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to <code>false</code> if some transformations require serial execution or are not thread-safe.</li></ul><p>Metadata: this function propagates table-level <code>:note</code>-style metadata. Column-level <code>:note</code>-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with <code>identity</code> or <code>copy</code> to a single column    even if column name is changed (this includes column renaming).    As a special case for <code>GroupedDataFrame</code> if the output has the same name    as a grouping column and <code>keepkeys=true</code>, metadata is taken from    original grouping column.</p><p>See <a href="#DataFrames.select"><code>select</code></a> for examples.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selection.jl#L969-L999">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.vcat" href="#Base.vcat"><code>Base.vcat</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">vcat(dfs::AbstractDataFrame...;
+           ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)</code></pre><p>Mutate <code>df</code> or <code>gd</code> in place to add columns specified by <code>args...</code> and return it. The result is guaranteed to have the same number of rows as <code>df</code>. Equivalent to <code>select!(df, :, args...)</code> or <code>select!(gd, :, args...)</code>, except that column renaming performs a copy.</p><p>Below detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.</p><p>All these operations are supported both for <code>AbstractDataFrame</code> (when split and combine steps are skipped) and <code>GroupedDataFrame</code>. Technically, <code>AbstractDataFrame</code> is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the <code>keepkeys</code> and <code>ungroup</code> keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.</p><p>In order to perform operations by groups you first need to create a <code>GroupedDataFrame</code> object from your data frame using the <code>groupby</code> function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.</p><p>Operations can then be applied on each group using one of the following functions:</p><ul><li><code>combine</code>: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in <code>GroupedDataFrame</code>; it is typically used to compute summary statistics by group; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>select</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; <code>select!</code> is an in-place version of <code>select</code>; for <code>GroupedDataFrame</code> if grouping columns are kept they are put as first columns in the result;</li><li><code>transform</code>: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; <code>transform!</code> is an in-place version of <code>transform</code>; existing columns in the source data frame are put as first columns in the result;</li></ul><p>As a special case, if a <code>GroupedDataFrame</code> that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.</p><p>All these functions take a specification of one or more functions to apply to each subset of the <code>DataFrame</code>. This specification can be of the following forms:</p><ol><li>standard column selectors (integers, <code>Symbol</code>s, strings, vectors of integers, vectors of <code>Symbol</code>s, vectors of strings, <code>All</code>, <code>Cols</code>, <code>:</code>, <code>Between</code>, <code>Not</code> and regular expressions)</li><li>a <code>cols =&gt; function</code> pair indicating that <code>function</code> should be called with positional arguments holding columns <code>cols</code>, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that <code>function</code> returns a single value or a vector; the generated name is created by concatenating source column name and <code>function</code> name by default (see examples below).</li><li>a <code>cols =&gt; function =&gt; target_cols</code> form additionally explicitly specifying the target column or columns, which must be a single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>. Additionally it can be a <code>Function</code> which takes a string or a vector of strings as an argument containing names of columns selected by <code>cols</code>, and returns the target columns names (all accepted types except <code>AsTable</code> are allowed).</li><li>a <code>col =&gt; target_cols</code> pair, which renames the column <code>col</code> to <code>target_cols</code>, which must be single name (as a <code>Symbol</code> or a string), a vector of names or <code>AsTable</code>.</li><li>column-independent operations <code>function =&gt; target_cols</code> or just <code>function</code> for specific <code>function</code>s where the input columns are omitted; without <code>target_cols</code> the new column has the same name as <code>function</code>, otherwise it must be single name (as a <code>Symbol</code> or a string). Supported <code>function</code>s are:<ul><li><code>nrow</code> to efficiently compute the number of rows in each group.</li><li><code>proprow</code> to efficiently compute the proportion of rows in each group.</li><li><code>eachindex</code> to return a vector holding the number of each row within each group.</li><li><code>groupindices</code> to return the group number.</li></ul></li><li>vectors or matrices containing transformations specified by the <code>Pair</code> syntax described in points 2 to 5</li><li>a function which will be called with a <code>SubDataFrame</code> corresponding to each group if a <code>GroupedDataFrame</code> is processed, or with the data frame itself if an <code>AbstractDataFrame</code> is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case <code>SubDataFrame</code> avoids excessive compilation)</li></ol><p>Note! If the expression of the form <code>x =&gt; y</code> is passed then except for the special convenience form <code>nrow =&gt; target_cols</code> it is always interpreted as <code>cols =&gt; function</code>. In particular the following expression <code>function =&gt; target_cols</code> is not a valid transformation specification.</p><p>Note! If <code>cols</code> or <code>target_cols</code> are one of <code>All</code>, <code>Cols</code>, <code>Between</code>, or <code>Not</code>, broadcasting using <code>.=&gt;</code> is supported and is equivalent to broadcasting the result of <code>names(df, cols)</code> or <code>names(df, target_cols)</code>. This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.</p><p>All functions have two types of signatures. One of them takes a <code>GroupedDataFrame</code> as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a <code>Function</code> or a <code>Type</code> is passed as the first argument and a <code>GroupedDataFrame</code> as the second argument (similar to <code>map</code>).</p><p>As a special rule, with the <code>cols =&gt; function</code> and <code>cols =&gt; function =&gt; target_cols</code> syntaxes, if <code>cols</code> is wrapped in an <code>AsTable</code> object then a <code>NamedTuple</code> containing columns selected by <code>cols</code> is passed to <code>function</code>. The documentation of <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> provides more information about this functionality, in particular covering performance considerations.</p><p>What is allowed for <code>function</code> to return is determined by the <code>target_cols</code> value:</p><ol><li>If both <code>cols</code> and <code>target_cols</code> are omitted (so only a <code>function</code> is passed), then returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code> or a <code>DataFrameRow</code> will produce multiple columns in the result. Returning any other value produces a single column.</li><li>If <code>target_cols</code> is a <code>Symbol</code> or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a <code>NamedTuple</code>, a <code>Tables.AbstractRow</code>, or a <code>DataFrameRow</code> raises an error.</li><li>If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings or <code>AsTable</code> it is assumed that <code>function</code> returns multiple columns. If <code>function</code> returns one of <code>AbstractDataFrame</code>, <code>NamedTuple</code>, <code>DataFrameRow</code>, <code>Tables.AbstractRow</code>, <code>AbstractMatrix</code> then rules described in point 1 above apply. If <code>function</code> returns an <code>AbstractVector</code> then each element of this vector must support the <code>keys</code> function, which must return a collection of <code>Symbol</code>s, strings or integers; the return value of <code>keys</code> must be identical for all elements. Then as many columns are created as there are elements in the return value of the <code>keys</code> function. If <code>target_cols</code> is <code>AsTable</code> then their names are set to be equal to the key names except if <code>keys</code> returns integers, in which case they are prefixed by <code>x</code> (so the column names are e.g. <code>x1</code>, <code>x2</code>, ...). If <code>target_cols</code> is a vector of <code>Symbol</code>s or strings then column names produced using the rules above are ignored and replaced by <code>target_cols</code> (the number of columns must be the same as the length of <code>target_cols</code> in this case). If <code>fun</code> returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the <code>Tables.columntable</code> function is called on it to get the resulting columns and their names. The names are retained when <code>target_cols</code> is <code>AsTable</code> and are replaced if <code>target_cols</code> is a vector of <code>Symbol</code>s or strings.</li></ol><p>In all of these cases, <code>function</code> can return either a single row or multiple rows. As a particular rule, values wrapped in a <code>Ref</code> or a <code>0</code>-dimensional <code>AbstractArray</code> are unwrapped and then treated as a single row.</p><p><code>select</code>/<code>select!</code> and <code>transform</code>/<code>transform!</code> always return a data frame with the same number and order of rows as the source (even if <code>GroupedDataFrame</code> had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).</p><p>For <code>combine</code>, rows in the returned object appear in the order of groups in the <code>GroupedDataFrame</code>. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a <code>DataFrame()</code> or <code>NamedTuple()</code> is returned, in which case a given group is skipped.</p><p>It is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.</p><p>To apply <code>function</code> to each row instead of whole columns, it can be wrapped in a <code>ByRow</code> struct. <code>cols</code> can be any column indexing syntax, in which case <code>function</code> will be passed one argument for each of the columns specified by <code>cols</code> or a <code>NamedTuple</code> of them if specified columns are wrapped in <code>AsTable</code>. If <code>ByRow</code> is used it is allowed for <code>cols</code> to select an empty set of columns, in which case <code>function</code> is called for each row without any arguments and an empty <code>NamedTuple</code> is passed if empty set of columns is wrapped in <code>AsTable</code>.</p><p>If a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. <code>select!(df, [:a], :, r&quot;a&quot;)</code> is allowed) and only the first occurrence is used. In particular a syntax to move column <code>:col</code> to the first position in the data frame is <code>select!(df, :col, :)</code>. On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. <code>select!(df, :a, :a =&gt; :a)</code> or <code>select!(df, :a, :a =&gt; ByRow(sin) =&gt; :a)</code> are not allowed.</p><p>In general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: <code>:x1</code>, <code>[:x1, :x2]</code>, <code>:x1 =&gt; :x2</code>, <code>:x1 =&gt; identity =&gt; :x2</code>, or <code>:x1 =&gt; (x -&gt; @view x[inds])</code> (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the <code>copycols</code> keyword argument:</p><ul><li>if <code>copycols=true</code> then results of such transformations always perform a copy of the source column or its view;</li><li>if <code>copycols=false</code> then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using <code>===</code>, which excludes views of source columns) performs a copy;</li></ul><p>Note that performing <code>transform!</code> or <code>select!</code> assumes that <code>copycols=false</code>.</p><p>If <code>df</code> is a <code>SubDataFrame</code> and <code>copycols=true</code> then a <code>DataFrame</code> is returned and the same copying rules apply as for a <code>DataFrame</code> input: this means in particular that selected columns will be copied. If <code>copycols=false</code>, a <code>SubDataFrame</code> is returned without copying columns and in this case transforming or renaming columns is not allowed.</p><p>If a <code>GroupedDataFrame</code> is passed and <code>threads=true</code> (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like <code>sum</code> and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or <code>threads=false</code> must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for <code>DataFrame</code> inputs.</p><p>In order to improve the performance of the operations some transformations invoke optimized implementation, see <a href="#DataFrames.table_transformation"><code>DataFrames.table_transformation</code></a> for details.</p><p><strong>Keyword arguments</strong></p><ul><li><code>renamecols::Bool=true</code> : whether in the <code>cols =&gt; function</code> form automatically generated column names should include the name of transformation functions or not.</li><li><code>ungroup::Bool=true</code> : whether the return value of the operation on <code>gd</code> should be a data frame or a <code>GroupedDataFrame</code>.</li><li><code>threads::Bool=true</code> : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to <code>false</code> if some transformations require serial execution or are not thread-safe.</li></ul><p>Metadata: this function propagates table-level <code>:note</code>-style metadata. Column-level <code>:note</code>-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with <code>identity</code> or <code>copy</code> to a single column    even if column name is changed (this includes column renaming).    As a special case for <code>GroupedDataFrame</code> if the output has the same name    as a grouping column and <code>keepkeys=true</code>, metadata is taken from    original grouping column.</p><p>See <a href="#DataFrames.select"><code>select</code></a> for examples.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selection.jl#L969-L999">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.vcat" href="#Base.vcat"><code>Base.vcat</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">vcat(dfs::AbstractDataFrame...;
      cols::Union{Symbol, AbstractVector{Symbol},
                  AbstractVector{&lt;:AbstractString}}=:setequal,
      source::Union{Nothing, Symbol, AbstractString,
@@ -1841,7 +1841,7 @@
    6 │     6        6  missing  b
    7 │     7  missing        7  d
    8 │     8  missing        8  d
-   9 │     9  missing        9  d</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1654-L1810">source</a></section></article><h2 id="Reshaping-data-frames-between-tall-and-wide-formats"><a class="docs-heading-anchor" href="#Reshaping-data-frames-between-tall-and-wide-formats">Reshaping data frames between tall and wide formats</a><a id="Reshaping-data-frames-between-tall-and-wide-formats-1"></a><a class="docs-heading-anchor-permalink" href="#Reshaping-data-frames-between-tall-and-wide-formats" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.stack" href="#Base.stack"><code>Base.stack</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">stack(df::AbstractDataFrame[, measure_vars[, id_vars] ];
+   9 │     9  missing        9  d</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1654-L1810">source</a></section></article><h2 id="Reshaping-data-frames-between-tall-and-wide-formats"><a class="docs-heading-anchor" href="#Reshaping-data-frames-between-tall-and-wide-formats">Reshaping data frames between tall and wide formats</a><a id="Reshaping-data-frames-between-tall-and-wide-formats-1"></a><a class="docs-heading-anchor-permalink" href="#Reshaping-data-frames-between-tall-and-wide-formats" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.stack" href="#Base.stack"><code>Base.stack</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">stack(df::AbstractDataFrame[, measure_vars[, id_vars] ];
       variable_name=:variable, value_name=:value,
       view::Bool=false, variable_eltype::Type=String)</code></pre><p>Stack a data frame <code>df</code>, i.e. convert it from wide to long format.</p><p>Return the long-format <code>DataFrame</code> with: columns for each of the <code>id_vars</code>, column <code>value_name</code> (<code>:value</code> by default) holding the values of the stacked columns (<code>measure_vars</code>), and column <code>variable_name</code> (<code>:variable</code> by default) a vector holding the name of the corresponding <code>measure_vars</code> variable.</p><p>If <code>view=true</code> then return a stacked view of a data frame (long format). The result is a view because the columns are special <code>AbstractVectors</code> that return views into the original data frame.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the AbstractDataFrame to be stacked</li><li><code>measure_vars</code> : the columns to be stacked (the measurement variables), as a column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers). If neither <code>measure_vars</code> or <code>id_vars</code> are given, <code>measure_vars</code> defaults to all floating point columns.</li><li><code>id_vars</code> : the identifier columns that are repeated during stacking, as a column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers). Defaults to all variables that are not <code>measure_vars</code></li><li><code>variable_name</code> : the name (<code>Symbol</code> or string) of the new stacked column that shall hold the names of each of <code>measure_vars</code></li><li><code>value_name</code> : the name (<code>Symbol</code> or string) of the new stacked column containing the values from each of <code>measure_vars</code></li><li><code>view</code> : whether the stacked data frame should be a view rather than contain freshly allocated vectors.</li><li><code>variable_eltype</code> : determines the element type of column <code>variable_name</code>. By default a <code>PooledArray{String}</code> is created. If <code>variable_eltype=Symbol</code> a <code>PooledVector{Symbol}</code> is created, and if <code>variable_eltype=CategoricalValue{String}</code> a <code>CategoricalArray{String}</code> is produced (call <code>using CategoricalArrays</code> first if needed) Passing any other type <code>T</code> will produce a <code>PooledVector{T}</code> column as long as it supports conversion from <code>String</code>. When <code>view=true</code>, a <code>RepeatedVector{T}</code> is produced.</li></ul><p>Metadata: table-level <code>:note</code>-style metadata and column-level <code>:note</code>-style metadata for identifier columns are preserved.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat(1:3, inner=2),
                       b=repeat(1:2, inner=3),
@@ -1929,7 +1929,7 @@
    9 │     2      1  c       d                3
   10 │     2      2  d       d                4
   11 │     3      2  e       d                5
-  12 │     3      2  f       d                6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/reshape.jl#L1-L135">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.unstack" href="#DataFrames.unstack"><code>DataFrames.unstack</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unstack(df::AbstractDataFrame, rowkeys, colkey, value;
+  12 │     3      2  f       d                6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/reshape.jl#L1-L135">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.unstack" href="#DataFrames.unstack"><code>DataFrames.unstack</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unstack(df::AbstractDataFrame, rowkeys, colkey, value;
         renamecols::Function=identity, allowmissing::Bool=false,
         combine=only, fill=missing, threads::Bool=true)
 unstack(df::AbstractDataFrame, colkey, value;
@@ -2074,7 +2074,7 @@
  Row │ a       b
      │ Int64?  Int64?
 ─────┼────────────────
-   1 │      3       4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/reshape.jl#L215-L419">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.permutedims" href="#Base.permutedims"><code>Base.permutedims</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">permutedims(df::AbstractDataFrame,
+   1 │      3       4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/reshape.jl#L215-L419">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.permutedims" href="#Base.permutedims"><code>Base.permutedims</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">permutedims(df::AbstractDataFrame,
             [src_namescol::Union{Int, Symbol, AbstractString}],
             [dest_namescol::Union{Symbol, AbstractString}];
             makeunique::Bool=false, strict::Bool=true)</code></pre><p>Turn <code>df</code> on its side such that rows become columns and values in the column indexed by <code>src_namescol</code> become the names of new columns. In the resulting <code>DataFrame</code>, column names of <code>df</code> will become the first column with name specified by <code>dest_namescol</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the <code>AbstractDataFrame</code></li><li><code>src_namescol</code> : the column that will become the new header.  If omitted then column names <code>:x1</code>, <code>:x2</code>, ... are generated automatically.</li><li><code>dest_namescol</code> : the name of the first column in the returned <code>DataFrame</code>. Defaults to the same name as <code>src_namescol</code>. Not supported when <code>src_namescol</code> is a vector or is omitted.</li><li><code>makeunique</code> : if <code>false</code> (the default), an error will be raised if duplicate names are found; if <code>true</code>, duplicate names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate). Not supported when <code>src_namescol</code> is omitted.</li><li><code>strict</code> : if <code>true</code> (the default), an error will be raised if the values contained in the <code>src_namescol</code> are not all <code>Symbol</code> or all <code>AbstractString</code>, or can all be converted to <code>String</code> using <code>convert</code>. If <code>false</code> then any values are accepted and the will be changed to strings using the <code>string</code> function. Not supported when <code>src_namescol</code> is a vector or is omitted.</li></ul><p>Note: The element types of columns in resulting <code>DataFrame</code> (other than the first column if it is created from <code>df</code> column names, which always has element type <code>String</code>) will depend on the element types of <em>all</em> input columns based on the result of <code>promote_type</code>. That is, if the source data frame contains <code>Int</code> and <code>Float64</code> columns, resulting columns will have element type <code>Float64</code>. If the source has <code>Int</code> and <code>String</code> columns, resulting columns will have element type <code>Any</code>.</p><p>Metadata: table-level <code>:note</code>-style metadata is preserved and column-level metadata is dropped.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:2, b=3:4)
@@ -2133,7 +2133,7 @@
 ─────┼─────────────────────────────
    1 │ b               1     two
    2 │ c               3     4
-   3 │ d               true  false</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/reshape.jl#L722-L823">source</a></section></article><h2 id="Sorting"><a class="docs-heading-anchor" href="#Sorting">Sorting</a><a id="Sorting-1"></a><a class="docs-heading-anchor-permalink" href="#Sorting" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.issorted" href="#Base.issorted"><code>Base.issorted</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">issorted(df::AbstractDataFrame, cols=All();
+   3 │ d               true  false</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/reshape.jl#L722-L823">source</a></section></article><h2 id="Sorting"><a class="docs-heading-anchor" href="#Sorting">Sorting</a><a id="Sorting-1"></a><a class="docs-heading-anchor-permalink" href="#Sorting" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.issorted" href="#Base.issorted"><code>Base.issorted</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">issorted(df::AbstractDataFrame, cols=All();
          lt::Union{Function, AbstractVector{&lt;:Function}}=isless,
          by::Union{Function, AbstractVector{&lt;:Function}}=identity,
          rev::Union{Bool, AbstractVector{Bool}}=false,
@@ -2158,7 +2158,7 @@
 false
 
 julia&gt; issorted(df, :b, rev=true)
-true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/sort.jl#L367-L408">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.order" href="#DataFrames.order"><code>DataFrames.order</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">order(col::ColumnIndex; kwargs...)</code></pre><p>Specify sorting order for a column <code>col</code> in a data frame. <code>kwargs</code> can be <code>lt</code>, <code>by</code>, <code>rev</code>, and <code>order</code> with values following the rules defined in <a href="#Base.sort!"><code>sort!</code></a>.</p><p>See also: <a href="#Base.sort!"><code>sort!</code></a>, <a href="#Base.sort"><code>sort</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=[-3, -1, 0, 2, 4], y=1:5)
+true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/sort.jl#L367-L408">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.order" href="#DataFrames.order"><code>DataFrames.order</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">order(col::ColumnIndex; kwargs...)</code></pre><p>Specify sorting order for a column <code>col</code> in a data frame. <code>kwargs</code> can be <code>lt</code>, <code>by</code>, <code>rev</code>, and <code>order</code> with values following the rules defined in <a href="#Base.sort!"><code>sort!</code></a>.</p><p>See also: <a href="#Base.sort!"><code>sort!</code></a>, <a href="#Base.sort"><code>sort</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=[-3, -1, 0, 2, 4], y=1:5)
 5×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -2189,7 +2189,7 @@
    2 │    -1      2
    3 │     2      4
    4 │    -3      1
-   5 │     4      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/sort.jl#L25-L69">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.sort" href="#Base.sort"><code>Base.sort</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sort(df::AbstractDataFrame, cols=All();
+   5 │     4      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/sort.jl#L25-L69">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.sort" href="#Base.sort"><code>Base.sort</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sort(df::AbstractDataFrame, cols=All();
      alg::Union{Algorithm, Nothing}=nothing,
      lt::Union{Function, AbstractVector{&lt;:Function}}=isless,
      by::Union{Function, AbstractVector{&lt;:Function}}=identity,
@@ -2244,7 +2244,7 @@
    1 │     1  c
    2 │     1  b
    3 │     2  a
-   4 │     3  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/sort.jl#L438-L518">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.sort!" href="#Base.sort!"><code>Base.sort!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sort!(df::AbstractDataFrame, cols=All();
+   4 │     3  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/sort.jl#L438-L518">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.sort!" href="#Base.sort!"><code>Base.sort!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sort!(df::AbstractDataFrame, cols=All();
       alg::Union{Algorithm, Nothing}=nothing,
       lt::Union{Function, AbstractVector{&lt;:Function}}=isless,
       by::Union{Function, AbstractVector{&lt;:Function}}=identity,
@@ -2298,7 +2298,7 @@
    1 │     1  c
    2 │     1  b
    3 │     2  a
-   4 │     3  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/sort.jl#L619-L701">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.sortperm" href="#Base.sortperm"><code>Base.sortperm</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sortperm(df::AbstractDataFrame, cols=All();
+   4 │     3  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/sort.jl#L619-L701">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.sortperm" href="#Base.sortperm"><code>Base.sortperm</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sortperm(df::AbstractDataFrame, cols=All();
          alg::Union{Algorithm, Nothing}=nothing,
          lt::Union{Function, AbstractVector{&lt;:Function}}=isless,
          by::Union{Function, AbstractVector{&lt;:Function}}=identity,
@@ -2340,7 +2340,7 @@
  2
  4
  3
- 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/sort.jl#L532-L595">source</a></section></article><h2 id="Joining"><a class="docs-heading-anchor" href="#Joining">Joining</a><a id="Joining-1"></a><a class="docs-heading-anchor-permalink" href="#Joining" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.antijoin" href="#DataAPI.antijoin"><code>DataAPI.antijoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">antijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)</code></pre><p>Perform an anti join of two data frame objects and return a <code>DataFrame</code> containing the result. An anti join returns the subset of rows of <code>df1</code> that do not match with the keys in <code>df2</code>.</p><p>The order of rows in the result is kept from <code>df1</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : ignored as no columns are added to <code>df1</code> columns (it is provided for consistency with other functions).</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument  define unique keys in each input data frame (according to <code>isequal</code>).  Can be a tuple or a pair, with the first element indicating whether to  run check for <code>df1</code> and the second element for <code>df2</code>.  By default no check is performed.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df2</code> <code>on</code> columns.</li></ul><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata are taken from <code>df1</code>.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>,           <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>, <a href="#DataAPI.semijoin"><code>semijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
+ 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/sort.jl#L532-L595">source</a></section></article><h2 id="Joining"><a class="docs-heading-anchor" href="#Joining">Joining</a><a id="Joining-1"></a><a class="docs-heading-anchor-permalink" href="#Joining" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.antijoin" href="#DataAPI.antijoin"><code>DataAPI.antijoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">antijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)</code></pre><p>Perform an anti join of two data frame objects and return a <code>DataFrame</code> containing the result. An anti join returns the subset of rows of <code>df1</code> that do not match with the keys in <code>df2</code>.</p><p>The order of rows in the result is kept from <code>df1</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : ignored as no columns are added to <code>df1</code> columns (it is provided for consistency with other functions).</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument  define unique keys in each input data frame (according to <code>isequal</code>).  Can be a tuple or a pair, with the first element indicating whether to  run check for <code>df1</code> and the second element for <code>df2</code>.  By default no check is performed.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df2</code> <code>on</code> columns.</li></ul><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata are taken from <code>df1</code>.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>,           <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>, <a href="#DataAPI.semijoin"><code>semijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
 3×2 DataFrame
  Row │ ID     Name
      │ Int64  String
@@ -2386,7 +2386,7 @@
  Row │ ID     Name
      │ Int64  String
 ─────┼──────────────────
-   1 │     3  Joe Blogs</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L1395-L1489">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.crossjoin" href="#DataAPI.crossjoin"><code>DataAPI.crossjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
+   1 │     3  Joe Blogs</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L1395-L1489">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.crossjoin" href="#DataAPI.crossjoin"><code>DataAPI.crossjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
           makeunique::Bool=false, renamecols=identity =&gt; identity)
 crossjoin(df1, df2, dfs...; makeunique = false)</code></pre><p>Perform a cross join of two or more data frame objects and return a <code>DataFrame</code> containing the result. A cross join returns the cartesian product of rows from all passed data frames, where the first passed data frame is assigned to the dimension that changes the slowest and the last data frame is assigned to the dimension that changes the fastest.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>, <code>dfs...</code> : the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>makeunique</code> : if <code>false</code> (the default), an error will be raised if duplicate names are found in columns not joined on; if <code>true</code>, duplicate names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate).</li><li><code>renamecols</code> : a <code>Pair</code> specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a <code>Symbol</code> can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a <code>String</code>.</li></ul><p>If more than two data frames are passed, the join is performed recursively with left associativity.</p><p>Metadata: table-level <code>:note</code>-style metadata is preserved only for keys which are defined in all passed tables and have the same value. Column-level <code>:note</code>-style metadata is preserved from both tables.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>,           <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>, <a href="#DataAPI.semijoin"><code>semijoin</code></a>, <a href="#DataAPI.antijoin"><code>antijoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(X=1:3)
 3×1 DataFrame
@@ -2415,7 +2415,7 @@
    3 │     2  a
    4 │     2  b
    5 │     3  a
-   6 │     3  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L1500-L1566">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.innerjoin" href="#DataAPI.innerjoin"><code>DataAPI.innerjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">innerjoin(df1, df2; on, makeunique=false, validate=(false, false),
+   6 │     3  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L1500-L1566">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.innerjoin" href="#DataAPI.innerjoin"><code>DataAPI.innerjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">innerjoin(df1, df2; on, makeunique=false, validate=(false, false),
           renamecols=(identity =&gt; identity), matchmissing=:error,
           order=:undefined)
 innerjoin(df1, df2, dfs...; on, makeunique=false,
@@ -2469,7 +2469,7 @@
      │ Int64  String    String
 ─────┼─────────────────────────
    1 │     1  John Doe  Lawyer
-   2 │     2  Jane Doe  Doctor</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L630-L755">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.leftjoin" href="#DataAPI.leftjoin"><code>DataAPI.leftjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">leftjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),
+   2 │     2  Jane Doe  Doctor</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L630-L755">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.leftjoin" href="#DataAPI.leftjoin"><code>DataAPI.leftjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">leftjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),
          renamecols=(identity =&gt; identity), matchmissing=:error, order=:undefined)</code></pre><p>Perform a left join of two data frame objects and return a <code>DataFrame</code> containing the result. A left join includes all rows from <code>df1</code>.</p><p>In the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in <code>df1</code>. This behavior may change in future releases.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : if <code>false</code> (the default), an error will be raised if duplicate names are found in columns not joined on; if <code>true</code>, duplicate names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate).</li><li><code>source</code> : Default: <code>nothing</code>. If a <code>Symbol</code> or string, adds indicator column with the given name, for whether a row appeared in only <code>df1</code> (<code>&quot;left_only&quot;</code>) or in both (<code>&quot;both&quot;</code>). If the name is already in use, the column name will be modified if <code>makeunique=true</code>.</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument define unique keys in each input data frame (according to <code>isequal</code>). Can be a tuple or a pair, with the first element indicating whether to run check for <code>df1</code> and the second element for <code>df2</code>. By default no check is performed.</li><li><code>renamecols</code> : a <code>Pair</code> specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a <code>Symbol</code> can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a <code>String</code>. Note that <code>renamecols</code> does not affect <code>on</code> columns, whose names are always taken from the left data frame and left unchanged.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df2</code> <code>on</code> columns.</li><li><code>order</code> : if <code>:undefined</code> (the default) the order of rows in the result is  undefined and may change in future releases. If <code>:left</code> then the order of  rows from the left data frame is retained. If <code>:right</code> then the order of rows  from the right data frame is retained (non-matching rows are put at the end).</li></ul><p>All columns of the returned data frame will support missing values.</p><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata is taken from <code>df1</code> (including key columns), except for columns added to it from <code>df2</code>, whose column-level <code>:note</code>-style metadata is taken from <code>df2</code>.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>, <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>,           <a href="#DataAPI.semijoin"><code>semijoin</code></a>, <a href="#DataAPI.antijoin"><code>antijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
 3×2 DataFrame
  Row │ ID     Name
@@ -2522,7 +2522,7 @@
 ─────┼───────────────────────────
    1 │     1  John Doe   Lawyer
    2 │     2  Jane Doe   Doctor
-   3 │     3  Joe Blogs  missing</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L793-L916">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.leftjoin!" href="#DataFrames.leftjoin!"><code>DataFrames.leftjoin!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">leftjoin!(df1, df2; on, makeunique=false, source=nothing,
+   3 │     3  Joe Blogs  missing</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L793-L916">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.leftjoin!" href="#DataFrames.leftjoin!"><code>DataFrames.leftjoin!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">leftjoin!(df1, df2; on, makeunique=false, source=nothing,
           matchmissing=:error)</code></pre><p>Perform a left join of two data frame objects by updating the <code>df1</code> with the joined columns from <code>df2</code>.</p><p>A left join includes all rows from <code>df1</code> and leaves all rows and columns from <code>df1</code> untouched. Note that each row in <code>df1</code> must have at most one match in <code>df2</code>. Otherwise, this function would not be able to execute the join in-place since new rows would need to be added to <code>df1</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : if <code>false</code> (the default), an error will be raised if duplicate names are found in columns not joined on; if <code>true</code>, duplicate names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate).</li><li><code>source</code> : Default: <code>nothing</code>. If a <code>Symbol</code> or string, adds indicator column with the given name, for whether a row appeared in only <code>df1</code> (<code>&quot;left_only&quot;</code>) or in both (<code>&quot;both&quot;</code>). If the name is already in use, the column name will be modified if <code>makeunique=true</code>.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df2</code> <code>on</code> columns.</li></ul><p>The columns added to <code>df1</code> from <code>df2</code> will support missing values.</p><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata are taken from <code>df1</code> (including key columns), except for columns added to it from <code>df2</code>, whose column-level <code>:note</code>-style metadata is taken from <code>df2</code>.</p><p>See also: <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
 3×2 DataFrame
  Row │ ID     Name
@@ -2566,7 +2566,7 @@
 ─────┼───────────────────────────────────────────────
    1 │     1  John Doe   Lawyer   Lawyer   both
    2 │     2  Jane Doe   Doctor   Doctor   both
-   3 │     3  Joe Blogs  missing  missing  left_only</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/inplace.jl#L1-L96">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.outerjoin" href="#DataAPI.outerjoin"><code>DataAPI.outerjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">outerjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),
+   3 │     3  Joe Blogs  missing  missing  left_only</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/inplace.jl#L1-L96">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.outerjoin" href="#DataAPI.outerjoin"><code>DataAPI.outerjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">outerjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),
           renamecols=(identity =&gt; identity), matchmissing=:error, order=:undefined)
 outerjoin(df1, df2, dfs...; on, makeunique = false,
           validate = (false, false), matchmissing=:error, order=:undefined)</code></pre><p>Perform an outer join of two or more data frame objects and return a <code>DataFrame</code> containing the result. An outer join includes rows with keys that appear in any of the passed data frames.</p><p>The order of rows in the result is undefined and may change in future releases.</p><p>In the returned data frame the type of the columns on which the data frames are joined is determined by the element type of these columns both <code>df1</code> and <code>df2</code>. This behavior may change in future releases.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>, <code>dfs...</code> : the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). When joining only two data frames, a <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : if <code>false</code> (the default), an error will be raised if duplicate names are found in columns not joined on; if <code>true</code>, duplicate names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate).</li><li><code>source</code> : Default: <code>nothing</code>. If a <code>Symbol</code> or string, adds indicator column with the given name for whether a row appeared in only <code>df1</code> (<code>&quot;left_only&quot;</code>), only <code>df2</code> (<code>&quot;right_only&quot;</code>) or in both (<code>&quot;both&quot;</code>). If the name is already in use, the column name will be modified if <code>makeunique=true</code>. This argument is only supported when joining exactly two data frames.</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument define unique keys in each input data frame (according to <code>isequal</code>). Can be a tuple or a pair, with the first element indicating whether to run check for <code>df1</code> and the second element for <code>df2</code>. By default no check is performed.</li><li><code>renamecols</code> : a <code>Pair</code> specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a <code>Symbol</code> can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a <code>String</code>. Note that <code>renamecols</code> does not affect <code>on</code> columns, whose names are always taken from the left data frame and left unchanged.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched.</li><li><code>order</code> : if <code>:undefined</code> (the default) the order of rows in the result is  undefined and may change in future releases. If <code>:left</code> then the order of  rows from the left data frame is retained (non-matching rows are put at the end).  If <code>:right</code> then the order of rows from the right data frame is retained  (non-matching rows are put at the end).</li></ul><p>All columns of the returned data frame will support missing values.</p><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>If more than two data frames are passed, the join is performed recursively with left associativity. In this case the <code>indicator</code> keyword argument is not supported and <code>validate</code> keyword argument is applied recursively with left associativity.</p><p>Metadata: table-level <code>:note</code>-style metadata and column-level <code>:note</code>-style metadata for key columns is preserved only for keys which are defined in all passed tables and have the same value. Column-level <code>:note</code>-style metadata is preserved for all other columns.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>,           <a href="#DataAPI.semijoin"><code>semijoin</code></a>, <a href="#DataAPI.antijoin"><code>antijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
@@ -2624,7 +2624,7 @@
    1 │     1  John Doe   Lawyer
    2 │     2  Jane Doe   Doctor
    3 │     3  Joe Blogs  missing
-   4 │     4  missing    Farmer</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L1102-L1241">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.rightjoin" href="#DataAPI.rightjoin"><code>DataAPI.rightjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rightjoin(df1, df2; on, makeunique=false, source=nothing,
+   4 │     4  missing    Farmer</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L1102-L1241">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.rightjoin" href="#DataAPI.rightjoin"><code>DataAPI.rightjoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rightjoin(df1, df2; on, makeunique=false, source=nothing,
           validate=(false, false), renamecols=(identity =&gt; identity),
           matchmissing=:error, order=:undefined)</code></pre><p>Perform a right join on two data frame objects and return a <code>DataFrame</code> containing the result. A right join includes all rows from <code>df2</code>.</p><p>The order of rows in the result is undefined and may change in future releases.</p><p>In the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in <code>df2</code>. This behavior may change in future releases.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : if <code>false</code> (the default), an error will be raised if duplicate names are found in columns not joined on; if <code>true</code>, duplicate names will be suffixed with <code>_i</code> (<code>i</code> starting at 1 for the first duplicate).</li><li><code>source</code> : Default: <code>nothing</code>. If a <code>Symbol</code> or string, adds indicator column with the given name for whether a row appeared in only <code>df2</code> (<code>&quot;right_only&quot;</code>) or in both (<code>&quot;both&quot;</code>). If the name is already in use, the column name will be modified if <code>makeunique=true</code>.</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument define unique keys in each input data frame (according to <code>isequal</code>). Can be a tuple or a pair, with the first element indicating whether to run check for <code>df1</code> and the second element for <code>df2</code>. By default no check is performed.</li><li><code>renamecols</code> : a <code>Pair</code> specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a <code>Symbol</code> can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a <code>String</code>. Note that <code>renamecols</code> does not affect <code>on</code> columns, whose names are always taken from the left data frame and left unchanged.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df1</code> <code>on</code> columns.</li><li><code>order</code> : if <code>:undefined</code> (the default) the order of rows in the result is  undefined and may change in future releases. If <code>:left</code> then the order of  rows from the left data frame is retained (non-matching rows are put at the end).  If <code>:right</code> then the order of rows from the right data frame is retained.</li></ul><p>All columns of the returned data frame will support missing values.</p><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata is taken from <code>df2</code> (including key columns), except for columns added to it from <code>df1</code>, whose column-level <code>:note</code>-style metadata is taken from <code>df1</code>.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>,           <a href="#DataAPI.semijoin"><code>semijoin</code></a>, <a href="#DataAPI.antijoin"><code>antijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
 3×2 DataFrame
@@ -2678,7 +2678,7 @@
 ─────┼─────────────────────────
    1 │     1  John Doe  Lawyer
    2 │     2  Jane Doe  Doctor
-   3 │     4  missing   Farmer</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L946-L1072">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.semijoin" href="#DataAPI.semijoin"><code>DataAPI.semijoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">semijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)</code></pre><p>Perform a semi join of two data frame objects and return a <code>DataFrame</code> containing the result. A semi join returns the subset of rows of <code>df1</code> that match with the keys in <code>df2</code>.</p><p>The order of rows in the result is kept from <code>df1</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : ignored as no columns are added to <code>df1</code> columns (it is provided for consistency with other functions).</li><li><code>indicator</code> : Default: <code>nothing</code>. If a <code>Symbol</code> or string, adds categorical indicator  column with the given name for whether a row appeared in only <code>df1</code> (<code>&quot;left_only&quot;</code>),  only <code>df2</code> (<code>&quot;right_only&quot;</code>) or in both (<code>&quot;both&quot;</code>). If the name is already in use,  the column name will be modified if <code>makeunique=true</code>.</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument  define unique keys in each input data frame (according to <code>isequal</code>).  Can be a tuple or a pair, with the first element indicating whether to  run check for <code>df1</code> and the second element for <code>df2</code>.  By default no check is performed.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df2</code> <code>on</code> columns.</li></ul><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata are taken from <code>df1</code>.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>,           <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>, <a href="#DataAPI.antijoin"><code>antijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
+   3 │     4  missing   Farmer</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L946-L1072">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.semijoin" href="#DataAPI.semijoin"><code>DataAPI.semijoin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">semijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)</code></pre><p>Perform a semi join of two data frame objects and return a <code>DataFrame</code> containing the result. A semi join returns the subset of rows of <code>df1</code> that match with the keys in <code>df2</code>.</p><p>The order of rows in the result is kept from <code>df1</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df1</code>, <code>df2</code>: the <code>AbstractDataFrames</code> to be joined</li></ul><p><strong>Keyword Arguments</strong></p><ul><li><code>on</code> : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A <code>left=&gt;right</code> pair of names can be used instead of a name, for the case where a key has different names in <code>df1</code> and <code>df2</code> (it is allowed to mix names and name pairs in a vector). Key values are compared using <code>isequal</code>. <code>on</code> is a required argument.</li><li><code>makeunique</code> : ignored as no columns are added to <code>df1</code> columns (it is provided for consistency with other functions).</li><li><code>indicator</code> : Default: <code>nothing</code>. If a <code>Symbol</code> or string, adds categorical indicator  column with the given name for whether a row appeared in only <code>df1</code> (<code>&quot;left_only&quot;</code>),  only <code>df2</code> (<code>&quot;right_only&quot;</code>) or in both (<code>&quot;both&quot;</code>). If the name is already in use,  the column name will be modified if <code>makeunique=true</code>.</li><li><code>validate</code> : whether to check that columns passed as the <code>on</code> argument  define unique keys in each input data frame (according to <code>isequal</code>).  Can be a tuple or a pair, with the first element indicating whether to  run check for <code>df1</code> and the second element for <code>df2</code>.  By default no check is performed.</li><li><code>matchmissing</code> : if equal to <code>:error</code> throw an error if <code>missing</code> is present in <code>on</code> columns; if equal to <code>:equal</code> then <code>missing</code> is allowed and missings are matched; if equal to <code>:notequal</code> then missings are dropped in <code>df2</code> <code>on</code> columns.</li></ul><p>It is not allowed to join on columns that contain <code>NaN</code> or <code>-0.0</code> in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a <code>CategoricalVector</code>.</p><p>When merging <code>on</code> categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.</p><p>Metadata: table-level and column-level <code>:note</code>-style metadata are taken from <code>df1</code>.</p><p>See also: <a href="#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="#DataAPI.leftjoin"><code>leftjoin</code></a>, <a href="#DataAPI.rightjoin"><code>rightjoin</code></a>,           <a href="#DataAPI.outerjoin"><code>outerjoin</code></a>, <a href="#DataAPI.antijoin"><code>antijoin</code></a>, <a href="#DataAPI.crossjoin"><code>crossjoin</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; name = DataFrame(ID=[1, 2, 3], Name=[&quot;John Doe&quot;, &quot;Jane Doe&quot;, &quot;Joe Blogs&quot;])
 3×2 DataFrame
  Row │ ID     Name
      │ Int64  String
@@ -2727,7 +2727,7 @@
      │ Int64  String
 ─────┼─────────────────
    1 │     1  John Doe
-   2 │     2  Jane Doe</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/join/composer.jl#L1284-L1385">source</a></section></article><h2 id="Grouping"><a class="docs-heading-anchor" href="#Grouping">Grouping</a><a id="Grouping-1"></a><a class="docs-heading-anchor-permalink" href="#Grouping" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.get" href="#Base.get"><code>Base.get</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">get(gd::GroupedDataFrame, key, default)</code></pre><p>Get a group based on the values of the grouping columns.</p><p><code>key</code> may be a <code>GroupKey</code>, <code>NamedTuple</code> or <code>Tuple</code> of grouping column values (in the same order as the <code>cols</code> argument to <code>groupby</code>). It may also be an <code>AbstractDict</code>, in which case the order of the arguments does not matter.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[2]),
+   2 │     2  Jane Doe</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/join/composer.jl#L1284-L1385">source</a></section></article><h2 id="Grouping"><a class="docs-heading-anchor" href="#Grouping">Grouping</a><a id="Grouping-1"></a><a class="docs-heading-anchor-permalink" href="#Grouping" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.get" href="#Base.get"><code>Base.get</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">get(gd::GroupedDataFrame, key, default)</code></pre><p>Get a group based on the values of the grouping columns.</p><p><code>key</code> may be a <code>GroupKey</code>, <code>NamedTuple</code> or <code>Tuple</code> of grouping column values (in the same order as the <code>cols</code> argument to <code>groupby</code>). It may also be an <code>AbstractDict</code>, in which case the order of the arguments does not matter.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[2]),
                       b=repeat([2, 1], outer=[3]),
                       c=1:6);
 
@@ -2763,7 +2763,7 @@
    1 │ baz         2      3
    2 │ baz         1      6
 
-julia&gt; get(gd, (:qux,), nothing)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L1055-L1105">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.groupby" href="#DataAPI.groupby"><code>DataAPI.groupby</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupby(df::AbstractDataFrame, cols;
+julia&gt; get(gd, (:qux,), nothing)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L1055-L1105">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.groupby" href="#DataAPI.groupby"><code>DataAPI.groupby</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupby(df::AbstractDataFrame, cols;
         sort::Union{Bool, Nothing, NamedTuple}=nothing,
         skipmissing::Bool=false)</code></pre><p>Return a <code>GroupedDataFrame</code> representing a view of an <code>AbstractDataFrame</code> split into row groups.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : an <code>AbstractDataFrame</code> to split</li><li><code>cols</code> : data frame columns to group by. Can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers). In particular if the selector picks no columns then a single-group <code>GroupedDataFrame</code> is created. As a special case, if <code>cols</code> is a single column or a vector of columns then it can contain columns wrapped in <a href="#DataFrames.order"><code>order</code></a> that will be used to determine the order of groups if <code>sort</code> is <code>true</code> or a <code>NamedTuple</code> (if <code>sort</code> is <code>false</code>, then passing <code>order</code> is an error; if <code>sort</code> is <code>nothing</code> then it is set to <code>true</code> when <code>order</code> is passed).</li><li><code>sort</code> : if <code>sort=true</code> sort groups according to the values of the grouping columns <code>cols</code>; if <code>sort=false</code> groups are created in their order of appearance in <code>df</code>; if <code>sort=nothing</code> (the default) then the fastest available grouping algorithm is picked and in consequence the order of groups in the result is undefined and may change in future releases; below a description of the current implementation is provided. Additionally <code>sort</code> can be a <code>NamedTuple</code> having some or all of <code>alg</code>, <code>lt</code>, <code>by</code>, <code>rev</code>, and <code>order</code> fields. In this case the groups are sorted and their order follows the <a href="#Base.sortperm"><code>sortperm</code></a> order.</li><li><code>skipmissing</code> : whether to skip groups with <code>missing</code> values in one of the grouping columns <code>cols</code></li></ul><p><strong>Details</strong></p><p>An iterator over a <code>GroupedDataFrame</code> returns a <code>SubDataFrame</code> view for each grouping into <code>df</code>. Within each group, the order of rows in <code>df</code> is preserved.</p><p>A <code>GroupedDataFrame</code> also supports indexing by groups, <code>select</code>, <code>transform</code>, and <code>combine</code> (which applies a function to each group and combines the result into a data frame).</p><p><code>GroupedDataFrame</code> also supports the dictionary interface. The keys are <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a> objects returned by <a href="#Base.keys"><code>keys(::GroupedDataFrame)</code></a>, which can also be used to get the values of the grouping columns for each group. <code>Tuples</code> and <code>NamedTuple</code>s containing the values of the grouping columns (in the same order as the <code>cols</code> argument) are also accepted as indices. Finally, an <code>AbstractDict</code> can be used to index into a grouped data frame where the keys are column names of the data frame. The order of the keys does not matter in this case.</p><p>In the current implementation if <code>sort=nothing</code> groups are ordered following the order of appearance of values in the grouping columns, except when all grouping columns provide non-<code>nothing</code> <code>DataAPI.refpool</code>, in which case the order of groups follows the order of values returned by <code>DataAPI.refpool</code>. As a particular application of this rule if all <code>cols</code> are <code>CategoricalVector</code>s then groups are always sorted. Integer columns with a narrow range also use this this optimization, so to the order of groups when grouping on integer columns is undefined. A column is considered to be an integer column when deciding on the grouping algorithm choice if its <code>eltype</code> is a subtype of <code>Union{Missing, Real}</code>, all its elements are either <code>missing</code> or pass <code>isinteger</code> test, and none of them is equal to <code>-0.0</code>.</p><p><strong>See also</strong></p><p><a href="#DataFrames.combine"><code>combine</code></a>, <a href="#DataFrames.select"><code>select</code></a>, <a href="#DataFrames.select!"><code>select!</code></a>, <a href="#DataFrames.transform"><code>transform</code></a>, <a href="#DataFrames.transform!"><code>transform!</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),
                       b=repeat([2, 1], outer=[4]),
@@ -2862,7 +2862,7 @@
      │ Int64  Int64  Int64
 ─────┼─────────────────────
    1 │     4      1      4
-   2 │     4      1      8</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L50-L217">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.groupcols" href="#DataFrames.groupcols"><code>DataFrames.groupcols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupcols(gd::GroupedDataFrame)</code></pre><p>Return a vector of <code>Symbol</code> column names in <code>parent(gd)</code> used for grouping.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L493-L497">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.groupindices" href="#DataFrames.groupindices"><code>DataFrames.groupindices</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupindices(gd::GroupedDataFrame)</code></pre><p>Return a vector of group indices for each row of <code>parent(gd)</code>.</p><p>Rows appearing in group <code>gd[i]</code> are attributed index <code>i</code>. Rows not present in any group are attributed <code>missing</code> (this can happen if <code>skipmissing=true</code> was passed when creating <code>gd</code>, or if <code>gd</code> is a subset from a larger <a href="../types/#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a>).</p><p>The <code>groupindices =&gt; target_col_name</code> syntax (or just <code>groupindices</code> without specifying the target column name) is also supported in the transformation mini-language when passing a <code>GroupedDataFrame</code> to transformation functions (<a href="#DataFrames.combine"><code>combine</code></a>, <a href="#DataFrames.select"><code>select</code></a>, etc.).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(id=[&quot;a&quot;, &quot;c&quot;, &quot;b&quot;, &quot;b&quot;, &quot;a&quot;])
+   2 │     4      1      8</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L50-L217">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.groupcols" href="#DataFrames.groupcols"><code>DataFrames.groupcols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupcols(gd::GroupedDataFrame)</code></pre><p>Return a vector of <code>Symbol</code> column names in <code>parent(gd)</code> used for grouping.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L493-L497">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.groupindices" href="#DataFrames.groupindices"><code>DataFrames.groupindices</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupindices(gd::GroupedDataFrame)</code></pre><p>Return a vector of group indices for each row of <code>parent(gd)</code>.</p><p>Rows appearing in group <code>gd[i]</code> are attributed index <code>i</code>. Rows not present in any group are attributed <code>missing</code> (this can happen if <code>skipmissing=true</code> was passed when creating <code>gd</code>, or if <code>gd</code> is a subset from a larger <a href="../types/#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a>).</p><p>The <code>groupindices =&gt; target_col_name</code> syntax (or just <code>groupindices</code> without specifying the target column name) is also supported in the transformation mini-language when passing a <code>GroupedDataFrame</code> to transformation functions (<a href="#DataFrames.combine"><code>combine</code></a>, <a href="#DataFrames.select"><code>select</code></a>, etc.).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(id=[&quot;a&quot;, &quot;c&quot;, &quot;b&quot;, &quot;b&quot;, &quot;a&quot;])
 5×1 DataFrame
  Row │ id
      │ String
@@ -2893,7 +2893,7 @@
    2 │ c           2
    3 │ b           3
    4 │ b           3
-   5 │ a           1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L379-L430">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.keys" href="#Base.keys"><code>Base.keys</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">keys(gd::GroupedDataFrame)</code></pre><p>Get the set of keys for each group of the <code>GroupedDataFrame</code> <code>gd</code> as a <a href="../types/#DataFrames.GroupKeys"><code>GroupKeys</code></a> object. Each key is a <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a>, which behaves like a <code>NamedTuple</code> holding the values of the grouping columns for a given group. Unlike the equivalent <code>Tuple</code>, <code>NamedTuple</code>, and <code>AbstractDict</code>, these keys can be used to index into <code>gd</code> efficiently. The ordering of the keys is identical to the ordering of the groups of <code>gd</code> under iteration and integer indexing.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[4]),
+   5 │ a           1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L379-L430">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.keys" href="#Base.keys"><code>Base.keys</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">keys(gd::GroupedDataFrame)</code></pre><p>Get the set of keys for each group of the <code>GroupedDataFrame</code> <code>gd</code> as a <a href="../types/#DataFrames.GroupKeys"><code>GroupKeys</code></a> object. Each key is a <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a>, which behaves like a <code>NamedTuple</code> holding the values of the grouping columns for a given group. Unlike the equivalent <code>Tuple</code>, <code>NamedTuple</code>, and <code>AbstractDict</code>, these keys can be used to index into <code>gd</code> efficiently. The ordering of the keys is identical to the ordering of the groups of <code>gd</code> under iteration and integer indexing.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[4]),
                       b=repeat([2, 1], outer=[6]),
                       c=1:12);
 
@@ -2952,7 +2952,7 @@
    2 │ foo         2      7
 
 julia&gt; gd[keys(gd)[1]] == gd[1]
-true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L929-L1010">source</a></section><section><div><pre><code class="language-julia hljs">keys(dfc::DataFrameColumns)</code></pre><p>Get a vector of column names of <code>dfc</code> as <code>Symbol</code>s.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L290-L294">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.parent" href="#Base.parent"><code>Base.parent</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">parent(gd::GroupedDataFrame)</code></pre><p>Return the parent data frame of <code>gd</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L329-L333">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.proprow" href="#DataFrames.proprow"><code>DataFrames.proprow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">proprow</code></pre><p>Compute the proportion of rows which belong to each group, i.e. its number of rows divided by the total number of rows in a <code>GroupedDataFrame</code>.</p><p>This function can only be used in the transformation mini-language via the <code>proprow =&gt; target_col_name</code> syntax (or just <code>proprow</code> without specifying the target column name), when passing a <code>GroupedDataFrame</code> to transformation functions (<a href="#DataFrames.combine"><code>combine</code></a>, <a href="#DataFrames.select"><code>select</code></a>, etc.).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(id=[&quot;a&quot;, &quot;c&quot;, &quot;b&quot;, &quot;b&quot;, &quot;a&quot;, &quot;b&quot;])
+true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L929-L1010">source</a></section><section><div><pre><code class="language-julia hljs">keys(dfc::DataFrameColumns)</code></pre><p>Get a vector of column names of <code>dfc</code> as <code>Symbol</code>s.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L290-L294">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.parent" href="#Base.parent"><code>Base.parent</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">parent(gd::GroupedDataFrame)</code></pre><p>Return the parent data frame of <code>gd</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L329-L333">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.proprow" href="#DataFrames.proprow"><code>DataFrames.proprow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">proprow</code></pre><p>Compute the proportion of rows which belong to each group, i.e. its number of rows divided by the total number of rows in a <code>GroupedDataFrame</code>.</p><p>This function can only be used in the transformation mini-language via the <code>proprow =&gt; target_col_name</code> syntax (or just <code>proprow</code> without specifying the target column name), when passing a <code>GroupedDataFrame</code> to transformation functions (<a href="#DataFrames.combine"><code>combine</code></a>, <a href="#DataFrames.select"><code>select</code></a>, etc.).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(id=[&quot;a&quot;, &quot;c&quot;, &quot;b&quot;, &quot;b&quot;, &quot;a&quot;, &quot;b&quot;])
 6×1 DataFrame
  Row │ id
      │ String
@@ -2985,7 +2985,7 @@
    3 │ b       0.5
    4 │ b       0.5
    5 │ a       0.333333
-   6 │ b       0.5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L438-L487">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.valuecols" href="#DataFrames.valuecols"><code>DataFrames.valuecols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">valuecols(gd::GroupedDataFrame)</code></pre><p>Return a vector of <code>Symbol</code> column names in <code>parent(gd)</code> not used for grouping.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L504-L508">source</a></section></article><h2 id="Filtering-rows"><a class="docs-heading-anchor" href="#Filtering-rows">Filtering rows</a><a id="Filtering-rows-1"></a><a class="docs-heading-anchor-permalink" href="#Filtering-rows" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.allunique" href="#Base.allunique"><code>Base.allunique</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">allunique(df::AbstractDataFrame, cols=:)</code></pre><p>Return <code>true</code> if none of the rows of <code>df</code> are duplicated. Two rows are duplicates if all their columns contain equal values (according to <code>isequal</code>) for all columns in <code>cols</code> (by default, all columns).</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : <code>AbstractDataFrame</code></li><li><code>cols</code> : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by <a href="#DataFrames.select"><code>select</code></a>.</li></ul><p>See also <a href="#Base.unique"><code>unique</code></a> and <a href="#DataFrames.nonunique"><code>nonunique</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:4, x=[1, 2, 1, 2])
+   6 │ b       0.5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L438-L487">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.valuecols" href="#DataFrames.valuecols"><code>DataFrames.valuecols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">valuecols(gd::GroupedDataFrame)</code></pre><p>Return a vector of <code>Symbol</code> column names in <code>parent(gd)</code> not used for grouping.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L504-L508">source</a></section></article><h2 id="Filtering-rows"><a class="docs-heading-anchor" href="#Filtering-rows">Filtering rows</a><a id="Filtering-rows-1"></a><a class="docs-heading-anchor-permalink" href="#Filtering-rows" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.allunique" href="#Base.allunique"><code>Base.allunique</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">allunique(df::AbstractDataFrame, cols=:)</code></pre><p>Return <code>true</code> if none of the rows of <code>df</code> are duplicated. Two rows are duplicates if all their columns contain equal values (according to <code>isequal</code>) for all columns in <code>cols</code> (by default, all columns).</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : <code>AbstractDataFrame</code></li><li><code>cols</code> : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by <a href="#DataFrames.select"><code>select</code></a>.</li></ul><p>See also <a href="#Base.unique"><code>unique</code></a> and <a href="#DataFrames.nonunique"><code>nonunique</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:4, x=[1, 2, 1, 2])
 4×2 DataFrame
  Row │ i      x
      │ Int64  Int64
@@ -3002,7 +3002,7 @@
 false
 
 julia&gt; allunique(df, :i =&gt; ByRow(isodd))
-false</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/unique.jl#L165-L202">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.deleteat!" href="#Base.deleteat!"><code>Base.deleteat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">deleteat!(df::DataFrame, inds)</code></pre><p>Delete rows specified by <code>inds</code> from a <code>DataFrame</code> <code>df</code> in place and return it.</p><p>Internally <code>deleteat!</code> is called for all columns so <code>inds</code> must be: a vector of sorted and unique integers, a boolean vector, an integer, or <code>Not</code> wrapping any valid selector.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+false</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/unique.jl#L165-L202">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.deleteat!" href="#Base.deleteat!"><code>Base.deleteat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">deleteat!(df::DataFrame, inds)</code></pre><p>Delete rows specified by <code>inds</code> from a <code>DataFrame</code> <code>df</code> in place and return it.</p><p>Internally <code>deleteat!</code> is called for all columns so <code>inds</code> must be: a vector of sorted and unique integers, a boolean vector, an integer, or <code>Not</code> wrapping any valid selector.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3017,7 +3017,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     1      4
-   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L823-L853">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.empty" href="#Base.empty"><code>Base.empty</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">empty(df::AbstractDataFrame)</code></pre><p>Create a new <code>DataFrame</code> with the same column names and column element types as <code>df</code> but with zero rows.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L482-L489">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.empty!" href="#Base.empty!"><code>Base.empty!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">empty!(df::DataFrame)</code></pre><p>Remove all rows from <code>df</code>, making each of its columns empty.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L823-L853">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.empty" href="#Base.empty"><code>Base.empty</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">empty(df::AbstractDataFrame)</code></pre><p>Create a new <code>DataFrame</code> with the same column names and column element types as <code>df</code> but with zero rows.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L482-L489">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.empty!" href="#Base.empty!"><code>Base.empty!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">empty!(df::DataFrame)</code></pre><p>Remove all rows from <code>df</code>, making each of its columns empty.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3033,7 +3033,7 @@
 ─────┴──────────────
 
 julia&gt; df.a, df.b
-(Int64[], Int64[])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1011-L1039">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.filter" href="#Base.filter"><code>Base.filter</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">filter(fun, df::AbstractDataFrame; view::Bool=false)
+(Int64[], Int64[])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1011-L1039">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.filter" href="#Base.filter"><code>Base.filter</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">filter(fun, df::AbstractDataFrame; view::Bool=false)
 filter(cols =&gt; fun, df::AbstractDataFrame; view::Bool=false)</code></pre><p>Return a data frame containing only rows from <code>df</code> for which <code>fun</code> returns <code>true</code>.</p><p>If <code>cols</code> is not specified then the predicate <code>fun</code> is passed <code>DataFrameRow</code>s. Elements of a <code>DataFrameRow</code> may be accessed with dot syntax or column indexing inside <code>fun</code>.</p><p>If <code>cols</code> is specified then the predicate <code>fun</code> is passed elements of the corresponding columns as separate positional arguments, unless <code>cols</code> is an <code>AsTable</code> selector, in which case a <code>NamedTuple</code> of these arguments is passed. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers), and column duplicates are allowed if a vector of <code>Symbol</code>s, strings, or integers is passed.</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned.</p><p>Passing <code>cols</code> leads to a more efficient execution of the operation for large data frames.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>This method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the <a href="#DataFrames.subset"><code>subset</code></a> function instead as it is consistent with other DataFrames.jl functions (as opposed to <code>filter</code>).</p></div></div><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Due to type stability the <code>filter(cols =&gt; fun, df::AbstractDataFrame; view::Bool=false)</code> call is preferred in performance critical applications.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#Base.filter!"><code>filter!</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=[3, 1, 2, 1], y=[&quot;b&quot;, &quot;c&quot;, &quot;a&quot;, &quot;b&quot;])
 4×2 DataFrame
  Row │ x      y
@@ -3084,7 +3084,7 @@
 ─────┼───────────────
    1 │     3  b
    2 │     1  c
-   3 │     1  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1125-L1219">source</a></section><section><div><pre><code class="language-julia hljs">filter(fun, gdf::GroupedDataFrame; ungroup::Bool=false)
+   3 │     1  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1125-L1219">source</a></section><section><div><pre><code class="language-julia hljs">filter(fun, gdf::GroupedDataFrame; ungroup::Bool=false)
 filter(cols =&gt; fun, gdf::GroupedDataFrame; ungroup::Bool=false)</code></pre><p>Return only groups in <code>gd</code> for which <code>fun</code> returns <code>true</code> as a <code>GroupedDataFrame</code> if <code>ungroup=false</code> (the default), or as a data frame if <code>ungroup=true</code>.</p><p>If <code>cols</code> is not specified then the predicate <code>fun</code> is called with a <code>SubDataFrame</code> for each group.</p><p>If <code>cols</code> is specified then the predicate <code>fun</code> is called for each group with views of the corresponding columns as separate positional arguments, unless <code>cols</code> is an <code>AsTable</code> selector, in which case a <code>NamedTuple</code> of these arguments is passed. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers), and column duplicates are allowed if a vector of <code>Symbol</code>s, strings, or integers is passed.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>This method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the <a href="#DataFrames.subset"><code>subset</code></a> function instead as it is consistent with other DataFrames.jl functions (as opposed to <code>filter</code>).</p></div></div><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(g=[1, 2], x=[&#39;a&#39;, &#39;b&#39;]);
 
 julia&gt; gd = groupby(df, :g)
@@ -3122,7 +3122,7 @@
  Row │ g      x
      │ Int64  Char
 ─────┼─────────────
-   1 │     1  a</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L1114-L1180">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.filter!" href="#Base.filter!"><code>Base.filter!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">filter!(fun, df::AbstractDataFrame)
+   1 │     1  a</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L1114-L1180">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.filter!" href="#Base.filter!"><code>Base.filter!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">filter!(fun, df::AbstractDataFrame)
 filter!(cols =&gt; fun, df::AbstractDataFrame)</code></pre><p>Remove rows from data frame <code>df</code> for which <code>fun</code> returns <code>false</code>.</p><p>If <code>cols</code> is not specified then the predicate <code>fun</code> is passed <code>DataFrameRow</code>s. Elements of a <code>DataFrameRow</code> may be accessed with dot syntax or column indexing inside <code>fun</code>.</p><p>If <code>cols</code> is specified then the predicate <code>fun</code> is passed elements of the corresponding columns as separate positional arguments, unless <code>cols</code> is an <code>AsTable</code> selector, in which case a <code>NamedTuple</code> of these arguments is passed. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers), and column duplicates are allowed if a vector of <code>Symbol</code>s, strings, or integers is passed.</p><p>Passing <code>cols</code> leads to a more efficient execution of the operation for large data frames.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>This method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the <a href="#DataFrames.subset!"><code>subset!</code></a> function instead as it is consistent with other DataFrames.jl functions (as opposed to <code>filter!</code>).</p></div></div><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Due to type stability the <code>filter!(cols =&gt; fun, df::AbstractDataFrame)</code> call is preferred in performance critical applications.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#Base.filter"><code>filter</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=[3, 1, 2, 1], y=[&quot;b&quot;, &quot;c&quot;, &quot;a&quot;, &quot;b&quot;])
 4×2 DataFrame
  Row │ x      y
@@ -3176,7 +3176,7 @@
 ─────┼───────────────
    1 │     3  b
    2 │     1  c
-   3 │     1  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1264-L1357">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.keepat!" href="#Base.keepat!"><code>Base.keepat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">keepat!(df::DataFrame, inds)</code></pre><p>Delete rows at all indices not specified by <code>inds</code> from a <code>DataFrame</code> <code>df</code> in place and return it.</p><p>Internally <code>deleteat!</code> is called for all columns so <code>inds</code> must be: a vector of sorted and unique integers, a boolean vector, an integer, or <code>Not</code> wrapping any valid selector.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+   3 │     1  b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1264-L1357">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.keepat!" href="#Base.keepat!"><code>Base.keepat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">keepat!(df::DataFrame, inds)</code></pre><p>Delete rows at all indices not specified by <code>inds</code> from a <code>DataFrame</code> <code>df</code> in place and return it.</p><p>Internally <code>deleteat!</code> is called for all columns so <code>inds</code> must be: a vector of sorted and unique integers, a boolean vector, an integer, or <code>Not</code> wrapping any valid selector.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3191,7 +3191,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     1      4
-   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L945-L976">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.first" href="#Base.first"><code>Base.first</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">first(df::AbstractDataFrame)</code></pre><p>Get the first row of <code>df</code> as a <code>DataFrameRow</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L549-L555">source</a></section><section><div><pre><code class="language-julia hljs">first(df::AbstractDataFrame, n::Integer; view::Bool=false)</code></pre><p>Get a data frame with the <code>n</code> first rows of <code>df</code>. Get all rows if <code>n</code> is greater than the number of rows in <code>df</code>. Error if <code>n</code> is negative.</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L558-L569">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.last" href="#Base.last"><code>Base.last</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">last(df::AbstractDataFrame)</code></pre><p>Get the last row of <code>df</code> as a <code>DataFrameRow</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L576-L582">source</a></section><section><div><pre><code class="language-julia hljs">last(df::AbstractDataFrame, n::Integer; view::Bool=false)</code></pre><p>Get a data frame with the <code>n</code> last rows of <code>df</code>. Get all rows if <code>n</code> is greater than the number of rows in <code>df</code>. Error if <code>n</code> is negative.</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L585-L596">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.nonunique" href="#DataFrames.nonunique"><code>DataFrames.nonunique</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">nonunique(df::AbstractDataFrame; keep::Symbol=:first)
+   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L945-L976">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.first" href="#Base.first"><code>Base.first</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">first(df::AbstractDataFrame)</code></pre><p>Get the first row of <code>df</code> as a <code>DataFrameRow</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L549-L555">source</a></section><section><div><pre><code class="language-julia hljs">first(df::AbstractDataFrame, n::Integer; view::Bool=false)</code></pre><p>Get a data frame with the <code>n</code> first rows of <code>df</code>. Get all rows if <code>n</code> is greater than the number of rows in <code>df</code>. Error if <code>n</code> is negative.</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L558-L569">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.last" href="#Base.last"><code>Base.last</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">last(df::AbstractDataFrame)</code></pre><p>Get the last row of <code>df</code> as a <code>DataFrameRow</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L576-L582">source</a></section><section><div><pre><code class="language-julia hljs">last(df::AbstractDataFrame, n::Integer; view::Bool=false)</code></pre><p>Get a data frame with the <code>n</code> last rows of <code>df</code>. Get all rows if <code>n</code> is greater than the number of rows in <code>df</code>. Error if <code>n</code> is negative.</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L585-L596">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.nonunique" href="#DataFrames.nonunique"><code>DataFrames.nonunique</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">nonunique(df::AbstractDataFrame; keep::Symbol=:first)
 nonunique(df::AbstractDataFrame, cols; keep::Symbol=:first)</code></pre><p>Return a <code>Vector{Bool}</code> in which <code>true</code> entries indicate duplicate rows.</p><p>Duplicate rows are those for which at least another row contains equal values (according to <code>isequal</code>) for all columns in <code>cols</code> (by default, all columns). If <code>keep=:first</code> (the default), only the first occurrence of a set of duplicate rows is indicated with a <code>false</code> entry. If <code>keep=:last</code>, only the last occurrence of a set of duplicate rows is indicated with a <code>false</code> entry. If <code>keep=:noduplicates</code>, only rows without any duplicates are indicated with a <code>false</code> entry.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : <code>AbstractDataFrame</code></li><li><code>cols</code> : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by <a href="#DataFrames.select"><code>select</code></a> that returns at least one column if <code>df</code> has at least one column.</li></ul><p>See also <a href="#Base.unique"><code>unique</code></a> and <a href="#Base.unique!"><code>unique!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:4, x=[1, 2, 1, 2])
 4×2 DataFrame
  Row │ i      x
@@ -3247,7 +3247,7 @@
  1
  1
  1
- 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/unique.jl#L1-L85">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.Iterators.only" href="#Base.Iterators.only"><code>Base.Iterators.only</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">only(df::AbstractDataFrame)</code></pre><p>If <code>df</code> has a single row return it as a <code>DataFrameRow</code>; otherwise throw <code>ArgumentError</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L536-L542">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.pop!" href="#Base.pop!"><code>Base.pop!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">pop!(df::DataFrame)</code></pre><p>Remove the last row from <code>df</code> and return a <code>NamedTuple</code> created from this row.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using this method for very wide data frames may lead to expensive compilation.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+ 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/unique.jl#L1-L85">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.Iterators.only" href="#Base.Iterators.only"><code>Base.Iterators.only</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">only(df::AbstractDataFrame)</code></pre><p>If <code>df</code> has a single row return it as a <code>DataFrameRow</code>; otherwise throw <code>ArgumentError</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L536-L542">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.pop!" href="#Base.pop!"><code>Base.pop!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">pop!(df::DataFrame)</code></pre><p>Remove the last row from <code>df</code> and return a <code>NamedTuple</code> created from this row.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using this method for very wide data frames may lead to expensive compilation.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3265,7 +3265,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     1      4
-   2 │     2      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1083-L1117">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.popat!" href="#Base.popat!"><code>Base.popat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">popat!(df::DataFrame, i::Integer)</code></pre><p>Remove the <code>i</code>-th row from <code>df</code> and return a <code>NamedTuple</code> created from this row.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using this method for very wide data frames may lead to expensive compilation.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+   2 │     2      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1083-L1117">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.popat!" href="#Base.popat!"><code>Base.popat!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">popat!(df::DataFrame, i::Integer)</code></pre><p>Remove the <code>i</code>-th row from <code>df</code> and return a <code>NamedTuple</code> created from this row.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using this method for very wide data frames may lead to expensive compilation.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3283,7 +3283,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     1      4
-   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1156-L1189">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.popfirst!" href="#Base.popfirst!"><code>Base.popfirst!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">popfirst!(df::DataFrame)</code></pre><p>Remove the first row from <code>df</code> and return a <code>NamedTuple</code> created from this row.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using this method for very wide data frames may lead to expensive compilation.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1156-L1189">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.popfirst!" href="#Base.popfirst!"><code>Base.popfirst!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">popfirst!(df::DataFrame)</code></pre><p>Remove the first row from <code>df</code> and return a <code>NamedTuple</code> created from this row.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using this method for very wide data frames may lead to expensive compilation.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3301,7 +3301,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     2      5
-   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1120-L1153">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.resize!" href="#Base.resize!"><code>Base.resize!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">resize!(df::DataFrame, n::Integer)</code></pre><p>Resize <code>df</code> to have <code>n</code> rows by calling <code>resize!</code> on all columns of <code>df</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
+   2 │     3      6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1120-L1153">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.resize!" href="#Base.resize!"><code>Base.resize!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">resize!(df::DataFrame, n::Integer)</code></pre><p>Resize <code>df</code> to have <code>n</code> rows by calling <code>resize!</code> on all columns of <code>df</code>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1:3, b=4:6)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -3316,7 +3316,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     1      4
-   2 │     2      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1046-L1072">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.subset" href="#DataFrames.subset"><code>DataFrames.subset</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subset(df::AbstractDataFrame, args...;
+   2 │     2      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1046-L1072">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.subset" href="#DataFrames.subset"><code>DataFrames.subset</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subset(df::AbstractDataFrame, args...;
        skipmissing::Bool=false, view::Bool=false, threads::Bool=true)
 subset(gdf::GroupedDataFrame, args...;
        skipmissing::Bool=false, view::Bool=false,
@@ -3379,7 +3379,7 @@
      │ Int64  Bool   Bool   Bool?    Int64
 ─────┼─────────────────────────────────────
    1 │     3   true  false  missing     11
-   2 │     4  false  false  missing     12</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/subset.jl#L159-L277">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.subset!" href="#DataFrames.subset!"><code>DataFrames.subset!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subset!(df::AbstractDataFrame, args...;
+   2 │     4  false  false  missing     12</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/subset.jl#L159-L277">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.subset!" href="#DataFrames.subset!"><code>DataFrames.subset!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subset!(df::AbstractDataFrame, args...;
         skipmissing::Bool=false, threads::Bool=true)
 subset!(gdf::GroupedDataFrame{DataFrame}, args...;
         skipmissing::Bool=false, ungroup::Bool=true, threads::Bool=true)</code></pre><p>Update data frame <code>df</code> or the parent of <code>gdf</code> in place to contain only rows for which all values produced by transformation(s) <code>args</code> for a given row is <code>true</code>. All transformations must produce vectors containing <code>true</code> or <code>false</code>. When the first argument is a <code>GroupedDataFrame</code>, transformations are also allowed to return a single <code>true</code> or <code>false</code> value, which results in including or excluding a whole group.</p><p>If <code>skipmissing=false</code> (the default) <code>args</code> are required to produce results containing only <code>Bool</code> values. If <code>skipmissing=true</code>, additionally <code>missing</code> is allowed and it is treated as <code>false</code> (i.e. rows for which one of the conditions returns <code>missing</code> are skipped).</p><p>Each argument passed in <code>args</code> can be any specifier following the rules described for <a href="#DataFrames.select"><code>select</code></a> with the restriction that:</p><ul><li>specifying target column name is not allowed as <code>subset!</code> does not create new columns;</li><li>every passed transformation must return a scalar or a vector (returning <code>AbstractDataFrame</code>, <code>NamedTuple</code>, <code>DataFrameRow</code> or <code>AbstractMatrix</code> is not supported).</li></ul><p>If <code>ungroup=false</code> the passed <code>GroupedDataFrame</code> <code>gdf</code> is updated (preserving the order of its groups) and returned.</p><p>If <code>threads=true</code> (the default) transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to <code>false</code> if some transformations require serial execution or are not thread-safe.</p><p>If <code>GroupedDataFrame</code> is subsetted then it must include all groups present in the <code>parent</code> data frame, like in <a href="#DataFrames.select!"><code>select!</code></a>. In this case the passed <code>GroupedDataFrame</code> is updated to have correct groups after its parent is updated.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Note that as the <code>subset!</code> function works in exactly the same way as other transformation functions defined in DataFrames.jl this is the preferred way to subset rows of a data frame or grouped data frame. In particular it uses a different set of rules for specifying transformations than <a href="#Base.filter!"><code>filter!</code></a> which is implemented in DataFrames.jl to ensure support for the standard Julia API for collections.</p></div></div><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#DataFrames.subset"><code>subset</code></a>, <a href="#Base.filter!"><code>filter!</code></a>, <a href="#DataFrames.select!"><code>select!</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false])
@@ -3460,7 +3460,7 @@
      │ Int64  Bool   Bool   Bool?    Int64
 ─────┼─────────────────────────────────────
    1 │     3   true  false  missing     11
-   2 │     4  false  false  missing     12</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/subset.jl#L336-L471">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.unique" href="#Base.unique"><code>Base.unique</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unique(df::AbstractDataFrame; view::Bool=false, keep::Symbol=:first)
+   2 │     4  false  false  missing     12</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/subset.jl#L336-L471">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.unique" href="#Base.unique"><code>Base.unique</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unique(df::AbstractDataFrame; view::Bool=false, keep::Symbol=:first)
 unique(df::AbstractDataFrame, cols; view::Bool=false, keep::Symbol=:first)</code></pre><p>Return a data frame containing only unique rows in <code>df</code>.</p><p>Non-unique (duplicate) rows are those for which at least another row contains equal values (according to <code>isequal</code>) for all columns in <code>cols</code> (by default, all columns). If <code>keep=:first</code> (the default), only the first occurrence of a set of duplicate rows is kept. If <code>keep=:last</code>, only the last occurrence of a set of duplicate rows is kept. If <code>keep=:noduplicates</code>, only rows without any duplicates are kept.</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned, and if <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the AbstractDataFrame</li><li><code>cols</code> : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by <a href="#DataFrames.select"><code>select</code></a> that returns at least one column if <code>df</code> has at least one column.</li></ul><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#Base.unique!"><code>unique!</code></a>, <a href="#DataFrames.nonunique"><code>nonunique</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:4, x=[1, 2, 1, 2])
 4×2 DataFrame
  Row │ i      x
@@ -3507,7 +3507,7 @@
 0×2 DataFrame
  Row │ i      x
      │ Int64  Int64
-─────┴──────────────</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/unique.jl#L215-L294">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.unique!" href="#Base.unique!"><code>Base.unique!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unique!(df::AbstractDataFrame; keep::Symbol=:first)
+─────┴──────────────</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/unique.jl#L215-L294">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.unique!" href="#Base.unique!"><code>Base.unique!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unique!(df::AbstractDataFrame; keep::Symbol=:first)
 unique!(df::AbstractDataFrame, cols; keep::Symbol=:first)</code></pre><p>Update <code>df</code> in-place to contain only unique rows.</p><p>Non-unique (duplicate) rows are those for which at least another row contains equal values (according to <code>isequal</code>) for all columns in <code>cols</code> (by default, all columns). If <code>keep=:first</code> (the default), only the first occurrence of a set of duplicate rows is kept. If <code>keep=:last</code>, only the last occurrence of a set of duplicate rows is kept. If <code>keep=:noduplicates</code>, only rows without any duplicates are kept.</p><p><strong>Arguments</strong></p><ul><li><code>df</code> : the AbstractDataFrame</li><li><code>cols</code> :  column indicator (<code>Symbol</code>, <code>Int</code>, <code>Vector{Symbol}</code>, <code>Regex</code>, etc.) specifying the column(s) to compare. Can be any column selector or transformation accepted by <a href="#DataFrames.select"><code>select</code></a> that returns at least one column if <code>df</code> has at least one column.</li></ul><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#Base.unique!"><code>unique!</code></a>, <a href="#DataFrames.nonunique"><code>nonunique</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:4, x=[1, 2, 1, 2])
 4×2 DataFrame
  Row │ i      x
@@ -3546,7 +3546,7 @@
 0×2 DataFrame
  Row │ i      x
      │ Int64  Int64
-─────┴──────────────</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/unique.jl#L307-L375">source</a></section></article><h2 id="Working-with-missing-values"><a class="docs-heading-anchor" href="#Working-with-missing-values">Working with missing values</a><a id="Working-with-missing-values-1"></a><a class="docs-heading-anchor-permalink" href="#Working-with-missing-values" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Missings.allowmissing" href="#Missings.allowmissing"><code>Missings.allowmissing</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">allowmissing(df::AbstractDataFrame, cols=:)</code></pre><p>Return a copy of data frame <code>df</code> with columns <code>cols</code> converted to element type <code>Union{T, Missing}</code> from <code>T</code> to allow support for missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=[1, 2])
+─────┴──────────────</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/unique.jl#L307-L375">source</a></section></article><h2 id="Working-with-missing-values"><a class="docs-heading-anchor" href="#Working-with-missing-values">Working with missing values</a><a id="Working-with-missing-values-1"></a><a class="docs-heading-anchor-permalink" href="#Working-with-missing-values" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Missings.allowmissing" href="#Missings.allowmissing"><code>Missings.allowmissing</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">allowmissing(df::AbstractDataFrame, cols=:)</code></pre><p>Return a copy of data frame <code>df</code> with columns <code>cols</code> converted to element type <code>Union{T, Missing}</code> from <code>T</code> to allow support for missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=[1, 2])
 2×1 DataFrame
  Row │ a
      │ Int64
@@ -3560,7 +3560,7 @@
      │ Int64?
 ─────┼────────
    1 │      1
-   2 │      2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2003-L2034">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.allowmissing!" href="#DataFrames.allowmissing!"><code>DataFrames.allowmissing!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">allowmissing!(df::DataFrame, cols=:)</code></pre><p>Convert columns <code>cols</code> of data frame <code>df</code> from element type <code>T</code> to <code>Union{T, Missing}</code> to support missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1251-L1262">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.completecases" href="#DataFrames.completecases"><code>DataFrames.completecases</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">completecases(df::AbstractDataFrame, cols=:)</code></pre><p>Return a Boolean vector with <code>true</code> entries indicating rows without missing values (complete cases) in data frame <code>df</code>.</p><p>If <code>cols</code> is provided, only missing values in the corresponding columns are considered. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers) that returns at least one column if <code>df</code> has at least one column.</p><p>See also: <a href="#DataFrames.dropmissing"><code>dropmissing</code></a> and <a href="#DataFrames.dropmissing!"><code>dropmissing!</code></a>. Use <code>findall(completecases(df))</code> to get the indices of the rows.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:5,
+   2 │      2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2003-L2034">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.allowmissing!" href="#DataFrames.allowmissing!"><code>DataFrames.allowmissing!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">allowmissing!(df::DataFrame, cols=:)</code></pre><p>Convert columns <code>cols</code> of data frame <code>df</code> from element type <code>T</code> to <code>Union{T, Missing}</code> to support missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1251-L1262">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.completecases" href="#DataFrames.completecases"><code>DataFrames.completecases</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">completecases(df::AbstractDataFrame, cols=:)</code></pre><p>Return a Boolean vector with <code>true</code> entries indicating rows without missing values (complete cases) in data frame <code>df</code>.</p><p>If <code>cols</code> is provided, only missing values in the corresponding columns are considered. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers) that returns at least one column if <code>df</code> has at least one column.</p><p>See also: <a href="#DataFrames.dropmissing"><code>dropmissing</code></a> and <a href="#DataFrames.dropmissing!"><code>dropmissing!</code></a>. Use <code>findall(completecases(df))</code> to get the indices of the rows.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:5,
                       x=[missing, 4, missing, 2, 1],
                       y=[missing, missing, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;])
 5×3 DataFrame
@@ -3595,7 +3595,7 @@
  0
  0
  1
- 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L851-L904">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Missings.disallowmissing" href="#Missings.disallowmissing"><code>Missings.disallowmissing</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">disallowmissing(df::AbstractDataFrame, cols=:; error::Bool=true)</code></pre><p>Return a copy of data frame <code>df</code> with columns <code>cols</code> converted from element type <code>Union{T, Missing}</code> to <code>T</code> to drop support for missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>If <code>error=false</code> then columns containing a <code>missing</code> value will be skipped instead of throwing an error.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=Union{Int, Missing}[1, 2])
+ 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L851-L904">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Missings.disallowmissing" href="#Missings.disallowmissing"><code>Missings.disallowmissing</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">disallowmissing(df::AbstractDataFrame, cols=:; error::Bool=true)</code></pre><p>Return a copy of data frame <code>df</code> with columns <code>cols</code> converted from element type <code>Union{T, Missing}</code> to <code>T</code> to drop support for missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>If <code>error=false</code> then columns containing a <code>missing</code> value will be skipped instead of throwing an error.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=Union{Int, Missing}[1, 2])
 2×1 DataFrame
  Row │ a
      │ Int64?
@@ -3625,7 +3625,7 @@
      │ Int64?
 ─────┼─────────
    1 │       1
-   2 │ missing</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1916-L1966">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.disallowmissing!" href="#DataFrames.disallowmissing!"><code>DataFrames.disallowmissing!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">disallowmissing!(df::DataFrame, cols=:; error::Bool=true)</code></pre><p>Convert columns <code>cols</code> of data frame <code>df</code> from element type <code>Union{T, Missing}</code> to <code>T</code> to drop support for missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>If <code>error=false</code> then columns containing a <code>missing</code> value will be skipped instead of throwing an error.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1293-L1307">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.dropmissing" href="#DataFrames.dropmissing"><code>DataFrames.dropmissing</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">dropmissing(df::AbstractDataFrame, cols=:; view::Bool=false, disallowmissing::Bool=!view)</code></pre><p>Return a data frame excluding rows with missing values in <code>df</code>.</p><p>If <code>cols</code> is provided, only missing values in the corresponding columns are considered. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned. In this case <code>disallowmissing</code> must be <code>false</code>.</p><p>If <code>disallowmissing</code> is <code>true</code> (the default when <code>view</code> is <code>false</code>) then columns specified in <code>cols</code> will be converted so as not to allow for missing values using <a href="#DataFrames.disallowmissing!"><code>disallowmissing!</code></a>.</p><p>See also: <a href="#DataFrames.completecases"><code>completecases</code></a> and <a href="#DataFrames.dropmissing!"><code>dropmissing!</code></a>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:5,
+   2 │ missing</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1916-L1966">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.disallowmissing!" href="#DataFrames.disallowmissing!"><code>DataFrames.disallowmissing!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">disallowmissing!(df::DataFrame, cols=:; error::Bool=true)</code></pre><p>Convert columns <code>cols</code> of data frame <code>df</code> from element type <code>Union{T, Missing}</code> to <code>T</code> to drop support for missing values.</p><p><code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>cols</code> is omitted all columns in the data frame are converted.</p><p>If <code>error=false</code> then columns containing a <code>missing</code> value will be skipped instead of throwing an error.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1293-L1307">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.dropmissing" href="#DataFrames.dropmissing"><code>DataFrames.dropmissing</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">dropmissing(df::AbstractDataFrame, cols=:; view::Bool=false, disallowmissing::Bool=!view)</code></pre><p>Return a data frame excluding rows with missing values in <code>df</code>.</p><p>If <code>cols</code> is provided, only missing values in the corresponding columns are considered. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>view=false</code> a freshly allocated <code>DataFrame</code> is returned. If <code>view=true</code> then a <code>SubDataFrame</code> view into <code>df</code> is returned. In this case <code>disallowmissing</code> must be <code>false</code>.</p><p>If <code>disallowmissing</code> is <code>true</code> (the default when <code>view</code> is <code>false</code>) then columns specified in <code>cols</code> will be converted so as not to allow for missing values using <a href="#DataFrames.disallowmissing!"><code>disallowmissing!</code></a>.</p><p>See also: <a href="#DataFrames.completecases"><code>completecases</code></a> and <a href="#DataFrames.dropmissing!"><code>dropmissing!</code></a>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:5,
                       x=[missing, 4, missing, 2, 1],
                       y=[missing, missing, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;])
 5×3 DataFrame
@@ -3669,7 +3669,7 @@
      │ Int64  Int64  String
 ─────┼──────────────────────
    1 │     4      2  d
-   2 │     5      1  e</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L939-L1008">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.dropmissing!" href="#DataFrames.dropmissing!"><code>DataFrames.dropmissing!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">dropmissing!(df::AbstractDataFrame, cols=:; disallowmissing::Bool=true)</code></pre><p>Remove rows with missing values from data frame <code>df</code> and return it.</p><p>If <code>cols</code> is provided, only missing values in the corresponding columns are considered. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>disallowmissing</code> is <code>true</code> (the default) then the <code>cols</code> columns will get converted using <a href="#DataFrames.disallowmissing!"><code>disallowmissing!</code></a>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#DataFrames.dropmissing"><code>dropmissing</code></a> and <a href="#DataFrames.completecases"><code>completecases</code></a>.</p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:5,
+   2 │     5      1  e</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L939-L1008">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.dropmissing!" href="#DataFrames.dropmissing!"><code>DataFrames.dropmissing!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">dropmissing!(df::AbstractDataFrame, cols=:; disallowmissing::Bool=true)</code></pre><p>Remove rows with missing values from data frame <code>df</code> and return it.</p><p>If <code>cols</code> is provided, only missing values in the corresponding columns are considered. <code>cols</code> can be any column selector (<code>Symbol</code>, string or integer; <code>:</code>, <code>Cols</code>, <code>All</code>, <code>Between</code>, <code>Not</code>, a regular expression, or a vector of <code>Symbol</code>s, strings or integers).</p><p>If <code>disallowmissing</code> is <code>true</code> (the default) then the <code>cols</code> columns will get converted using <a href="#DataFrames.disallowmissing!"><code>disallowmissing!</code></a>.</p><p>Metadata: this function preserves table-level and column-level <code>:note</code>-style metadata.</p><p>See also: <a href="#DataFrames.dropmissing"><code>dropmissing</code></a> and <a href="#DataFrames.completecases"><code>completecases</code></a>.</p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(i=1:5,
                       x=[missing, 4, missing, 2, 1],
                       y=[missing, missing, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;])
 5×3 DataFrame
@@ -3713,7 +3713,7 @@
      │ Int64  Int64  String
 ─────┼──────────────────────
    1 │     4      2  d
-   2 │     5      1  e</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1052-L1114">source</a></section></article><h2 id="Iteration"><a class="docs-heading-anchor" href="#Iteration">Iteration</a><a id="Iteration-1"></a><a class="docs-heading-anchor-permalink" href="#Iteration" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.eachcol" href="#Base.eachcol"><code>Base.eachcol</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">eachcol(df::AbstractDataFrame)</code></pre><p>Return a <code>DataFrameColumns</code> object that is a vector-like that allows iterating an <code>AbstractDataFrame</code> column by column.</p><p>Indexing into <code>DataFrameColumns</code> objects using integer, <code>Symbol</code> or string returns the corresponding column (without copying). Indexing into <code>DataFrameColumns</code> objects using a multiple column selector returns a subsetted <code>DataFrameColumns</code> object with a new parent containing only the selected columns (without copying).</p><p><code>DataFrameColumns</code> supports most of the <code>AbstractVector</code> API. The key differences are that it is read-only and that the <code>keys</code> function returns a vector of <code>Symbol</code>s (and not integers as for normal vectors).</p><p>In particular <code>findnext</code>, <code>findprev</code>, <code>findfirst</code>, <code>findlast</code>, and <code>findall</code> functions are supported, and in <code>findnext</code> and <code>findprev</code> functions it is allowed to pass an integer, string, or <code>Symbol</code> as a reference index.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
+   2 │     5      1  e</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1052-L1114">source</a></section></article><h2 id="Iteration"><a class="docs-heading-anchor" href="#Iteration">Iteration</a><a id="Iteration-1"></a><a class="docs-heading-anchor-permalink" href="#Iteration" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.eachcol" href="#Base.eachcol"><code>Base.eachcol</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">eachcol(df::AbstractDataFrame)</code></pre><p>Return a <code>DataFrameColumns</code> object that is a vector-like that allows iterating an <code>AbstractDataFrame</code> column by column.</p><p>Indexing into <code>DataFrameColumns</code> objects using integer, <code>Symbol</code> or string returns the corresponding column (without copying). Indexing into <code>DataFrameColumns</code> objects using a multiple column selector returns a subsetted <code>DataFrameColumns</code> object with a new parent containing only the selected columns (without copying).</p><p><code>DataFrameColumns</code> supports most of the <code>AbstractVector</code> API. The key differences are that it is read-only and that the <code>keys</code> function returns a vector of <code>Symbol</code>s (and not integers as for normal vectors).</p><p>In particular <code>findnext</code>, <code>findprev</code>, <code>findfirst</code>, <code>findlast</code>, and <code>findall</code> functions are supported, and in <code>findnext</code> and <code>findprev</code> functions it is allowed to pass an integer, string, or <code>Symbol</code> as a reference index.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
 4×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -3748,7 +3748,7 @@
 julia&gt; sum.(eachcol(df))
 2-element Vector{Int64}:
  10
- 50</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L191-L238">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.eachrow" href="#Base.eachrow"><code>Base.eachrow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">eachrow(df::AbstractDataFrame)</code></pre><p>Return a <code>DataFrameRows</code> that iterates a data frame row by row, with each row represented as a <code>DataFrameRow</code>.</p><p>Because <code>DataFrameRow</code>s have an <code>eltype</code> of <code>Any</code>, use <code>copy(dfr::DataFrameRow)</code> to obtain a named tuple, which supports iteration and property access like a <code>DataFrameRow</code>, but also passes information on the <code>eltypes</code> of the columns of <code>df</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
+ 50</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L191-L238">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.eachrow" href="#Base.eachrow"><code>Base.eachrow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">eachrow(df::AbstractDataFrame)</code></pre><p>Return a <code>DataFrameRows</code> that iterates a data frame row by row, with each row represented as a <code>DataFrameRow</code>.</p><p>Because <code>DataFrameRow</code>s have an <code>eltype</code> of <code>Any</code>, use <code>copy(dfr::DataFrameRow)</code> to obtain a named tuple, which supports iteration and property access like a <code>DataFrameRow</code>, but also passes information on the <code>eltypes</code> of the columns of <code>df</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(x=1:4, y=11:14)
 4×2 DataFrame
  Row │ x      y
      │ Int64  Int64
@@ -3781,7 +3781,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │    14      4
-   2 │    13      3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L27-L74">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.values" href="#Base.values"><code>Base.values</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">values(dfc::DataFrameColumns)</code></pre><p>Get a vector of columns from <code>dfc</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L297-L301">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.pairs" href="#Base.pairs"><code>Base.pairs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">pairs(dfc::DataFrameColumns)</code></pre><p>Return an iterator of pairs associating the name of each column of <code>dfc</code> with the corresponding column vector, i.e. <code>name =&gt; col</code> where <code>name</code> is the column name of the column <code>col</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L304-L310">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.Iterators.partition" href="#Base.Iterators.partition"><code>Base.Iterators.partition</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Iterators.partition(df::AbstractDataFrame, n::Integer)</code></pre><p>Iterate over <code>df</code> data frame <code>n</code> rows at a time, returning each block as a <code>SubDataFrame</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; collect(Iterators.partition(DataFrame(x=1:5), 2))
+   2 │    13      3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L27-L74">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.values" href="#Base.values"><code>Base.values</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">values(dfc::DataFrameColumns)</code></pre><p>Get a vector of columns from <code>dfc</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L297-L301">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.pairs" href="#Base.pairs"><code>Base.pairs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">pairs(dfc::DataFrameColumns)</code></pre><p>Return an iterator of pairs associating the name of each column of <code>dfc</code> with the corresponding column vector, i.e. <code>name =&gt; col</code> where <code>name</code> is the column name of the column <code>col</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L304-L310">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.Iterators.partition" href="#Base.Iterators.partition"><code>Base.Iterators.partition</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Iterators.partition(df::AbstractDataFrame, n::Integer)</code></pre><p>Iterate over <code>df</code> data frame <code>n</code> rows at a time, returning each block as a <code>SubDataFrame</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; collect(Iterators.partition(DataFrame(x=1:5), 2))
 3-element Vector{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}:
  2×1 SubDataFrame
  Row │ x
@@ -3799,7 +3799,7 @@
  Row │ x
      │ Int64
 ─────┼───────
-   1 │     5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L2936-L2965">source</a></section><section><div><pre><code class="language-julia hljs">Iterators.partition(dfr::DataFrameRows, n::Integer)</code></pre><p>Iterate over <code>DataFrameRows</code> <code>dfr</code> <code>n</code> rows at a time, returning each block as a <code>DataFrameRows</code> over a view of rows of parent of <code>dfr</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))
+   1 │     5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L2936-L2965">source</a></section><section><div><pre><code class="language-julia hljs">Iterators.partition(dfr::DataFrameRows, n::Integer)</code></pre><p>Iterate over <code>DataFrameRows</code> <code>dfr</code> <code>n</code> rows at a time, returning each block as a <code>DataFrameRows</code> over a view of rows of parent of <code>dfr</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))
 3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}:
  2×1 DataFrameRows
  Row │ x
@@ -3817,9 +3817,9 @@
  Row │ x
      │ Int64
 ─────┼───────
-   1 │     5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L99-L128">source</a></section></article><h2 id="Equality"><a class="docs-heading-anchor" href="#Equality">Equality</a><a id="Equality-1"></a><a class="docs-heading-anchor-permalink" href="#Equality" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.isapprox" href="#Base.isapprox"><code>Base.isapprox</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">isapprox(df1::AbstractDataFrame, df2::AbstractDataFrame;
+   1 │     5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L99-L128">source</a></section></article><h2 id="Equality"><a class="docs-heading-anchor" href="#Equality">Equality</a><a id="Equality-1"></a><a class="docs-heading-anchor-permalink" href="#Equality" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="Base.isapprox" href="#Base.isapprox"><code>Base.isapprox</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">isapprox(df1::AbstractDataFrame, df2::AbstractDataFrame;
          rtol::Real=atol&gt;0 ? 0 : √eps, atol::Real=0,
-         nans::Bool=false, norm::Function=norm)</code></pre><p>Inexact equality comparison. <code>df1</code> and <code>df2</code> must have the same size and column names. Return  <code>true</code> if <code>isapprox</code> with given keyword arguments applied to all pairs of columns stored in <code>df1</code> and <code>df2</code> returns <code>true</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L514-L522">source</a></section></article><h2 id="Metadata"><a class="docs-heading-anchor" href="#Metadata">Metadata</a><a id="Metadata-1"></a><a class="docs-heading-anchor-permalink" href="#Metadata" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.metadata" href="#DataAPI.metadata"><code>DataAPI.metadata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">metadata(df::AbstractDataFrame, key::AbstractString, [default]; style::Bool=false)
+         nans::Bool=false, norm::Function=norm)</code></pre><p>Inexact equality comparison. <code>df1</code> and <code>df2</code> must have the same size and column names. Return  <code>true</code> if <code>isapprox</code> with given keyword arguments applied to all pairs of columns stored in <code>df1</code> and <code>df2</code> returns <code>true</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L514-L522">source</a></section></article><h2 id="Metadata"><a class="docs-heading-anchor" href="#Metadata">Metadata</a><a id="Metadata-1"></a><a class="docs-heading-anchor-permalink" href="#Metadata" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.metadata" href="#DataAPI.metadata"><code>DataAPI.metadata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">metadata(df::AbstractDataFrame, key::AbstractString, [default]; style::Bool=false)
 metadata(dfr::DataFrameRow, key::AbstractString, [default]; style::Bool=false)
 metadata(dfc::DataFrameColumns, key::AbstractString, [default]; style::Bool=false)
 metadata(dfr::DataFrameRows, key::AbstractString, [default]; style::Bool=false)</code></pre><p>Return table-level metadata value associated with <code>df</code> for key <code>key</code>. If <code>style=true</code> return a tuple of metadata value and metadata style.</p><p><code>SubDataFrame</code> and <code>DataFrameRow</code> expose only <code>:note</code>-style metadata of their parent.</p><p>If <code>default</code> is passed then return it if <code>key</code> does not exist; if <code>style=true</code> return <code>(default, :default)</code>.</p><p>See also: <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3842,7 +3842,7 @@
 julia&gt; deletemetadata!(df, &quot;name&quot;);
 
 julia&gt; metadatakeys(df)
-()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L79-L101">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.metadatakeys" href="#DataAPI.metadatakeys"><code>DataAPI.metadatakeys</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">metadatakeys(df::AbstractDataFrame)
+()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L79-L101">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.metadatakeys" href="#DataAPI.metadatakeys"><code>DataAPI.metadatakeys</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">metadatakeys(df::AbstractDataFrame)
 metadatakeys(dfr::DataFrameRow)
 metadatakeys(dfc::DataFrameColumns)
 metadatakeys(dfr::DataFrameRows)</code></pre><p>Return an iterator of table-level metadata keys which are set in the object.</p><p>Values can be accessed using <a href="#DataAPI.metadata"><code>metadata(df, key)</code></a>.</p><p><code>SubDataFrame</code> and <code>DataFrameRow</code> expose only <code>:note</code>-style metadata keys of their parent.</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3865,7 +3865,7 @@
 julia&gt; deletemetadata!(df, &quot;name&quot;);
 
 julia&gt; metadatakeys(df)
-()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L131-L150">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.metadata!" href="#DataAPI.metadata!"><code>DataAPI.metadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">metadata!(df::AbstractDataFrame, key::AbstractString, value; style::Symbol=:default)
+()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L131-L150">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.metadata!" href="#DataAPI.metadata!"><code>DataAPI.metadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">metadata!(df::AbstractDataFrame, key::AbstractString, value; style::Symbol=:default)
 metadata!(dfr::DataFrameRow, key::AbstractString, value; style::Symbol=:default)
 metadata!(dfc::DataFrameColumns, key::AbstractString, value; style::Symbol=:default)
 metadata!(dfr::DataFrameRows, key::AbstractString, value; style::Symbol=:default)</code></pre><p>Set table-level metadata for object <code>df</code> for key <code>key</code> to have value <code>value</code> and style <code>style</code> (<code>:default</code> by default) and return <code>df</code>.</p><p>For <code>SubDataFrame</code> and <code>DataFrameRow</code> only <code>:note</code>-style is allowed. Trying to set a key-value pair for which the key already exists in the parent data frame with another style throws an error.</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3888,7 +3888,7 @@
 julia&gt; deletemetadata!(df, &quot;name&quot;);
 
 julia&gt; metadatakeys(df)
-()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L170-L190">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.deletemetadata!" href="#DataAPI.deletemetadata!"><code>DataAPI.deletemetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">deletemetadata!(df::AbstractDataFrame, key::AbstractString)
+()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L170-L190">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.deletemetadata!" href="#DataAPI.deletemetadata!"><code>DataAPI.deletemetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">deletemetadata!(df::AbstractDataFrame, key::AbstractString)
 deletemetadata!(dfr::DataFrameRow, key::AbstractString)
 deletemetadata!(dfc::DataFrameColumns, key::AbstractString)
 deletemetadata!(dfr::DataFrameRows, key::AbstractString)</code></pre><p>Delete table-level metadata from object <code>df</code> for key <code>key</code> and return <code>df</code>. If key does not exist, return <code>df</code> without modification.</p><p>For <code>SubDataFrame</code> and <code>DataFrameRow</code> only <code>:note</code>-style metadata from their parent can be deleted (as other styles are not propagated to views).</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3911,7 +3911,7 @@
 julia&gt; deletemetadata!(df, &quot;name&quot;);
 
 julia&gt; metadatakeys(df)
-()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L230-L249">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.emptymetadata!" href="#DataAPI.emptymetadata!"><code>DataAPI.emptymetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">emptymetadata!(df::AbstractDataFrame)
+()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L230-L249">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.emptymetadata!" href="#DataAPI.emptymetadata!"><code>DataAPI.emptymetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">emptymetadata!(df::AbstractDataFrame)
 emptymetadata!(dfr::DataFrameRow)
 emptymetadata!(dfc::DataFrameColumns)
 emptymetadata!(dfr::DataFrameRows)</code></pre><p>Delete all table-level metadata from object <code>df</code>.</p><p>For <code>SubDataFrame</code> and <code>DataFrameRow</code> only <code>:note</code>-style metadata from their parent can be deleted (as other styles are not propagated to views).</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3934,7 +3934,7 @@
 julia&gt; emptymetadata!(df);
 
 julia&gt; metadatakeys(df)
-()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L277-L318">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.colmetadata" href="#DataAPI.colmetadata"><code>DataAPI.colmetadata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">colmetadata(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)
+()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L277-L318">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.colmetadata" href="#DataAPI.colmetadata"><code>DataAPI.colmetadata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">colmetadata(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)
 colmetadata(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)
 colmetadata(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)
 colmetadata(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)</code></pre><p>Return column-level metadata value associated with <code>df</code> for column <code>col</code> and key <code>key</code>.</p><p><code>SubDataFrame</code> and <code>DataFrameRow</code> expose only <code>:note</code>-style metadata of their parent.</p><p>If <code>default</code> is passed then return it if <code>key</code> does not exist for column <code>col</code>; if <code>style=true</code> return <code>(default, :default)</code>. If <code>col</code> does not exist in <code>df</code> always throw an error.</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3961,7 +3961,7 @@
 julia&gt; deletecolmetadata!(df, :a, &quot;name&quot;);
 
 julia&gt; colmetadatakeys(df)
-()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L338-L359">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.colmetadatakeys" href="#DataAPI.colmetadatakeys"><code>DataAPI.colmetadatakeys</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">colmetadatakeys(df::AbstractDataFrame, [col::ColumnIndex])
+()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L338-L359">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.colmetadatakeys" href="#DataAPI.colmetadatakeys"><code>DataAPI.colmetadatakeys</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">colmetadatakeys(df::AbstractDataFrame, [col::ColumnIndex])
 colmetadatakeys(dfr::DataFrameRow, [col::ColumnIndex])
 colmetadatakeys(dfc::DataFrameColumns, [col::ColumnIndex])
 colmetadatakeys(dfr::DataFrameRows, [col::ColumnIndex])</code></pre><p>If <code>col</code> is passed return an iterator of column-level metadata keys which are set for column <code>col</code>. If <code>col</code> is not passed return an iterator of <code>col =&gt; colmetadatakeys(x, col)</code> pairs for all columns that have metadata, where <code>col</code> are <code>Symbol</code>.</p><p>Values can be accessed using <a href="#DataAPI.colmetadata"><code>colmetadata(df, col, key)</code></a>.</p><p><code>SubDataFrame</code> and <code>DataFrameRow</code> expose only <code>:note</code>-style metadata of their parent.</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -3988,7 +3988,7 @@
 julia&gt; deletecolmetadata!(df, :a, &quot;name&quot;);
 
 julia&gt; colmetadatakeys(df)
-()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L403-L425">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.colmetadata!" href="#DataAPI.colmetadata!"><code>DataAPI.colmetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">colmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)
+()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L403-L425">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.colmetadata!" href="#DataAPI.colmetadata!"><code>DataAPI.colmetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">colmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)
 colmetadata!(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)
 colmetadata!(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)
 colmetadata!(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)</code></pre><p>Set column-level metadata in <code>df</code> for column <code>col</code> and key <code>key</code> to have value <code>value</code> and style <code>style</code> (<code>:default</code> by default) and return <code>df</code>.</p><p>For <code>SubDataFrame</code> and <code>DataFrameRow</code> only <code>:note</code> style is allowed. Trying to set a key-value pair for which the key already exists in the parent data frame with another style throws an error.</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -4015,7 +4015,7 @@
 julia&gt; deletecolmetadata!(df, :a, &quot;name&quot;);
 
 julia&gt; colmetadatakeys(df)
-()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L467-L487">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.deletecolmetadata!" href="#DataAPI.deletecolmetadata!"><code>DataAPI.deletecolmetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">deletecolmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString)
+()</code></pre><p>```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L467-L487">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.deletecolmetadata!" href="#DataAPI.deletecolmetadata!"><code>DataAPI.deletecolmetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">deletecolmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString)
 deletecolmetadata!(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString)
 deletecolmetadata!(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString)
 deletecolmetadata!(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString)</code></pre><p>Delete column-level metadata set in <code>df</code> for column <code>col</code> and key <code>key</code> and return <code>df</code>.</p><p>For <code>SubDataFrame</code> and <code>DataFrameRow</code> only <code>:note</code>-style metadata from their parent can be deleted (as other styles are not propagated to views).</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.emptycolmetadata!"><code>emptycolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -4042,7 +4042,7 @@
 julia&gt; deletecolmetadata!(df, :a, &quot;name&quot;);
 
 julia&gt; colmetadatakeys(df)
-()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L534-L551">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.emptycolmetadata!" href="#DataAPI.emptycolmetadata!"><code>DataAPI.emptycolmetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">emptycolmetadata!(df::AbstractDataFrame, [col::ColumnIndex])
+()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L534-L551">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataAPI.emptycolmetadata!" href="#DataAPI.emptycolmetadata!"><code>DataAPI.emptycolmetadata!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">emptycolmetadata!(df::AbstractDataFrame, [col::ColumnIndex])
 emptycolmetadata!(dfr::DataFrameRow, [col::ColumnIndex])
 emptycolmetadata!(dfc::DataFrameColumns, [col::ColumnIndex])
 emptycolmetadata!(dfr::DataFrameRows, [col::ColumnIndex])</code></pre><p>Delete column-level metadata set in <code>df</code> for column <code>col</code> and key <code>key</code> and return <code>df</code>.</p><p>For <code>SubDataFrame</code> and <code>DataFrameRow</code> only <code>:note</code>-style metadata from their parent can be deleted (as other styles are not propagated to views).</p><p>See also: <a href="#DataAPI.metadata"><code>metadata</code></a>, <a href="#DataAPI.metadatakeys"><code>metadatakeys</code></a>, <a href="#DataAPI.metadata!"><code>metadata!</code></a>, <a href="#DataAPI.deletemetadata!"><code>deletemetadata!</code></a>, <a href="#DataAPI.emptymetadata!"><code>emptymetadata!</code></a>, <a href="#DataAPI.colmetadata"><code>colmetadata</code></a>, <a href="#DataAPI.colmetadatakeys"><code>colmetadatakeys</code></a>, <a href="#DataAPI.colmetadata!"><code>colmetadata!</code></a>, <a href="#DataAPI.deletecolmetadata!"><code>deletecolmetadata!</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=1, b=2);
@@ -4066,4 +4066,4 @@
 julia&gt; emptycolmetadata!(df, :a);
 
 julia&gt; colmetadatakeys(df)
-()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/metadata.jl#L586-L628">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../types/">« Types</a><a class="docs-footer-nextpage" href="../indexing/">Indexing »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+()</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/metadata.jl#L586-L628">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../types/">« Types</a><a class="docs-footer-nextpage" href="../indexing/">Indexing »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/lib/indexing/index.html b/dev/lib/indexing/index.html
index 6fce6fbe2..cfdc2a265 100644
--- a/dev/lib/indexing/index.html
+++ b/dev/lib/indexing/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Indexing · DataFrames.jl</title><meta name="title" content="Indexing · DataFrames.jl"/><meta property="og:title" content="Indexing · DataFrames.jl"/><meta property="twitter:title" content="Indexing · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../types/">Types</a></li><li><a class="tocitem" href="../functions/">Functions</a></li><li class="is-active"><a class="tocitem" href>Indexing</a><ul class="internal"><li><a class="tocitem" href="#General-rules"><span>General rules</span></a></li><li><a class="tocitem" href="#getindex-and-view"><span><code>getindex</code> and <code>view</code></span></a></li><li><a class="tocitem" href="#setindex!"><span><code>setindex!</code></span></a></li><li><a class="tocitem" href="#Broadcasting"><span>Broadcasting</span></a></li><li><a class="tocitem" href="#Indexing-GroupedDataFrames"><span>Indexing <code>GroupedDataFrame</code>s</span></a></li><li class="toplevel"><a class="tocitem" href="#Common-API-for-types-defined-in-DataFrames.jl"><span>Common API for types defined in DataFrames.jl</span></a></li></ul></li><li><a class="tocitem" href="../metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Indexing</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Indexing</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/lib/indexing.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Indexing"><a class="docs-heading-anchor" href="#Indexing">Indexing</a><a id="Indexing-1"></a><a class="docs-heading-anchor-permalink" href="#Indexing" title="Permalink"></a></h1><ul></ul><h2 id="General-rules"><a class="docs-heading-anchor" href="#General-rules">General rules</a><a id="General-rules-1"></a><a class="docs-heading-anchor-permalink" href="#General-rules" title="Permalink"></a></h2><p>The following rules explain target functionality of how <code>getindex</code>, <code>setindex!</code>, <code>view</code>, and broadcasting are intended to work with <code>DataFrame</code>, <code>SubDataFrame</code> and <code>DataFrameRow</code> objects.</p><p>The following values are a valid column index:</p><ul><li>a scalar, later denoted as <code>col</code>:<ul><li>a <code>Symbol</code>;</li><li>an <code>AbstractString</code>;</li><li>an <code>Integer</code> that is not <code>Bool</code>;</li></ul></li><li>a vector, later denoted as <code>cols</code>:<ul><li>a vector of <code>Symbol</code> (does not have to be a subtype of <code>AbstractVector{Symbol}</code>);</li><li>a vector of <code>AbstractString</code> (does not have to be a subtype of <code>AbstractVector{&lt;:AbstractString}</code>);</li><li>a vector of <code>Integer</code> that are not <code>Bool</code> (does not have to be a subtype of <code>AbstractVector{&lt;:Integer}</code>);</li><li>a vector of <code>Bool</code> (must be a subtype of <code>AbstractVector{Bool}</code>);</li><li>a <a href="https://docs.julialang.org/en/v1/manual/strings/#Regular-Expressions">regular expression</a> (will be expanded to a vector of matching column names);</li><li>a <code>Not</code> expression (see <a href="https://github.com/JuliaData/InvertedIndices.jl">InvertedIndices.jl</a>); <code>Not(idx)</code> selects all indices not in the passed <code>idx</code>; when passed as column selector <code>Not(idx...)</code> is equivalent to <code>Not(Cols(idx...))</code>.</li><li>a <code>Cols</code> expression (see <a href="https://github.com/JuliaData/DataAPI.jl">DataAPI.jl</a>); <code>Cols(idxs...)</code> selects the union of the selections in <code>idxs</code>; in particular <code>Cols()</code> selects no columns and <code>Cols(:)</code> selects all columns; a special rule is <code>Cols(predicate)</code>, where <code>predicate</code> is a predicate function; in this case the columns whose names passed to <code>predicate</code> as strings return <code>true</code> are selected.</li><li>a <code>Between</code> expression (see <a href="https://github.com/JuliaData/DataAPI.jl">DataAPI.jl</a>); <code>Between(first, last)</code> selects the columns between <code>first</code> and <code>last</code> inclusively;</li><li>an <code>All</code> expression (see <a href="https://github.com/JuliaData/DataAPI.jl">DataAPI.jl</a>); <code>All()</code> selects all columns, equivalent to <code>:</code>;</li><li>a literal colon <code>:</code> (selects all columns).</li></ul></li></ul><p>The following values are a valid row index:</p><ul><li>a scalar, later denoted as <code>row</code>:<ul><li>an <code>Integer</code> that is not <code>Bool</code>;</li></ul></li><li>a vector, later denoted as <code>rows</code>:<ul><li>a vector of <code>Integer</code> that are not <code>Bool</code> (does not have to be a subtype of <code>AbstractVector{&lt;:Integer}</code>);</li><li>a vector of <code>Bool</code> (must be a subtype of <code>AbstractVector{Bool}</code>);</li><li>a <code>Not</code> expression (see <a href="https://github.com/JuliaData/InvertedIndices.jl">InvertedIndices.jl</a>);</li><li>a literal colon <code>:</code> (selects all rows with copying);</li><li>a literal exclamation mark <code>!</code> (selects all rows without copying).</li></ul></li></ul><p>Additionally it is allowed to index into an <code>AbstractDataFrame</code> using a two-dimensional <code>CartesianIndex</code>.</p><p>In the descriptions below <code>df</code> represents a <code>DataFrame</code>, <code>sdf</code> is a <code>SubDataFrame</code> and <code>dfr</code> is a <code>DataFrameRow</code>.</p><p><code>:</code> always expands to <code>axes(df, 1)</code> or <code>axes(sdf, 1)</code>.</p><p><code>df.col</code> works like <code>df[!, col]</code> and <code>sdf.col</code> works like <code>sdf[!, col]</code> in all cases. An exception is that under Julia 1.6 or earlier <code>df.col .= v</code> and <code>sdf.col .= v</code> performs in-place broadcasting if <code>col</code> is present in <code>df</code>/<code>sdf</code> and is a valid identifier (this inconsistency is not present under Julia 1.7 and later).</p><h2 id="getindex-and-view"><a class="docs-heading-anchor" href="#getindex-and-view"><code>getindex</code> and <code>view</code></a><a id="getindex-and-view-1"></a><a class="docs-heading-anchor-permalink" href="#getindex-and-view" title="Permalink"></a></h2><p>The following list specifies the behavior of <code>getindex</code> and <code>view</code> operations depending on argument types.</p><p>In particular a description explicitly mentions that the data is <em>copied</em> or <em>reused without copying</em>.</p><p>For performance reasons, accessing, via <code>getindex</code> or <code>view</code>, a single <code>row</code> and multiple <code>cols</code> of a <code>DataFrame</code>, a <code>SubDataFrame</code> or a <code>DataFrameRow</code> always returns a <code>DataFrameRow</code> (which is a view type).</p><p><code>getindex</code> on <code>DataFrame</code>:</p><ul><li><code>df[row, col]</code> -&gt; the value contained in row <code>row</code> of column <code>col</code>, the same as <code>df[!, col][row]</code>;</li><li><code>df[CartesianIndex(row, col)]</code> -&gt; the same as <code>df[row, col]</code>;</li><li><code>df[row, cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>df</code>;</li><li><code>df[rows, col]</code> -&gt; a copy of the vector <code>df[!, col]</code> with only the entries                    corresponding to <code>rows</code> selected, the same as <code>df[!, col][rows]</code>;</li><li><code>df[rows, cols]</code> -&gt; a <code>DataFrame</code> containing copies of columns <code>cols</code> with                     only the entries corresponding to <code>rows</code> selected;</li><li><code>df[!, col]</code> -&gt; the vector contained in column <code>col</code> returned without copying;                 the same as <code>df.col</code> if <code>col</code> is a valid identifier.</li><li><code>df[!, cols]</code> -&gt; create a new <code>DataFrame</code> with columns <code>cols</code> without copying                  of columns; the same as <code>select(df, cols, copycols=false)</code>.</li></ul><p><code>view</code> on <code>DataFrame</code>:</p><ul><li><code>@view df[row, col]</code> -&gt; a <code>0</code>-dimensional view into <code>df[!, col]</code> in row <code>row</code>,                         the same as <code>view(df[!, col], row)</code>;</li><li><code>@view df[CartesianIndex(row, col)]</code> -&gt; the same as <code>@view df[row, col]</code>;</li><li><code>@view df[row, cols]</code> -&gt; the same as <code>df[row, cols]</code>;</li><li><code>@view df[rows, col]</code> -&gt; a view into <code>df[!, col]</code> with <code>rows</code> selected, the                          same as <code>view(df[!, col], rows)</code>;</li><li><code>@view df[rows, cols]</code> -&gt; a <code>SubDataFrame</code> with <code>rows</code> selected with parent <code>df</code>;</li><li><code>@view df[!, col]</code> -&gt; a view into <code>df[!, col]</code>  with all rows.</li><li><code>@view df[!, cols]</code> -&gt; the same as <code>@view df[:, cols]</code>.</li></ul><p><code>getindex</code> on <code>SubDataFrame</code>:</p><ul><li><code>sdf[row, col]</code> -&gt; a value contained in row <code>row</code> of column <code>col</code>;</li><li><code>sdf[CartesianIndex(row, col)]</code> -&gt; the same as <code>sdf[row, col]</code>;</li><li><code>sdf[row, cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(sdf)</code>;</li><li><code>sdf[rows, col]</code> -&gt; a copy of <code>sdf[!, col]</code> with only rows <code>rows</code> selected,                     the same as <code>sdf[!, col][rows]</code>;</li><li><code>sdf[rows, cols]</code> -&gt; a <code>DataFrame</code> containing columns <code>cols</code> and <code>sdf[rows, col]</code> as a vector for each <code>col</code> in <code>cols</code>;</li><li><code>sdf[!, col]</code> -&gt; a view of entries corresponding to <code>sdf</code> in the vector                  <code>parent(sdf)[!, col]</code>; the same as <code>sdf.col</code> if <code>col</code> is a                  valid identifier.</li><li><code>sdf[!, cols]</code> -&gt; create a new <code>SubDataFrame</code> with columns <code>cols</code>, the same                   parent as <code>sdf</code>, and the same rows selected; the same as                   <code>select(sdf, cols, copycols=false)</code>.</li></ul><p><code>view</code> on <code>SubDataFrame</code>:</p><ul><li><code>@view sdf[row, col]</code> -&gt; a <code>0</code>-dimensional view into <code>df[!, col]</code> at row                          <code>row</code>, the same as <code>view(sdf[!, col], row)</code>;</li><li><code>@view sdf[CartesianIndex(row, col)]</code> -&gt; the same as <code>@view sdf[row, col]</code>;</li><li><code>@view sdf[row, cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(sdf)</code>;</li><li><code>@view sdf[rows, col]</code> -&gt; a view into <code>sdf[!, col]</code> vector with <code>rows</code>                           selected, the same as <code>view(sdf[!, col], rows)</code>;</li><li><code>@view sdf[rows, cols]</code> -&gt; a <code>SubDataFrame</code> with parent <code>parent(sdf)</code>;</li><li><code>@view sdf[!, col]</code> -&gt; a view into <code>sdf[!, col]</code> vector with all rows.</li><li><code>@view sdf[!, cols]</code> -&gt; the same as <code>@view sdf[:, cols]</code>.</li></ul><p><code>getindex</code> on <code>DataFrameRow</code>:</p><ul><li><code>dfr[col]</code> -&gt; the value contained in column <code>col</code> of <code>dfr</code>; the same as               <code>dfr.col</code> if <code>col</code> is a valid identifier;</li><li><code>dfr[cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(dfr)</code>;</li></ul><p><code>view</code> on <code>DataFrameRow</code>:</p><ul><li><code>@view dfr[col]</code> -&gt; a <code>0</code>-dimensional view into                     <code>parent(dfr)[DataFrames.row(dfr), col]</code>;</li><li><code>@view dfr[cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(dfr)</code>;</li></ul><p>Note that views created with columns selector set to <code>:</code> change their columns&#39; count if columns are added/removed/renamed in the parent; if column selector is other than <code>:</code> then view points to selected columns by their number at the moment of creation of the view.</p><h2 id="setindex!"><a class="docs-heading-anchor" href="#setindex!"><code>setindex!</code></a><a id="setindex!-1"></a><a class="docs-heading-anchor-permalink" href="#setindex!" title="Permalink"></a></h2><p>The following list specifies the behavior of <code>setindex!</code> operations depending on argument types.</p><p>In particular a description explicitly mentions if the assignment is <em>in-place</em>.</p><p>Note that if a <code>setindex!</code> operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).</p><p><code>setindex!</code> on <code>DataFrame</code>:</p><ul><li><code>df[row, col] = v</code> -&gt; set value of <code>col</code> in row <code>row</code> to <code>v</code> in-place;</li><li><code>df[CartesianIndex(row, col)] = v</code> -&gt; the same as <code>df[row, col] = v</code>;</li><li><code>df[row, cols] = v</code> -&gt; set row <code>row</code> of columns <code>cols</code> in-place; the same as                        <code>dfr = df[row, cols]; dfr[:] = v</code>;</li><li><code>df[rows, col] = v</code> -&gt; set rows <code>rows</code> of column <code>col</code> in-place; <code>v</code> must be                        an <code>AbstractVector</code>; if <code>rows</code> is <code>:</code> and <code>col</code> is a                        <code>Symbol</code> or <code>AbstractString</code> that is not present in                        <code>df</code> then a new column in <code>df</code> is created and holds a                        <code>copy</code> of <code>v</code>; equivalent to <code>df.col = copy(v)</code> if                        <code>col</code> is a valid identifier;</li><li><code>df[rows, cols] = v</code> -&gt; set rows <code>rows</code> of columns <code>cols</code> in-place; <code>v</code> must                         be an <code>AbstractMatrix</code> or an <code>AbstractDataFrame</code> (in                         this case column names must match);</li><li><code>df[!, col] = v</code> -&gt; replaces <code>col</code> with <code>v</code> without copying (with the                     exception that if <code>v</code> is an <code>AbstractRange</code> it gets                     converted to a <code>Vector</code>); also if <code>col</code> is a <code>Symbol</code> or                     <code>AbstractString</code> that is not present in <code>df</code> then a new                     column in <code>df</code> is created and holds <code>v</code>; equivalent to                     <code>df.col = v</code> if <code>col</code> is a valid identifier; this is                     allowed if <code>ncol(df) == 0 || length(v) == nrow(df)</code>;</li><li><code>df[!, cols] = v</code> -&gt; replaces existing columns <code>cols</code> in data frame <code>df</code> with                      copying; <code>v</code> must be an <code>AbstractMatrix</code> or an                      <code>AbstractDataFrame</code> (in the latter case column names must                      match);</li></ul><p><code>setindex!</code> on <code>SubDataFrame</code>:</p><ul><li><code>sdf[row, col] = v</code> -&gt; set value of <code>col</code> in row <code>row</code> to <code>v</code> in-place;</li><li><code>sdf[CartesianIndex(row, col)] = v</code> -&gt; the same as <code>sdf[row, col] = v</code>;</li><li><code>sdf[row, cols] = v</code> -&gt; the same as <code>dfr = df[row, cols]; dfr[:] = v</code> in-place;</li><li><code>sdf[rows, col] = v</code> -&gt; set rows <code>rows</code> of column <code>col</code>, in-place; <code>v</code> must be                         an abstract vector;</li><li><code>sdf[rows, cols] = v</code> -&gt; set rows <code>rows</code> of columns <code>cols</code> in-place; <code>v</code> can                          be an <code>AbstractMatrix</code> or <code>v</code> can be                          <code>AbstractDataFrame</code> in which case column names must                          match;</li><li><code>sdf[!, col] = v</code> -&gt; replaces <code>col</code> with <code>v</code> with copying; if <code>col</code> is present                      in <code>sdf</code> then filtered-out rows in newly created vector                      are filled with values already present in that column and                      <code>promote_type</code> is used to determine the <code>eltype</code> of the                      new column; if <code>col</code> is not present in <code>sdf</code> then the                      operation is only allowed if <code>sdf</code> was created with <code>:</code>                      as column selector, in which case filtered-out rows are                      filled with <code>missing</code>; equivalent to <code>sdf.col = v</code> if                      <code>col</code> is a valid identifier; operation is allowed if                      <code>length(v) == nrow(sdf)</code>;</li><li><code>sdf[!, cols] = v</code> -&gt; replaces existing columns <code>cols</code> in data frame <code>sdf</code>                       with copying; <code>v</code> must be an <code>AbstractMatrix</code> or an                       <code>AbstractDataFrame</code> (in the latter case column names                       must match); filtered-out rows in newly created vectors                       are filled with values already present in respective                       columns and <code>promote_type</code> is used to determine the                       <code>eltype</code> of the new columns;</li></ul><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The rules above mean that <code>sdf[:, col] = v</code> is an in-place operation if <code>col</code> is present in <code>sdf</code>, therefore it will be fast in general. On the other hand using <code>sdf[!, col] = v</code> or <code>sdf.col = v</code> will always allocate a new vector, which is more expensive computationally.</p></div></div><p><code>setindex!</code> on <code>DataFrameRow</code>:</p><ul><li><code>dfr[col] = v</code> -&gt; set value of <code>col</code> in row <code>row</code> to <code>v</code> in-place;                   equivalent to <code>dfr.col = v</code> if <code>col</code> is a valid identifier;</li><li><code>dfr[cols] = v</code> -&gt; set values of entries in columns <code>cols</code> in <code>dfr</code> by                    elements of <code>v</code> in place; <code>v</code> can be: 1) a <code>Tuple</code> or an                    <code>AbstractArray</code>, in which cases it must have a number of                    elements equal to <code>length(dfr)</code>, 2) an <code>AbstractDict</code>, in                    which case column names must match, 3) a <code>NamedTuple</code> or                    <code>DataFrameRow</code>, in which case column names and order must                    match;</li></ul><h2 id="Broadcasting"><a class="docs-heading-anchor" href="#Broadcasting">Broadcasting</a><a id="Broadcasting-1"></a><a class="docs-heading-anchor-permalink" href="#Broadcasting" title="Permalink"></a></h2><p>The following broadcasting rules apply to <code>AbstractDataFrame</code> objects:</p><ul><li><code>AbstractDataFrame</code> behaves in broadcasting like a two-dimensional collection compatible with matrices.</li><li>If an <code>AbstractDataFrame</code> takes part in broadcasting then a <code>DataFrame</code> is always produced as a result. In this case the requested broadcasting operation produces an object with exactly two dimensions. An exception is when an <code>AbstractDataFrame</code> is used only as a source of broadcast assignment into an object of dimensionality higher than two.</li><li>If multiple <code>AbstractDataFrame</code> objects take part in broadcasting then they have to have identical column names.</li></ul><p>Note that if broadcasting assignment operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).</p><p>Broadcasting <code>DataFrameRow</code> is currently not allowed (which is consistent with <code>NamedTuple</code>).</p><p>It is possible to assign a value to <code>AbstractDataFrame</code> and <code>DataFrameRow</code> objects using the <code>.=</code> operator. In such an operation <code>AbstractDataFrame</code> is considered as two-dimensional and <code>DataFrameRow</code> as single-dimensional.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The rule above means that, similar to single-dimensional objects in Base (e.g. vectors), <code>DataFrameRow</code> is considered to be column-oriented.</p></div></div><p>Additional rules:</p><ul><li>in the <code>df[CartesianIndex(row, col)] .= v</code>, <code>df[row, col] .= v</code> syntaxes <code>v</code> is broadcasted into the contents of <code>df[row, col]</code> (this is consistent with Julia Base);</li><li>in the <code>df[row, cols] .= v</code> syntaxes the assignment to <code>df</code> is performed in-place;</li><li>in the <code>df[rows, col] .= v</code> and <code>df[rows, cols] .= v</code> syntaxes the assignment to <code>df</code> is performed in-place; if <code>rows</code> is <code>:</code> and <code>col</code> is <code>Symbol</code> or <code>AbstractString</code> and it is missing from <code>df</code> then a new column is allocated and added; the length of the column is always the value of <code>nrow(df)</code> before the assignment takes place;</li><li>in the <code>df[!, col] .= v</code> syntax column <code>col</code> is replaced by a freshly allocated vector; if <code>col</code> is <code>Symbol</code> or <code>AbstractString</code> and it is missing from <code>df</code> then a new column is allocated added; the length of the column is always the value of <code>nrow(df)</code> before the assignment takes place;</li><li>the <code>df[!, cols] .= v</code> syntax replaces existing columns <code>cols</code> in data frame <code>df</code> with freshly allocated vectors;</li><li><code>df.col .= v</code> syntax currently performs in-place assignment to an existing vector <code>df.col</code>; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if <code>:col</code> is not present in <code>df</code> then a new column will be created in <code>df</code>.</li><li>in the <code>sdf[CartesianIndex(row, col)] .= v</code>, <code>sdf[row, col] .= v</code> and <code>sdf[row, cols] .= v</code> syntaxes the assignment to <code>sdf</code> is performed in-place;</li><li>in the <code>sdf[rows, col] .= v</code> and <code>sdf[rows, cols] .= v</code> syntaxes the assignment to <code>sdf</code> is performed in-place; if <code>rows</code> is <code>:</code> and <code>col</code> is a <code>Symbol</code> or <code>AbstractString</code> referring to a column missing from <code>sdf</code> and <code>sdf</code> was created with <code>:</code> as column selector then a new column is allocated and added; the filtered-out rows are filled with <code>missing</code>;</li><li>in the <code>sdf[!, col] .= v</code> syntax column <code>col</code> is replaced by a freshly allocated vector; the filtered-out rows are filled with values already present in <code>col</code>; if <code>col</code> is a <code>Symbol</code> or <code>AbstractString</code> referring to a column missing from <code>sdf</code> and was <code>sdf</code> created with <code>:</code> as column selector then a new column is allocated and added; in this case the filtered-out rows are filled with <code>missing</code>;</li><li>the <code>sdf[!, cols] .= v</code> syntax replaces existing columns <code>cols</code> in data frame <code>sdf</code> with freshly allocated vectors; the filtered-out rows are filled with values already present in <code>cols</code>;</li><li><code>sdf.col .= v</code> syntax currently performs in-place assignment to an existing vector <code>sdf.col</code>; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if <code>:col</code> is not present in <code>sdf</code> then a new column will be created in <code>sdf</code> if <code>sdf</code> was created with <code>:</code> as a column selector.</li><li><code>dfr.col .= v</code> syntax is allowed and performs in-place assignment to a value extracted by <code>dfr.col</code>.</li></ul><p>Note that <code>sdf[!, col] .= v</code> and <code>sdf[!, cols] .= v</code> syntaxes are not allowed as <code>sdf</code> can be only modified in-place.</p><p>If column indexing using <code>Symbol</code> or <code>AbstractString</code> names in <code>cols</code> is performed, the order of columns in the operation is specified by the order of names.</p><h2 id="Indexing-GroupedDataFrames"><a class="docs-heading-anchor" href="#Indexing-GroupedDataFrames">Indexing <code>GroupedDataFrame</code>s</a><a id="Indexing-GroupedDataFrames-1"></a><a class="docs-heading-anchor-permalink" href="#Indexing-GroupedDataFrames" title="Permalink"></a></h2><p>A <a href="../types/#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a> can behave as either an <code>AbstractVector</code> or <code>AbstractDict</code> depending on the type of index used. Integers (or arrays of them) trigger vector-like indexing while <code>Tuples</code>s and <code>NamedTuple</code>s trigger dictionary-like indexing. An intermediate between the two is the <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a> type returned by <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a>, which behaves similarly to a <code>NamedTuple</code> but has performance on par with integer indexing.</p><p>The elements of a <code>GroupedDataFrame</code> are <a href="../types/#DataFrames.SubDataFrame"><code>SubDataFrame</code></a>s of its parent.</p><ul><li><code>gd[i::Integer]</code> -&gt; Get the <code>i</code>th group.</li><li><code>gd[key::NamedTuple]</code> -&gt; Get the group corresponding to the given values of the grouping columns. The fields of the <code>NamedTuple</code> must match the grouping columns columns passed to <a href="../functions/#DataAPI.groupby"><code>groupby</code></a> (including order).</li><li><code>gd[key::Tuple]</code> -&gt; Same as previous, but omitting the names on <code>key</code>.</li><li><code>get(gd, key::Union{Tuple, NamedTuple}, default)</code> -&gt; Get group for key <code>key</code>, returning <code>default</code> if it does not exist.</li><li><code>gd[key::GroupKey]</code> -&gt; Get the group corresponding to the <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a> <code>key</code> (one of the elements of the vector returned by <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a>). This should be nearly as fast as integer indexing.</li><li><code>gd[a::AbstractVector]</code> -&gt; Select multiple groups and return them in a new <code>GroupedDataFrame</code> object. Groups may be selected by integer position using an array of <code>Integer</code>s or <code>Bool</code>s, similar to a standard array. Alternatively the array may contain keys of any of the types supported for dictionary-like indexing (<code>GroupKey</code>, <code>Tuple</code>, or <code>NamedTuple</code>). Selected groups must be unique, and different types of indices cannot be mixed.</li><li><code>gd[n::Not]</code> -&gt; Any of the above types wrapped in <code>Not</code>. The result will be a  new <code>GroupedDataFrame</code> containing all groups in <code>gd</code> <em>not</em> selected by the  wrapped index.</li></ul><h1 id="Common-API-for-types-defined-in-DataFrames.jl"><a class="docs-heading-anchor" href="#Common-API-for-types-defined-in-DataFrames.jl">Common API for types defined in DataFrames.jl</a><a id="Common-API-for-types-defined-in-DataFrames.jl-1"></a><a class="docs-heading-anchor-permalink" href="#Common-API-for-types-defined-in-DataFrames.jl" title="Permalink"></a></h1><p>This table presents return value types of calling <code>names</code>, <code>propertynames</code>, <code>keys</code>, <code>length</code> and <code>ndims</code> on types exposed to the user by DataFrames.jl:</p><table><tr><th style="text-align: right">Type</th><th style="text-align: right"><code>names</code></th><th style="text-align: right"><code>propertynames</code></th><th style="text-align: right"><code>keys</code></th><th style="text-align: right"><code>length</code></th><th style="text-align: right"><code>ndims</code></th></tr><tr><td style="text-align: right"><code>AbstractDataFrame</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right">undefined</td><td style="text-align: right">undefined</td><td style="text-align: right"><code>2</code></td></tr><tr><td style="text-align: right"><code>DataFrameRow</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>DataFrameRows</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right">vector of <code>Int</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>DataFrameColumns</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>GroupedDataFrame</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right">tuple of fields</td><td style="text-align: right"><code>GroupKeys</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>GroupKeys</code></td><td style="text-align: right">undefined</td><td style="text-align: right">tuple of fields</td><td style="text-align: right">vector of <code>Int</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>GroupKey</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr></table><p>Additionally the above types <code>T</code> (i.e. <code>AbstractDataFrame</code>, <code>DataFrameRow</code>, <code>DataFrameRows</code>, <code>DataFrameColumns</code>, <code>GroupedDataFrame</code>, <code>GroupKeys</code>, <code>GroupKey</code>) the following methods are defined:</p><ul><li><code>size(::T)</code> returning a <code>Tuple</code> of <code>Int</code>.</li><li><code>size(::T, ::Integer)</code> returning an <code>Int</code>.</li><li><code>axes(::T)</code> returning a <code>Tuple</code> of <code>Int</code> vectors.</li><li><code>axes(::T, ::Integer)</code> returning an <code>Int</code> vector for a valid dimension (except  <code>DataFrameRows</code> and <code>GroupKeys</code> for which <code>Base.OneTo(1)</code> is also returned  for a dimension higher than a valid one because they are <code>AbstractVector</code>).</li><li><code>firstindex(::T)</code> returning <code>1</code> (except <code>AbstractDataFrame</code> for which it is undefined).</li><li><code>firstindex(::T, ::Integer)</code> returning <code>1</code> for a valid dimension (except  <code>DataFrameRows</code> and <code>GroupKeys</code> for which <code>1</code> is also returned for a  dimension higher than a valid one because they are <code>AbstractVector</code>).</li><li><code>lastindex(::T)</code> returning <code>Int</code> (except <code>AbstractDataFrame</code> for which it is undefined).</li><li><code>lastindex(::T, ::Integer)</code> returning <code>Int</code> for a valid dimension  (except  <code>DataFrameRows</code> and <code>GroupKeys</code> for which <code>1</code> is also returned for a  dimension higher than a valid one because they are <code>AbstractVector</code>).</li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../functions/">« Functions</a><a class="docs-footer-nextpage" href="../metadata/">Metadata »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Indexing · DataFrames.jl</title><meta name="title" content="Indexing · DataFrames.jl"/><meta property="og:title" content="Indexing · DataFrames.jl"/><meta property="twitter:title" content="Indexing · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../types/">Types</a></li><li><a class="tocitem" href="../functions/">Functions</a></li><li class="is-active"><a class="tocitem" href>Indexing</a><ul class="internal"><li><a class="tocitem" href="#General-rules"><span>General rules</span></a></li><li><a class="tocitem" href="#getindex-and-view"><span><code>getindex</code> and <code>view</code></span></a></li><li><a class="tocitem" href="#setindex!"><span><code>setindex!</code></span></a></li><li><a class="tocitem" href="#Broadcasting"><span>Broadcasting</span></a></li><li><a class="tocitem" href="#Indexing-GroupedDataFrames"><span>Indexing <code>GroupedDataFrame</code>s</span></a></li><li class="toplevel"><a class="tocitem" href="#Common-API-for-types-defined-in-DataFrames.jl"><span>Common API for types defined in DataFrames.jl</span></a></li></ul></li><li><a class="tocitem" href="../metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Indexing</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Indexing</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/lib/indexing.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Indexing"><a class="docs-heading-anchor" href="#Indexing">Indexing</a><a id="Indexing-1"></a><a class="docs-heading-anchor-permalink" href="#Indexing" title="Permalink"></a></h1><ul></ul><h2 id="General-rules"><a class="docs-heading-anchor" href="#General-rules">General rules</a><a id="General-rules-1"></a><a class="docs-heading-anchor-permalink" href="#General-rules" title="Permalink"></a></h2><p>The following rules explain target functionality of how <code>getindex</code>, <code>setindex!</code>, <code>view</code>, and broadcasting are intended to work with <code>DataFrame</code>, <code>SubDataFrame</code> and <code>DataFrameRow</code> objects.</p><p>The following values are a valid column index:</p><ul><li>a scalar, later denoted as <code>col</code>:<ul><li>a <code>Symbol</code>;</li><li>an <code>AbstractString</code>;</li><li>an <code>Integer</code> that is not <code>Bool</code>;</li></ul></li><li>a vector, later denoted as <code>cols</code>:<ul><li>a vector of <code>Symbol</code> (does not have to be a subtype of <code>AbstractVector{Symbol}</code>);</li><li>a vector of <code>AbstractString</code> (does not have to be a subtype of <code>AbstractVector{&lt;:AbstractString}</code>);</li><li>a vector of <code>Integer</code> that are not <code>Bool</code> (does not have to be a subtype of <code>AbstractVector{&lt;:Integer}</code>);</li><li>a vector of <code>Bool</code> (must be a subtype of <code>AbstractVector{Bool}</code>);</li><li>a <a href="https://docs.julialang.org/en/v1/manual/strings/#Regular-Expressions">regular expression</a> (will be expanded to a vector of matching column names);</li><li>a <code>Not</code> expression (see <a href="https://github.com/JuliaData/InvertedIndices.jl">InvertedIndices.jl</a>); <code>Not(idx)</code> selects all indices not in the passed <code>idx</code>; when passed as column selector <code>Not(idx...)</code> is equivalent to <code>Not(Cols(idx...))</code>.</li><li>a <code>Cols</code> expression (see <a href="https://github.com/JuliaData/DataAPI.jl">DataAPI.jl</a>); <code>Cols(idxs...)</code> selects the union of the selections in <code>idxs</code>; in particular <code>Cols()</code> selects no columns and <code>Cols(:)</code> selects all columns; a special rule is <code>Cols(predicate)</code>, where <code>predicate</code> is a predicate function; in this case the columns whose names passed to <code>predicate</code> as strings return <code>true</code> are selected.</li><li>a <code>Between</code> expression (see <a href="https://github.com/JuliaData/DataAPI.jl">DataAPI.jl</a>); <code>Between(first, last)</code> selects the columns between <code>first</code> and <code>last</code> inclusively;</li><li>an <code>All</code> expression (see <a href="https://github.com/JuliaData/DataAPI.jl">DataAPI.jl</a>); <code>All()</code> selects all columns, equivalent to <code>:</code>;</li><li>a literal colon <code>:</code> (selects all columns).</li></ul></li></ul><p>The following values are a valid row index:</p><ul><li>a scalar, later denoted as <code>row</code>:<ul><li>an <code>Integer</code> that is not <code>Bool</code>;</li></ul></li><li>a vector, later denoted as <code>rows</code>:<ul><li>a vector of <code>Integer</code> that are not <code>Bool</code> (does not have to be a subtype of <code>AbstractVector{&lt;:Integer}</code>);</li><li>a vector of <code>Bool</code> (must be a subtype of <code>AbstractVector{Bool}</code>);</li><li>a <code>Not</code> expression (see <a href="https://github.com/JuliaData/InvertedIndices.jl">InvertedIndices.jl</a>);</li><li>a literal colon <code>:</code> (selects all rows with copying);</li><li>a literal exclamation mark <code>!</code> (selects all rows without copying).</li></ul></li></ul><p>Additionally it is allowed to index into an <code>AbstractDataFrame</code> using a two-dimensional <code>CartesianIndex</code>.</p><p>In the descriptions below <code>df</code> represents a <code>DataFrame</code>, <code>sdf</code> is a <code>SubDataFrame</code> and <code>dfr</code> is a <code>DataFrameRow</code>.</p><p><code>:</code> always expands to <code>axes(df, 1)</code> or <code>axes(sdf, 1)</code>.</p><p><code>df.col</code> works like <code>df[!, col]</code> and <code>sdf.col</code> works like <code>sdf[!, col]</code> in all cases. An exception is that under Julia 1.6 or earlier <code>df.col .= v</code> and <code>sdf.col .= v</code> performs in-place broadcasting if <code>col</code> is present in <code>df</code>/<code>sdf</code> and is a valid identifier (this inconsistency is not present under Julia 1.7 and later).</p><h2 id="getindex-and-view"><a class="docs-heading-anchor" href="#getindex-and-view"><code>getindex</code> and <code>view</code></a><a id="getindex-and-view-1"></a><a class="docs-heading-anchor-permalink" href="#getindex-and-view" title="Permalink"></a></h2><p>The following list specifies the behavior of <code>getindex</code> and <code>view</code> operations depending on argument types.</p><p>In particular a description explicitly mentions that the data is <em>copied</em> or <em>reused without copying</em>.</p><p>For performance reasons, accessing, via <code>getindex</code> or <code>view</code>, a single <code>row</code> and multiple <code>cols</code> of a <code>DataFrame</code>, a <code>SubDataFrame</code> or a <code>DataFrameRow</code> always returns a <code>DataFrameRow</code> (which is a view type).</p><p><code>getindex</code> on <code>DataFrame</code>:</p><ul><li><code>df[row, col]</code> -&gt; the value contained in row <code>row</code> of column <code>col</code>, the same as <code>df[!, col][row]</code>;</li><li><code>df[CartesianIndex(row, col)]</code> -&gt; the same as <code>df[row, col]</code>;</li><li><code>df[row, cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>df</code>;</li><li><code>df[rows, col]</code> -&gt; a copy of the vector <code>df[!, col]</code> with only the entries                    corresponding to <code>rows</code> selected, the same as <code>df[!, col][rows]</code>;</li><li><code>df[rows, cols]</code> -&gt; a <code>DataFrame</code> containing copies of columns <code>cols</code> with                     only the entries corresponding to <code>rows</code> selected;</li><li><code>df[!, col]</code> -&gt; the vector contained in column <code>col</code> returned without copying;                 the same as <code>df.col</code> if <code>col</code> is a valid identifier.</li><li><code>df[!, cols]</code> -&gt; create a new <code>DataFrame</code> with columns <code>cols</code> without copying                  of columns; the same as <code>select(df, cols, copycols=false)</code>.</li></ul><p><code>view</code> on <code>DataFrame</code>:</p><ul><li><code>@view df[row, col]</code> -&gt; a <code>0</code>-dimensional view into <code>df[!, col]</code> in row <code>row</code>,                         the same as <code>view(df[!, col], row)</code>;</li><li><code>@view df[CartesianIndex(row, col)]</code> -&gt; the same as <code>@view df[row, col]</code>;</li><li><code>@view df[row, cols]</code> -&gt; the same as <code>df[row, cols]</code>;</li><li><code>@view df[rows, col]</code> -&gt; a view into <code>df[!, col]</code> with <code>rows</code> selected, the                          same as <code>view(df[!, col], rows)</code>;</li><li><code>@view df[rows, cols]</code> -&gt; a <code>SubDataFrame</code> with <code>rows</code> selected with parent <code>df</code>;</li><li><code>@view df[!, col]</code> -&gt; a view into <code>df[!, col]</code>  with all rows.</li><li><code>@view df[!, cols]</code> -&gt; the same as <code>@view df[:, cols]</code>.</li></ul><p><code>getindex</code> on <code>SubDataFrame</code>:</p><ul><li><code>sdf[row, col]</code> -&gt; a value contained in row <code>row</code> of column <code>col</code>;</li><li><code>sdf[CartesianIndex(row, col)]</code> -&gt; the same as <code>sdf[row, col]</code>;</li><li><code>sdf[row, cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(sdf)</code>;</li><li><code>sdf[rows, col]</code> -&gt; a copy of <code>sdf[!, col]</code> with only rows <code>rows</code> selected,                     the same as <code>sdf[!, col][rows]</code>;</li><li><code>sdf[rows, cols]</code> -&gt; a <code>DataFrame</code> containing columns <code>cols</code> and <code>sdf[rows, col]</code> as a vector for each <code>col</code> in <code>cols</code>;</li><li><code>sdf[!, col]</code> -&gt; a view of entries corresponding to <code>sdf</code> in the vector                  <code>parent(sdf)[!, col]</code>; the same as <code>sdf.col</code> if <code>col</code> is a                  valid identifier.</li><li><code>sdf[!, cols]</code> -&gt; create a new <code>SubDataFrame</code> with columns <code>cols</code>, the same                   parent as <code>sdf</code>, and the same rows selected; the same as                   <code>select(sdf, cols, copycols=false)</code>.</li></ul><p><code>view</code> on <code>SubDataFrame</code>:</p><ul><li><code>@view sdf[row, col]</code> -&gt; a <code>0</code>-dimensional view into <code>df[!, col]</code> at row                          <code>row</code>, the same as <code>view(sdf[!, col], row)</code>;</li><li><code>@view sdf[CartesianIndex(row, col)]</code> -&gt; the same as <code>@view sdf[row, col]</code>;</li><li><code>@view sdf[row, cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(sdf)</code>;</li><li><code>@view sdf[rows, col]</code> -&gt; a view into <code>sdf[!, col]</code> vector with <code>rows</code>                           selected, the same as <code>view(sdf[!, col], rows)</code>;</li><li><code>@view sdf[rows, cols]</code> -&gt; a <code>SubDataFrame</code> with parent <code>parent(sdf)</code>;</li><li><code>@view sdf[!, col]</code> -&gt; a view into <code>sdf[!, col]</code> vector with all rows.</li><li><code>@view sdf[!, cols]</code> -&gt; the same as <code>@view sdf[:, cols]</code>.</li></ul><p><code>getindex</code> on <code>DataFrameRow</code>:</p><ul><li><code>dfr[col]</code> -&gt; the value contained in column <code>col</code> of <code>dfr</code>; the same as               <code>dfr.col</code> if <code>col</code> is a valid identifier;</li><li><code>dfr[cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(dfr)</code>;</li></ul><p><code>view</code> on <code>DataFrameRow</code>:</p><ul><li><code>@view dfr[col]</code> -&gt; a <code>0</code>-dimensional view into                     <code>parent(dfr)[DataFrames.row(dfr), col]</code>;</li><li><code>@view dfr[cols]</code> -&gt; a <code>DataFrameRow</code> with parent <code>parent(dfr)</code>;</li></ul><p>Note that views created with columns selector set to <code>:</code> change their columns&#39; count if columns are added/removed/renamed in the parent; if column selector is other than <code>:</code> then view points to selected columns by their number at the moment of creation of the view.</p><h2 id="setindex!"><a class="docs-heading-anchor" href="#setindex!"><code>setindex!</code></a><a id="setindex!-1"></a><a class="docs-heading-anchor-permalink" href="#setindex!" title="Permalink"></a></h2><p>The following list specifies the behavior of <code>setindex!</code> operations depending on argument types.</p><p>In particular a description explicitly mentions if the assignment is <em>in-place</em>.</p><p>Note that if a <code>setindex!</code> operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).</p><p><code>setindex!</code> on <code>DataFrame</code>:</p><ul><li><code>df[row, col] = v</code> -&gt; set value of <code>col</code> in row <code>row</code> to <code>v</code> in-place;</li><li><code>df[CartesianIndex(row, col)] = v</code> -&gt; the same as <code>df[row, col] = v</code>;</li><li><code>df[row, cols] = v</code> -&gt; set row <code>row</code> of columns <code>cols</code> in-place; the same as                        <code>dfr = df[row, cols]; dfr[:] = v</code>;</li><li><code>df[rows, col] = v</code> -&gt; set rows <code>rows</code> of column <code>col</code> in-place; <code>v</code> must be                        an <code>AbstractVector</code>; if <code>rows</code> is <code>:</code> and <code>col</code> is a                        <code>Symbol</code> or <code>AbstractString</code> that is not present in                        <code>df</code> then a new column in <code>df</code> is created and holds a                        <code>copy</code> of <code>v</code>; equivalent to <code>df.col = copy(v)</code> if                        <code>col</code> is a valid identifier;</li><li><code>df[rows, cols] = v</code> -&gt; set rows <code>rows</code> of columns <code>cols</code> in-place; <code>v</code> must                         be an <code>AbstractMatrix</code> or an <code>AbstractDataFrame</code> (in                         this case column names must match);</li><li><code>df[!, col] = v</code> -&gt; replaces <code>col</code> with <code>v</code> without copying (with the                     exception that if <code>v</code> is an <code>AbstractRange</code> it gets                     converted to a <code>Vector</code>); also if <code>col</code> is a <code>Symbol</code> or                     <code>AbstractString</code> that is not present in <code>df</code> then a new                     column in <code>df</code> is created and holds <code>v</code>; equivalent to                     <code>df.col = v</code> if <code>col</code> is a valid identifier; this is                     allowed if <code>ncol(df) == 0 || length(v) == nrow(df)</code>;</li><li><code>df[!, cols] = v</code> -&gt; replaces existing columns <code>cols</code> in data frame <code>df</code> with                      copying; <code>v</code> must be an <code>AbstractMatrix</code> or an                      <code>AbstractDataFrame</code> (in the latter case column names must                      match);</li></ul><p><code>setindex!</code> on <code>SubDataFrame</code>:</p><ul><li><code>sdf[row, col] = v</code> -&gt; set value of <code>col</code> in row <code>row</code> to <code>v</code> in-place;</li><li><code>sdf[CartesianIndex(row, col)] = v</code> -&gt; the same as <code>sdf[row, col] = v</code>;</li><li><code>sdf[row, cols] = v</code> -&gt; the same as <code>dfr = df[row, cols]; dfr[:] = v</code> in-place;</li><li><code>sdf[rows, col] = v</code> -&gt; set rows <code>rows</code> of column <code>col</code>, in-place; <code>v</code> must be                         an abstract vector;</li><li><code>sdf[rows, cols] = v</code> -&gt; set rows <code>rows</code> of columns <code>cols</code> in-place; <code>v</code> can                          be an <code>AbstractMatrix</code> or <code>v</code> can be                          <code>AbstractDataFrame</code> in which case column names must                          match;</li><li><code>sdf[!, col] = v</code> -&gt; replaces <code>col</code> with <code>v</code> with copying; if <code>col</code> is present                      in <code>sdf</code> then filtered-out rows in newly created vector                      are filled with values already present in that column and                      <code>promote_type</code> is used to determine the <code>eltype</code> of the                      new column; if <code>col</code> is not present in <code>sdf</code> then the                      operation is only allowed if <code>sdf</code> was created with <code>:</code>                      as column selector, in which case filtered-out rows are                      filled with <code>missing</code>; equivalent to <code>sdf.col = v</code> if                      <code>col</code> is a valid identifier; operation is allowed if                      <code>length(v) == nrow(sdf)</code>;</li><li><code>sdf[!, cols] = v</code> -&gt; replaces existing columns <code>cols</code> in data frame <code>sdf</code>                       with copying; <code>v</code> must be an <code>AbstractMatrix</code> or an                       <code>AbstractDataFrame</code> (in the latter case column names                       must match); filtered-out rows in newly created vectors                       are filled with values already present in respective                       columns and <code>promote_type</code> is used to determine the                       <code>eltype</code> of the new columns;</li></ul><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The rules above mean that <code>sdf[:, col] = v</code> is an in-place operation if <code>col</code> is present in <code>sdf</code>, therefore it will be fast in general. On the other hand using <code>sdf[!, col] = v</code> or <code>sdf.col = v</code> will always allocate a new vector, which is more expensive computationally.</p></div></div><p><code>setindex!</code> on <code>DataFrameRow</code>:</p><ul><li><code>dfr[col] = v</code> -&gt; set value of <code>col</code> in row <code>row</code> to <code>v</code> in-place;                   equivalent to <code>dfr.col = v</code> if <code>col</code> is a valid identifier;</li><li><code>dfr[cols] = v</code> -&gt; set values of entries in columns <code>cols</code> in <code>dfr</code> by                    elements of <code>v</code> in place; <code>v</code> can be: 1) a <code>Tuple</code> or an                    <code>AbstractArray</code>, in which cases it must have a number of                    elements equal to <code>length(dfr)</code>, 2) an <code>AbstractDict</code>, in                    which case column names must match, 3) a <code>NamedTuple</code> or                    <code>DataFrameRow</code>, in which case column names and order must                    match;</li></ul><h2 id="Broadcasting"><a class="docs-heading-anchor" href="#Broadcasting">Broadcasting</a><a id="Broadcasting-1"></a><a class="docs-heading-anchor-permalink" href="#Broadcasting" title="Permalink"></a></h2><p>The following broadcasting rules apply to <code>AbstractDataFrame</code> objects:</p><ul><li><code>AbstractDataFrame</code> behaves in broadcasting like a two-dimensional collection compatible with matrices.</li><li>If an <code>AbstractDataFrame</code> takes part in broadcasting then a <code>DataFrame</code> is always produced as a result. In this case the requested broadcasting operation produces an object with exactly two dimensions. An exception is when an <code>AbstractDataFrame</code> is used only as a source of broadcast assignment into an object of dimensionality higher than two.</li><li>If multiple <code>AbstractDataFrame</code> objects take part in broadcasting then they have to have identical column names.</li></ul><p>Note that if broadcasting assignment operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).</p><p>Broadcasting <code>DataFrameRow</code> is currently not allowed (which is consistent with <code>NamedTuple</code>).</p><p>It is possible to assign a value to <code>AbstractDataFrame</code> and <code>DataFrameRow</code> objects using the <code>.=</code> operator. In such an operation <code>AbstractDataFrame</code> is considered as two-dimensional and <code>DataFrameRow</code> as single-dimensional.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The rule above means that, similar to single-dimensional objects in Base (e.g. vectors), <code>DataFrameRow</code> is considered to be column-oriented.</p></div></div><p>Additional rules:</p><ul><li>in the <code>df[CartesianIndex(row, col)] .= v</code>, <code>df[row, col] .= v</code> syntaxes <code>v</code> is broadcasted into the contents of <code>df[row, col]</code> (this is consistent with Julia Base);</li><li>in the <code>df[row, cols] .= v</code> syntaxes the assignment to <code>df</code> is performed in-place;</li><li>in the <code>df[rows, col] .= v</code> and <code>df[rows, cols] .= v</code> syntaxes the assignment to <code>df</code> is performed in-place; if <code>rows</code> is <code>:</code> and <code>col</code> is <code>Symbol</code> or <code>AbstractString</code> and it is missing from <code>df</code> then a new column is allocated and added; the length of the column is always the value of <code>nrow(df)</code> before the assignment takes place;</li><li>in the <code>df[!, col] .= v</code> syntax column <code>col</code> is replaced by a freshly allocated vector; if <code>col</code> is <code>Symbol</code> or <code>AbstractString</code> and it is missing from <code>df</code> then a new column is allocated added; the length of the column is always the value of <code>nrow(df)</code> before the assignment takes place;</li><li>the <code>df[!, cols] .= v</code> syntax replaces existing columns <code>cols</code> in data frame <code>df</code> with freshly allocated vectors;</li><li><code>df.col .= v</code> syntax currently performs in-place assignment to an existing vector <code>df.col</code>; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if <code>:col</code> is not present in <code>df</code> then a new column will be created in <code>df</code>.</li><li>in the <code>sdf[CartesianIndex(row, col)] .= v</code>, <code>sdf[row, col] .= v</code> and <code>sdf[row, cols] .= v</code> syntaxes the assignment to <code>sdf</code> is performed in-place;</li><li>in the <code>sdf[rows, col] .= v</code> and <code>sdf[rows, cols] .= v</code> syntaxes the assignment to <code>sdf</code> is performed in-place; if <code>rows</code> is <code>:</code> and <code>col</code> is a <code>Symbol</code> or <code>AbstractString</code> referring to a column missing from <code>sdf</code> and <code>sdf</code> was created with <code>:</code> as column selector then a new column is allocated and added; the filtered-out rows are filled with <code>missing</code>;</li><li>in the <code>sdf[!, col] .= v</code> syntax column <code>col</code> is replaced by a freshly allocated vector; the filtered-out rows are filled with values already present in <code>col</code>; if <code>col</code> is a <code>Symbol</code> or <code>AbstractString</code> referring to a column missing from <code>sdf</code> and was <code>sdf</code> created with <code>:</code> as column selector then a new column is allocated and added; in this case the filtered-out rows are filled with <code>missing</code>;</li><li>the <code>sdf[!, cols] .= v</code> syntax replaces existing columns <code>cols</code> in data frame <code>sdf</code> with freshly allocated vectors; the filtered-out rows are filled with values already present in <code>cols</code>;</li><li><code>sdf.col .= v</code> syntax currently performs in-place assignment to an existing vector <code>sdf.col</code>; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if <code>:col</code> is not present in <code>sdf</code> then a new column will be created in <code>sdf</code> if <code>sdf</code> was created with <code>:</code> as a column selector.</li><li><code>dfr.col .= v</code> syntax is allowed and performs in-place assignment to a value extracted by <code>dfr.col</code>.</li></ul><p>Note that <code>sdf[!, col] .= v</code> and <code>sdf[!, cols] .= v</code> syntaxes are not allowed as <code>sdf</code> can be only modified in-place.</p><p>If column indexing using <code>Symbol</code> or <code>AbstractString</code> names in <code>cols</code> is performed, the order of columns in the operation is specified by the order of names.</p><h2 id="Indexing-GroupedDataFrames"><a class="docs-heading-anchor" href="#Indexing-GroupedDataFrames">Indexing <code>GroupedDataFrame</code>s</a><a id="Indexing-GroupedDataFrames-1"></a><a class="docs-heading-anchor-permalink" href="#Indexing-GroupedDataFrames" title="Permalink"></a></h2><p>A <a href="../types/#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a> can behave as either an <code>AbstractVector</code> or <code>AbstractDict</code> depending on the type of index used. Integers (or arrays of them) trigger vector-like indexing while <code>Tuples</code>s and <code>NamedTuple</code>s trigger dictionary-like indexing. An intermediate between the two is the <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a> type returned by <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a>, which behaves similarly to a <code>NamedTuple</code> but has performance on par with integer indexing.</p><p>The elements of a <code>GroupedDataFrame</code> are <a href="../types/#DataFrames.SubDataFrame"><code>SubDataFrame</code></a>s of its parent.</p><ul><li><code>gd[i::Integer]</code> -&gt; Get the <code>i</code>th group.</li><li><code>gd[key::NamedTuple]</code> -&gt; Get the group corresponding to the given values of the grouping columns. The fields of the <code>NamedTuple</code> must match the grouping columns columns passed to <a href="../functions/#DataAPI.groupby"><code>groupby</code></a> (including order).</li><li><code>gd[key::Tuple]</code> -&gt; Same as previous, but omitting the names on <code>key</code>.</li><li><code>get(gd, key::Union{Tuple, NamedTuple}, default)</code> -&gt; Get group for key <code>key</code>, returning <code>default</code> if it does not exist.</li><li><code>gd[key::GroupKey]</code> -&gt; Get the group corresponding to the <a href="../types/#DataFrames.GroupKey"><code>GroupKey</code></a> <code>key</code> (one of the elements of the vector returned by <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a>). This should be nearly as fast as integer indexing.</li><li><code>gd[a::AbstractVector]</code> -&gt; Select multiple groups and return them in a new <code>GroupedDataFrame</code> object. Groups may be selected by integer position using an array of <code>Integer</code>s or <code>Bool</code>s, similar to a standard array. Alternatively the array may contain keys of any of the types supported for dictionary-like indexing (<code>GroupKey</code>, <code>Tuple</code>, or <code>NamedTuple</code>). Selected groups must be unique, and different types of indices cannot be mixed.</li><li><code>gd[n::Not]</code> -&gt; Any of the above types wrapped in <code>Not</code>. The result will be a  new <code>GroupedDataFrame</code> containing all groups in <code>gd</code> <em>not</em> selected by the  wrapped index.</li></ul><h1 id="Common-API-for-types-defined-in-DataFrames.jl"><a class="docs-heading-anchor" href="#Common-API-for-types-defined-in-DataFrames.jl">Common API for types defined in DataFrames.jl</a><a id="Common-API-for-types-defined-in-DataFrames.jl-1"></a><a class="docs-heading-anchor-permalink" href="#Common-API-for-types-defined-in-DataFrames.jl" title="Permalink"></a></h1><p>This table presents return value types of calling <code>names</code>, <code>propertynames</code>, <code>keys</code>, <code>length</code> and <code>ndims</code> on types exposed to the user by DataFrames.jl:</p><table><tr><th style="text-align: right">Type</th><th style="text-align: right"><code>names</code></th><th style="text-align: right"><code>propertynames</code></th><th style="text-align: right"><code>keys</code></th><th style="text-align: right"><code>length</code></th><th style="text-align: right"><code>ndims</code></th></tr><tr><td style="text-align: right"><code>AbstractDataFrame</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right">undefined</td><td style="text-align: right">undefined</td><td style="text-align: right"><code>2</code></td></tr><tr><td style="text-align: right"><code>DataFrameRow</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>DataFrameRows</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right">vector of <code>Int</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>DataFrameColumns</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>GroupedDataFrame</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right">tuple of fields</td><td style="text-align: right"><code>GroupKeys</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>GroupKeys</code></td><td style="text-align: right">undefined</td><td style="text-align: right">tuple of fields</td><td style="text-align: right">vector of <code>Int</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr><tr><td style="text-align: right"><code>GroupKey</code></td><td style="text-align: right"><code>Vector{String}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Vector{Symbol}</code></td><td style="text-align: right"><code>Int</code></td><td style="text-align: right"><code>1</code></td></tr></table><p>Additionally the above types <code>T</code> (i.e. <code>AbstractDataFrame</code>, <code>DataFrameRow</code>, <code>DataFrameRows</code>, <code>DataFrameColumns</code>, <code>GroupedDataFrame</code>, <code>GroupKeys</code>, <code>GroupKey</code>) the following methods are defined:</p><ul><li><code>size(::T)</code> returning a <code>Tuple</code> of <code>Int</code>.</li><li><code>size(::T, ::Integer)</code> returning an <code>Int</code>.</li><li><code>axes(::T)</code> returning a <code>Tuple</code> of <code>Int</code> vectors.</li><li><code>axes(::T, ::Integer)</code> returning an <code>Int</code> vector for a valid dimension (except  <code>DataFrameRows</code> and <code>GroupKeys</code> for which <code>Base.OneTo(1)</code> is also returned  for a dimension higher than a valid one because they are <code>AbstractVector</code>).</li><li><code>firstindex(::T)</code> returning <code>1</code> (except <code>AbstractDataFrame</code> for which it is undefined).</li><li><code>firstindex(::T, ::Integer)</code> returning <code>1</code> for a valid dimension (except  <code>DataFrameRows</code> and <code>GroupKeys</code> for which <code>1</code> is also returned for a  dimension higher than a valid one because they are <code>AbstractVector</code>).</li><li><code>lastindex(::T)</code> returning <code>Int</code> (except <code>AbstractDataFrame</code> for which it is undefined).</li><li><code>lastindex(::T, ::Integer)</code> returning <code>Int</code> for a valid dimension  (except  <code>DataFrameRows</code> and <code>GroupKeys</code> for which <code>1</code> is also returned for a  dimension higher than a valid one because they are <code>AbstractVector</code>).</li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../functions/">« Functions</a><a class="docs-footer-nextpage" href="../metadata/">Metadata »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/lib/internals/index.html b/dev/lib/internals/index.html
index ea9ac2e3c..8a233cbae 100644
--- a/dev/lib/internals/index.html
+++ b/dev/lib/internals/index.html
@@ -1,9 +1,9 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Internals · DataFrames.jl</title><meta name="title" content="Internals · DataFrames.jl"/><meta property="og:title" content="Internals · DataFrames.jl"/><meta property="twitter:title" content="Internals · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/internals/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/internals/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/lib/internals/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../types/">Types</a></li><li><a class="tocitem" href="../functions/">Functions</a></li><li><a class="tocitem" href="../indexing/">Indexing</a></li><li><a class="tocitem" href="../metadata/">Metadata</a></li><li class="is-active"><a class="tocitem" href>Internals</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Internals</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Internals</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/lib/internals.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Internals"><a class="docs-heading-anchor" href="#Internals">Internals</a><a id="Internals-1"></a><a class="docs-heading-anchor-permalink" href="#Internals" title="Permalink"></a></h1><div class="admonition is-warning"><header class="admonition-header">Internal API</header><div class="admonition-body"><p>The functions, methods and types listed on this page are internal to DataFrames and are <strong>not considered to be part of the public API</strong>.</p></div></div><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.compacttype" href="#DataFrames.compacttype"><code>DataFrames.compacttype</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)</code></pre><p>Return compact string representation of type <code>T</code>.</p><p>For displaying data frame we do not want string representation of type to be longer than <code>maxwidth</code>. This function implements rules how type names are cropped if they are longer than <code>maxwidth</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/show.jl#L85-L93">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.gennames" href="#DataFrames.gennames"><code>DataFrames.gennames</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">gennames(n::Integer)</code></pre><p>Generate standardized names for columns of a DataFrame. The first name will be <code>:x1</code>, the second <code>:x2</code>, etc.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/utils.jl#L124-L129">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.getmaxwidths" href="#DataFrames.getmaxwidths"><code>DataFrames.getmaxwidths</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrames.getmaxwidths(df::AbstractDataFrame,
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Internals · DataFrames.jl</title><meta name="title" content="Internals · DataFrames.jl"/><meta property="og:title" content="Internals · DataFrames.jl"/><meta property="twitter:title" content="Internals · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/internals/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/internals/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/lib/internals/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../types/">Types</a></li><li><a class="tocitem" href="../functions/">Functions</a></li><li><a class="tocitem" href="../indexing/">Indexing</a></li><li><a class="tocitem" href="../metadata/">Metadata</a></li><li class="is-active"><a class="tocitem" href>Internals</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Internals</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Internals</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/lib/internals.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Internals"><a class="docs-heading-anchor" href="#Internals">Internals</a><a id="Internals-1"></a><a class="docs-heading-anchor-permalink" href="#Internals" title="Permalink"></a></h1><div class="admonition is-warning"><header class="admonition-header">Internal API</header><div class="admonition-body"><p>The functions, methods and types listed on this page are internal to DataFrames and are <strong>not considered to be part of the public API</strong>.</p></div></div><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.compacttype" href="#DataFrames.compacttype"><code>DataFrames.compacttype</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)</code></pre><p>Return compact string representation of type <code>T</code>.</p><p>For displaying data frame we do not want string representation of type to be longer than <code>maxwidth</code>. This function implements rules how type names are cropped if they are longer than <code>maxwidth</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/show.jl#L85-L93">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.gennames" href="#DataFrames.gennames"><code>DataFrames.gennames</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">gennames(n::Integer)</code></pre><p>Generate standardized names for columns of a DataFrame. The first name will be <code>:x1</code>, the second <code>:x2</code>, etc.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/utils.jl#L124-L129">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.getmaxwidths" href="#DataFrames.getmaxwidths"><code>DataFrames.getmaxwidths</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrames.getmaxwidths(df::AbstractDataFrame,
                         io::IO,
                         rowindices1::AbstractVector{Int},
                         rowindices2::AbstractVector{Int},
                         rowlabel::Symbol,
                         rowid::Union{Integer, Nothing},
                         show_eltype::Bool,
-                        buffer::IOBuffer)</code></pre><p>Calculate, for each column of an AbstractDataFrame, the maximum string width used to render the name of that column, its type, and the longest entry in that column – among the rows of the data frame will be rendered to IO. The widths for all columns are returned as a vector.</p><p>Return a <code>Vector{Int}</code> giving the maximum string widths required to render each column, including that column&#39;s name and type.</p><p>NOTE: The last entry of the result vector is the string width of the implicit row ID column contained in every <code>AbstractDataFrame</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df::AbstractDataFrame</code>: The data frame whose columns will be printed.</li><li><code>io::IO</code>: The <code>IO</code> to which <code>df</code> is to be printed</li><li>`rowindices1::AbstractVector{Int}: A set of indices of the first chunk of the AbstractDataFrame that would be rendered to IO.</li><li>`rowindices2::AbstractVector{Int}: A set of indices of the second chunk of the AbstractDataFrame that would be rendered to IO. Can be empty if the AbstractDataFrame would be printed without any ellipses.</li><li><code>rowlabel::AbstractString</code>: The label that will be used when rendered the numeric ID&#39;s of each row. Typically, this will be set to &quot;Row&quot;.</li><li><code>rowid</code>: Used to handle showing <code>DataFrameRow</code>.</li><li><code>show_eltype</code>: Whether to print the column type  under the column name in the heading.</li><li><code>buffer</code>: buffer passed around to avoid reallocations in <code>ourstrwidth</code></li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/io.jl#L1-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.ourshow" href="#DataFrames.ourshow"><code>DataFrames.ourshow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrames.ourshow(io::IO, x::Any, truncstring::Int)</code></pre><p>Render a value to an <code>IO</code> object compactly using print. <code>truncstring</code> indicates the approximate number of text characters width to truncate the output (if it is a non-positive value then no truncation is applied).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/show.jl#L28-L34">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.ourstrwidth" href="#DataFrames.ourstrwidth"><code>DataFrames.ourstrwidth</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrames.ourstrwidth(io::IO, x::Any, buffer::IOBuffer, truncstring::Int)</code></pre><p>Determine the number of characters that would be used to print a value.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/show.jl#L5-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.@spawn_for_chunks" href="#DataFrames.@spawn_for_chunks"><code>DataFrames.@spawn_for_chunks</code></a> — <span class="docstring-category">Macro</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">@spawn_for_chunks basesize for i in range ... end</code></pre><p>Parallelize a <code>for</code> loop by spawning separate tasks iterating each over a chunk of at least <code>basesize</code> elements in <code>range</code>.</p><p>A number of tasks higher than <code>Threads.nthreads()</code> may be spawned, since that can allow for a more efficient load balancing in case some threads are busy (nested parallelism).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/utils.jl#L197-L207">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.@spawn_or_run_task" href="#DataFrames.@spawn_or_run_task"><code>DataFrames.@spawn_or_run_task</code></a> — <span class="docstring-category">Macro</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">@spawn_or_run_task threads expr</code></pre><p>Equivalent to <code>Threads.@spawn</code> if <code>threads === true</code>, otherwise run <code>expr</code> and return a <code>Task</code> that returns its value.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/utils.jl#L218-L223">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.@spawn_or_run" href="#DataFrames.@spawn_or_run"><code>DataFrames.@spawn_or_run</code></a> — <span class="docstring-category">Macro</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">@spawn_or_run threads expr</code></pre><p>Equivalent to <code>Threads.@spawn</code> if <code>threads === true</code>, otherwise run <code>expr</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/utils.jl#L257-L262">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.default_table_transformation" href="#DataFrames.default_table_transformation"><code>DataFrames.default_table_transformation</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">default_table_transformation(df_sel::AbstractDataFrame, fun)</code></pre><p>This is a default implementation called when <code>AsTable(...) =&gt; fun</code> is requested. The <code>df_sel</code> argument is a data frame storing columns selected by <code>AsTable(...)</code> selector.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selectionfast.jl#L91-L97">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.isreadonly" href="#DataFrames.isreadonly"><code>DataFrames.isreadonly</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">isreadonly(fun)</code></pre><p>Trait returning a <code>Bool</code> indicator if function <code>fun</code> is only reading the passed argument. Such a function guarantees not to modify nor return in any form the passed argument. By default <code>false</code> is returned.</p><p>This function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental. Adding a method to <code>isreadonly</code> for a specific function <code>fun</code> will improve performance of <code>AsTable(...) =&gt; ByRow(fun∘collect)</code> operation.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/selectionfast.jl#L57-L68">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../metadata/">« Metadata</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+                        buffer::IOBuffer)</code></pre><p>Calculate, for each column of an AbstractDataFrame, the maximum string width used to render the name of that column, its type, and the longest entry in that column – among the rows of the data frame will be rendered to IO. The widths for all columns are returned as a vector.</p><p>Return a <code>Vector{Int}</code> giving the maximum string widths required to render each column, including that column&#39;s name and type.</p><p>NOTE: The last entry of the result vector is the string width of the implicit row ID column contained in every <code>AbstractDataFrame</code>.</p><p><strong>Arguments</strong></p><ul><li><code>df::AbstractDataFrame</code>: The data frame whose columns will be printed.</li><li><code>io::IO</code>: The <code>IO</code> to which <code>df</code> is to be printed</li><li>`rowindices1::AbstractVector{Int}: A set of indices of the first chunk of the AbstractDataFrame that would be rendered to IO.</li><li>`rowindices2::AbstractVector{Int}: A set of indices of the second chunk of the AbstractDataFrame that would be rendered to IO. Can be empty if the AbstractDataFrame would be printed without any ellipses.</li><li><code>rowlabel::AbstractString</code>: The label that will be used when rendered the numeric ID&#39;s of each row. Typically, this will be set to &quot;Row&quot;.</li><li><code>rowid</code>: Used to handle showing <code>DataFrameRow</code>.</li><li><code>show_eltype</code>: Whether to print the column type  under the column name in the heading.</li><li><code>buffer</code>: buffer passed around to avoid reallocations in <code>ourstrwidth</code></li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/io.jl#L1-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.ourshow" href="#DataFrames.ourshow"><code>DataFrames.ourshow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrames.ourshow(io::IO, x::Any, truncstring::Int)</code></pre><p>Render a value to an <code>IO</code> object compactly using print. <code>truncstring</code> indicates the approximate number of text characters width to truncate the output (if it is a non-positive value then no truncation is applied).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/show.jl#L28-L34">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.ourstrwidth" href="#DataFrames.ourstrwidth"><code>DataFrames.ourstrwidth</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrames.ourstrwidth(io::IO, x::Any, buffer::IOBuffer, truncstring::Int)</code></pre><p>Determine the number of characters that would be used to print a value.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/show.jl#L5-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.@spawn_for_chunks" href="#DataFrames.@spawn_for_chunks"><code>DataFrames.@spawn_for_chunks</code></a> — <span class="docstring-category">Macro</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">@spawn_for_chunks basesize for i in range ... end</code></pre><p>Parallelize a <code>for</code> loop by spawning separate tasks iterating each over a chunk of at least <code>basesize</code> elements in <code>range</code>.</p><p>A number of tasks higher than <code>Threads.nthreads()</code> may be spawned, since that can allow for a more efficient load balancing in case some threads are busy (nested parallelism).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/utils.jl#L197-L207">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.@spawn_or_run_task" href="#DataFrames.@spawn_or_run_task"><code>DataFrames.@spawn_or_run_task</code></a> — <span class="docstring-category">Macro</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">@spawn_or_run_task threads expr</code></pre><p>Equivalent to <code>Threads.@spawn</code> if <code>threads === true</code>, otherwise run <code>expr</code> and return a <code>Task</code> that returns its value.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/utils.jl#L218-L223">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.@spawn_or_run" href="#DataFrames.@spawn_or_run"><code>DataFrames.@spawn_or_run</code></a> — <span class="docstring-category">Macro</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">@spawn_or_run threads expr</code></pre><p>Equivalent to <code>Threads.@spawn</code> if <code>threads === true</code>, otherwise run <code>expr</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/utils.jl#L257-L262">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.default_table_transformation" href="#DataFrames.default_table_transformation"><code>DataFrames.default_table_transformation</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">default_table_transformation(df_sel::AbstractDataFrame, fun)</code></pre><p>This is a default implementation called when <code>AsTable(...) =&gt; fun</code> is requested. The <code>df_sel</code> argument is a data frame storing columns selected by <code>AsTable(...)</code> selector.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selectionfast.jl#L91-L97">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.isreadonly" href="#DataFrames.isreadonly"><code>DataFrames.isreadonly</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">isreadonly(fun)</code></pre><p>Trait returning a <code>Bool</code> indicator if function <code>fun</code> is only reading the passed argument. Such a function guarantees not to modify nor return in any form the passed argument. By default <code>false</code> is returned.</p><p>This function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental. Adding a method to <code>isreadonly</code> for a specific function <code>fun</code> will improve performance of <code>AsTable(...) =&gt; ByRow(fun∘collect)</code> operation.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/selectionfast.jl#L57-L68">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../metadata/">« Metadata</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/lib/metadata/index.html b/dev/lib/metadata/index.html
index 414a8d8c5..f02b4d7db 100644
--- a/dev/lib/metadata/index.html
+++ b/dev/lib/metadata/index.html
@@ -72,4 +72,4 @@
 julia&gt; emptycolmetadata!(df);
 
 julia&gt; colmetadatakeys(df)
-()</code></pre><h2 id="Propagation-of-:note-style-metadata"><a class="docs-heading-anchor" href="#Propagation-of-:note-style-metadata">Propagation of <code>:note</code>-style metadata</a><a id="Propagation-of-:note-style-metadata-1"></a><a class="docs-heading-anchor-permalink" href="#Propagation-of-:note-style-metadata" title="Permalink"></a></h2><p>An important design feature of <code>:note</code>-style metadata is how it is handled when data frames are transformed.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The provided rules might slightly change in the future. Any change to <code>:note</code>-style metadata propagation rules will not be considered as breaking and can be done in any minor release of DataFrames.jl. Such changes might be made based on users&#39; feedback about what metadata propagation rules are most convenient in practice.</p></div></div><p>The general design rules for propagation of <code>:note</code>-style metadata are as follows.</p><p>For operations that take a single data frame as an input:</p><ul><li>Table level metadata is propagated to the returned data frame object.</li><li>For column-level metadata:<ul><li>in all cases when a single column is transformed to a single column and the name of the column does not change (or is automatically changed e.g. to de-duplicate column names or via column renaming in joins) column-level metadata is preserved (example operations of this kind are <code>getindex</code>, <code>subset</code>, joins, <code>mapcols</code>).</li><li>in all cases when a single column is transformed with <code>identity</code> or <code>copy</code> to a single column, column-level metadata is preserved even if column name is changed (example operations of this kind are <code>rename</code>, or the <code>:x =&gt; :y</code> or <code>:x =&gt; copy =&gt; :y</code> operation specification in <code>select</code>).</li></ul></li></ul><p>For operations that take multiple data frames as their input two cases are distinguished:</p><ul><li>When there is a natural main table in the operation (<code>append!</code>, <code>prepend!</code>, <code>leftjoin</code>, <code>leftjoin!</code>, <code>rightjoin</code>, <code>semijoin</code>, <code>antijoin</code>, <code>setindex!</code>):<ul><li>table-level metadata is taken from the main table;</li><li>column-level metadata for columns from the main table is taken from main table;</li><li>column-level metadata for columns from the non-main table is taken only for columns not present in the main table.</li></ul></li><li>When all tables are equivalent (<code>hcat</code>, <code>vcat</code>, <code>innerjoin</code>, <code>outerjoin</code>):<ul><li>table-level metadata is preserved only for keys which are defined in all passed tables and have the same value;</li><li>column-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value.</li></ul></li></ul><p>In all these operations when metadata is preserved the values in the key-value pairs are not copied (this is relevant in case of mutable values).</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The rules for column-level <code>:note</code>-style metadata propagation are designed to make the right decision in common cases. In particular, they assume that if source and target column name is the same then the metadata for the column is not changed. While this is valid for many operations, it is not always true in general. For example the <code>:x =&gt; ByRow(log) =&gt; :x</code> transformation might invalidate metadata if it contained unit of measure of the variable. In such cases user must either use a different name for the output column, set metadata style to <code>:default</code> before the operation, or manually drop or update such metadata from the <code>:x</code> column after the transformation.</p></div></div><h3 id="Operations-that-preserve-:note-style-metadata"><a class="docs-heading-anchor" href="#Operations-that-preserve-:note-style-metadata">Operations that preserve <code>:note</code>-style metadata</a><a id="Operations-that-preserve-:note-style-metadata-1"></a><a class="docs-heading-anchor-permalink" href="#Operations-that-preserve-:note-style-metadata" title="Permalink"></a></h3><p>Most of the functions in DataFrames.jl only preserve table and column metadata whose style is <code>:note</code>. Some functions use a more complex logic, even if they follow the general rules described above (in particular under any transformation all non-<code>:note</code>-style metadata is always dropped). These are:</p><ul><li><a href="../functions/#DataAPI.describe"><code>describe</code></a> drops all metadata.</li><li><a href="../functions/#Base.hcat"><code>hcat</code></a>: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved.</li><li><a href="../functions/#Base.vcat"><code>vcat</code></a>: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value;</li><li><a href="../functions/#Base.stack"><code>stack</code></a>: propagates table-level metadata and column-level metadata for identifier columns.</li><li><a href="../functions/#DataFrames.unstack"><code>unstack</code></a>: propagates table-level metadata and column-level metadata for row keys columns.</li><li><a href="../functions/#Base.permutedims"><code>permutedims</code></a>: propagates table-level metadata and drops column-level  metadata.</li><li>broadcasted assignment does not change target metadata; under Julia earlier than 1.7 operation of kind <code>df.a .= s</code> does not drop non-<code>:note</code>-style metadata; under Julia 1.7 or later this operation preserves only <code>:note</code>-style metadata</li><li>broadcasting propagates table-level metadata if some key is present in all passed data frames and value associated with it is identical in all passed data frames; column-level metadata is propagated for columns if some key for a given column is present in all passed data frames and value associated with it is identical in all passed data frames.</li><li><code>getindex</code> preserves table-level metadata and column-level metadata for selected columns</li><li><code>setindex!</code> does not affect table-level and column-level metadata</li><li><a href="../functions/#Base.push!"><code>push!</code></a>, <a href="../functions/#Base.pushfirst!"><code>pushfirst!</code></a>, <a href="../functions/#Base.insert!"><code>insert!</code></a> do not affect table-level nor column-level metadata (even if they add new columns and pushed row is a <code>DataFrameRow</code> or other value supporting metadata interface)</li><li><a href="../functions/#Base.append!"><code>append!</code></a> and <a href="../functions/#Base.prepend!"><code>prepend!</code></a> do not change table and column-level metadata of the destination data frame, except that if new columns are added and these columns have metadata in the appended/prepended table then this metadata is preserved.</li><li><a href="../functions/#DataFrames.leftjoin!"><code>leftjoin!</code></a>, <a href="../functions/#DataAPI.leftjoin"><code>leftjoin</code></a>: table and column-level metadata is taken from the left table except for non-key columns from right table for which metadata is taken from right table;</li><li><a href="../functions/#DataAPI.rightjoin"><code>rightjoin</code></a>: table and column-level metadata is taken from the right table except for non-key columns from left table for which metadata is taken from left table;</li><li><a href="../functions/#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="../functions/#DataAPI.outerjoin"><code>outerjoin</code></a>: propagates table-level metadata only for keys that are defined in all passed data frames and have the same value; column-level metadata is propagated for all columns except for key columns, for which it is propagated only for keys that are defined in all passed data frames and have the same value.</li><li><a href="../functions/#DataAPI.semijoin"><code>semijoin</code></a>, <a href="../functions/#DataAPI.antijoin"><code>antijoin</code></a>: table and column-level metadata is taken from the left table.</li><li><a href="../functions/#DataAPI.crossjoin"><code>crossjoin</code></a>: propagates table-level metadata only for keys that are defined in both passed data frames and have the same value; propagates column-level metadata from both passed data frames.</li><li><a href="../functions/#DataFrames.select"><code>select</code></a>, <a href="../functions/#DataFrames.select!"><code>select!</code></a>, <a href="../functions/#DataFrames.transform"><code>transform</code></a>, <a href="../functions/#DataFrames.transform!"><code>transform!</code></a>, <a href="../functions/#DataFrames.combine"><code>combine</code></a>: propagate table-level metadata; column-level metadata is propagated if: a) a single column is transformed to a single column and the name of the column does not change    (this includes all column selection operations), or b) a single column is transformed with <code>identity</code> or <code>copy</code> to a single column    even if column name is changed (this includes column renaming).</li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../indexing/">« Indexing</a><a class="docs-footer-nextpage" href="../internals/">Internals »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+()</code></pre><h2 id="Propagation-of-:note-style-metadata"><a class="docs-heading-anchor" href="#Propagation-of-:note-style-metadata">Propagation of <code>:note</code>-style metadata</a><a id="Propagation-of-:note-style-metadata-1"></a><a class="docs-heading-anchor-permalink" href="#Propagation-of-:note-style-metadata" title="Permalink"></a></h2><p>An important design feature of <code>:note</code>-style metadata is how it is handled when data frames are transformed.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The provided rules might slightly change in the future. Any change to <code>:note</code>-style metadata propagation rules will not be considered as breaking and can be done in any minor release of DataFrames.jl. Such changes might be made based on users&#39; feedback about what metadata propagation rules are most convenient in practice.</p></div></div><p>The general design rules for propagation of <code>:note</code>-style metadata are as follows.</p><p>For operations that take a single data frame as an input:</p><ul><li>Table level metadata is propagated to the returned data frame object.</li><li>For column-level metadata:<ul><li>in all cases when a single column is transformed to a single column and the name of the column does not change (or is automatically changed e.g. to de-duplicate column names or via column renaming in joins) column-level metadata is preserved (example operations of this kind are <code>getindex</code>, <code>subset</code>, joins, <code>mapcols</code>).</li><li>in all cases when a single column is transformed with <code>identity</code> or <code>copy</code> to a single column, column-level metadata is preserved even if column name is changed (example operations of this kind are <code>rename</code>, or the <code>:x =&gt; :y</code> or <code>:x =&gt; copy =&gt; :y</code> operation specification in <code>select</code>).</li></ul></li></ul><p>For operations that take multiple data frames as their input two cases are distinguished:</p><ul><li>When there is a natural main table in the operation (<code>append!</code>, <code>prepend!</code>, <code>leftjoin</code>, <code>leftjoin!</code>, <code>rightjoin</code>, <code>semijoin</code>, <code>antijoin</code>, <code>setindex!</code>):<ul><li>table-level metadata is taken from the main table;</li><li>column-level metadata for columns from the main table is taken from main table;</li><li>column-level metadata for columns from the non-main table is taken only for columns not present in the main table.</li></ul></li><li>When all tables are equivalent (<code>hcat</code>, <code>vcat</code>, <code>innerjoin</code>, <code>outerjoin</code>):<ul><li>table-level metadata is preserved only for keys which are defined in all passed tables and have the same value;</li><li>column-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value.</li></ul></li></ul><p>In all these operations when metadata is preserved the values in the key-value pairs are not copied (this is relevant in case of mutable values).</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The rules for column-level <code>:note</code>-style metadata propagation are designed to make the right decision in common cases. In particular, they assume that if source and target column name is the same then the metadata for the column is not changed. While this is valid for many operations, it is not always true in general. For example the <code>:x =&gt; ByRow(log) =&gt; :x</code> transformation might invalidate metadata if it contained unit of measure of the variable. In such cases user must either use a different name for the output column, set metadata style to <code>:default</code> before the operation, or manually drop or update such metadata from the <code>:x</code> column after the transformation.</p></div></div><h3 id="Operations-that-preserve-:note-style-metadata"><a class="docs-heading-anchor" href="#Operations-that-preserve-:note-style-metadata">Operations that preserve <code>:note</code>-style metadata</a><a id="Operations-that-preserve-:note-style-metadata-1"></a><a class="docs-heading-anchor-permalink" href="#Operations-that-preserve-:note-style-metadata" title="Permalink"></a></h3><p>Most of the functions in DataFrames.jl only preserve table and column metadata whose style is <code>:note</code>. Some functions use a more complex logic, even if they follow the general rules described above (in particular under any transformation all non-<code>:note</code>-style metadata is always dropped). These are:</p><ul><li><a href="../functions/#DataAPI.describe"><code>describe</code></a> drops all metadata.</li><li><a href="../functions/#Base.hcat"><code>hcat</code></a>: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved.</li><li><a href="../functions/#Base.vcat"><code>vcat</code></a>: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value;</li><li><a href="../functions/#Base.stack"><code>stack</code></a>: propagates table-level metadata and column-level metadata for identifier columns.</li><li><a href="../functions/#DataFrames.unstack"><code>unstack</code></a>: propagates table-level metadata and column-level metadata for row keys columns.</li><li><a href="../functions/#Base.permutedims"><code>permutedims</code></a>: propagates table-level metadata and drops column-level  metadata.</li><li>broadcasted assignment does not change target metadata; under Julia earlier than 1.7 operation of kind <code>df.a .= s</code> does not drop non-<code>:note</code>-style metadata; under Julia 1.7 or later this operation preserves only <code>:note</code>-style metadata</li><li>broadcasting propagates table-level metadata if some key is present in all passed data frames and value associated with it is identical in all passed data frames; column-level metadata is propagated for columns if some key for a given column is present in all passed data frames and value associated with it is identical in all passed data frames.</li><li><code>getindex</code> preserves table-level metadata and column-level metadata for selected columns</li><li><code>setindex!</code> does not affect table-level and column-level metadata</li><li><a href="../functions/#Base.push!"><code>push!</code></a>, <a href="../functions/#Base.pushfirst!"><code>pushfirst!</code></a>, <a href="../functions/#Base.insert!"><code>insert!</code></a> do not affect table-level nor column-level metadata (even if they add new columns and pushed row is a <code>DataFrameRow</code> or other value supporting metadata interface)</li><li><a href="../functions/#Base.append!"><code>append!</code></a> and <a href="../functions/#Base.prepend!"><code>prepend!</code></a> do not change table and column-level metadata of the destination data frame, except that if new columns are added and these columns have metadata in the appended/prepended table then this metadata is preserved.</li><li><a href="../functions/#DataFrames.leftjoin!"><code>leftjoin!</code></a>, <a href="../functions/#DataAPI.leftjoin"><code>leftjoin</code></a>: table and column-level metadata is taken from the left table except for non-key columns from right table for which metadata is taken from right table;</li><li><a href="../functions/#DataAPI.rightjoin"><code>rightjoin</code></a>: table and column-level metadata is taken from the right table except for non-key columns from left table for which metadata is taken from left table;</li><li><a href="../functions/#DataAPI.innerjoin"><code>innerjoin</code></a>, <a href="../functions/#DataAPI.outerjoin"><code>outerjoin</code></a>: propagates table-level metadata only for keys that are defined in all passed data frames and have the same value; column-level metadata is propagated for all columns except for key columns, for which it is propagated only for keys that are defined in all passed data frames and have the same value.</li><li><a href="../functions/#DataAPI.semijoin"><code>semijoin</code></a>, <a href="../functions/#DataAPI.antijoin"><code>antijoin</code></a>: table and column-level metadata is taken from the left table.</li><li><a href="../functions/#DataAPI.crossjoin"><code>crossjoin</code></a>: propagates table-level metadata only for keys that are defined in both passed data frames and have the same value; propagates column-level metadata from both passed data frames.</li><li><a href="../functions/#DataFrames.select"><code>select</code></a>, <a href="../functions/#DataFrames.select!"><code>select!</code></a>, <a href="../functions/#DataFrames.transform"><code>transform</code></a>, <a href="../functions/#DataFrames.transform!"><code>transform!</code></a>, <a href="../functions/#DataFrames.combine"><code>combine</code></a>: propagate table-level metadata; column-level metadata is propagated if: a) a single column is transformed to a single column and the name of the column does not change    (this includes all column selection operations), or b) a single column is transformed with <code>identity</code> or <code>copy</code> to a single column    even if column name is changed (this includes column renaming).</li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../indexing/">« Indexing</a><a class="docs-footer-nextpage" href="../internals/">Internals »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/lib/types/index.html b/dev/lib/types/index.html
index 89e5da868..af8b450c9 100644
--- a/dev/lib/types/index.html
+++ b/dev/lib/types/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Types · DataFrames.jl</title><meta name="title" content="Types · DataFrames.jl"/><meta property="og:title" content="Types · DataFrames.jl"/><meta property="twitter:title" content="Types · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/types/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/types/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/lib/types/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li class="is-active"><a class="tocitem" href>Types</a><ul class="internal"><li><a class="tocitem" href="#Type-hierarchy-design"><span>Type hierarchy design</span></a></li><li><a class="tocitem" href="#man-columnhandling"><span>The design of handling of columns of a <code>DataFrame</code></span></a></li><li><a class="tocitem" href="#Types-specification"><span>Types specification</span></a></li></ul></li><li><a class="tocitem" href="../functions/">Functions</a></li><li><a class="tocitem" href="../indexing/">Indexing</a></li><li><a class="tocitem" href="../metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Types</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Types</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/lib/types.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Types"><a class="docs-heading-anchor" href="#Types">Types</a><a id="Types-1"></a><a class="docs-heading-anchor-permalink" href="#Types" title="Permalink"></a></h1><ul><li><a href="#DataFrames.AbstractDataFrame"><code>DataFrames.AbstractDataFrame</code></a></li><li><a href="#DataFrames.AsTable"><code>DataFrames.AsTable</code></a></li><li><a href="#DataFrames.DataFrame"><code>DataFrames.DataFrame</code></a></li><li><a href="#DataFrames.DataFrameColumns"><code>DataFrames.DataFrameColumns</code></a></li><li><a href="#DataFrames.DataFrameRow"><code>DataFrames.DataFrameRow</code></a></li><li><a href="#DataFrames.DataFrameRows"><code>DataFrames.DataFrameRows</code></a></li><li><a href="#DataFrames.GroupKey"><code>DataFrames.GroupKey</code></a></li><li><a href="#DataFrames.GroupKeys"><code>DataFrames.GroupKeys</code></a></li><li><a href="#DataFrames.GroupedDataFrame"><code>DataFrames.GroupedDataFrame</code></a></li><li><a href="#DataFrames.RepeatedVector"><code>DataFrames.RepeatedVector</code></a></li><li><a href="#DataFrames.StackedVector"><code>DataFrames.StackedVector</code></a></li><li><a href="#DataFrames.SubDataFrame"><code>DataFrames.SubDataFrame</code></a></li></ul><h2 id="Type-hierarchy-design"><a class="docs-heading-anchor" href="#Type-hierarchy-design">Type hierarchy design</a><a id="Type-hierarchy-design-1"></a><a class="docs-heading-anchor-permalink" href="#Type-hierarchy-design" title="Permalink"></a></h2><p><code>AbstractDataFrame</code> is an abstract type that provides an interface for data frame types. It is not intended as a fully generic interface for working with tabular data, which is the role of interfaces defined by <a href="https://github.com/JuliaData/Tables.jl/">Tables.jl</a> instead.</p><p><code>DataFrame</code> is the most fundamental subtype of <code>AbstractDataFrame</code>, which stores a set of columns as <code>AbstractVector</code> objects. Indexing of all stored columns must be 1-based. Also, all functions exposed by DataFrames.jl API make sure to <code>collect</code> passed <code>AbstractRange</code> source columns before storing them in a <code>DataFrame</code>.</p><p><code>SubDataFrame</code> is an <code>AbstractDataFrame</code> subtype representing a view into a <code>DataFrame</code>. It stores only a reference to the parent <code>DataFrame</code> and information about which rows and columns from the parent are selected (both as integer indices referring to the parent). Typically it is created using the <code>view</code> function or is returned by indexing into a <code>GroupedDataFrame</code> object.</p><p><code>GroupedDataFrame</code> is a type that stores the result of a  grouping operation performed on an <code>AbstractDataFrame</code>. It is intended to be created as a result of a call to the <code>groupby</code> function.</p><p><code>DataFrameRow</code> is a view into a single row of an <code>AbstractDataFrame</code>. It stores only a reference to a parent <code>DataFrame</code> and information about which row and columns from the parent are selected (both as integer indices referring to the parent). The <code>DataFrameRow</code> type supports iteration over columns of the row and is similar in functionality to the <code>NamedTuple</code> type, but allows for modification of data stored in the parent <code>DataFrame</code> and reflects changes done to the parent after the creation of the view. Typically objects of the <code>DataFrameRow</code> type are encountered when returned by the <code>eachrow</code> function, or when accessing a single row of a <code>DataFrame</code> or <code>SubDataFrame</code> via <code>getindex</code> or <code>view</code>.</p><p>The <code>eachrow</code> function returns a value of the <code>DataFrameRows</code> type, which serves as an iterator over rows of an <code>AbstractDataFrame</code>, returning <code>DataFrameRow</code> objects. The <code>DataFrameRows</code> is a subtype of <code>AbstractVector</code> and supports its interface with the exception that it is read-only.</p><p>Similarly, the <code>eachcol</code> function returns a value of the <code>DataFrameColumns</code> type, which is not an <code>AbstractVector</code>, but supports most of its API. The key differences are that it is read-only and that the <code>keys</code> function returns a vector of <code>Symbol</code>s (and not integers as for normal vectors).</p><p>Note that <code>DataFrameRows</code> and <code>DataFrameColumns</code> are not exported and should not be constructed directly, but using the <code>eachrow</code> and <code>eachcol</code> functions.</p><p>The <code>RepeatedVector</code> and <code>StackedVector</code> types are subtypes of <code>AbstractVector</code> and support its interface with the exception that they are read only. Note that they are not exported and should not be constructed directly, but they are columns of a <code>DataFrame</code> returned by <code>stack</code> with <code>view=true</code>.</p><p>The <code>ByRow</code> type is a special type used for selection operations to signal that the wrapped function should be applied to each element (row) of the selection.</p><p>The <code>AsTable</code> type is a special type used for selection operations to signal that the columns selected by a wrapped selector should be passed as a <code>NamedTuple</code> to the function or to signal that it is requested to expand the return value of a transformation into multiple columns.</p><h2 id="man-columnhandling"><a class="docs-heading-anchor" href="#man-columnhandling">The design of handling of columns of a <code>DataFrame</code></a><a id="man-columnhandling-1"></a><a class="docs-heading-anchor-permalink" href="#man-columnhandling" title="Permalink"></a></h2><p>When a <code>DataFrame</code> is constructed columns are copied by default. You can disable this behavior by setting <code>copycols</code> keyword argument to <code>false</code>. The exception is if an <code>AbstractRange</code> is passed as a column, then it is always collected to a <code>Vector</code>.</p><p>Also functions that transform a <code>DataFrame</code> to produce a new <code>DataFrame</code> perform a copy of the columns, unless they are passed <code>copycols=false</code> (available only for functions that could perform a transformation without copying the columns). Examples of such functions are <a href="../functions/#Base.vcat"><code>vcat</code></a>, <a href="../functions/#Base.hcat"><code>hcat</code></a>, <a href="../functions/#Base.filter"><code>filter</code></a>, <a href="../functions/#DataFrames.dropmissing"><code>dropmissing</code></a>, <code>getindex</code>, <a href="../functions/#Base.copy"><code>copy</code></a> or the <a href="#DataFrames.DataFrame"><code>DataFrame</code></a> constructor mentioned above.</p><p>The generic single-argument constructor <code>DataFrame(table)</code> has <code>copycols=nothing</code> by default, meaning that columns are copied unless <code>table</code> signals that a copy of columns doesn&#39;t need to be made (this is done by wrapping the source table in <code>Tables.CopiedColumns</code>). <a href="https://csv.juliadata.org/stable">CSV.jl</a> does this when <code>CSV.read(file, DataFrame)</code> is called, since columns are built only for the purpose of use in a <code>DataFrame</code> constructor. Another example is <a href="https://arrow.juliadata.org/dev/manual/#Arrow.Table"><code>Arrow.Table</code></a>, where arrow data is inherently immutable so columns can&#39;t be accidentally mutated anyway. To be able to mutate arrow data, columns must be materialized, which can be accomplished via <code>DataFrame(arrow_table, copycols=true)</code>.</p><p>On the contrary, functions that create a view of a <code>DataFrame</code> <em>do not</em> by definition make copies of the columns, and therefore require particular caution. This includes <code>view</code>, which returns a <code>SubDataFrame</code> or a <code>DataFrameRow</code>, and <code>groupby</code>, which returns a <code>GroupedDataFrame</code>.</p><p>A partial exception to this rule is the <a href="../functions/#Base.stack"><code>stack</code></a> function with <code>view=true</code> which creates a <code>DataFrame</code> that contains views of the columns from the source <code>DataFrame</code>.</p><p>In-place functions whose names end with <code>!</code> (like <code>sort!</code> or <a href="../functions/#DataFrames.dropmissing!"><code>dropmissing!</code></a>, <code>setindex!</code>, <code>push!</code>, <code>append!</code>) may mutate the column vectors of the <code>DataFrame</code> they take as an argument. These functions are safe to call due to the rules described above, <em>except</em> when a view of the <code>DataFrame</code> is in use (via a <code>SubDataFrame</code>, a <code>DataFrameRow</code> or a <code>GroupedDataFrame</code>). In the latter case, calling such a function on the parent might corrupt the view, which make trigger errors, silently return invalid data or even cause Julia to crash. The same caution applies when <code>DataFrame</code> was created using columns of another <code>DataFrame</code> without copying (for instance when <code>copycols=false</code> in functions such as <code>DataFrame</code> or <code>hcat</code>).</p><p>It is possible to have a direct access to a column <code>col</code> of a <code>DataFrame</code> <code>df</code> (e.g. this can be useful in performance critical code to avoid copying), using one of the following methods:</p><ul><li>via the <code>getproperty</code> function using the syntax <code>df.col</code>;</li><li>via the <code>getindex</code> function using the syntax <code>df[!, :col]</code> (note this is in contrast to <code>df[:, :col]</code> which copies);</li><li>by creating a <code>DataFrameColumns</code> object using the <a href="../functions/#Base.eachcol"><code>eachcol</code></a> function;</li><li>by calling the <code>parent</code> function on a view of a column of the <code>DataFrame</code>, e.g. <code>parent(@view df[:, :col])</code>;</li><li>by storing the reference to the column before creating a <code>DataFrame</code> with <code>copycols=false</code>;</li></ul><p>A column obtained from a <code>DataFrame</code> using one of the above methods should not be mutated without caution because:</p><ul><li>resizing a column vector will corrupt its parent <code>DataFrame</code> and any associated views as methods only check the length of the column when it is added to the <code>DataFrame</code> and later assume that all columns have the same length;</li><li>reordering values in a column vector (e.g. using <code>sort!</code>) will break the consistency of rows with other columns, which will also affect views (if any);</li><li>changing values contained in a column vector is acceptable as long as it is not used as a grouping column in a <code>GroupedDataFrame</code> created based on the <code>DataFrame</code>.</li></ul><h2 id="Types-specification"><a class="docs-heading-anchor" href="#Types-specification">Types specification</a><a id="Types-specification-1"></a><a class="docs-heading-anchor-permalink" href="#Types-specification" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.AbstractDataFrame" href="#DataFrames.AbstractDataFrame"><code>DataFrames.AbstractDataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">AbstractDataFrame</code></pre><p>An abstract type for which all concrete types expose an interface for working with tabular data.</p><p>An <code>AbstractDataFrame</code> is a two-dimensional table with <code>Symbol</code>s or strings for column names.</p><p>DataFrames.jl defines two types that are subtypes of <code>AbstractDataFrame</code>: <a href="#DataFrames.DataFrame"><code>DataFrame</code></a> and <a href="#DataFrames.SubDataFrame"><code>SubDataFrame</code></a>.</p><p><strong>Indexing and broadcasting</strong></p><p><code>AbstractDataFrame</code> can be indexed by passing two indices specifying row and column selectors. The allowed indices are a superset of indices that can be used for standard arrays. You can also access a single column of an <code>AbstractDataFrame</code> using <code>getproperty</code> and <code>setproperty!</code> functions. Columns can be selected using integers, <code>Symbol</code>s, or strings. In broadcasting <code>AbstractDataFrame</code> behavior is similar to a <code>Matrix</code>.</p><p>A detailed description of <code>getindex</code>, <code>setindex!</code>, <code>getproperty</code>, <code>setproperty!</code>, broadcasting and broadcasting assignment for data frames is given in the <a href="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/">&quot;Indexing&quot; section</a> of the manual.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/abstractdataframe.jl#L1-L26">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.AsTable" href="#DataFrames.AsTable"><code>DataFrames.AsTable</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">AsTable(cols)</code></pre><p>A type having a special meaning in <code>source =&gt; transformation =&gt; destination</code> selection operations supported by <a href="../functions/#DataFrames.combine"><code>combine</code></a>, <a href="../functions/#DataFrames.select"><code>select</code></a>, <a href="../functions/#DataFrames.select!"><code>select!</code></a>, <a href="../functions/#DataFrames.transform"><code>transform</code></a>, <a href="../functions/#DataFrames.transform!"><code>transform!</code></a>, <a href="../functions/#DataFrames.subset"><code>subset</code></a>, and <a href="../functions/#DataFrames.subset!"><code>subset!</code></a>.</p><p>If <code>AsTable(cols)</code> is used in <code>source</code> position it signals that the columns selected by the wrapped selector <code>cols</code> should be passed as a <code>NamedTuple</code> to the function.</p><p>If <code>AsTable</code> is used in <code>destination</code> position it means that the result of the <code>transformation</code> operation is a vector of containers (or a single container if <code>ByRow(transformation)</code> is used) that should be expanded  into multiple columns using <code>keys</code> to get column names.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(a=1:3, b=11:13)
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Types · DataFrames.jl</title><meta name="title" content="Types · DataFrames.jl"/><meta property="og:title" content="Types · DataFrames.jl"/><meta property="twitter:title" content="Types · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/types/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/lib/types/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/lib/types/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li><a class="tocitem" href="../../man/basics/">First Steps with DataFrames.jl</a></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../../man/getting_started/">Getting Started</a></li><li><a class="tocitem" href="../../man/working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../../man/importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../../man/joins/">Joins</a></li><li><a class="tocitem" href="../../man/split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../../man/reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../../man/sorting/">Sorting</a></li><li><a class="tocitem" href="../../man/categorical/">Categorical Data</a></li><li><a class="tocitem" href="../../man/missing/">Missing Data</a></li><li><a class="tocitem" href="../../man/querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../../man/comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li class="is-active"><a class="tocitem" href>Types</a><ul class="internal"><li><a class="tocitem" href="#Type-hierarchy-design"><span>Type hierarchy design</span></a></li><li><a class="tocitem" href="#man-columnhandling"><span>The design of handling of columns of a <code>DataFrame</code></span></a></li><li><a class="tocitem" href="#Types-specification"><span>Types specification</span></a></li></ul></li><li><a class="tocitem" href="../functions/">Functions</a></li><li><a class="tocitem" href="../indexing/">Indexing</a></li><li><a class="tocitem" href="../metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Types</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Types</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/lib/types.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Types"><a class="docs-heading-anchor" href="#Types">Types</a><a id="Types-1"></a><a class="docs-heading-anchor-permalink" href="#Types" title="Permalink"></a></h1><ul><li><a href="#DataFrames.AbstractDataFrame"><code>DataFrames.AbstractDataFrame</code></a></li><li><a href="#DataFrames.AsTable"><code>DataFrames.AsTable</code></a></li><li><a href="#DataFrames.DataFrame"><code>DataFrames.DataFrame</code></a></li><li><a href="#DataFrames.DataFrameColumns"><code>DataFrames.DataFrameColumns</code></a></li><li><a href="#DataFrames.DataFrameRow"><code>DataFrames.DataFrameRow</code></a></li><li><a href="#DataFrames.DataFrameRows"><code>DataFrames.DataFrameRows</code></a></li><li><a href="#DataFrames.GroupKey"><code>DataFrames.GroupKey</code></a></li><li><a href="#DataFrames.GroupKeys"><code>DataFrames.GroupKeys</code></a></li><li><a href="#DataFrames.GroupedDataFrame"><code>DataFrames.GroupedDataFrame</code></a></li><li><a href="#DataFrames.RepeatedVector"><code>DataFrames.RepeatedVector</code></a></li><li><a href="#DataFrames.StackedVector"><code>DataFrames.StackedVector</code></a></li><li><a href="#DataFrames.SubDataFrame"><code>DataFrames.SubDataFrame</code></a></li></ul><h2 id="Type-hierarchy-design"><a class="docs-heading-anchor" href="#Type-hierarchy-design">Type hierarchy design</a><a id="Type-hierarchy-design-1"></a><a class="docs-heading-anchor-permalink" href="#Type-hierarchy-design" title="Permalink"></a></h2><p><code>AbstractDataFrame</code> is an abstract type that provides an interface for data frame types. It is not intended as a fully generic interface for working with tabular data, which is the role of interfaces defined by <a href="https://github.com/JuliaData/Tables.jl/">Tables.jl</a> instead.</p><p><code>DataFrame</code> is the most fundamental subtype of <code>AbstractDataFrame</code>, which stores a set of columns as <code>AbstractVector</code> objects. Indexing of all stored columns must be 1-based. Also, all functions exposed by DataFrames.jl API make sure to <code>collect</code> passed <code>AbstractRange</code> source columns before storing them in a <code>DataFrame</code>.</p><p><code>SubDataFrame</code> is an <code>AbstractDataFrame</code> subtype representing a view into a <code>DataFrame</code>. It stores only a reference to the parent <code>DataFrame</code> and information about which rows and columns from the parent are selected (both as integer indices referring to the parent). Typically it is created using the <code>view</code> function or is returned by indexing into a <code>GroupedDataFrame</code> object.</p><p><code>GroupedDataFrame</code> is a type that stores the result of a  grouping operation performed on an <code>AbstractDataFrame</code>. It is intended to be created as a result of a call to the <code>groupby</code> function.</p><p><code>DataFrameRow</code> is a view into a single row of an <code>AbstractDataFrame</code>. It stores only a reference to a parent <code>DataFrame</code> and information about which row and columns from the parent are selected (both as integer indices referring to the parent). The <code>DataFrameRow</code> type supports iteration over columns of the row and is similar in functionality to the <code>NamedTuple</code> type, but allows for modification of data stored in the parent <code>DataFrame</code> and reflects changes done to the parent after the creation of the view. Typically objects of the <code>DataFrameRow</code> type are encountered when returned by the <code>eachrow</code> function, or when accessing a single row of a <code>DataFrame</code> or <code>SubDataFrame</code> via <code>getindex</code> or <code>view</code>.</p><p>The <code>eachrow</code> function returns a value of the <code>DataFrameRows</code> type, which serves as an iterator over rows of an <code>AbstractDataFrame</code>, returning <code>DataFrameRow</code> objects. The <code>DataFrameRows</code> is a subtype of <code>AbstractVector</code> and supports its interface with the exception that it is read-only.</p><p>Similarly, the <code>eachcol</code> function returns a value of the <code>DataFrameColumns</code> type, which is not an <code>AbstractVector</code>, but supports most of its API. The key differences are that it is read-only and that the <code>keys</code> function returns a vector of <code>Symbol</code>s (and not integers as for normal vectors).</p><p>Note that <code>DataFrameRows</code> and <code>DataFrameColumns</code> are not exported and should not be constructed directly, but using the <code>eachrow</code> and <code>eachcol</code> functions.</p><p>The <code>RepeatedVector</code> and <code>StackedVector</code> types are subtypes of <code>AbstractVector</code> and support its interface with the exception that they are read only. Note that they are not exported and should not be constructed directly, but they are columns of a <code>DataFrame</code> returned by <code>stack</code> with <code>view=true</code>.</p><p>The <code>ByRow</code> type is a special type used for selection operations to signal that the wrapped function should be applied to each element (row) of the selection.</p><p>The <code>AsTable</code> type is a special type used for selection operations to signal that the columns selected by a wrapped selector should be passed as a <code>NamedTuple</code> to the function or to signal that it is requested to expand the return value of a transformation into multiple columns.</p><h2 id="man-columnhandling"><a class="docs-heading-anchor" href="#man-columnhandling">The design of handling of columns of a <code>DataFrame</code></a><a id="man-columnhandling-1"></a><a class="docs-heading-anchor-permalink" href="#man-columnhandling" title="Permalink"></a></h2><p>When a <code>DataFrame</code> is constructed columns are copied by default. You can disable this behavior by setting <code>copycols</code> keyword argument to <code>false</code>. The exception is if an <code>AbstractRange</code> is passed as a column, then it is always collected to a <code>Vector</code>.</p><p>Also functions that transform a <code>DataFrame</code> to produce a new <code>DataFrame</code> perform a copy of the columns, unless they are passed <code>copycols=false</code> (available only for functions that could perform a transformation without copying the columns). Examples of such functions are <a href="../functions/#Base.vcat"><code>vcat</code></a>, <a href="../functions/#Base.hcat"><code>hcat</code></a>, <a href="../functions/#Base.filter"><code>filter</code></a>, <a href="../functions/#DataFrames.dropmissing"><code>dropmissing</code></a>, <code>getindex</code>, <a href="../functions/#Base.copy"><code>copy</code></a> or the <a href="#DataFrames.DataFrame"><code>DataFrame</code></a> constructor mentioned above.</p><p>The generic single-argument constructor <code>DataFrame(table)</code> has <code>copycols=nothing</code> by default, meaning that columns are copied unless <code>table</code> signals that a copy of columns doesn&#39;t need to be made (this is done by wrapping the source table in <code>Tables.CopiedColumns</code>). <a href="https://csv.juliadata.org/stable">CSV.jl</a> does this when <code>CSV.read(file, DataFrame)</code> is called, since columns are built only for the purpose of use in a <code>DataFrame</code> constructor. Another example is <a href="https://arrow.juliadata.org/dev/manual/#Arrow.Table"><code>Arrow.Table</code></a>, where arrow data is inherently immutable so columns can&#39;t be accidentally mutated anyway. To be able to mutate arrow data, columns must be materialized, which can be accomplished via <code>DataFrame(arrow_table, copycols=true)</code>.</p><p>On the contrary, functions that create a view of a <code>DataFrame</code> <em>do not</em> by definition make copies of the columns, and therefore require particular caution. This includes <code>view</code>, which returns a <code>SubDataFrame</code> or a <code>DataFrameRow</code>, and <code>groupby</code>, which returns a <code>GroupedDataFrame</code>.</p><p>A partial exception to this rule is the <a href="../functions/#Base.stack"><code>stack</code></a> function with <code>view=true</code> which creates a <code>DataFrame</code> that contains views of the columns from the source <code>DataFrame</code>.</p><p>In-place functions whose names end with <code>!</code> (like <code>sort!</code> or <a href="../functions/#DataFrames.dropmissing!"><code>dropmissing!</code></a>, <code>setindex!</code>, <code>push!</code>, <code>append!</code>) may mutate the column vectors of the <code>DataFrame</code> they take as an argument. These functions are safe to call due to the rules described above, <em>except</em> when a view of the <code>DataFrame</code> is in use (via a <code>SubDataFrame</code>, a <code>DataFrameRow</code> or a <code>GroupedDataFrame</code>). In the latter case, calling such a function on the parent might corrupt the view, which make trigger errors, silently return invalid data or even cause Julia to crash. The same caution applies when <code>DataFrame</code> was created using columns of another <code>DataFrame</code> without copying (for instance when <code>copycols=false</code> in functions such as <code>DataFrame</code> or <code>hcat</code>).</p><p>It is possible to have a direct access to a column <code>col</code> of a <code>DataFrame</code> <code>df</code> (e.g. this can be useful in performance critical code to avoid copying), using one of the following methods:</p><ul><li>via the <code>getproperty</code> function using the syntax <code>df.col</code>;</li><li>via the <code>getindex</code> function using the syntax <code>df[!, :col]</code> (note this is in contrast to <code>df[:, :col]</code> which copies);</li><li>by creating a <code>DataFrameColumns</code> object using the <a href="../functions/#Base.eachcol"><code>eachcol</code></a> function;</li><li>by calling the <code>parent</code> function on a view of a column of the <code>DataFrame</code>, e.g. <code>parent(@view df[:, :col])</code>;</li><li>by storing the reference to the column before creating a <code>DataFrame</code> with <code>copycols=false</code>;</li></ul><p>A column obtained from a <code>DataFrame</code> using one of the above methods should not be mutated without caution because:</p><ul><li>resizing a column vector will corrupt its parent <code>DataFrame</code> and any associated views as methods only check the length of the column when it is added to the <code>DataFrame</code> and later assume that all columns have the same length;</li><li>reordering values in a column vector (e.g. using <code>sort!</code>) will break the consistency of rows with other columns, which will also affect views (if any);</li><li>changing values contained in a column vector is acceptable as long as it is not used as a grouping column in a <code>GroupedDataFrame</code> created based on the <code>DataFrame</code>.</li></ul><h2 id="Types-specification"><a class="docs-heading-anchor" href="#Types-specification">Types specification</a><a id="Types-specification-1"></a><a class="docs-heading-anchor-permalink" href="#Types-specification" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.AbstractDataFrame" href="#DataFrames.AbstractDataFrame"><code>DataFrames.AbstractDataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">AbstractDataFrame</code></pre><p>An abstract type for which all concrete types expose an interface for working with tabular data.</p><p>An <code>AbstractDataFrame</code> is a two-dimensional table with <code>Symbol</code>s or strings for column names.</p><p>DataFrames.jl defines two types that are subtypes of <code>AbstractDataFrame</code>: <a href="#DataFrames.DataFrame"><code>DataFrame</code></a> and <a href="#DataFrames.SubDataFrame"><code>SubDataFrame</code></a>.</p><p><strong>Indexing and broadcasting</strong></p><p><code>AbstractDataFrame</code> can be indexed by passing two indices specifying row and column selectors. The allowed indices are a superset of indices that can be used for standard arrays. You can also access a single column of an <code>AbstractDataFrame</code> using <code>getproperty</code> and <code>setproperty!</code> functions. Columns can be selected using integers, <code>Symbol</code>s, or strings. In broadcasting <code>AbstractDataFrame</code> behavior is similar to a <code>Matrix</code>.</p><p>A detailed description of <code>getindex</code>, <code>setindex!</code>, <code>getproperty</code>, <code>setproperty!</code>, broadcasting and broadcasting assignment for data frames is given in the <a href="https://juliadata.github.io/DataFrames.jl/stable/lib/indexing/">&quot;Indexing&quot; section</a> of the manual.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/abstractdataframe.jl#L1-L26">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.AsTable" href="#DataFrames.AsTable"><code>DataFrames.AsTable</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">AsTable(cols)</code></pre><p>A type having a special meaning in <code>source =&gt; transformation =&gt; destination</code> selection operations supported by <a href="../functions/#DataFrames.combine"><code>combine</code></a>, <a href="../functions/#DataFrames.select"><code>select</code></a>, <a href="../functions/#DataFrames.select!"><code>select!</code></a>, <a href="../functions/#DataFrames.transform"><code>transform</code></a>, <a href="../functions/#DataFrames.transform!"><code>transform!</code></a>, <a href="../functions/#DataFrames.subset"><code>subset</code></a>, and <a href="../functions/#DataFrames.subset!"><code>subset!</code></a>.</p><p>If <code>AsTable(cols)</code> is used in <code>source</code> position it signals that the columns selected by the wrapped selector <code>cols</code> should be passed as a <code>NamedTuple</code> to the function.</p><p>If <code>AsTable</code> is used in <code>destination</code> position it means that the result of the <code>transformation</code> operation is a vector of containers (or a single container if <code>ByRow(transformation)</code> is used) that should be expanded  into multiple columns using <code>keys</code> to get column names.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df1 = DataFrame(a=1:3, b=11:13)
 3×2 DataFrame
  Row │ a      b
      │ Int64  Int64
@@ -33,7 +33,7 @@
 ─────┼──────────────
    1 │     1    121
    2 │     4    144
-   3 │     9    169</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/other/utils.jl#L1-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrame" href="#DataFrames.DataFrame"><code>DataFrames.DataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrame &lt;: AbstractDataFrame</code></pre><p>An <code>AbstractDataFrame</code> that stores a set of named columns.</p><p>The columns are normally <code>AbstractVector</code>s stored in memory, particularly a <code>Vector</code>, <code>PooledVector</code> or <code>CategoricalVector</code>.</p><p><strong>Constructors</strong></p><pre><code class="language-julia hljs">DataFrame(pairs::Pair...; makeunique::Bool=false, copycols::Bool=true)
+   3 │     9    169</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/other/utils.jl#L1-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrame" href="#DataFrames.DataFrame"><code>DataFrames.DataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrame &lt;: AbstractDataFrame</code></pre><p>An <code>AbstractDataFrame</code> that stores a set of named columns.</p><p>The columns are normally <code>AbstractVector</code>s stored in memory, particularly a <code>Vector</code>, <code>PooledVector</code> or <code>CategoricalVector</code>.</p><p><strong>Constructors</strong></p><pre><code class="language-julia hljs">DataFrame(pairs::Pair...; makeunique::Bool=false, copycols::Bool=true)
 DataFrame(pairs::AbstractVector{&lt;:Pair}; makeunique::Bool=false, copycols::Bool=true)
 DataFrame(ds::AbstractDict; copycols::Bool=true)
 DataFrame(; kwargs..., copycols::Bool=true)
@@ -107,7 +107,7 @@
      │ Int64  Int64
 ─────┼──────────────
    1 │     1      0
-   2 │     2      0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframe/dataframe.jl#L1-L177">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrameRow" href="#DataFrames.DataFrameRow"><code>DataFrames.DataFrameRow</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrameRow{&lt;:AbstractDataFrame, &lt;:AbstractIndex}</code></pre><p>A view of one row of an <code>AbstractDataFrame</code>.</p><p>A <code>DataFrameRow</code> is returned by <code>getindex</code> or <code>view</code> functions when one row and a selection of columns are requested, or when iterating the result of the call to the <a href="../functions/#Base.eachrow"><code>eachrow</code></a> function.</p><p>The <code>DataFrameRow</code> constructor can also be called directly:</p><pre><code class="nohighlight hljs">DataFrameRow(parent::AbstractDataFrame, row::Integer, cols=:)</code></pre><p>A <code>DataFrameRow</code> supports the iteration interface and can therefore be passed to functions that expect a collection as an argument. Its element type is always <code>Any</code>.</p><p>Indexing is one-dimensional like specifying a column of a <code>DataFrame</code>. You can also access the data in a <code>DataFrameRow</code> using the <code>getproperty</code> and <code>setproperty!</code> functions and convert it to a <code>Tuple</code>, <code>NamedTuple</code>, or <code>Vector</code> using the corresponding functions.</p><p>If the selection of columns in a parent data frame is passed as <code>:</code> (a colon) then <code>DataFrameRow</code> will always have all columns from the parent, even if they are added or removed after its creation.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([1, 2], outer=[2]),
+   2 │     2      0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframe/dataframe.jl#L1-L177">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrameRow" href="#DataFrames.DataFrameRow"><code>DataFrames.DataFrameRow</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrameRow{&lt;:AbstractDataFrame, &lt;:AbstractIndex}</code></pre><p>A view of one row of an <code>AbstractDataFrame</code>.</p><p>A <code>DataFrameRow</code> is returned by <code>getindex</code> or <code>view</code> functions when one row and a selection of columns are requested, or when iterating the result of the call to the <a href="../functions/#Base.eachrow"><code>eachrow</code></a> function.</p><p>The <code>DataFrameRow</code> constructor can also be called directly:</p><pre><code class="nohighlight hljs">DataFrameRow(parent::AbstractDataFrame, row::Integer, cols=:)</code></pre><p>A <code>DataFrameRow</code> supports the iteration interface and can therefore be passed to functions that expect a collection as an argument. Its element type is always <code>Any</code>.</p><p>Indexing is one-dimensional like specifying a column of a <code>DataFrame</code>. You can also access the data in a <code>DataFrameRow</code> using the <code>getproperty</code> and <code>setproperty!</code> functions and convert it to a <code>Tuple</code>, <code>NamedTuple</code>, or <code>Vector</code> using the corresponding functions.</p><p>If the selection of columns in a parent data frame is passed as <code>:</code> (a colon) then <code>DataFrameRow</code> will always have all columns from the parent, even if they are added or removed after its creation.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([1, 2], outer=[2]),
                       b=repeat([&quot;a&quot;, &quot;b&quot;], inner=[2]),
                       c=1:4)
 4×3 DataFrame
@@ -150,7 +150,7 @@
 3-element Vector{Any}:
  1
   &quot;a&quot;
- 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/dataframerow/dataframerow.jl#L1-L75">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.GroupedDataFrame" href="#DataFrames.GroupedDataFrame"><code>DataFrames.GroupedDataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GroupedDataFrame</code></pre><p>The result of a <a href="../functions/#DataAPI.groupby"><code>groupby</code></a> operation on an <code>AbstractDataFrame</code>; a view into the <code>AbstractDataFrame</code> grouped by rows.</p><p>Not meant to be constructed directly, see <a href="../functions/#DataAPI.groupby"><code>groupby</code></a>.</p><p>One can get the names of columns used to create <code>GroupedDataFrame</code> using the <a href="../functions/#DataFrames.groupcols"><code>groupcols</code></a> function. Similarly the <a href="../functions/#DataFrames.groupindices"><code>groupindices</code></a> function returns a vector of group indices for each row of the parent data frame.</p><p>After its creation, a <code>GroupedDataFrame</code> reflects the grouping of rows that was valid at its creation time. Therefore grouping columns of its parent data frame must not be mutated, and rows must not be added nor removed from it. To safeguard the user against such cases, if the number of rows in the parent data frame changes then trying to use <code>GroupedDataFrame</code> will throw an error. However, one can add or remove columns to the parent data frame without invalidating the <code>GroupedDataFrame</code> provided that columns used for grouping are not changed.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L15-L36">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.GroupKey" href="#DataFrames.GroupKey"><code>DataFrames.GroupKey</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GroupKey{T&lt;:GroupedDataFrame}</code></pre><p>Key for one of the groups of a <a href="#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a>. Contains the values of the corresponding grouping columns and behaves similarly to a <code>NamedTuple</code>, but using it to index its <code>GroupedDataFrame</code> is more efficient than using the equivalent <code>Tuple</code> and <code>NamedTuple</code>, and much more efficient than using the equivalent <code>AbstractDict</code>.</p><p>Instances of this type are returned by <code>keys(::GroupedDataFrame)</code> and are not meant to be constructed directly.</p><p>Indexing fields of <code>GroupKey</code> is allowed using an integer, a <code>Symbol</code>, or a string. It is also possible to access the data in a <code>GroupKey</code> using the <code>getproperty</code> function. A <code>GroupKey</code> can be converted to a <code>Tuple</code>, <code>NamedTuple</code>, a <code>Vector</code>, or a <code>Dict</code>. When converted to a <code>Dict</code>, the keys of the <code>Dict</code> are <code>Symbol</code>s.</p><p>See <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L612-L630">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.GroupKeys" href="#DataFrames.GroupKeys"><code>DataFrames.GroupKeys</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GroupKeys{T&lt;:GroupedDataFrame} &lt;: AbstractVector{GroupKey{T}}</code></pre><p>A vector containing all <a href="#DataFrames.GroupKey"><code>GroupKey</code></a> objects for a given <a href="#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a>.</p><p>See <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/groupeddataframe/groupeddataframe.jl#L760-L767">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.SubDataFrame" href="#DataFrames.SubDataFrame"><code>DataFrames.SubDataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SubDataFrame{&lt;:AbstractDataFrame, &lt;:AbstractIndex, &lt;:AbstractVector{Int}} &lt;: AbstractDataFrame</code></pre><p>A view of an <code>AbstractDataFrame</code>. It is returned by a call to the <code>view</code> function on an <code>AbstractDataFrame</code> if a collections of rows and columns are specified.</p><p>A <code>SubDataFrame</code> is an <code>AbstractDataFrame</code>, so expect that most DataFrame functions should work. Such methods include <code>describe</code>, <code>summary</code>, <code>nrow</code>, <code>size</code>, <code>by</code>, <code>stack</code>, and <code>join</code>.</p><p>If the selection of columns in a parent data frame is passed as <code>:</code> (a colon) then <code>SubDataFrame</code> will always have all columns from the parent, even if they are added or removed after its creation.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),
+ 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/dataframerow/dataframerow.jl#L1-L75">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.GroupedDataFrame" href="#DataFrames.GroupedDataFrame"><code>DataFrames.GroupedDataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GroupedDataFrame</code></pre><p>The result of a <a href="../functions/#DataAPI.groupby"><code>groupby</code></a> operation on an <code>AbstractDataFrame</code>; a view into the <code>AbstractDataFrame</code> grouped by rows.</p><p>Not meant to be constructed directly, see <a href="../functions/#DataAPI.groupby"><code>groupby</code></a>.</p><p>One can get the names of columns used to create <code>GroupedDataFrame</code> using the <a href="../functions/#DataFrames.groupcols"><code>groupcols</code></a> function. Similarly the <a href="../functions/#DataFrames.groupindices"><code>groupindices</code></a> function returns a vector of group indices for each row of the parent data frame.</p><p>After its creation, a <code>GroupedDataFrame</code> reflects the grouping of rows that was valid at its creation time. Therefore grouping columns of its parent data frame must not be mutated, and rows must not be added nor removed from it. To safeguard the user against such cases, if the number of rows in the parent data frame changes then trying to use <code>GroupedDataFrame</code> will throw an error. However, one can add or remove columns to the parent data frame without invalidating the <code>GroupedDataFrame</code> provided that columns used for grouping are not changed.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L15-L36">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.GroupKey" href="#DataFrames.GroupKey"><code>DataFrames.GroupKey</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GroupKey{T&lt;:GroupedDataFrame}</code></pre><p>Key for one of the groups of a <a href="#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a>. Contains the values of the corresponding grouping columns and behaves similarly to a <code>NamedTuple</code>, but using it to index its <code>GroupedDataFrame</code> is more efficient than using the equivalent <code>Tuple</code> and <code>NamedTuple</code>, and much more efficient than using the equivalent <code>AbstractDict</code>.</p><p>Instances of this type are returned by <code>keys(::GroupedDataFrame)</code> and are not meant to be constructed directly.</p><p>Indexing fields of <code>GroupKey</code> is allowed using an integer, a <code>Symbol</code>, or a string. It is also possible to access the data in a <code>GroupKey</code> using the <code>getproperty</code> function. A <code>GroupKey</code> can be converted to a <code>Tuple</code>, <code>NamedTuple</code>, a <code>Vector</code>, or a <code>Dict</code>. When converted to a <code>Dict</code>, the keys of the <code>Dict</code> are <code>Symbol</code>s.</p><p>See <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L612-L630">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.GroupKeys" href="#DataFrames.GroupKeys"><code>DataFrames.GroupKeys</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GroupKeys{T&lt;:GroupedDataFrame} &lt;: AbstractVector{GroupKey{T}}</code></pre><p>A vector containing all <a href="#DataFrames.GroupKey"><code>GroupKey</code></a> objects for a given <a href="#DataFrames.GroupedDataFrame"><code>GroupedDataFrame</code></a>.</p><p>See <a href="../functions/#Base.keys"><code>keys(::GroupedDataFrame)</code></a> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/groupeddataframe/groupeddataframe.jl#L760-L767">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.SubDataFrame" href="#DataFrames.SubDataFrame"><code>DataFrames.SubDataFrame</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SubDataFrame{&lt;:AbstractDataFrame, &lt;:AbstractIndex, &lt;:AbstractVector{Int}} &lt;: AbstractDataFrame</code></pre><p>A view of an <code>AbstractDataFrame</code>. It is returned by a call to the <code>view</code> function on an <code>AbstractDataFrame</code> if a collections of rows and columns are specified.</p><p>A <code>SubDataFrame</code> is an <code>AbstractDataFrame</code>, so expect that most DataFrame functions should work. Such methods include <code>describe</code>, <code>summary</code>, <code>nrow</code>, <code>size</code>, <code>by</code>, <code>stack</code>, and <code>join</code>.</p><p>If the selection of columns in a parent data frame is passed as <code>:</code> (a colon) then <code>SubDataFrame</code> will always have all columns from the parent, even if they are added or removed after its creation.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),
                       b=repeat([2, 1], outer=[4]),
                       c=1:8)
 8×3 DataFrame
@@ -200,6 +200,6 @@
      │ Int64  Int64  Int64
 ─────┼─────────────────────
    1 │     1      2      1
-   2 │     1      2      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/subdataframe/subdataframe.jl#L1-L69">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrameRows" href="#DataFrames.DataFrameRows"><code>DataFrames.DataFrameRows</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrameRows{D&lt;:AbstractDataFrame} &lt;: AbstractVector{DataFrameRow}</code></pre><p>Iterator over rows of an <code>AbstractDataFrame</code>, with each row represented as a <code>DataFrameRow</code>.</p><p>A value of this type is returned by the <a href="../functions/#Base.eachrow"><code>eachrow</code></a> function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L8-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrameColumns" href="#DataFrames.DataFrameColumns"><code>DataFrames.DataFrameColumns</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrameColumns{&lt;:AbstractDataFrame}</code></pre><p>A vector-like object that allows iteration over columns of an <code>AbstractDataFrame</code>.</p><p>Indexing into <code>DataFrameColumns</code> objects using integer, <code>Symbol</code> or string returns the corresponding column (without copying). Indexing into <code>DataFrameColumns</code> objects using a multiple column selector returns a subsetted <code>DataFrameColumns</code> object with a new parent containing only the selected columns (without copying).</p><p><code>DataFrameColumns</code> supports most of the <code>AbstractVector</code> API. The key differences are that it is read-only and that the <code>keys</code> function returns a vector of <code>Symbol</code>s (and not integers as for normal vectors).</p><p>In particular <code>findnext</code>, <code>findprev</code>, <code>findfirst</code>, <code>findlast</code>, and <code>findall</code> functions are supported, and in <code>findnext</code> and <code>findprev</code> functions it is allowed to pass an integer, string, or <code>Symbol</code> as a reference index.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/iteration.jl#L177-L183">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.RepeatedVector" href="#DataFrames.RepeatedVector"><code>DataFrames.RepeatedVector</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RepeatedVector{T} &lt;: AbstractVector{T}</code></pre><p>An AbstractVector that is a view into another AbstractVector with repeated elements</p><p>NOTE: Not exported.</p><p><strong>Constructor</strong></p><pre><code class="language-julia hljs">RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)</code></pre><p><strong>Arguments</strong></p><ul><li><code>parent</code> : the AbstractVector that&#39;s repeated</li><li><code>inner</code> : the number of times each element is repeated</li><li><code>outer</code> : the number of times the whole vector is repeated after expanded by <code>inner</code></li></ul><p><code>inner</code> and <code>outer</code> have the same meaning as similarly named arguments to <code>repeat</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">RepeatedVector([1, 2], 3, 1)   # [1, 1, 1, 2, 2, 2]
+   2 │     1      2      5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/subdataframe/subdataframe.jl#L1-L69">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrameRows" href="#DataFrames.DataFrameRows"><code>DataFrames.DataFrameRows</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrameRows{D&lt;:AbstractDataFrame} &lt;: AbstractVector{DataFrameRow}</code></pre><p>Iterator over rows of an <code>AbstractDataFrame</code>, with each row represented as a <code>DataFrameRow</code>.</p><p>A value of this type is returned by the <a href="../functions/#Base.eachrow"><code>eachrow</code></a> function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L8-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.DataFrameColumns" href="#DataFrames.DataFrameColumns"><code>DataFrames.DataFrameColumns</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataFrameColumns{&lt;:AbstractDataFrame}</code></pre><p>A vector-like object that allows iteration over columns of an <code>AbstractDataFrame</code>.</p><p>Indexing into <code>DataFrameColumns</code> objects using integer, <code>Symbol</code> or string returns the corresponding column (without copying). Indexing into <code>DataFrameColumns</code> objects using a multiple column selector returns a subsetted <code>DataFrameColumns</code> object with a new parent containing only the selected columns (without copying).</p><p><code>DataFrameColumns</code> supports most of the <code>AbstractVector</code> API. The key differences are that it is read-only and that the <code>keys</code> function returns a vector of <code>Symbol</code>s (and not integers as for normal vectors).</p><p>In particular <code>findnext</code>, <code>findprev</code>, <code>findfirst</code>, <code>findlast</code>, and <code>findall</code> functions are supported, and in <code>findnext</code> and <code>findprev</code> functions it is allowed to pass an integer, string, or <code>Symbol</code> as a reference index.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/iteration.jl#L177-L183">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.RepeatedVector" href="#DataFrames.RepeatedVector"><code>DataFrames.RepeatedVector</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RepeatedVector{T} &lt;: AbstractVector{T}</code></pre><p>An AbstractVector that is a view into another AbstractVector with repeated elements</p><p>NOTE: Not exported.</p><p><strong>Constructor</strong></p><pre><code class="language-julia hljs">RepeatedVector(parent::AbstractVector, inner::Int, outer::Int)</code></pre><p><strong>Arguments</strong></p><ul><li><code>parent</code> : the AbstractVector that&#39;s repeated</li><li><code>inner</code> : the number of times each element is repeated</li><li><code>outer</code> : the number of times the whole vector is repeated after expanded by <code>inner</code></li></ul><p><code>inner</code> and <code>outer</code> have the same meaning as similarly named arguments to <code>repeat</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">RepeatedVector([1, 2], 3, 1)   # [1, 1, 1, 2, 2, 2]
 RepeatedVector([1, 2], 1, 3)   # [1, 2, 1, 2, 1, 2]
-RepeatedVector([1, 2], 2, 2)   # [1, 1, 2, 2, 1, 1, 2, 2]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/reshape.jl#L668-L696">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.StackedVector" href="#DataFrames.StackedVector"><code>DataFrames.StackedVector</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">StackedVector &lt;: AbstractVector</code></pre><p>An <code>AbstractVector</code> that is a linear, concatenated view into another set of AbstractVectors</p><p>NOTE: Not exported.</p><p><strong>Constructor</strong></p><pre><code class="language-julia hljs">StackedVector(d::AbstractVector)</code></pre><p><strong>Arguments</strong></p><ul><li><code>d...</code> : one or more AbstractVectors</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">StackedVector(Any[[1, 2], [9, 10], [11, 12]])  # [1, 2, 9, 10, 11, 12]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/86606d45b6148cd9afbe3087f34e765fc9eaf1a1/src/abstractdataframe/reshape.jl#L619-L639">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../man/comparisons/">« Comparison with Python/R/Stata</a><a class="docs-footer-nextpage" href="../functions/">Functions »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+RepeatedVector([1, 2], 2, 2)   # [1, 1, 2, 2, 1, 1, 2, 2]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/reshape.jl#L668-L696">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="DataFrames.StackedVector" href="#DataFrames.StackedVector"><code>DataFrames.StackedVector</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">StackedVector &lt;: AbstractVector</code></pre><p>An <code>AbstractVector</code> that is a linear, concatenated view into another set of AbstractVectors</p><p>NOTE: Not exported.</p><p><strong>Constructor</strong></p><pre><code class="language-julia hljs">StackedVector(d::AbstractVector)</code></pre><p><strong>Arguments</strong></p><ul><li><code>d...</code> : one or more AbstractVectors</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">StackedVector(Any[[1, 2], [9, 10], [11, 12]])  # [1, 2, 9, 10, 11, 12]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/DataFrames.jl/blob/dc59622fee8a45d3eb106d3f648231d3b7b8ecdf/src/abstractdataframe/reshape.jl#L619-L639">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../man/comparisons/">« Comparison with Python/R/Stata</a><a class="docs-footer-nextpage" href="../functions/">Functions »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/basics/index.html b/dev/man/basics/index.html
index 1d52bd410..098bb3b24 100644
--- a/dev/man/basics/index.html
+++ b/dev/man/basics/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>First Steps with DataFrames.jl · DataFrames.jl</title><meta name="title" content="First Steps with DataFrames.jl · DataFrames.jl"/><meta property="og:title" content="First Steps with DataFrames.jl · DataFrames.jl"/><meta property="twitter:title" content="First Steps with DataFrames.jl · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/man/basics/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/man/basics/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/man/basics/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li class="is-active"><a class="tocitem" href>First Steps with DataFrames.jl</a><ul class="internal"><li><a class="tocitem" href="#Setting-up-the-Environment"><span>Setting up the Environment</span></a></li><li><a class="tocitem" href="#Constructors-and-Basic-Utility-Functions"><span>Constructors and Basic Utility Functions</span></a></li><li><a class="tocitem" href="#Getting-and-Setting-Data-in-a-Data-Frame"><span>Getting and Setting Data in a Data Frame</span></a></li><li><a class="tocitem" href="#Basic-Usage-of-Transformation-Functions"><span>Basic Usage of Transformation Functions</span></a></li></ul></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../getting_started/">Getting Started</a></li><li><a class="tocitem" href="../working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../joins/">Joins</a></li><li><a class="tocitem" href="../split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../sorting/">Sorting</a></li><li><a class="tocitem" href="../categorical/">Categorical Data</a></li><li><a class="tocitem" href="../missing/">Missing Data</a></li><li><a class="tocitem" href="../querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../lib/types/">Types</a></li><li><a class="tocitem" href="../../lib/functions/">Functions</a></li><li><a class="tocitem" href="../../lib/indexing/">Indexing</a></li><li><a class="tocitem" href="../../lib/metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>First Steps with DataFrames.jl</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>First Steps with DataFrames.jl</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/man/basics.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="First-Steps-with-DataFrames.jl"><a class="docs-heading-anchor" href="#First-Steps-with-DataFrames.jl">First Steps with DataFrames.jl</a><a id="First-Steps-with-DataFrames.jl-1"></a><a class="docs-heading-anchor-permalink" href="#First-Steps-with-DataFrames.jl" title="Permalink"></a></h1><h2 id="Setting-up-the-Environment"><a class="docs-heading-anchor" href="#Setting-up-the-Environment">Setting up the Environment</a><a id="Setting-up-the-Environment-1"></a><a class="docs-heading-anchor-permalink" href="#Setting-up-the-Environment" title="Permalink"></a></h2><p>If want to use the DataFrames.jl package you need to install it first. You can do it using the following commands:</p><pre><code class="language-julia hljs">julia&gt; using Pkg
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>First Steps with DataFrames.jl · DataFrames.jl</title><meta name="title" content="First Steps with DataFrames.jl · DataFrames.jl"/><meta property="og:title" content="First Steps with DataFrames.jl · DataFrames.jl"/><meta property="twitter:title" content="First Steps with DataFrames.jl · DataFrames.jl"/><meta name="description" content="Documentation for DataFrames.jl."/><meta property="og:description" content="Documentation for DataFrames.jl."/><meta property="twitter:description" content="Documentation for DataFrames.jl."/><meta property="og:url" content="https://juliadata.github.io/DataFrames.jl/stable/man/basics/"/><meta property="twitter:url" content="https://juliadata.github.io/DataFrames.jl/stable/man/basics/"/><link rel="canonical" href="https://juliadata.github.io/DataFrames.jl/stable/man/basics/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script><link href="../../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="DataFrames.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">DataFrames.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Introduction</a></li><li class="is-active"><a class="tocitem" href>First Steps with DataFrames.jl</a><ul class="internal"><li><a class="tocitem" href="#Setting-up-the-Environment"><span>Setting up the Environment</span></a></li><li><a class="tocitem" href="#Constructors-and-Basic-Utility-Functions"><span>Constructors and Basic Utility Functions</span></a></li><li><a class="tocitem" href="#Getting-and-Setting-Data-in-a-Data-Frame"><span>Getting and Setting Data in a Data Frame</span></a></li><li><a class="tocitem" href="#Manipulation-Functions"><span>Manipulation Functions</span></a></li><li><a class="tocitem" href="#Approach-Comparison"><span>Approach Comparison</span></a></li></ul></li><li><span class="tocitem">User Guide</span><ul><li><a class="tocitem" href="../getting_started/">Getting Started</a></li><li><a class="tocitem" href="../working_with_dataframes/">Working with DataFrames</a></li><li><a class="tocitem" href="../importing_and_exporting/">Importing and Exporting Data (I/O)</a></li><li><a class="tocitem" href="../joins/">Joins</a></li><li><a class="tocitem" href="../split_apply_combine/">Split-apply-combine</a></li><li><a class="tocitem" href="../reshaping_and_pivoting/">Reshaping</a></li><li><a class="tocitem" href="../sorting/">Sorting</a></li><li><a class="tocitem" href="../categorical/">Categorical Data</a></li><li><a class="tocitem" href="../missing/">Missing Data</a></li><li><a class="tocitem" href="../querying_frameworks/">Data manipulation frameworks</a></li><li><a class="tocitem" href="../comparisons/">Comparison with Python/R/Stata</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../lib/types/">Types</a></li><li><a class="tocitem" href="../../lib/functions/">Functions</a></li><li><a class="tocitem" href="../../lib/indexing/">Indexing</a></li><li><a class="tocitem" href="../../lib/metadata/">Metadata</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>First Steps with DataFrames.jl</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>First Steps with DataFrames.jl</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaData/DataFrames.jl/blob/main/docs/src/man/basics.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="First-Steps-with-DataFrames.jl"><a class="docs-heading-anchor" href="#First-Steps-with-DataFrames.jl">First Steps with DataFrames.jl</a><a id="First-Steps-with-DataFrames.jl-1"></a><a class="docs-heading-anchor-permalink" href="#First-Steps-with-DataFrames.jl" title="Permalink"></a></h1><h2 id="Setting-up-the-Environment"><a class="docs-heading-anchor" href="#Setting-up-the-Environment">Setting up the Environment</a><a id="Setting-up-the-Environment-1"></a><a class="docs-heading-anchor-permalink" href="#Setting-up-the-Environment" title="Permalink"></a></h2><p>If want to use the DataFrames.jl package you need to install it first. You can do it using the following commands:</p><pre><code class="language-julia hljs">julia&gt; using Pkg
 
 julia&gt; Pkg.add(&quot;DataFrames&quot;)</code></pre><p>or</p><pre><code class="language-julia hljs">julia&gt; ] # &#39;]&#39; should be pressed
 
@@ -580,7 +580,7 @@
    4 │    95  male         2
    5 │    78  male         2
    6 │    89  male         1</code></pre><p>This is a non-copying operation. One can perform it only if <code>val</code> vector has the same length as number of rows of <code>df1</code> or as a special case if <code>df1</code> would not have any columns.</p><pre><code class="language-julia-repl hljs">julia&gt; df1.Age === val # no copy is performed
-true</code></pre><p>If in indexing you select a subset of rows from a data frame the mutation is performed in place, i.e. writing to an existing vector. Below setting values of column <code>:Job</code> in rows <code>1:3</code> to values <code>[2, 4, 6]</code>:</p><pre><code class="language-julia-repl hljs">julia&gt; df1[1:3, :Job] = [2, 3, 2]
+true</code></pre><p>If in indexing you select a subset of rows from a data frame the mutation is performed in place, i.e. writing to an existing vector. Below setting values of column <code>:Job</code> in rows <code>1:3</code> to values <code>[2, 3, 2]</code>:</p><pre><code class="language-julia-repl hljs">julia&gt; df1[1:3, :Job] = [2, 3, 2]
 3-element Vector{Int64}:
  2
  3
@@ -658,7 +658,7 @@
  Row │ Age    Sex     Job
      │ Int64  String  Int64
 ─────┼──────────────────────
-   2 │    98  male        2</code></pre><p>This operations updated the data stored in the <code>df1</code> data frame.</p><p>In a similar fashion views can be used to update data stored in their parent data frame. Here are some examples:</p><pre><code class="language-julia-repl hljs">julia&gt; sdf = view(df1, :, 2:3)
+   2 │    98  male        2</code></pre><p>These operations updated the data stored in the <code>df1</code> data frame.</p><p>In a similar fashion views can be used to update data stored in their parent data frame. Here are some examples:</p><pre><code class="language-julia-repl hljs">julia&gt; sdf = view(df1, :, 2:3)
 6×2 SubDataFrame
  Row │ Sex          Job
      │ String       Int64
@@ -908,420 +908,1046 @@
  997 │ male     little
  998 │ male     little
  999 │ male     moderate
-                984 rows omitted</code></pre><h2 id="Basic-Usage-of-Transformation-Functions"><a class="docs-heading-anchor" href="#Basic-Usage-of-Transformation-Functions">Basic Usage of Transformation Functions</a><a id="Basic-Usage-of-Transformation-Functions-1"></a><a class="docs-heading-anchor-permalink" href="#Basic-Usage-of-Transformation-Functions" title="Permalink"></a></h2><p>In DataFrames.jl we have five functions that we can be used to perform transformations of columns of a data frame:</p><ul><li><code>combine</code>: creates a new data frame populated with columns that are results of transformation applied to the source data frame columns, potentially combining its rows;</li><li><code>select</code>: creates a new data frame that has the same number of rows as the source data frame populated with columns that are results of transformations applied to the source data frame columns;</li><li><code>select!</code>: the same as <code>select</code> but updates the passed data frame in place;</li><li><code>transform</code>: the same as <code>select</code> but keeps the columns that were already present in the data frame (note though that these columns can be potentially modified by the transformation passed to <code>transform</code>);</li><li><code>transform!</code>: the same as <code>transform</code> but updates the passed data frame in place.</li></ul><p>The fundamental ways to specify a transformation are:</p><ul><li><code>source_column =&gt; transformation =&gt; target_column_name</code>; In this scenario the <code>source_column</code> is passed as an argument to <code>transformation</code> function and stored in <code>target_column_name</code> column.</li><li><code>source_column =&gt; transformation</code>; In this scenario we apply the transformation function to <code>source_column</code> and the target column names is automatically generated.</li><li><code>source_column =&gt; target_column_name</code> renames the <code>source_column</code> to <code>target_column_name</code>.</li><li><code>source_column</code> just keep the source column as is in the result without any transformation;</li></ul><p>These rules are typically called transformation mini-language.</p><p>Let us move to the examples of application of these rules</p><pre><code class="language-julia-repl hljs">julia&gt; using Statistics
+                984 rows omitted</code></pre><h2 id="Manipulation-Functions"><a class="docs-heading-anchor" href="#Manipulation-Functions">Manipulation Functions</a><a id="Manipulation-Functions-1"></a><a class="docs-heading-anchor-permalink" href="#Manipulation-Functions" title="Permalink"></a></h2><p>The seven functions below can be used to manipulate data frames by applying operations to them.</p><p>The functions without a <code>!</code> in their name will create a new data frame based on the source data frame, so you will probably want to store the new data frame to a new variable name, e.g. <code>new_df = transform(source_df, operation)</code>. The functions with a <code>!</code> at the end of their name will modify an existing data frame in-place, so there is typically no need to assign the result to a variable, e.g. <code>transform!(source_df, operation)</code> instead of <code>source_df = transform(source_df, operation)</code>.</p><p>The number of columns and rows in the resultant data frame varies depending on the manipulation function employed.</p><table><tr><th style="text-align: right">Function</th><th style="text-align: right">Memory Usage</th><th style="text-align: right">Column Retention</th><th style="text-align: right">Row Retention</th></tr><tr><td style="text-align: right"><code>transform</code></td><td style="text-align: right">Creates a new data frame.</td><td style="text-align: right">Retains original and resultant columns.</td><td style="text-align: right">Retains same number of rows as original data frame.</td></tr><tr><td style="text-align: right"><code>transform!</code></td><td style="text-align: right">Modifies an existing data frame.</td><td style="text-align: right">Retains original and resultant columns.</td><td style="text-align: right">Retains same number of rows as original data frame.</td></tr><tr><td style="text-align: right"><code>select</code></td><td style="text-align: right">Creates a new data frame.</td><td style="text-align: right">Retains only resultant columns.</td><td style="text-align: right">Retains same number of rows as original data frame.</td></tr><tr><td style="text-align: right"><code>select!</code></td><td style="text-align: right">Modifies an existing data frame.</td><td style="text-align: right">Retains only resultant columns.</td><td style="text-align: right">Retains same number of rows as original data frame.</td></tr><tr><td style="text-align: right"><code>subset</code></td><td style="text-align: right">Creates a new data frame.</td><td style="text-align: right">Retains original columns.</td><td style="text-align: right">Retains only rows where condition is true.</td></tr><tr><td style="text-align: right"><code>subset!</code></td><td style="text-align: right">Modifies an existing data frame.</td><td style="text-align: right">Retains original columns.</td><td style="text-align: right">Retains only rows where condition is true.</td></tr><tr><td style="text-align: right"><code>combine</code></td><td style="text-align: right">Creates a new data frame.</td><td style="text-align: right">Retains only resultant columns.</td><td style="text-align: right">Retains only resultant rows.</td></tr></table><h3 id="Constructing-Operations"><a class="docs-heading-anchor" href="#Constructing-Operations">Constructing Operations</a><a id="Constructing-Operations-1"></a><a class="docs-heading-anchor-permalink" href="#Constructing-Operations" title="Permalink"></a></h3><p>All of the functions above use the same syntax which is commonly <code>manipulation_function(dataframe, operation)</code>. The <code>operation</code> argument defines the operation to be applied to the source <code>dataframe</code>, and it can take any of the following common forms explained below:</p><p><code>source_column_selector</code> : selects source column(s) without manipulating or renaming them</p><p>Examples: <code>:a</code>, <code>[:a, :b]</code>, <code>All()</code>, <code>Not(:a)</code></p><p><code>source_column_selector =&gt; operation_function</code> : passes source column(s) as arguments to a function and automatically names the resulting column(s)</p><p>Examples: <code>:a =&gt; sum</code>, <code>[:a, :b] =&gt; +</code>, <code>:a =&gt; ByRow(==(3))</code></p><p><code>source_column_selector =&gt; operation_function =&gt; new_column_names</code> : passes source column(s) as arguments to a function and names the resulting column(s) <code>new_column_names</code></p><p>Examples: <code>:a =&gt; sum =&gt; :sum_of_a</code>, <code>[:a, :b] =&gt; (+) =&gt; :a_plus_b</code></p><p><em>(Not available for <code>subset</code> or <code>subset!</code>)</em></p><p><code>source_column_selector =&gt; new_column_names</code> : renames a source column, or splits a column containing collection elements into multiple new columns</p><p>Examples: <code>:a =&gt; :new_a</code>, <code>:a_b =&gt; [:a, :b]</code>, <code>:nt =&gt; AsTable</code></p><p>(<em>Not available for <code>subset</code> or <code>subset!</code></em>)</p><p>The <code>=&gt;</code> operator constructs a <a href="https://docs.julialang.org/en/v1/base/collections/#Core.Pair">Pair</a>, which is a type to link one object to another. (Pairs are commonly used to create elements of a <a href="https://docs.julialang.org/en/v1/base/collections/#Dictionaries">Dictionary</a>.) In DataFrames.jl manipulation functions, <code>Pair</code> arguments are used to define column <code>operations</code> to be performed. The examples shown above will be explained in more detail later.</p><p><em>The manipulation functions also have methods for applying multiple operations. See the later sections <a href="#Applying-Multiple-Operations-per-Manipulation">Applying Multiple Operations per Manipulation</a> and <a href="#Broadcasting-Operation-Pairs">Broadcasting Operation Pairs</a> for more information.</em></p><h4 id="source_column_selector"><a class="docs-heading-anchor" href="#source_column_selector"><code>source_column_selector</code></a><a id="source_column_selector-1"></a><a class="docs-heading-anchor-permalink" href="#source_column_selector" title="Permalink"></a></h4><p>Inside an <code>operation</code>, <code>source_column_selector</code> is usually a column name or column index which identifies a data frame column.</p><p><code>source_column_selector</code> may be used as the entire <code>operation</code> with <code>select</code> or <code>select!</code> to isolate or reorder columns.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a = [1, 2, 3], b = [4, 5, 6], c = [7, 8, 9])
+3×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      7
+   2 │     2      5      8
+   3 │     3      6      9
+
+julia&gt; select(df, :b)
+3×1 DataFrame
+ Row │ b
+     │ Int64
+─────┼───────
+   1 │     4
+   2 │     5
+   3 │     6
+
+julia&gt; select(df, &quot;b&quot;)
+3×1 DataFrame
+ Row │ b
+     │ Int64
+─────┼───────
+   1 │     4
+   2 │     5
+   3 │     6
+
+julia&gt; select(df, 2)
+3×1 DataFrame
+ Row │ b
+     │ Int64
+─────┼───────
+   1 │     4
+   2 │     5
+   3 │     6</code></pre><p><code>source_column_selector</code> may also be used as the entire <code>operation</code> with <code>subset</code> or <code>subset!</code> if the source column contains <code>Bool</code> values.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(
+           name = [&quot;Scott&quot;, &quot;Jill&quot;, &quot;Erica&quot;, &quot;Jimmy&quot;],
+           minor = [false, true, false, true],
+       )
+4×2 DataFrame
+ Row │ name    minor
+     │ String  Bool
+─────┼───────────────
+   1 │ Scott   false
+   2 │ Jill     true
+   3 │ Erica   false
+   4 │ Jimmy    true
+
+julia&gt; subset(df, :minor)
+2×2 DataFrame
+ Row │ name    minor
+     │ String  Bool
+─────┼───────────────
+   1 │ Jill     true
+   2 │ Jimmy    true</code></pre><p><code>source_column_selector</code> may instead be a collection of columns such as a vector, a <a href="https://docs.julialang.org/en/v1/manual/strings/#Regular-Expressions">regular expression</a>, a <code>Not</code>, <code>Between</code>, <code>All</code>, or <code>Cols</code> expression, or a <code>:</code>. See the <a href="../../lib/indexing/#Indexing">Indexing</a> API for the full list of possible values with references.</p><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>The Julia parser sometimes prevents <code>:</code> from being used by itself. If you get <code>ERROR: syntax: whitespace not allowed after &quot;:&quot; used for quoting</code>, try using <code>All()</code>, <code>Cols(:)</code>, or <code>(:)</code> instead to select all columns.</p></div></div><pre><code class="language-julia hljs">julia&gt; df = DataFrame(
+           id = [1, 2, 3],
+           first_name = [&quot;José&quot;, &quot;Emma&quot;, &quot;Nathan&quot;],
+           last_name = [&quot;Garcia&quot;, &quot;Marino&quot;, &quot;Boyer&quot;],
+           age = [61, 24, 33]
+       )
+3×4 DataFrame
+ Row │ id     first_name  last_name  age
+     │ Int64  String      String     Int64
+─────┼─────────────────────────────────────
+   1 │     1  José        Garcia        61
+   2 │     2  Emma        Marino        24
+   3 │     3  Nathan      Boyer         33
+
+julia&gt; select(df, [:last_name, :first_name])
+3×2 DataFrame
+ Row │ last_name  first_name
+     │ String     String
+─────┼───────────────────────
+   1 │ Garcia     José
+   2 │ Marino     Emma
+   3 │ Boyer      Nathan
+
+julia&gt; select(df, r&quot;name&quot;)
+3×2 DataFrame
+ Row │ first_name  last_name
+     │ String      String
+─────┼───────────────────────
+   1 │ José        Garcia
+   2 │ Emma        Marino
+   3 │ Nathan      Boyer
+
+julia&gt; select(df, Not(:id))
+3×3 DataFrame
+ Row │ first_name  last_name  age
+     │ String      String     Int64
+─────┼──────────────────────────────
+   1 │ José        Garcia        61
+   2 │ Emma        Marino        24
+   3 │ Nathan      Boyer         33
+
+julia&gt; select(df, Between(2,4))
+3×3 DataFrame
+ Row │ first_name  last_name  age
+     │ String      String     Int64
+─────┼──────────────────────────────
+   1 │ José        Garcia        61
+   2 │ Emma        Marino        24
+   3 │ Nathan      Boyer         33
+
+julia&gt; df2 = DataFrame(
+           name = [&quot;Scott&quot;, &quot;Jill&quot;, &quot;Erica&quot;, &quot;Jimmy&quot;],
+           minor = [false, true, false, true],
+           male = [true, false, false, true],
+       )
+4×3 DataFrame
+ Row │ name    minor  male
+     │ String  Bool   Bool
+─────┼──────────────────────
+   1 │ Scott   false   true
+   2 │ Jill     true  false
+   3 │ Erica   false  false
+   4 │ Jimmy    true   true
+
+julia&gt; subset(df2, [:minor, :male])
+1×3 DataFrame
+ Row │ name    minor  male
+     │ String  Bool   Bool
+─────┼─────────────────────
+   1 │ Jimmy    true  true</code></pre><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>Using <code>Symbol</code> in <code>source_column_selector</code> will perform slightly faster than using string. However, a string is convenient when column names contain spaces.</p><p>All elements of <code>source_column_selector</code> must be the same type (unless wrapped in <code>Cols</code>), e.g. <code>subset(df2, [:minor, &quot;male&quot;])</code> will error since <code>Symbol</code> and string are used simultaneously.</p></div></div><h4 id="operation_function"><a class="docs-heading-anchor" href="#operation_function"><code>operation_function</code></a><a id="operation_function-1"></a><a class="docs-heading-anchor-permalink" href="#operation_function" title="Permalink"></a></h4><p>Inside an <code>operation</code> pair, <code>operation_function</code> is a function which operates on data frame columns passed as vectors. When multiple columns are selected by <code>source_column_selector</code>, the <code>operation_function</code> will receive the columns as separate positional arguments in the order they were selected, e.g. <code>f(column1, column2, column3)</code>.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a = [1, 2, 3], b = [4, 5, 4])
+3×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     2      5
+   3 │     3      4
 
-julia&gt; combine(german, :Age =&gt; mean =&gt; :mean_age)
+julia&gt; combine(df, :a =&gt; sum)
 1×1 DataFrame
- Row │ mean_age
+ Row │ a_sum
+     │ Int64
+─────┼───────
+   1 │     6
+
+julia&gt; transform(df, :b =&gt; maximum) # `transform` and `select` copy scalar result to all rows
+3×3 DataFrame
+ Row │ a      b      b_maximum
+     │ Int64  Int64  Int64
+─────┼─────────────────────────
+   1 │     1      4          5
+   2 │     2      5          5
+   3 │     3      4          5
+
+julia&gt; transform(df, [:b, :a] =&gt; -) # vector subtraction is okay
+3×3 DataFrame
+ Row │ a      b      b_a_-
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      3
+   2 │     2      5      3
+   3 │     3      4      1
+
+julia&gt; transform(df, [:a, :b] =&gt; *) # vector multiplication is not defined
+ERROR: MethodError: no method matching *(::Vector{Int64}, ::Vector{Int64})</code></pre><p>Don&#39;t worry! There is a quick fix for the previous error. If you want to apply a function to each element in a column instead of to the entire column vector, then you can wrap your element-wise function in <code>ByRow</code> like <code>ByRow(my_elementwise_function)</code>. This will apply <code>my_elementwise_function</code> to every element in the column and then collect the results back into a vector.</p><pre><code class="language-julia hljs">julia&gt; transform(df, [:a, :b] =&gt; ByRow(*))
+3×3 DataFrame
+ Row │ a      b      a_b_*
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      4
+   2 │     2      5     10
+   3 │     3      4     12
+
+julia&gt; transform(df, Cols(:) =&gt; ByRow(max))
+3×3 DataFrame
+ Row │ a      b      a_b_max
+     │ Int64  Int64  Int64
+─────┼───────────────────────
+   1 │     1      4        4
+   2 │     2      5        5
+   3 │     3      4        4
+
+julia&gt; f(x) = x + 1
+f (generic function with 1 method)
+
+julia&gt; transform(df, :a =&gt; ByRow(f))
+3×3 DataFrame
+ Row │ a      b      a_f
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      2
+   2 │     2      5      3
+   3 │     3      4      4</code></pre><p>Alternatively, you may just want to define the function itself so it <a href="https://docs.julialang.org/en/v1/manual/arrays/#Broadcasting">broadcasts</a> over vectors.</p><pre><code class="language-julia hljs">julia&gt; g(x) = x .+ 1
+g (generic function with 1 method)
+
+julia&gt; transform(df, :a =&gt; g)
+3×3 DataFrame
+ Row │ a      b      a_g
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      2
+   2 │     2      5      3
+   3 │     3      4      4
+
+julia&gt; h(x, y) = x .+ y .+ 1
+h (generic function with 1 method)
+
+julia&gt; transform(df, [:a, :b] =&gt; h)
+3×3 DataFrame
+ Row │ a      b      a_b_h
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      6
+   2 │     2      5      8
+   3 │     3      4      8</code></pre><p><a href="https://docs.julialang.org/en/v1/manual/functions/#man-anonymous-functions">Anonymous functions</a> are a convenient way to define and use an <code>operation_function</code> all within the manipulation function call.</p><pre><code class="language-julia hljs">julia&gt; select(df, :a =&gt; ByRow(x -&gt; x + 1))
+3×1 DataFrame
+ Row │ a_function
+     │ Int64
+─────┼────────────
+   1 │          2
+   2 │          3
+   3 │          4
+
+julia&gt; transform(df, [:a, :b] =&gt; ByRow((x, y) -&gt; 2x + y))
+3×3 DataFrame
+ Row │ a      b      a_b_function
+     │ Int64  Int64  Int64
+─────┼────────────────────────────
+   1 │     1      4             6
+   2 │     2      5             9
+   3 │     3      4            10
+
+julia&gt; subset(df, :b =&gt; ByRow(x -&gt; x &lt; 5))
+2×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     3      4
+
+julia&gt; subset(df, :b =&gt; ByRow(&lt;(5))) # shorter version of the previous
+2×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     3      4</code></pre><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p><code>operation_functions</code> within <code>subset</code> or <code>subset!</code> function calls must return a Boolean vector. <code>true</code> elements in the Boolean vector will determine which rows are retained in the resulting data frame.</p></div></div><p>As demonstrated above, <code>DataFrame</code> columns are usually passed from <code>source_column_selector</code> to <code>operation_function</code> as one or more vector arguments. However, when <code>AsTable(source_column_selector)</code> is used, the selected columns are collected and passed as a single <code>NamedTuple</code> to <code>operation_function</code>.</p><p>This is often useful when your <code>operation_function</code> is defined to operate on a single collection argument rather than on multiple positional arguments. The distinction is somewhat similar to the difference between the built-in <code>min</code> and <code>minimum</code> functions. <code>min</code> is defined to find the minimum value among multiple positional arguments, while <code>minimum</code> is defined to find the minimum value among the elements of a single collection argument.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a = 1:2, b = 3:4, c = 5:6, d = 2:-1:1)
+2×4 DataFrame
+ Row │ a      b      c      d
+     │ Int64  Int64  Int64  Int64
+─────┼────────────────────────────
+   1 │     1      3      5      2
+   2 │     2      4      6      1
+
+julia&gt; select(df, Cols(:) =&gt; ByRow(min)) # min operates on multiple arguments
+2×1 DataFrame
+ Row │ a_b_etc_min
+     │ Int64
+─────┼─────────────
+   1 │           1
+   2 │           1
+
+julia&gt; select(df, AsTable(:) =&gt; ByRow(minimum)) # minimum operates on a collection
+2×1 DataFrame
+ Row │ a_b_etc_minimum
+     │ Int64
+─────┼─────────────────
+   1 │               1
+   2 │               1
+
+julia&gt; select(df, [:a,:b] =&gt; ByRow(+)) # `+` operates on a multiple arguments
+2×1 DataFrame
+ Row │ a_b_+
+     │ Int64
+─────┼───────
+   1 │     4
+   2 │     6
+
+julia&gt; select(df, AsTable([:a,:b]) =&gt; ByRow(sum)) # `sum` operates on a collection
+2×1 DataFrame
+ Row │ a_b_sum
+     │ Int64
+─────┼─────────
+   1 │       4
+   2 │       6
+
+julia&gt; using Statistics # contains the `mean` function
+
+julia&gt; select(df, AsTable(Between(:b, :d)) =&gt; ByRow(mean)) # `mean` operates on a collection
+2×1 DataFrame
+ Row │ b_c_d_mean
      │ Float64
-─────┼──────────
-   1 │   35.546
+─────┼────────────
+   1 │    3.33333
+   2 │    3.66667</code></pre><p><code>AsTable</code> can also be used to pass columns to a function which operates on fields of a <code>NamedTuple</code>.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a = 1:2, b = 3:4, c = 5:6, d = 7:8)
+2×4 DataFrame
+ Row │ a      b      c      d
+     │ Int64  Int64  Int64  Int64
+─────┼────────────────────────────
+   1 │     1      3      5      7
+   2 │     2      4      6      8
 
-julia&gt; select(german, :Age =&gt; mean =&gt; :mean_age)
-1000×1 DataFrame
-  Row │ mean_age
-      │ Float64
-──────┼──────────
-    1 │   35.546
-    2 │   35.546
-    3 │   35.546
-    4 │   35.546
-    5 │   35.546
-    6 │   35.546
-    7 │   35.546
-    8 │   35.546
-  ⋮   │    ⋮
-  994 │   35.546
-  995 │   35.546
-  996 │   35.546
-  997 │   35.546
-  998 │   35.546
-  999 │   35.546
- 1000 │   35.546
- 985 rows omitted</code></pre><p>As you can see in both cases the <code>mean</code> function was applied to <code>:Age</code> column and the result was stored in the <code>:mean_age</code> column. The difference between the <code>combine</code> and <code>select</code> functions is that the <code>combine</code> aggregates data and produces as many rows as were returned by the transformation function. On the other hand the <code>select</code> function always keeps the number of rows in a data frame to be the same as in the source data frame. Therefore in this case the result of the <code>mean</code> function got broadcasted.</p><p>As <code>combine</code> potentially allows any number of rows to be produced as a result of the transformation if we have a combination of transformations where some of them produce a vector, and other produce scalars then scalars get broadcasted exactly like in  <code>select</code>. Here is an example:</p><pre><code class="language-julia-repl hljs">julia&gt; combine(german, :Age =&gt; mean =&gt; :mean_age, :Housing =&gt; unique =&gt; :housing)
+julia&gt; f(nt) = nt.a + nt.d
+f (generic function with 1 method)
+
+julia&gt; transform(df, AsTable(:) =&gt; ByRow(f))
+2×5 DataFrame
+ Row │ a      b      c      d      a_b_etc_f
+     │ Int64  Int64  Int64  Int64  Int64
+─────┼───────────────────────────────────────
+   1 │     1      3      5      7          8
+   2 │     2      4      6      8         10</code></pre><p>As demonstrated above, in the <code>source_column_selector =&gt; operation_function</code> operation pair form, the results of an operation will be placed into a new column with an automatically-generated name based on the operation; the new column name will be the <code>operation_function</code> name appended to the source column name(s) with an underscore.</p><p>This automatic column naming behavior can be avoided in two ways. First, the operation result can be placed back into the original column with the original column name by switching the keyword argument <code>renamecols</code> from its default value (<code>true</code>) to <code>renamecols=false</code>. This option prevents the function name from being appended to the column name as it usually would be.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a=1:4, b=5:8)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; transform(df, :a =&gt; ByRow(x-&gt;x+10), renamecols=false) # add 10 in-place
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │    11      5
+   2 │    12      6
+   3 │    13      7
+   4 │    14      8</code></pre><p>The second method to avoid the default manipulation column naming is to specify your own <code>new_column_names</code>.</p><h4 id="new_column_names"><a class="docs-heading-anchor" href="#new_column_names"><code>new_column_names</code></a><a id="new_column_names-1"></a><a class="docs-heading-anchor-permalink" href="#new_column_names" title="Permalink"></a></h4><p><code>new_column_names</code> can be included at the end of an <code>operation</code> pair to specify the name of the new column(s). <code>new_column_names</code> may be a symbol, string, function, vector of symbols, vector of strings, or <code>AsTable</code>.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a=1:4, b=5:8)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; transform(df, Cols(:) =&gt; ByRow(+) =&gt; :c)
+4×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5      6
+   2 │     2      6      8
+   3 │     3      7     10
+   4 │     4      8     12
+
+julia&gt; transform(df, Cols(:) =&gt; ByRow(+) =&gt; &quot;a+b&quot;)
+4×3 DataFrame
+ Row │ a      b      a+b
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5      6
+   2 │     2      6      8
+   3 │     3      7     10
+   4 │     4      8     12
+
+julia&gt; transform(df, :a =&gt; ByRow(x-&gt;x+10) =&gt; &quot;a+10&quot;)
+4×3 DataFrame
+ Row │ a      b      a+10
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5     11
+   2 │     2      6     12
+   3 │     3      7     13
+   4 │     4      8     14</code></pre><p>The <code>source_column_selector =&gt; new_column_names</code> operation form can be used to rename columns without an intermediate function. However, there are <code>rename</code> and <code>rename!</code> functions, which accept similar syntax, that tend to be more useful for this operation.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a=1:4, b=5:8)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; transform(df, :a =&gt; :apple) # adds column `apple`
+4×3 DataFrame
+ Row │ a      b      apple
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5      1
+   2 │     2      6      2
+   3 │     3      7      3
+   4 │     4      8      4
+
+julia&gt; select(df, :a =&gt; :apple) # retains only column `apple`
+4×1 DataFrame
+ Row │ apple
+     │ Int64
+─────┼───────
+   1 │     1
+   2 │     2
+   3 │     3
+   4 │     4
+
+julia&gt; rename(df, :a =&gt; :apple) # renames column `a` to `apple` in-place
+4×2 DataFrame
+ Row │ apple  b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8</code></pre><p>If <code>new_column_names</code> already exist in the source data frame, those columns will be replaced in the existing column location rather than being added to the end. This can be done by manually specifying an existing column name or by using the <code>renamecols=false</code> keyword argument.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a=1:4, b=5:8)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; transform(df, :b =&gt; (x -&gt; x .+ 10))  # automatic new column and column name
+4×3 DataFrame
+ Row │ a      b      b_function
+     │ Int64  Int64  Int64
+─────┼──────────────────────────
+   1 │     1      5          15
+   2 │     2      6          16
+   3 │     3      7          17
+   4 │     4      8          18
+
+julia&gt; transform(df, :b =&gt; (x -&gt; x .+ 10), renamecols=false)  # transform column in-place
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1     15
+   2 │     2     16
+   3 │     3     17
+   4 │     4     18
+
+julia&gt; transform(df, :b =&gt; (x -&gt; x .+ 10) =&gt; :a)  # replace column :a
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │    15      5
+   2 │    16      6
+   3 │    17      7
+   4 │    18      8</code></pre><p>Actually, <code>renamecols=false</code> just prevents the function name from being appended to the final column name such that the operation is <em>usually</em> returned to the same column.</p><pre><code class="language-julia hljs">julia&gt; transform(df, [:a, :b] =&gt; +)  # new column name is all source columns and function name
+4×3 DataFrame
+ Row │ a      b      a_b_+
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5      6
+   2 │     2      6      8
+   3 │     3      7     10
+   4 │     4      8     12
+
+julia&gt; transform(df, [:a, :b] =&gt; +, renamecols=false)  # same as above but with no function name
+4×3 DataFrame
+ Row │ a      b      a_b
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5      6
+   2 │     2      6      8
+   3 │     3      7     10
+   4 │     4      8     12
+
+julia&gt; transform(df, [:a, :b] =&gt; (+) =&gt; :a)  # manually overwrite column :a (see Note below about parentheses)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     6      5
+   2 │     8      6
+   3 │    10      7
+   4 │    12      8</code></pre><p>In the <code>source_column_selector =&gt; operation_function =&gt; new_column_names</code> operation form, <code>new_column_names</code> may also be a renaming function which operates on a string to create the destination column names programmatically.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a=1:4, b=5:8)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; add_prefix(s) = &quot;new_&quot; * s
+add_prefix (generic function with 1 method)
+
+julia&gt; transform(df, :a =&gt; (x -&gt; 10 .* x) =&gt; add_prefix) # with named renaming function
+4×3 DataFrame
+ Row │ a      b      new_a
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5     10
+   2 │     2      6     20
+   3 │     3      7     30
+   4 │     4      8     40
+
+julia&gt; transform(df, :a =&gt; (x -&gt; 10 .* x) =&gt; (s -&gt; &quot;new_&quot; * s)) # with anonymous renaming function
+4×3 DataFrame
+ Row │ a      b      new_a
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5     10
+   2 │     2      6     20
+   3 │     3      7     30
+   4 │     4      8     40</code></pre><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>It is a good idea to wrap anonymous functions in parentheses to avoid the <code>=&gt;</code> operator accidently becoming part of the anonymous function. The examples above do not work correctly without the parentheses!</p><pre><code class="language-julia hljs">julia&gt; transform(df, :a =&gt; x -&gt; 10 .* x =&gt; add_prefix)  # Not what we wanted!
+4×3 DataFrame
+ Row │ a      b      a_function
+     │ Int64  Int64  Pair…
+─────┼────────────────────────────────────────────
+   1 │     1      5  [10, 20, 30, 40]=&gt;add_prefix
+   2 │     2      6  [10, 20, 30, 40]=&gt;add_prefix
+   3 │     3      7  [10, 20, 30, 40]=&gt;add_prefix
+   4 │     4      8  [10, 20, 30, 40]=&gt;add_prefix
+julia&gt; transform(df, :a =&gt; x -&gt; 10 .* x =&gt; s -&gt; &quot;new_&quot; * s)  # Not what we wanted!
+4×3 DataFrame
+ Row │ a      b      a_function
+     │ Int64  Int64  Pair…
+─────┼─────────────────────────────────────
+   1 │     1      5  [10, 20, 30, 40]=&gt;#18
+   2 │     2      6  [10, 20, 30, 40]=&gt;#18
+   3 │     3      7  [10, 20, 30, 40]=&gt;#18
+   4 │     4      8  [10, 20, 30, 40]=&gt;#18</code></pre></div></div><p>A renaming function will not work in the <code>source_column_selector =&gt; new_column_names</code> operation form because a function in the second element of the operation pair is assumed to take the <code>source_column_selector =&gt; operation_function</code> operation form. To work around this limitation, use the <code>source_column_selector =&gt; operation_function =&gt; new_column_names</code> operation form with <code>identity</code> as the <code>operation_function</code>.</p><pre><code class="language-julia hljs">julia&gt; transform(df, :a =&gt; add_prefix)
+ERROR: MethodError: no method matching *(::String, ::Vector{Int64})
+
+julia&gt; transform(df, :a =&gt; identity =&gt; add_prefix)
+4×3 DataFrame
+ Row │ a      b      new_a
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      5      1
+   2 │     2      6      2
+   3 │     3      7      3
+   4 │     4      8      4</code></pre><p>In this case though, it is probably again more useful to use the <code>rename</code> or <code>rename!</code> function rather than one of the manipulation functions in order to rename in-place and avoid the intermediate <code>operation_function</code>.</p><pre><code class="language-julia hljs">julia&gt; rename(add_prefix, df)  # rename all columns with a function
+4×2 DataFrame
+ Row │ new_a  new_b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; rename(add_prefix, df; cols=:a)  # rename some columns with a function
+4×2 DataFrame
+ Row │ new_a  b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8</code></pre><p>In the <code>source_column_selector =&gt; new_column_names</code> operation form, only a single source column may be selected per operation, so why is <code>new_column_names</code> plural? It is possible to split the data contained inside a single column into multiple new columns by supplying a vector of strings or symbols as <code>new_column_names</code>.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(data = [(1,2), (3,4)]) # vector of tuples
+2×1 DataFrame
+ Row │ data
+     │ Tuple…
+─────┼────────
+   1 │ (1, 2)
+   2 │ (3, 4)
+
+julia&gt; transform(df, :data =&gt; [:first, :second]) # manual naming
+2×3 DataFrame
+ Row │ data    first  second
+     │ Tuple…  Int64  Int64
+─────┼───────────────────────
+   1 │ (1, 2)      1       2
+   2 │ (3, 4)      3       4</code></pre><p>This kind of data splitting can even be done automatically with <code>AsTable</code>.</p><pre><code class="language-julia hljs">julia&gt; transform(df, :data =&gt; AsTable) # default automatic naming with tuples
+2×3 DataFrame
+ Row │ data    x1     x2
+     │ Tuple…  Int64  Int64
+─────┼──────────────────────
+   1 │ (1, 2)      1      2
+   2 │ (3, 4)      3      4</code></pre><p>If a data frame column contains <code>NamedTuple</code>s, then <code>AsTable</code> will preserve the field names.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(data = [(a=1,b=2), (a=3,b=4)]) # vector of named tuples
+2×1 DataFrame
+ Row │ data
+     │ NamedTup…
+─────┼────────────────
+   1 │ (a = 1, b = 2)
+   2 │ (a = 3, b = 4)
+
+julia&gt; transform(df, :data =&gt; AsTable) # keeps names from named tuples
+2×3 DataFrame
+ Row │ data            a      b
+     │ NamedTup…       Int64  Int64
+─────┼──────────────────────────────
+   1 │ (a = 1, b = 2)      1      2
+   2 │ (a = 3, b = 4)      3      4</code></pre><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>To pack multiple columns into a single column of <code>NamedTuple</code>s (reverse of the above operation) apply the <code>identity</code> function <code>ByRow</code>, e.g. <code>transform(df, AsTable([:a, :b]) =&gt; ByRow(identity) =&gt; :data)</code>.</p></div></div><p>Renaming functions also work for multi-column transformations, but they must operate on a vector of strings.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(data = [(1,2), (3,4)])
+2×1 DataFrame
+ Row │ data
+     │ Tuple…
+─────┼────────
+   1 │ (1, 2)
+   2 │ (3, 4)
+
+julia&gt; new_names(v) = [&quot;primary &quot;, &quot;secondary &quot;] .* v
+new_names (generic function with 1 method)
+
+julia&gt; transform(df, :data =&gt; identity =&gt; new_names)
+2×3 DataFrame
+ Row │ data    primary data  secondary data
+     │ Tuple…  Int64         Int64
+─────┼──────────────────────────────────────
+   1 │ (1, 2)             1               2
+   2 │ (3, 4)             3               4</code></pre><h3 id="Applying-Multiple-Operations-per-Manipulation"><a class="docs-heading-anchor" href="#Applying-Multiple-Operations-per-Manipulation">Applying Multiple Operations per Manipulation</a><a id="Applying-Multiple-Operations-per-Manipulation-1"></a><a class="docs-heading-anchor-permalink" href="#Applying-Multiple-Operations-per-Manipulation" title="Permalink"></a></h3><p>All data frame manipulation functions can accept multiple <code>operation</code> pairs at once using any of the following methods:</p><ul><li><code>manipulation_function(dataframe, operation1, operation2)</code>   : multiple arguments</li><li><code>manipulation_function(dataframe, [operation1, operation2])</code> : vector argument</li><li><code>manipulation_function(dataframe, [operation1 operation2])</code>  : matrix argument</li></ul><p>Passing multiple operations is especially useful for the <code>select</code>, <code>select!</code>, and <code>combine</code> manipulation functions, since they only retain columns which are a result of the passed operations.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a = 1:4, b = [50,50,60,60], c = [&quot;hat&quot;,&quot;bat&quot;,&quot;cat&quot;,&quot;dog&quot;])
+4×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  String
+─────┼──────────────────────
+   1 │     1     50  hat
+   2 │     2     50  bat
+   3 │     3     60  cat
+   4 │     4     60  dog
+
+julia&gt; combine(df, :a =&gt; maximum, :b =&gt; sum, :c =&gt; join) # 3 combine operations
+1×3 DataFrame
+ Row │ a_maximum  b_sum  c_join
+     │ Int64      Int64  String
+─────┼────────────────────────────────
+   1 │         4    220  hatbatcatdog
+
+julia&gt; select(df, :c, :b, :a) # re-order columns
+4×3 DataFrame
+ Row │ c       b      a
+     │ String  Int64  Int64
+─────┼──────────────────────
+   1 │ hat        50      1
+   2 │ bat        50      2
+   3 │ cat        60      3
+   4 │ dog        60      4
+
+ulia&gt; select(df, :b, :) # `:` here means all other columns
+4×3 DataFrame
+ Row │ b      a      c
+     │ Int64  Int64  String
+─────┼──────────────────────
+   1 │    50      1  hat
+   2 │    50      2  bat
+   3 │    60      3  cat
+   4 │    60      4  dog
+
+julia&gt; select(
+           df,
+           :c =&gt; (x -&gt; &quot;a &quot; .* x) =&gt; :one_c,
+           :a =&gt; (x -&gt; 100x),
+           :b,
+           renamecols=false
+       ) # can mix operation forms
+4×3 DataFrame
+ Row │ one_c   a      b
+     │ String  Int64  Int64
+─────┼──────────────────────
+   1 │ a hat     100     50
+   2 │ a bat     200     50
+   3 │ a cat     300     60
+   4 │ a dog     400     60
+
+julia&gt; select(
+           df,
+           :c =&gt; ByRow(reverse),
+           :c =&gt; ByRow(uppercase)
+       ) # multiple operations on same column
+4×2 DataFrame
+ Row │ c_reverse  c_uppercase
+     │ String     String
+─────┼────────────────────────
+   1 │ tah        HAT
+   2 │ tab        BAT
+   3 │ tac        CAT
+   4 │ god        DOG</code></pre><p>In the last two examples, the manipulation function arguments were split across multiple lines. This is a good way to make manipulations with many operations more readable.</p><p>Passing multiple operations to <code>subset</code> or <code>subset!</code> is an easy way to narrow in on a particular row of data.</p><pre><code class="language-julia hljs">julia&gt; subset(
+           df,
+           :b =&gt; ByRow(==(60)),
+           :c =&gt; ByRow(contains(&quot;at&quot;))
+       ) # rows with 60 and &quot;at&quot;
+1×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  String
+─────┼──────────────────────
+   1 │     3     60  cat</code></pre><p>Note that all operations within a single manipulation must use the data as it existed before the function call i.e. you cannot use newly created columns for subsequent operations within the same manipulation.</p><pre><code class="language-julia hljs">julia&gt; transform(
+           df,
+           [:a, :b] =&gt; ByRow(+) =&gt; :d,
+           :d =&gt; (x -&gt; x ./ 2),
+       ) # requires two separate transformations
+ERROR: ArgumentError: column name :d not found in the data frame; existing most similar names are: :a, :b and :c
+
+julia&gt; new_df = transform(df, [:a, :b] =&gt; ByRow(+) =&gt; :d)
+4×4 DataFrame
+ Row │ a      b      c       d
+     │ Int64  Int64  String  Int64
+─────┼─────────────────────────────
+   1 │     1     50  hat        51
+   2 │     2     50  bat        52
+   3 │     3     60  cat        63
+   4 │     4     60  dog        64
+
+julia&gt; transform!(new_df, :d =&gt; (x -&gt; x ./ 2) =&gt; :d_2)
+4×5 DataFrame
+ Row │ a      b      c       d      d_2
+     │ Int64  Int64  String  Int64  Float64
+─────┼──────────────────────────────────────
+   1 │     1     50  hat        51     25.5
+   2 │     2     50  bat        52     26.0
+   3 │     3     60  cat        63     31.5
+   4 │     4     60  dog        64     32.0</code></pre><h3 id="Broadcasting-Operation-Pairs"><a class="docs-heading-anchor" href="#Broadcasting-Operation-Pairs">Broadcasting Operation Pairs</a><a id="Broadcasting-Operation-Pairs-1"></a><a class="docs-heading-anchor-permalink" href="#Broadcasting-Operation-Pairs" title="Permalink"></a></h3><p><a href="https://docs.julialang.org/en/v1/manual/arrays/#Broadcasting">Broadcasting</a> pairs with <code>.=&gt;</code> is often a convenient way to generate multiple similar <code>operation</code>s to be applied within a single manipulation. Broadcasting within the <code>Pair</code> of an <code>operation</code> is no different than broadcasting in base Julia. The broadcasting <code>.=&gt;</code> will be expanded into a vector of pairs (<code>[operation1, operation2, ...]</code>), and this expansion will occur before the manipulation function is invoked. Then the manipulation function will use the <code>manipulation_function(dataframe, [operation1, operation2, ...])</code> method. This process will be explained in more detail below.</p><p>To illustrate these concepts, let us first examine the <code>Type</code> of a basic <code>Pair</code>. In DataFrames.jl, a symbol, string, or integer may be used to select a single column. Some <code>Pair</code>s with these types are below.</p><pre><code class="language-julia hljs">julia&gt; typeof(:x =&gt; :a)
+Pair{Symbol, Symbol}
+
+julia&gt; typeof(&quot;x&quot; =&gt; &quot;a&quot;)
+Pair{String, String}
+
+julia&gt; typeof(1 =&gt; &quot;a&quot;)
+Pair{Int64, String}</code></pre><p>Any of the <code>Pair</code>s above could be used to rename the first column of the data frame below to <code>a</code>.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(x = 1:3, y = 4:6)
 3×2 DataFrame
- Row │ mean_age  housing
-     │ Float64   String7
-─────┼───────────────────
-   1 │   35.546  own
-   2 │   35.546  free
-   3 │   35.546  rent</code></pre><p>Note, however, that it is not allowed to return vectors of different lengths in different transformations:</p><pre><code class="language-julia-repl hljs">julia&gt; combine(german, :Age, :Housing =&gt; unique =&gt; :Housing)
-ERROR: ArgumentError: New columns must have the same length as old columns</code></pre><p>Let us discuss some other examples using <code>select</code>. Often we want to apply some function not to the whole column of a data frame, but rather to its individual elements. Normally we can achieve this using broadcasting like this:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, :Sex =&gt; (x -&gt; uppercase.(x)) =&gt; :Sex)
-1000×1 DataFrame
-  Row │ Sex
-      │ String
-──────┼────────
-    1 │ MALE
-    2 │ FEMALE
-    3 │ MALE
-    4 │ MALE
-    5 │ MALE
-    6 │ MALE
-    7 │ MALE
-    8 │ MALE
-  ⋮   │   ⋮
-  994 │ MALE
-  995 │ MALE
-  996 │ FEMALE
-  997 │ MALE
-  998 │ MALE
-  999 │ MALE
- 1000 │ MALE
-985 rows omitted</code></pre><p>This pattern is encountered very often in practice, therefore there is a <code>ByRow</code> convenience wrapper for a function that creates its broadcasted variant. In these examples <code>ByRow</code> is a special type used for selection operations to signal that the wrapped function should be applied to each element (row) of the selection. Here we are passing <code>ByRow</code> wrapper to target column name <code>:Sex</code> using <code>uppercase</code> function:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, :Sex =&gt; ByRow(uppercase) =&gt; :SEX)
-1000×1 DataFrame
-  Row │ SEX
-      │ String
-──────┼────────
-    1 │ MALE
-    2 │ FEMALE
-    3 │ MALE
-    4 │ MALE
-    5 │ MALE
-    6 │ MALE
-    7 │ MALE
-    8 │ MALE
-  ⋮   │   ⋮
-  994 │ MALE
-  995 │ MALE
-  996 │ FEMALE
-  997 │ MALE
-  998 │ MALE
-  999 │ MALE
- 1000 │ MALE
-985 rows omitted</code></pre><p>In this case we transform our source column <code>:Age</code> using <code>ByRow</code> wrapper and automatically generate the target column name:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, :Age, :Age =&gt; ByRow(sqrt))
-1000×2 DataFrame
-  Row │ Age    Age_sqrt
-      │ Int64  Float64
-──────┼─────────────────
-    1 │    67   8.18535
-    2 │    22   4.69042
-    3 │    49   7.0
-    4 │    45   6.7082
-    5 │    53   7.28011
-    6 │    35   5.91608
-    7 │    53   7.28011
-    8 │    35   5.91608
-  ⋮   │   ⋮       ⋮
-  994 │    30   5.47723
-  995 │    50   7.07107
-  996 │    31   5.56776
-  997 │    40   6.32456
-  998 │    38   6.16441
-  999 │    23   4.79583
- 1000 │    27   5.19615
-        985 rows omitted</code></pre><p>When we pass just a column (without the <code>=&gt;</code> part) we can use any column selector that is allowed in indexing.</p><p>Here we exclude the column <code>:Age</code> from the resulting data frame:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, Not(:Age))
-1000×9 DataFrame
-  Row │ id     Sex      Job    Housing  Saving accounts  Checking account  Cre ⋯
-      │ Int64  String7  Int64  String7  String15         String15          Int ⋯
-──────┼─────────────────────────────────────────────────────────────────────────
-    1 │     0  male         2  own      NA               little                ⋯
-    2 │     1  female       2  own      little           moderate
-    3 │     2  male         1  own      little           NA
-    4 │     3  male         2  free     little           little
-    5 │     4  male         2  free     little           little                ⋯
-    6 │     5  male         1  free     NA               NA
-    7 │     6  male         2  own      quite rich       NA
-    8 │     7  male         3  rent     little           moderate
-  ⋮   │   ⋮       ⋮       ⋮       ⋮            ⋮                ⋮              ⋱
-  994 │   993  male         3  own      little           little                ⋯
-  995 │   994  male         2  own      NA               NA
-  996 │   995  female       1  own      little           NA
-  997 │   996  male         3  own      little           little
-  998 │   997  male         2  own      little           NA                    ⋯
-  999 │   998  male         2  free     little           little
- 1000 │   999  male         2  own      moderate         moderate
-                                                  3 columns and 985 rows omitted</code></pre><p>In the next example we drop columns <code>&quot;Age&quot;</code>, <code>&quot;Saving accounts&quot;</code>, <code>&quot;Checking account&quot;</code>, <code>&quot;Credit amount&quot;</code>, and <code>&quot;Purpose&quot;</code>. Note that this time we use string column selectors because some of the column names have spaces in them:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, Not([&quot;Age&quot;, &quot;Saving accounts&quot;, &quot;Checking account&quot;,
-                           &quot;Credit amount&quot;, &quot;Purpose&quot;]))
-1000×5 DataFrame
-  Row │ id     Sex      Job    Housing  Duration
-      │ Int64  String7  Int64  String7  Int64
-──────┼──────────────────────────────────────────
-    1 │     0  male         2  own             6
-    2 │     1  female       2  own            48
-    3 │     2  male         1  own            12
-    4 │     3  male         2  free           42
-    5 │     4  male         2  free           24
-    6 │     5  male         1  free           36
-    7 │     6  male         2  own            24
-    8 │     7  male         3  rent           36
-  ⋮   │   ⋮       ⋮       ⋮       ⋮        ⋮
-  994 │   993  male         3  own            36
-  995 │   994  male         2  own            12
-  996 │   995  female       1  own            12
-  997 │   996  male         3  own            30
-  998 │   997  male         2  own            12
-  999 │   998  male         2  free           45
- 1000 │   999  male         2  own            45
-                                 985 rows omitted
-</code></pre><p>As another example let us present that the <code>r&quot;S&quot;</code> regular expression we used above also works with <code>select</code>:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, r&quot;S&quot;)
-1000×2 DataFrame
-  Row │ Sex      Saving accounts
-      │ String7  String15
-──────┼──────────────────────────
-    1 │ male     NA
-    2 │ female   little
-    3 │ male     little
-    4 │ male     little
-    5 │ male     little
-    6 │ male     NA
-    7 │ male     quite rich
-    8 │ male     little
-  ⋮   │    ⋮            ⋮
-  994 │ male     little
-  995 │ male     NA
-  996 │ female   little
-  997 │ male     little
-  998 │ male     little
-  999 │ male     little
- 1000 │ male     moderate
-                 985 rows omitted</code></pre><p>The benefit of <code>select</code> or <code>combine</code> over indexing is that it is easier to get the union of several column selectors, e.g.:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, r&quot;S&quot;, &quot;Job&quot;, 1)
-1000×4 DataFrame
-  Row │ Sex      Saving accounts  Job    id
-      │ String7  String15         Int64  Int64
-──────┼────────────────────────────────────────
-    1 │ male     NA                   2      0
-    2 │ female   little               2      1
-    3 │ male     little               1      2
-    4 │ male     little               2      3
-    5 │ male     little               2      4
-    6 │ male     NA                   1      5
-    7 │ male     quite rich           2      6
-    8 │ male     little               3      7
-  ⋮   │    ⋮            ⋮           ⋮      ⋮
-  994 │ male     little               3    993
-  995 │ male     NA                   2    994
-  996 │ female   little               1    995
-  997 │ male     little               3    996
-  998 │ male     little               2    997
-  999 │ male     little               2    998
- 1000 │ male     moderate             2    999
-                               985 rows omitted</code></pre><p>Taking advantage of this flexibility here is an idiomatic pattern to move some column to the front of a data frame:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, &quot;Sex&quot;, :)
-1000×10 DataFrame
-  Row │ Sex      id     Age    Job    Housing  Saving accounts  Checking accou ⋯
-      │ String7  Int64  Int64  Int64  String7  String15         String15       ⋯
-──────┼─────────────────────────────────────────────────────────────────────────
-    1 │ male         0     67      2  own      NA               little         ⋯
-    2 │ female       1     22      2  own      little           moderate
-    3 │ male         2     49      1  own      little           NA
-    4 │ male         3     45      2  free     little           little
-    5 │ male         4     53      2  free     little           little         ⋯
-    6 │ male         5     35      1  free     NA               NA
-    7 │ male         6     53      2  own      quite rich       NA
-    8 │ male         7     35      3  rent     little           moderate
-  ⋮   │    ⋮       ⋮      ⋮      ⋮       ⋮            ⋮                ⋮       ⋱
-  994 │ male       993     30      3  own      little           little         ⋯
-  995 │ male       994     50      2  own      NA               NA
-  996 │ female     995     31      1  own      little           NA
-  997 │ male       996     40      3  own      little           little
-  998 │ male       997     38      2  own      little           NA             ⋯
-  999 │ male       998     23      2  free     little           little
- 1000 │ male       999     27      2  own      moderate         moderate
-                                                  4 columns and 985 rows omitted</code></pre><p>Below, we are simply passing source column and target column name to rename them (without specifying the transformation part):</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, :Sex =&gt; :x1, :Age =&gt; :x2)
-1000×2 DataFrame
-  Row │ x1       x2
-      │ String7  Int64
-──────┼────────────────
-    1 │ male        67
-    2 │ female      22
-    3 │ male        49
-    4 │ male        45
-    5 │ male        53
-    6 │ male        35
-    7 │ male        53
-    8 │ male        35
-  ⋮   │    ⋮       ⋮
-  994 │ male        30
-  995 │ male        50
-  996 │ female      31
-  997 │ male        40
-  998 │ male        38
-  999 │ male        23
- 1000 │ male        27
-       985 rows omitted</code></pre><p>It is important to note that <code>select</code> always returns a data frame, even if a single column selected as opposed to indexing syntax. Compare the following:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, :Age)
-1000×1 DataFrame
-  Row │ Age
-      │ Int64
-──────┼───────
-    1 │    67
-    2 │    22
-    3 │    49
-    4 │    45
-    5 │    53
-    6 │    35
-    7 │    53
-    8 │    35
-  ⋮   │   ⋮
-  994 │    30
-  995 │    50
-  996 │    31
-  997 │    40
-  998 │    38
-  999 │    23
- 1000 │    27
-985 rows omitted
+ Row │ x      y
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     2      5
+   3 │     3      6
 
-julia&gt; german[:, :Age]
-1000-element Vector{Int64}:
- 67
- 22
- 49
- 45
- 53
- 35
- 53
- 35
- 61
- 28
-  ⋮
- 34
- 23
- 30
- 50
- 31
- 40
- 38
- 23
- 27</code></pre><p>By default <code>select</code> copies columns of a passed source data frame. In order to avoid copying, pass the <code>copycols=false</code> keyword argument:</p><pre><code class="language-julia-repl hljs">julia&gt; df = select(german, :Sex)
-1000×1 DataFrame
-  Row │ Sex
-      │ String7
-──────┼─────────
-    1 │ male
-    2 │ female
-    3 │ male
-    4 │ male
-    5 │ male
-    6 │ male
-    7 │ male
-    8 │ male
-  ⋮   │    ⋮
-  994 │ male
-  995 │ male
-  996 │ female
-  997 │ male
-  998 │ male
-  999 │ male
- 1000 │ male
-985 rows omitted
+julia&gt; select(df, :x =&gt; :a)
+3×1 DataFrame
+ Row │ a
+     │ Int64
+─────┼───────
+   1 │     1
+   2 │     2
+   3 │     3
 
-julia&gt; df.Sex === german.Sex # copy
-false
+julia&gt; select(df, 1 =&gt; &quot;a&quot;)
+3×1 DataFrame
+ Row │ a
+     │ Int64
+─────┼───────
+   1 │     1
+   2 │     2
+   3 │     3</code></pre><p>What should we do if we want to keep and rename both the <code>x</code> and <code>y</code> column? One option is to supply a <code>Vector</code> of operation <code>Pair</code>s to <code>select</code>. <code>select</code> will process all of these operations in order.</p><pre><code class="language-julia hljs">julia&gt; [&quot;x&quot; =&gt; &quot;a&quot;, &quot;y&quot; =&gt; &quot;b&quot;]
+2-element Vector{Pair{String, String}}:
+ &quot;x&quot; =&gt; &quot;a&quot;
+ &quot;y&quot; =&gt; &quot;b&quot;
 
-julia&gt; df = select(german, :Sex, copycols=false)
-1000×1 DataFrame
-  Row │ Sex
-      │ String7
-──────┼─────────
-    1 │ male
-    2 │ female
-    3 │ male
-    4 │ male
-    5 │ male
-    6 │ male
-    7 │ male
-    8 │ male
-  ⋮   │    ⋮
-  994 │ male
-  995 │ male
-  996 │ female
-  997 │ male
-  998 │ male
-  999 │ male
- 1000 │ male
-985 rows omitted
+julia&gt; select(df, [&quot;x&quot; =&gt; &quot;a&quot;, &quot;y&quot; =&gt; &quot;b&quot;])
+3×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     2      5
+   3 │     3      6</code></pre><p>We can use broadcasting to simplify the syntax above.</p><pre><code class="language-julia hljs">julia&gt; [&quot;x&quot;, &quot;y&quot;] .=&gt; [&quot;a&quot;, &quot;b&quot;]
+2-element Vector{Pair{String, String}}:
+ &quot;x&quot; =&gt; &quot;a&quot;
+ &quot;y&quot; =&gt; &quot;b&quot;
 
-julia&gt; df.Sex === german.Sex # no-copy is performed
-true</code></pre><p>To perform the selection operation in-place use <code>select!</code>:</p><pre><code class="language-julia-repl hljs">julia&gt; select!(german, Not(:Age));
+julia&gt; select(df, [&quot;x&quot;, &quot;y&quot;] .=&gt; [&quot;a&quot;, &quot;b&quot;])
+3×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     2      5
+   3 │     3      6</code></pre><p>Notice that <code>select</code> sees the same <code>Vector{Pair{String, String}}</code> operation argument whether the individual pairs are written out explicitly or constructed with broadcasting. The broadcasting is applied before the call to <code>select</code>.</p><pre><code class="language-julia hljs">julia&gt; [&quot;x&quot; =&gt; &quot;a&quot;, &quot;y&quot; =&gt; &quot;b&quot;] == ([&quot;x&quot;, &quot;y&quot;] .=&gt; [&quot;a&quot;, &quot;b&quot;])
+true</code></pre><div class="admonition is-info"><header class="admonition-header">Note</header><div class="admonition-body"><p>These operation pairs (or vector of pairs) can be given variable names. This is uncommon in practice but could be helpful for intermediate inspection and testing.</p><pre><code class="language-julia hljs">df = DataFrame(x = 1:3, y = 4:6)       # create data frame
+operation = [&quot;x&quot;, &quot;y&quot;] .=&gt; [&quot;a&quot;, &quot;b&quot;]  # save operation to variable
+typeof(operation)                      # check type of operation
+first(operation)                       # check first pair in operation
+last(operation)                        # check last pair in operation
+select(df, operation)                  # manipulate `df` with `operation`</code></pre></div></div><p>In Julia, a non-vector broadcasted with a vector will be repeated in each resultant pair element.</p><pre><code class="language-julia hljs">julia&gt; [&quot;x&quot;, &quot;y&quot;] .=&gt; :a    # :a is repeated
+2-element Vector{Pair{String, Symbol}}:
+ &quot;x&quot; =&gt; :a
+ &quot;y&quot; =&gt; :a
 
-julia&gt; german
-1000×9 DataFrame
-  Row │ id     Sex      Job    Housing  Saving accounts  Checking account  Cre ⋯
-      │ Int64  String7  Int64  String7  String15         String15          Int ⋯
-──────┼─────────────────────────────────────────────────────────────────────────
-    1 │     0  male         2  own      NA               little                ⋯
-    2 │     1  female       2  own      little           moderate
-    3 │     2  male         1  own      little           NA
-    4 │     3  male         2  free     little           little
-    5 │     4  male         2  free     little           little                ⋯
-    6 │     5  male         1  free     NA               NA
-    7 │     6  male         2  own      quite rich       NA
-    8 │     7  male         3  rent     little           moderate
-  ⋮   │   ⋮       ⋮       ⋮       ⋮            ⋮                ⋮              ⋱
-  994 │   993  male         3  own      little           little                ⋯
-  995 │   994  male         2  own      NA               NA
-  996 │   995  female       1  own      little           NA
-  997 │   996  male         3  own      little           little
-  998 │   997  male         2  own      little           NA                    ⋯
-  999 │   998  male         2  free     little           little
- 1000 │   999  male         2  own      moderate         moderate
-                                                  3 columns and 985 rows omitted</code></pre><p>As you can see the <code>:Age</code> column was dropped from the <code>german</code> data frame.</p><p>The <code>transform</code> and <code>transform!</code> functions work identically to <code>select</code> and <code>select!</code> with the only difference that they retain all columns that are present in the source data frame. Here are some examples:</p><pre><code class="language-julia-repl hljs">julia&gt; german = copy(german_ref);
+julia&gt; 1 .=&gt; [:a, :b]       # 1 is repeated
+2-element Vector{Pair{Int64, Symbol}}:
+ 1 =&gt; :a
+ 1 =&gt; :b</code></pre><p>We can use this fact to easily broadcast an <code>operation_function</code> to multiple columns.</p><pre><code class="language-julia hljs">julia&gt; f(x) = 2 * x
+f (generic function with 1 method)
 
-julia&gt; df = german_ref[1:8, 1:5]
-8×5 DataFrame
- Row │ id     Age    Sex      Job    Housing
-     │ Int64  Int64  String7  Int64  String7
-─────┼───────────────────────────────────────
-   1 │     0     67  male         2  own
-   2 │     1     22  female       2  own
-   3 │     2     49  male         1  own
-   4 │     3     45  male         2  free
-   5 │     4     53  male         2  free
-   6 │     5     35  male         1  free
-   7 │     6     53  male         2  own
-   8 │     7     35  male         3  rent
-
-julia&gt; transform(df, :Age =&gt; maximum)
-8×6 DataFrame
- Row │ id     Age    Sex      Job    Housing  Age_maximum
-     │ Int64  Int64  String7  Int64  String7  Int64
+julia&gt; [&quot;x&quot;, &quot;y&quot;] .=&gt; f  # f is repeated
+2-element Vector{Pair{String, typeof(f)}}:
+ &quot;x&quot; =&gt; f
+ &quot;y&quot; =&gt; f
+
+julia&gt; select(df, [&quot;x&quot;, &quot;y&quot;] .=&gt; f)  # apply f with automatic column renaming
+3×2 DataFrame
+ Row │ x_f    y_f
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      8
+   2 │     4     10
+   3 │     6     12
+
+julia&gt; [&quot;x&quot;, &quot;y&quot;] .=&gt; f .=&gt; [&quot;a&quot;, &quot;b&quot;]  # f is repeated
+2-element Vector{Pair{String, Pair{typeof(f), String}}}:
+ &quot;x&quot; =&gt; (f =&gt; &quot;a&quot;)
+ &quot;y&quot; =&gt; (f =&gt; &quot;b&quot;)
+
+julia&gt; select(df, [&quot;x&quot;, &quot;y&quot;] .=&gt; f .=&gt; [&quot;a&quot;, &quot;b&quot;])  # apply f with manual column renaming
+3×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      8
+   2 │     4     10
+   3 │     6     12</code></pre><p>A renaming function can be applied to multiple columns in the same way. It will also be repeated in each operation <code>Pair</code>.</p><pre><code class="language-julia hljs">julia&gt; newname(s::String) = s * &quot;_new&quot;
+newname (generic function with 1 method)
+
+julia&gt; [&quot;x&quot;, &quot;y&quot;] .=&gt; f .=&gt; newname  # both f and newname are repeated
+2-element Vector{Pair{String, Pair{typeof(f), typeof(newname)}}}:
+ &quot;x&quot; =&gt; (f =&gt; newname)
+ &quot;y&quot; =&gt; (f =&gt; newname)
+
+julia&gt; select(df, [&quot;x&quot;, &quot;y&quot;] .=&gt; f .=&gt; newname)  # apply f then rename column with newname
+3×2 DataFrame
+ Row │ x_new  y_new
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      8
+   2 │     4     10
+   3 │     6     12</code></pre><p>You can see from the type output above that a three element pair does not actually exist. A <code>Pair</code> (as the name implies) can only contain two elements. Thus, <code>:x =&gt; :y =&gt; :z</code> becomes a nested <code>Pair</code>, where <code>:x</code> is the first element and points to the <code>Pair</code> <code>:y =&gt; :z</code>, which is the second element.</p><pre><code class="language-julia hljs">julia&gt; p = :x =&gt; :y =&gt; :z
+:x =&gt; (:y =&gt; :z)
+
+julia&gt; p[1]
+:x
+
+julia&gt; p[2]
+:y =&gt; :z
+
+julia&gt; p[2][1]
+:y
+
+julia&gt; p[2][2]
+:z
+
+julia&gt; p[3] # there is no index 3 for a pair
+ERROR: BoundsError: attempt to access Pair{Symbol, Pair{Symbol, Symbol}} at index [3]</code></pre><p>In the previous examples, the source columns have been individually selected. When broadcasting multiple columns to the same function, often similarities in the column names or position can be exploited to avoid tedious selection. Consider a data frame with temperature data at three different locations taken over time.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(Time = 1:4,
+                      Temperature1 = [20, 23, 25, 28],
+                      Temperature2 = [33, 37, 41, 44],
+                      Temperature3 = [15, 10, 4, 0])
+4×4 DataFrame
+ Row │ Time   Temperature1  Temperature2  Temperature3
+     │ Int64  Int64         Int64         Int64
+─────┼─────────────────────────────────────────────────
+   1 │     1            20            33            15
+   2 │     2            23            37            10
+   3 │     3            25            41             4
+   4 │     4            28            44             0</code></pre><p>To convert all of the temperature data in one transformation, we just need to define a conversion function and broadcast it to all of the &quot;Temperature&quot; columns.</p><pre><code class="language-julia hljs">julia&gt; celsius_to_kelvin(x) = x + 273
+celsius_to_kelvin (generic function with 1 method)
+
+julia&gt; transform(
+           df,
+           Cols(r&quot;Temp&quot;) .=&gt; ByRow(celsius_to_kelvin),
+           renamecols = false
+       )
+4×4 DataFrame
+ Row │ Time   Temperature1  Temperature2  Temperature3
+     │ Int64  Int64         Int64         Int64
+─────┼─────────────────────────────────────────────────
+   1 │     1           293           306           288
+   2 │     2           296           310           283
+   3 │     3           298           314           277
+   4 │     4           301           317           273</code></pre><p>Or, simultaneously changing the column names:</p><pre><code class="language-julia hljs">julia&gt; rename_function(s) = &quot;Temperature $(last(s)) (K)&quot;
+rename_function (generic function with 1 method)
+
+julia&gt; select(
+           df,
+           &quot;Time&quot;,
+           Cols(r&quot;Temp&quot;) .=&gt; ByRow(celsius_to_kelvin) .=&gt; rename_function
+       )
+4×4 DataFrame
+ Row │ Time   Temperature 1 (K)  Temperature 2 (K)  Temperature 3 (K)
+     │ Int64  Int64              Int64              Int64
+─────┼────────────────────────────────────────────────────────────────
+   1 │     1                293                306                288
+   2 │     2                296                310                283
+   3 │     3                298                314                277
+   4 │     4                301                317                273</code></pre><div class="admonition is-info"><header class="admonition-header">Notes</header><div class="admonition-body"><ul><li><code>Not(&quot;Time&quot;)</code> or <code>2:4</code> would have been equally good choices for <code>source_column_selector</code> in the above operations.</li><li>Don&#39;t forget <code>ByRow</code> if your function is to be applied to elements rather than entire column vectors.</li></ul><p>Without <code>ByRow</code>, the manipulations above would have thrown <code>ERROR: MethodError: no method matching +(::Vector{Int64}, ::Int64)</code>.</p><ul><li>Regular expression (<code>r&quot;&quot;</code>) and <code>:</code> <code>source_column_selectors</code></li></ul><p>must be wrapped in <code>Cols</code> to be properly broadcasted because otherwise the broadcasting occurs before the expression is expanded into a vector of matches.</p></div></div><p>You could also broadcast different columns to different functions by supplying a vector of functions.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(a=1:4, b=5:8)
+4×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      5
+   2 │     2      6
+   3 │     3      7
+   4 │     4      8
+
+julia&gt; f1(x) = x .+ 1
+f1 (generic function with 1 method)
+
+julia&gt; f2(x) = x ./ 10
+f2 (generic function with 1 method)
+
+julia&gt; transform(df, [:a, :b] .=&gt; [f1, f2])
+4×4 DataFrame
+ Row │ a      b      a_f1   b_f2
+     │ Int64  Int64  Int64  Float64
+─────┼──────────────────────────────
+   1 │     1      5      2      0.5
+   2 │     2      6      3      0.6
+   3 │     3      7      4      0.7
+   4 │     4      8      5      0.8</code></pre><p>However, this form is not much more convenient than supplying multiple individual operations.</p><pre><code class="language-julia hljs">julia&gt; transform(df, [:a =&gt; f1, :b =&gt; f2]) # same manipulation as previous
+4×4 DataFrame
+ Row │ a      b      a_f1   b_f2
+     │ Int64  Int64  Int64  Float64
+─────┼──────────────────────────────
+   1 │     1      5      2      0.5
+   2 │     2      6      3      0.6
+   3 │     3      7      4      0.7
+   4 │     4      8      5      0.8</code></pre><p>Perhaps more useful for broadcasting syntax is to apply multiple functions to multiple columns by changing the vector of functions to a 1-by-x matrix of functions. (Recall that a list, a vector, or a matrix of operation pairs are all valid for passing to the manipulation functions.)</p><pre><code class="language-julia hljs">julia&gt; [:a, :b] .=&gt; [f1 f2] # No comma `,` between f1 and f2
+2×2 Matrix{Pair{Symbol}}:
+ :a=&gt;f1  :a=&gt;f2
+ :b=&gt;f1  :b=&gt;f2
+
+julia&gt; transform(df, [:a, :b] .=&gt; [f1 f2]) # No comma `,` between f1 and f2
+4×6 DataFrame
+ Row │ a      b      a_f1   b_f1   a_f2     b_f2
+     │ Int64  Int64  Int64  Int64  Float64  Float64
+─────┼──────────────────────────────────────────────
+   1 │     1      5      2      6      0.1      0.5
+   2 │     2      6      3      7      0.2      0.6
+   3 │     3      7      4      8      0.3      0.7
+   4 │     4      8      5      9      0.4      0.8</code></pre><p>In this way, every combination of selected columns and functions will be applied.</p><p>Pair broadcasting is a simple but powerful tool that can be used in any of the manipulation functions listed under <a href="#Manipulation-Functions">Manipulation Functions</a>. Experiment for yourself to discover other useful operations.</p><h3 id="Additional-Resources"><a class="docs-heading-anchor" href="#Additional-Resources">Additional Resources</a><a id="Additional-Resources-1"></a><a class="docs-heading-anchor-permalink" href="#Additional-Resources" title="Permalink"></a></h3><p>More details and examples of operation pair syntax can be found in <a href="https://bkamins.github.io/julialang/2020/12/24/minilanguage.html">this blog post</a>. (The official wording describing the syntax has changed since the blog post was written, but the examples are still illustrative. The operation pair syntax is sometimes referred to as the DataFrames.jl mini-language or Domain-Specific Language.)</p><p>For additional syntax niceties, many users find the <a href="https://github.com/jkrumbiegel/Chain.jl">Chain.jl</a> and <a href="https://github.com/JuliaData/DataFramesMeta.jl">DataFramesMeta.jl</a> packages useful to help simplify manipulations that may be tedious with operation pairs alone.</p><h2 id="Approach-Comparison"><a class="docs-heading-anchor" href="#Approach-Comparison">Approach Comparison</a><a id="Approach-Comparison-1"></a><a class="docs-heading-anchor-permalink" href="#Approach-Comparison" title="Permalink"></a></h2><p>After that deep dive into <a href="#Manipulation-Functions">Manipulation Functions</a>, it is a good idea to review the alternative approaches covered in <a href="#Getting-and-Setting-Data-in-a-Data-Frame">Getting and Setting Data in a Data Frame</a>. Let us compare the approaches with a few examples.</p><p>For simple operations, often getting/setting data with dot syntax is simpler than the equivalent data frame manipulation. Here we will add the two columns of our data frame together and place the result in a new third column.</p><p><strong>Setup:</strong></p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(x = 1:3, y = 4:6)  # define a data frame
+3×2 DataFrame
+ Row │ x      y
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      4
+   2 │     2      5
+   3 │     3      6</code></pre><p><strong>Manipulation:</strong></p><pre><code class="language-julia hljs">julia&gt; transform!(df, [:x, :y] =&gt; (+) =&gt; :z)
+3×3 DataFrame
+ Row │ x      y      z
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      5
+   2 │     2      5      7
+   3 │     3      6      9</code></pre><p><strong>Dot Syntax:</strong></p><pre><code class="language-julia hljs">julia&gt; df.z = df.x + df.y
+3-element Vector{Int64}:
+ 5
+ 7
+ 9
+
+julia&gt; df  # see that the previous expression updated the data frame `df`
+3×3 DataFrame
+ Row │ x      y      z
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      5
+   2 │     2      5      7
+   3 │     3      6      9</code></pre><p>Recall that the return type from a data frame manipulation function call is always a data frame. The return type of a data frame column accessed with dot syntax is a <code>Vector</code>. Thus the expression <code>df.x + df.y</code> gets the column data as vectors and returns the result of the vector addition. However, in that same line, we assigned the resultant <code>Vector</code> to a new column <code>z</code> in the data frame <code>df</code>. We could have instead assigned the resultant <code>Vector</code> to some other variable, and then <code>df</code> would not have been altered. The approach with dot syntax is very versatile since the data getting, mathematics, and data setting can be separate steps.</p><pre><code class="language-julia hljs">julia&gt; df.x  # dot syntax returns a vector
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+
+julia&gt; v = df.x + df.y  # assign mathematical result to a vector `v`
+3-element Vector{Int64}:
+ 5
+ 7
+ 9
+
+julia&gt; df.z = v  # place `v` into the data frame `df` with the column name `z`
+3-element Vector{Int64}:
+ 5
+ 7
+ 9</code></pre><p>However, one way in which dot syntax is less versatile is that the column name must be explicitly written in the code. Indexing syntax is a good alternative in these cases which is only slightly longer to write than dot syntax. Both indexing syntax and manipulation functions can operate on dynamic column names stored in variables.</p><p><strong>Setup:</strong></p><p>Imagine this setup data was read from a file and/or entered by a user at runtime.</p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(&quot;My First Column&quot; =&gt; 1:3, &quot;My Second Column&quot; =&gt; 4:6)  # define a data frame
+3×2 DataFrame
+ Row │ My First Column  My Second Column
+     │ Int64            Int64
+─────┼───────────────────────────────────
+   1 │               1                 4
+   2 │               2                 5
+   3 │               3                 6
+
+julia&gt; c1 = &quot;My First Column&quot;; c2 = &quot;My Second Column&quot;; c3 = &quot;My Third Column&quot;;  # define column names</code></pre><p><strong>Dot Syntax:</strong></p><pre><code class="language-julia hljs">julia&gt; df.c1  # dot syntax expects an explicit column name and cannot be used to access variable column name
+ERROR: ArgumentError: column name :c1 not found in the data frame</code></pre><p><strong>Indexing:</strong></p><pre><code class="language-julia hljs">julia&gt; df[:, c3] = df[:, c1] + df[:, c2]  # access columns with names stored in variables
+3-element Vector{Int64}:
+ 5
+ 7
+ 9
+
+julia&gt; df  # see that the previous expression updated the data frame `df`
+3×3 DataFrame
+ Row │ My First Column  My Second Column  My Third Column
+     │ Int64            Int64             Int64
 ─────┼────────────────────────────────────────────────────
-   1 │     0     67  male         2  own               67
-   2 │     1     22  female       2  own               67
-   3 │     2     49  male         1  own               67
-   4 │     3     45  male         2  free              67
-   5 │     4     53  male         2  free              67
-   6 │     5     35  male         1  free              67
-   7 │     6     53  male         2  own               67
-   8 │     7     35  male         3  rent              67</code></pre><p>In the example below we are swapping values stored in columns <code>:Sex</code> and <code>:Age</code>:</p><pre><code class="language-julia-repl hljs">julia&gt; transform(german, :Age =&gt; :Sex, :Sex =&gt; :Age)
-1000×10 DataFrame
-  Row │ id     Age      Sex    Job    Housing  Saving accounts  Checking accou ⋯
-      │ Int64  String7  Int64  Int64  String7  String15         String15       ⋯
-──────┼─────────────────────────────────────────────────────────────────────────
-    1 │     0  male        67      2  own      NA               little         ⋯
-    2 │     1  female      22      2  own      little           moderate
-    3 │     2  male        49      1  own      little           NA
-    4 │     3  male        45      2  free     little           little
-    5 │     4  male        53      2  free     little           little         ⋯
-    6 │     5  male        35      1  free     NA               NA
-    7 │     6  male        53      2  own      quite rich       NA
-    8 │     7  male        35      3  rent     little           moderate
-  ⋮   │   ⋮       ⋮       ⋮      ⋮       ⋮            ⋮                ⋮       ⋱
-  994 │   993  male        30      3  own      little           little         ⋯
-  995 │   994  male        50      2  own      NA               NA
-  996 │   995  female      31      1  own      little           NA
-  997 │   996  male        40      3  own      little           little
-  998 │   997  male        38      2  own      little           NA             ⋯
-  999 │   998  male        23      2  free     little           little
- 1000 │   999  male        27      2  own      moderate         moderate
-                                                  4 columns and 985 rows omitted</code></pre><p>If we give more than one source column to a transformation they are passed as consecutive positional arguments. So for example the <code>[:Age, :Job] =&gt; (+) =&gt; :res</code> transformation below evaluates <code>+(df1.Age, df1.Job)</code> (which adds two columns) and stores the result in the <code>:res</code> column:</p><pre><code class="language-julia-repl hljs">julia&gt; select(german, :Age, :Job, [:Age, :Job] =&gt; (+) =&gt; :res)
-1000×3 DataFrame
-  Row │ Age    Job    res
-      │ Int64  Int64  Int64
-──────┼─────────────────────
-    1 │    67      2     69
-    2 │    22      2     24
-    3 │    49      1     50
-    4 │    45      2     47
-    5 │    53      2     55
-    6 │    35      1     36
-    7 │    53      2     55
-    8 │    35      3     38
-  ⋮   │   ⋮      ⋮      ⋮
-  994 │    30      3     33
-  995 │    50      2     52
-  996 │    31      1     32
-  997 │    40      3     43
-  998 │    38      2     40
-  999 │    23      2     25
- 1000 │    27      2     29
-            985 rows omitted</code></pre><p>In the examples given in this introductory tutorial we did not cover all options of the transformation mini-language. More advanced examples, in particular showing how to pass or produce multiple columns using the <code>AsTable</code> operation (which you might have seen in some DataFrames.jl demos) are given in the later sections of the manual.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../">« Introduction</a><a class="docs-footer-nextpage" href="../getting_started/">Getting Started »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+   1 │               1                 4                5
+   2 │               2                 5                7
+   3 │               3                 6                9</code></pre><p><strong>Manipulation:</strong></p><pre><code class="language-julia hljs">julia&gt; transform!(df, [c1, c2] =&gt; (+) =&gt; c3)  # access columns with names stored in variables
+3×3 DataFrame
+ Row │ My First Column  My Second Column  My Third Column
+     │ Int64            Int64             Int64
+─────┼────────────────────────────────────────────────────
+   1 │               1                 4                5
+   2 │               2                 5                7
+   3 │               3                 6                9</code></pre><p>Additionally, manipulation functions only require the name of the data frame to be written once. This can be helpful when dealing with long variable and column names.</p><p><strong>Setup:</strong></p><pre><code class="language-julia hljs">julia&gt; my_very_long_data_frame_name = DataFrame(
+           &quot;My First Column&quot; =&gt; 1:3,
+           &quot;My Second Column&quot; =&gt; 4:6
+       )  # define a data frame
+3×2 DataFrame
+ Row │ My First Column  My Second Column
+     │ Int64            Int64
+─────┼───────────────────────────────────
+   1 │               1                 4
+   2 │               2                 5
+   3 │               3                 6
+
+julia&gt; c1 = &quot;My First Column&quot;; c2 = &quot;My Second Column&quot;; c3 = &quot;My Third Column&quot;;  # define column names</code></pre><p><strong>Manipulation:</strong></p><pre><code class="language-julia hljs">
+julia&gt; transform!(my_very_long_data_frame_name, [c1, c2] =&gt; (+) =&gt; c3)
+3×3 DataFrame
+ Row │ My First Column  My Second Column  My Third Column
+     │ Int64            Int64             Int64
+─────┼────────────────────────────────────────────────────
+   1 │               1                 4                5
+   2 │               2                 5                7
+   3 │               3                 6                9</code></pre><p><strong>Indexing:</strong></p><pre><code class="language-julia hljs">julia&gt; my_very_long_data_frame_name[:, c3] = my_very_long_data_frame_name[:, c1] + my_very_long_data_frame_name[:, c2]
+3-element Vector{Int64}:
+ 5
+ 7
+ 9
+
+julia&gt; df  # see that the previous expression updated the data frame `df`
+3×3 DataFrame
+ Row │ My First Column  My Second Column  My Third Column
+     │ Int64            Int64             Int64
+─────┼────────────────────────────────────────────────────
+   1 │               1                 4                5
+   2 │               2                 5                7
+   3 │               3                 6                9</code></pre><p>Another benefit of manipulation functions and indexing over dot syntax is that it is easier to operate on a subset of columns.</p><p><strong>Setup:</strong></p><pre><code class="language-julia hljs">julia&gt; df = DataFrame(x = 1:3, y = 4:6, z = 7:9)  # define data frame
+3×3 DataFrame
+ Row │ x      y      z
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      4      7
+   2 │     2      5      8
+   3 │     3      6      9</code></pre><p><strong>Dot Syntax:</strong></p><pre><code class="language-julia hljs">julia&gt; df.Not(:x)  # will not work; requires a literal column name
+ERROR: ArgumentError: column name :Not not found in the data frame</code></pre><p><strong>Indexing:</strong></p><pre><code class="language-julia hljs">julia&gt; df[:, :y_z_max] = maximum.(eachrow(df[:, Not(:x)]))  # find maximum value across all rows except for column `x`
+3-element Vector{Int64}:
+ 7
+ 8
+ 9
+
+julia&gt; df  # see that the previous expression updated the data frame `df`
+3×4 DataFrame
+ Row │ x      y      z      y_z_max
+     │ Int64  Int64  Int64  Int64
+─────┼──────────────────────────────
+   1 │     1      4      7        7
+   2 │     2      5      8        8
+   3 │     3      6      9        9</code></pre><p><strong>Manipulation:</strong></p><pre><code class="language-julia hljs">julia&gt; transform!(df, Not(:x) =&gt; ByRow(max))  # find maximum value across all rows except for column `x`
+3×4 DataFrame
+ Row │ x      y      z      y_z_max
+     │ Int64  Int64  Int64  Int64
+─────┼──────────────────────────────
+   1 │     1      4      7        7
+   2 │     2      5      8        8
+   3 │     3      6      9        9</code></pre><p>Moreover, indexing can operate on a subset of columns <em>and</em> rows.</p><p><strong>Indexing:</strong></p><pre><code class="language-julia hljs">julia&gt; y_z_max_row3 = maximum(df[3, Not(:x)])  # find maximum value across row 3 except for column `x`
+9</code></pre><p>Hopefully this small comparison has illustrated some of the benefits and drawbacks of the various syntaxes available in DataFrames.jl. The best syntax to use depends on the situation.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../">« Introduction</a><a class="docs-footer-nextpage" href="../getting_started/">Getting Started »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/categorical/index.html b/dev/man/categorical/index.html
index 7ec206d51..9937fa9a1 100644
--- a/dev/man/categorical/index.html
+++ b/dev/man/categorical/index.html
@@ -79,4 +79,4 @@
 true
 
 julia&gt; cv1[1] &lt; cv1[2]
-true</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../sorting/">« Sorting</a><a class="docs-footer-nextpage" href="../missing/">Missing Data »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+true</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../sorting/">« Sorting</a><a class="docs-footer-nextpage" href="../missing/">Missing Data »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/comparisons/index.html b/dev/man/comparisons/index.html
index 0e5cf58cd..c8a1f46dc 100644
--- a/dev/man/comparisons/index.html
+++ b/dev/man/comparisons/index.html
@@ -26,4 +26,4 @@
              z = c(3:7, NA), id = letters[1:6])</code></pre><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">dplyr</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>summarize(df, mean(x))</code></td><td style="text-align: left"><code>combine(df, :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>mutate(df, x_mean = mean(x))</code></td><td style="text-align: left"><code>transform(df, :x =&gt; mean =&gt; :x_mean)</code></td></tr><tr><td style="text-align: left">Rename columns</td><td style="text-align: left"><code>rename(df, x_new = x)</code></td><td style="text-align: left"><code>rename(df, :x =&gt; :x_new)</code></td></tr><tr><td style="text-align: left">Pick columns</td><td style="text-align: left"><code>select(df, x, y)</code></td><td style="text-align: left"><code>select(df, :x, :y)</code></td></tr><tr><td style="text-align: left">Pick &amp; transform columns</td><td style="text-align: left"><code>transmute(df, mean(x), y)</code></td><td style="text-align: left"><code>select(df, :x =&gt; mean, :y)</code></td></tr><tr><td style="text-align: left">Pick rows</td><td style="text-align: left"><code>filter(df, x &gt;= 1)</code></td><td style="text-align: left"><code>subset(df, :x =&gt; ByRow(x -&gt; x &gt;= 1))</code></td></tr><tr><td style="text-align: left">Sort rows</td><td style="text-align: left"><code>arrange(df, x)</code></td><td style="text-align: left"><code>sort(df, :x)</code></td></tr></table><p>As in dplyr, some of these functions can be applied to grouped data frames, in which case they operate by group:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">dplyr</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>summarize(group_by(df, grp), mean(x))</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>mutate(group_by(df, grp), mean(x))</code></td><td style="text-align: left"><code>transform(groupby(df, :grp), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Pick &amp; transform columns</td><td style="text-align: left"><code>transmute(group_by(df, grp), mean(x), y)</code></td><td style="text-align: left"><code>select(groupby(df, :grp), :x =&gt; mean, :y)</code></td></tr></table><p>The table below compares more advanced commands:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">dplyr</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Complex Function</td><td style="text-align: left"><code>summarize(df, mean(x, na.rm = T))</code></td><td style="text-align: left"><code>combine(df, :x =&gt; x -&gt; mean(skipmissing(x)))</code></td></tr><tr><td style="text-align: left">Transform several columns</td><td style="text-align: left"><code>summarize(df, max(x), min(y))</code></td><td style="text-align: left"><code>combine(df, :x =&gt; maximum,  :y =&gt; minimum)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>summarize(df, across(c(x, y), mean))</code></td><td style="text-align: left"><code>combine(df, [:x, :y] .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>summarize(df, across(starts_with(&quot;x&quot;), mean))</code></td><td style="text-align: left"><code>combine(df, names(df, r&quot;^x&quot;) .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>summarize(df, across(c(x, y), list(max, min)))</code></td><td style="text-align: left"><code>combine(df, ([:x, :y] .=&gt; [maximum minimum])...)</code></td></tr><tr><td style="text-align: left">Multivariate function</td><td style="text-align: left"><code>mutate(df, cor(x, y))</code></td><td style="text-align: left"><code>transform(df, [:x, :y] =&gt; cor)</code></td></tr><tr><td style="text-align: left">Row-wise</td><td style="text-align: left"><code>mutate(rowwise(df), min(x, y))</code></td><td style="text-align: left"><code>transform(df, [:x, :y] =&gt; ByRow(min))</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>mutate(rowwise(df), which.max(c_across(matches(&quot;^x&quot;))))</code></td><td style="text-align: left"><code>transform(df, AsTable(r&quot;^x&quot;) =&gt; ByRow(argmax))</code></td></tr><tr><td style="text-align: left">DataFrame as input</td><td style="text-align: left"><code>summarize(df, head(across(), 2))</code></td><td style="text-align: left"><code>combine(d -&gt; first(d, 2), df)</code></td></tr><tr><td style="text-align: left">DataFrame as output</td><td style="text-align: left"><code>summarize(df, tibble(value = c(min(x), max(x))))</code></td><td style="text-align: left"><code>combine(df, :x =&gt; (x -&gt; (value = [minimum(x), maximum(x)],)) =&gt; AsTable)</code></td></tr></table><h2 id="Comparison-with-the-R-package-data.table"><a class="docs-heading-anchor" href="#Comparison-with-the-R-package-data.table">Comparison with the R package data.table</a><a id="Comparison-with-the-R-package-data.table-1"></a><a class="docs-heading-anchor-permalink" href="#Comparison-with-the-R-package-data.table" title="Permalink"></a></h2><p>The following table compares the main functions of DataFrames.jl with the R package data.table (version 1.14.1).</p><pre><code class="language-R hljs">library(data.table)
 df  &lt;- data.table(grp = rep(1:2, 3), x = 6:1, y = 4:9,
                   z = c(3:7, NA), id = letters[1:6])
-df2 &lt;- data.table(grp=c(1,3), w = c(10,11))</code></pre><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>df[, .(mean(x))]</code></td><td style="text-align: left"><code>combine(df, :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>df[, x_mean:=mean(x) ]</code></td><td style="text-align: left"><code>transform!(df, :x =&gt; mean =&gt; :x_mean)</code></td></tr><tr><td style="text-align: left">Rename column (in place)</td><td style="text-align: left"><code>setnames(df, &quot;x&quot;, &quot;x_new&quot;)</code></td><td style="text-align: left"><code>rename!(df, :x =&gt; :x_new)</code></td></tr><tr><td style="text-align: left">Rename multiple columns (in place)</td><td style="text-align: left"><code>setnames(df, c(&quot;x&quot;, &quot;y&quot;), c(&quot;x_new&quot;, &quot;y_new&quot;))</code></td><td style="text-align: left"><code>rename!(df, [:x, :y] .=&gt; [:x_new, :y_new])</code></td></tr><tr><td style="text-align: left">Pick columns as dataframe</td><td style="text-align: left"><code>df[, .(x, y)]</code></td><td style="text-align: left"><code>select(df, :x, :y)</code></td></tr><tr><td style="text-align: left">Pick column as a vector</td><td style="text-align: left"><code>df[, x]</code></td><td style="text-align: left"><code>df[!, :x]</code></td></tr><tr><td style="text-align: left">Remove columns</td><td style="text-align: left"><code>df[, -&quot;x&quot;]</code></td><td style="text-align: left"><code>select(df, Not(:x))</code></td></tr><tr><td style="text-align: left">Remove columns (in place)</td><td style="text-align: left"><code>df[, x:=NULL]</code></td><td style="text-align: left"><code>select!(df, Not(:x))</code></td></tr><tr><td style="text-align: left">Remove columns (in place)</td><td style="text-align: left"><code>df[, c(&quot;x&quot;, &quot;y&quot;):=NULL]</code></td><td style="text-align: left"><code>select!(df, Not([:x, :y]))</code></td></tr><tr><td style="text-align: left">Pick &amp; transform columns</td><td style="text-align: left"><code>df[, .(mean(x), y)]</code></td><td style="text-align: left"><code>select(df, :x =&gt; mean, :y)</code></td></tr><tr><td style="text-align: left">Pick rows</td><td style="text-align: left"><code>df[ x &gt;= 1 ]</code></td><td style="text-align: left"><code>filter(:x =&gt; &gt;=(1), df)</code></td></tr><tr><td style="text-align: left">Sort rows (in place)</td><td style="text-align: left"><code>setorder(df, x)</code></td><td style="text-align: left"><code>sort!(df, :x)</code></td></tr><tr><td style="text-align: left">Sort rows</td><td style="text-align: left"><code>df[ order(x) ]</code></td><td style="text-align: left"><code>sort(df, :x)</code></td></tr></table><h3 id="Grouping-data-and-aggregation-2"><a class="docs-heading-anchor" href="#Grouping-data-and-aggregation-2">Grouping data and aggregation</a><a class="docs-heading-anchor-permalink" href="#Grouping-data-and-aggregation-2" title="Permalink"></a></h3><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>df[, mean(x), by=id ]</code></td><td style="text-align: left"><code>combine(groupby(df, :id), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns (in place)</td><td style="text-align: left"><code>df[, x_mean:=mean(x), by=id]</code></td><td style="text-align: left"><code>transform!(groupby(df, :id), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Pick &amp; transform columns</td><td style="text-align: left"><code>df[, .(x_mean = mean(x), y), by=id]</code></td><td style="text-align: left"><code>select(groupby(df, :id), :x =&gt; mean, :y)</code></td></tr></table><h3 id="More-advanced-commands-2"><a class="docs-heading-anchor" href="#More-advanced-commands-2">More advanced commands</a><a class="docs-heading-anchor-permalink" href="#More-advanced-commands-2" title="Permalink"></a></h3><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Complex Function</td><td style="text-align: left"><code>df[, .(mean(x, na.rm=TRUE)) ]</code></td><td style="text-align: left"><code>combine(df, :x =&gt; x -&gt; mean(skipmissing(x)))</code></td></tr><tr><td style="text-align: left">Transform certain rows (in place)</td><td style="text-align: left"><code>df[x&lt;=0, x:=0]</code></td><td style="text-align: left"><code>df.x[df.x .&lt;= 0] .= 0</code></td></tr><tr><td style="text-align: left">Transform several columns</td><td style="text-align: left"><code>df[, .(max(x), min(y)) ]</code></td><td style="text-align: left"><code>combine(df, :x =&gt; maximum, :y =&gt; minimum)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>df[, lapply(.SD, mean), .SDcols = c(&quot;x&quot;, &quot;y&quot;) ]</code></td><td style="text-align: left"><code>combine(df, [:x, :y] .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>df[, lapply(.SD, mean), .SDcols = patterns(&quot;*x&quot;) ]</code></td><td style="text-align: left"><code>combine(df, names(df, r&quot;^x&quot;) .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>dcast(df, . ~ ., list(max,min), value.var = c(&quot;x&quot;,&quot;y&quot;))</code></td><td style="text-align: left"><code>combine(df, ([:x, :y] .=&gt; [maximum minimum])...)</code></td></tr><tr><td style="text-align: left">Multivariate function</td><td style="text-align: left"><code>df[, .(cor(x,y)) ]</code></td><td style="text-align: left"><code>transform(df, [:x, :y] =&gt; cor)</code></td></tr><tr><td style="text-align: left">Row-wise</td><td style="text-align: left"><code>df[, min_xy := min(x, y), by = 1:nrow(df)]</code></td><td style="text-align: left"><code>transform!(df, [:x, :y] =&gt; ByRow(min))</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>df[, argmax_xy := which.max(.SD) , .SDcols = patterns(&quot;*x&quot;), by = 1:nrow(df) ]</code></td><td style="text-align: left"><code>transform!(df, AsTable(r&quot;^x&quot;) =&gt; ByRow(argmax))</code></td></tr><tr><td style="text-align: left">DataFrame as output</td><td style="text-align: left"><code>df[, .SD[1], by=grp]</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), first)</code></td></tr><tr><td style="text-align: left">DataFrame as output</td><td style="text-align: left"><code>df[, .SD[which.max(x)], by=grp]</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), sdf -&gt; sdf[argmax(sdf.x), :])</code></td></tr><tr><td style="text-align: left">Reshape longer</td><td style="text-align: left"><code>longdf = melt(df, measure.vars=c(&quot;x&quot;,&quot;y&quot;), id.vars=&quot;id&quot;)</code></td><td style="text-align: left"><code>longdf = stack(df, [:x, :y], :id)</code></td></tr><tr><td style="text-align: left">Reshape wider</td><td style="text-align: left"><code>dcast(longdf, id ~ variable, value.var=&quot;value&quot;)</code></td><td style="text-align: left"><code>unstack(longdf, :id, :variable, :value)</code></td></tr></table><h3 id="Joining-data-frames-2"><a class="docs-heading-anchor" href="#Joining-data-frames-2">Joining data frames</a><a class="docs-heading-anchor-permalink" href="#Joining-data-frames-2" title="Permalink"></a></h3><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Inner join</td><td style="text-align: left"><code>merge(df, df2, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>innerjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Outer join</td><td style="text-align: left"><code>merge(df, df2, all = TRUE, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>outerjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Left join</td><td style="text-align: left"><code>merge(df, df2, all.x = TRUE, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>leftjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Right join</td><td style="text-align: left"><code>merge(df, df2, all.y = TRUE, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>rightjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Anti join (filtering)</td><td style="text-align: left"><code>df[!df2, on = &quot;grp&quot; ]</code></td><td style="text-align: left"><code>antijoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Semi join (filtering)</td><td style="text-align: left"><code>merge(df1, df2[, .(grp)])</code></td><td style="text-align: left"><code>semijoin(df, df2, on = :grp)</code></td></tr></table><h2 id="Comparison-with-Stata-(version-8-and-above)"><a class="docs-heading-anchor" href="#Comparison-with-Stata-(version-8-and-above)">Comparison with Stata (version 8 and above)</a><a id="Comparison-with-Stata-(version-8-and-above)-1"></a><a class="docs-heading-anchor-permalink" href="#Comparison-with-Stata-(version-8-and-above)" title="Permalink"></a></h2><p>The following table compares the main functions of DataFrames.jl with Stata:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">Stata</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>collapse (mean) x</code></td><td style="text-align: left"><code>combine(df, :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>egen x_mean = mean(x)</code></td><td style="text-align: left"><code>transform!(df, :x =&gt; mean =&gt; :x_mean)</code></td></tr><tr><td style="text-align: left">Rename columns</td><td style="text-align: left"><code>rename x x_new</code></td><td style="text-align: left"><code>rename!(df, :x =&gt; :x_new)</code></td></tr><tr><td style="text-align: left">Pick columns</td><td style="text-align: left"><code>keep x y</code></td><td style="text-align: left"><code>select!(df, :x, :y)</code></td></tr><tr><td style="text-align: left">Pick rows</td><td style="text-align: left"><code>keep if x &gt;= 1</code></td><td style="text-align: left"><code>subset!(df, :x =&gt; ByRow(x -&gt; x &gt;= 1))</code></td></tr><tr><td style="text-align: left">Sort rows</td><td style="text-align: left"><code>sort x</code></td><td style="text-align: left"><code>sort!(df, :x)</code></td></tr></table><p>Note that the suffix <code>!</code> (i.e. <code>transform!</code>, <code>select!</code>, etc) ensures that the operation transforms the dataframe in place, as in Stata</p><p>Some of these functions can be applied to grouped data frames, in which case they operate by group:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">Stata</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>egen x_mean = mean(x), by(grp)</code></td><td style="text-align: left"><code>transform!(groupby(df, :grp), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>collapse (mean) x, by(grp)</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), :x =&gt; mean)</code></td></tr></table><p>The table below compares more advanced commands:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">Stata</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Transform certain rows</td><td style="text-align: left"><code>replace x = 0 if x &lt;= 0</code></td><td style="text-align: left"><code>transform(df, :x =&gt; (x -&gt; ifelse.(x .&lt;= 0, 0, x)) =&gt; :x)</code></td></tr><tr><td style="text-align: left">Transform several columns</td><td style="text-align: left"><code>collapse (max) x (min) y</code></td><td style="text-align: left"><code>combine(df, :x =&gt; maximum,  :y =&gt; minimum)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>collapse (mean) x y</code></td><td style="text-align: left"><code>combine(df, [:x, :y] .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>collapse (mean) x*</code></td><td style="text-align: left"><code>combine(df, names(df, r&quot;^x&quot;) .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>collapse (max) x y (min) x y</code></td><td style="text-align: left"><code>combine(df, ([:x, :y] .=&gt; [maximum minimum])...)</code></td></tr><tr><td style="text-align: left">Multivariate function</td><td style="text-align: left"><code>egen z = corr(x y)</code></td><td style="text-align: left"><code>transform!(df, [:x, :y] =&gt; cor =&gt; :z)</code></td></tr><tr><td style="text-align: left">Row-wise</td><td style="text-align: left"><code>egen z = rowmin(x y)</code></td><td style="text-align: left"><code>transform!(df, [:x, :y] =&gt; ByRow(min) =&gt; :z)</code></td></tr></table></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../querying_frameworks/">« Data manipulation frameworks</a><a class="docs-footer-nextpage" href="../../lib/types/">Types »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+df2 &lt;- data.table(grp=c(1,3), w = c(10,11))</code></pre><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>df[, .(mean(x))]</code></td><td style="text-align: left"><code>combine(df, :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>df[, x_mean:=mean(x) ]</code></td><td style="text-align: left"><code>transform!(df, :x =&gt; mean =&gt; :x_mean)</code></td></tr><tr><td style="text-align: left">Rename column (in place)</td><td style="text-align: left"><code>setnames(df, &quot;x&quot;, &quot;x_new&quot;)</code></td><td style="text-align: left"><code>rename!(df, :x =&gt; :x_new)</code></td></tr><tr><td style="text-align: left">Rename multiple columns (in place)</td><td style="text-align: left"><code>setnames(df, c(&quot;x&quot;, &quot;y&quot;), c(&quot;x_new&quot;, &quot;y_new&quot;))</code></td><td style="text-align: left"><code>rename!(df, [:x, :y] .=&gt; [:x_new, :y_new])</code></td></tr><tr><td style="text-align: left">Pick columns as dataframe</td><td style="text-align: left"><code>df[, .(x, y)]</code></td><td style="text-align: left"><code>select(df, :x, :y)</code></td></tr><tr><td style="text-align: left">Pick column as a vector</td><td style="text-align: left"><code>df[, x]</code></td><td style="text-align: left"><code>df[!, :x]</code></td></tr><tr><td style="text-align: left">Remove columns</td><td style="text-align: left"><code>df[, -&quot;x&quot;]</code></td><td style="text-align: left"><code>select(df, Not(:x))</code></td></tr><tr><td style="text-align: left">Remove columns (in place)</td><td style="text-align: left"><code>df[, x:=NULL]</code></td><td style="text-align: left"><code>select!(df, Not(:x))</code></td></tr><tr><td style="text-align: left">Remove columns (in place)</td><td style="text-align: left"><code>df[, c(&quot;x&quot;, &quot;y&quot;):=NULL]</code></td><td style="text-align: left"><code>select!(df, Not([:x, :y]))</code></td></tr><tr><td style="text-align: left">Pick &amp; transform columns</td><td style="text-align: left"><code>df[, .(mean(x), y)]</code></td><td style="text-align: left"><code>select(df, :x =&gt; mean, :y)</code></td></tr><tr><td style="text-align: left">Pick rows</td><td style="text-align: left"><code>df[ x &gt;= 1 ]</code></td><td style="text-align: left"><code>filter(:x =&gt; &gt;=(1), df)</code></td></tr><tr><td style="text-align: left">Sort rows (in place)</td><td style="text-align: left"><code>setorder(df, x)</code></td><td style="text-align: left"><code>sort!(df, :x)</code></td></tr><tr><td style="text-align: left">Sort rows</td><td style="text-align: left"><code>df[ order(x) ]</code></td><td style="text-align: left"><code>sort(df, :x)</code></td></tr></table><h3 id="Grouping-data-and-aggregation-2"><a class="docs-heading-anchor" href="#Grouping-data-and-aggregation-2">Grouping data and aggregation</a><a class="docs-heading-anchor-permalink" href="#Grouping-data-and-aggregation-2" title="Permalink"></a></h3><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>df[, mean(x), by=id ]</code></td><td style="text-align: left"><code>combine(groupby(df, :id), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns (in place)</td><td style="text-align: left"><code>df[, x_mean:=mean(x), by=id]</code></td><td style="text-align: left"><code>transform!(groupby(df, :id), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Pick &amp; transform columns</td><td style="text-align: left"><code>df[, .(x_mean = mean(x), y), by=id]</code></td><td style="text-align: left"><code>select(groupby(df, :id), :x =&gt; mean, :y)</code></td></tr></table><h3 id="More-advanced-commands-2"><a class="docs-heading-anchor" href="#More-advanced-commands-2">More advanced commands</a><a class="docs-heading-anchor-permalink" href="#More-advanced-commands-2" title="Permalink"></a></h3><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Complex Function</td><td style="text-align: left"><code>df[, .(mean(x, na.rm=TRUE)) ]</code></td><td style="text-align: left"><code>combine(df, :x =&gt; x -&gt; mean(skipmissing(x)))</code></td></tr><tr><td style="text-align: left">Transform certain rows (in place)</td><td style="text-align: left"><code>df[x&lt;=0, x:=0]</code></td><td style="text-align: left"><code>df.x[df.x .&lt;= 0] .= 0</code></td></tr><tr><td style="text-align: left">Transform several columns</td><td style="text-align: left"><code>df[, .(max(x), min(y)) ]</code></td><td style="text-align: left"><code>combine(df, :x =&gt; maximum, :y =&gt; minimum)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>df[, lapply(.SD, mean), .SDcols = c(&quot;x&quot;, &quot;y&quot;) ]</code></td><td style="text-align: left"><code>combine(df, [:x, :y] .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>df[, lapply(.SD, mean), .SDcols = patterns(&quot;*x&quot;) ]</code></td><td style="text-align: left"><code>combine(df, names(df, r&quot;^x&quot;) .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>dcast(df, . ~ ., list(max,min), value.var = c(&quot;x&quot;,&quot;y&quot;))</code></td><td style="text-align: left"><code>combine(df, ([:x, :y] .=&gt; [maximum minimum])...)</code></td></tr><tr><td style="text-align: left">Multivariate function</td><td style="text-align: left"><code>df[, .(cor(x,y)) ]</code></td><td style="text-align: left"><code>transform(df, [:x, :y] =&gt; cor)</code></td></tr><tr><td style="text-align: left">Row-wise</td><td style="text-align: left"><code>df[, min_xy := min(x, y), by = 1:nrow(df)]</code></td><td style="text-align: left"><code>transform!(df, [:x, :y] =&gt; ByRow(min))</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>df[, argmax_xy := which.max(.SD) , .SDcols = patterns(&quot;*x&quot;), by = 1:nrow(df) ]</code></td><td style="text-align: left"><code>transform!(df, AsTable(r&quot;^x&quot;) =&gt; ByRow(argmax))</code></td></tr><tr><td style="text-align: left">DataFrame as output</td><td style="text-align: left"><code>df[, .SD[1], by=grp]</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), first)</code></td></tr><tr><td style="text-align: left">DataFrame as output</td><td style="text-align: left"><code>df[, .SD[which.max(x)], by=grp]</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), sdf -&gt; sdf[argmax(sdf.x), :])</code></td></tr><tr><td style="text-align: left">Reshape longer</td><td style="text-align: left"><code>longdf = melt(df, measure.vars=c(&quot;x&quot;,&quot;y&quot;), id.vars=&quot;id&quot;)</code></td><td style="text-align: left"><code>longdf = stack(df, [:x, :y], :id)</code></td></tr><tr><td style="text-align: left">Reshape wider</td><td style="text-align: left"><code>dcast(longdf, id ~ variable, value.var=&quot;value&quot;)</code></td><td style="text-align: left"><code>unstack(longdf, :id, :variable, :value)</code></td></tr></table><h3 id="Joining-data-frames-2"><a class="docs-heading-anchor" href="#Joining-data-frames-2">Joining data frames</a><a class="docs-heading-anchor-permalink" href="#Joining-data-frames-2" title="Permalink"></a></h3><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">data.table</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Inner join</td><td style="text-align: left"><code>merge(df, df2, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>innerjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Outer join</td><td style="text-align: left"><code>merge(df, df2, all = TRUE, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>outerjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Left join</td><td style="text-align: left"><code>merge(df, df2, all.x = TRUE, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>leftjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Right join</td><td style="text-align: left"><code>merge(df, df2, all.y = TRUE, on = &quot;grp&quot;)</code></td><td style="text-align: left"><code>rightjoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Anti join (filtering)</td><td style="text-align: left"><code>df[!df2, on = &quot;grp&quot; ]</code></td><td style="text-align: left"><code>antijoin(df, df2, on = :grp)</code></td></tr><tr><td style="text-align: left">Semi join (filtering)</td><td style="text-align: left"><code>merge(df1, df2[, .(grp)])</code></td><td style="text-align: left"><code>semijoin(df, df2, on = :grp)</code></td></tr></table><h2 id="Comparison-with-Stata-(version-8-and-above)"><a class="docs-heading-anchor" href="#Comparison-with-Stata-(version-8-and-above)">Comparison with Stata (version 8 and above)</a><a id="Comparison-with-Stata-(version-8-and-above)-1"></a><a class="docs-heading-anchor-permalink" href="#Comparison-with-Stata-(version-8-and-above)" title="Permalink"></a></h2><p>The following table compares the main functions of DataFrames.jl with Stata:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">Stata</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>collapse (mean) x</code></td><td style="text-align: left"><code>combine(df, :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>egen x_mean = mean(x)</code></td><td style="text-align: left"><code>transform!(df, :x =&gt; mean =&gt; :x_mean)</code></td></tr><tr><td style="text-align: left">Rename columns</td><td style="text-align: left"><code>rename x x_new</code></td><td style="text-align: left"><code>rename!(df, :x =&gt; :x_new)</code></td></tr><tr><td style="text-align: left">Pick columns</td><td style="text-align: left"><code>keep x y</code></td><td style="text-align: left"><code>select!(df, :x, :y)</code></td></tr><tr><td style="text-align: left">Pick rows</td><td style="text-align: left"><code>keep if x &gt;= 1</code></td><td style="text-align: left"><code>subset!(df, :x =&gt; ByRow(x -&gt; x &gt;= 1))</code></td></tr><tr><td style="text-align: left">Sort rows</td><td style="text-align: left"><code>sort x</code></td><td style="text-align: left"><code>sort!(df, :x)</code></td></tr></table><p>Note that the suffix <code>!</code> (i.e. <code>transform!</code>, <code>select!</code>, etc) ensures that the operation transforms the dataframe in place, as in Stata</p><p>Some of these functions can be applied to grouped data frames, in which case they operate by group:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">Stata</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Add new columns</td><td style="text-align: left"><code>egen x_mean = mean(x), by(grp)</code></td><td style="text-align: left"><code>transform!(groupby(df, :grp), :x =&gt; mean)</code></td></tr><tr><td style="text-align: left">Reduce multiple values</td><td style="text-align: left"><code>collapse (mean) x, by(grp)</code></td><td style="text-align: left"><code>combine(groupby(df, :grp), :x =&gt; mean)</code></td></tr></table><p>The table below compares more advanced commands:</p><table><tr><th style="text-align: left">Operation</th><th style="text-align: left">Stata</th><th style="text-align: left">DataFrames.jl</th></tr><tr><td style="text-align: left">Transform certain rows</td><td style="text-align: left"><code>replace x = 0 if x &lt;= 0</code></td><td style="text-align: left"><code>transform(df, :x =&gt; (x -&gt; ifelse.(x .&lt;= 0, 0, x)) =&gt; :x)</code></td></tr><tr><td style="text-align: left">Transform several columns</td><td style="text-align: left"><code>collapse (max) x (min) y</code></td><td style="text-align: left"><code>combine(df, :x =&gt; maximum,  :y =&gt; minimum)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>collapse (mean) x y</code></td><td style="text-align: left"><code>combine(df, [:x, :y] .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>collapse (mean) x*</code></td><td style="text-align: left"><code>combine(df, names(df, r&quot;^x&quot;) .=&gt; mean)</code></td></tr><tr><td style="text-align: left"></td><td style="text-align: left"><code>collapse (max) x y (min) x y</code></td><td style="text-align: left"><code>combine(df, ([:x, :y] .=&gt; [maximum minimum])...)</code></td></tr><tr><td style="text-align: left">Multivariate function</td><td style="text-align: left"><code>egen z = corr(x y)</code></td><td style="text-align: left"><code>transform!(df, [:x, :y] =&gt; cor =&gt; :z)</code></td></tr><tr><td style="text-align: left">Row-wise</td><td style="text-align: left"><code>egen z = rowmin(x y)</code></td><td style="text-align: left"><code>transform!(df, [:x, :y] =&gt; ByRow(min) =&gt; :z)</code></td></tr></table></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../querying_frameworks/">« Data manipulation frameworks</a><a class="docs-footer-nextpage" href="../../lib/types/">Types »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/getting_started/index.html b/dev/man/getting_started/index.html
index 37b3527cb..c4cf7350f 100644
--- a/dev/man/getting_started/index.html
+++ b/dev/man/getting_started/index.html
@@ -264,4 +264,4 @@
 julia&gt; Tables.rowtable(df)
 2-element Vector{@NamedTuple{a::Int64, b::Int64}}:
  (a = 1, b = 2)
- (a = 3, b = 4)</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../basics/">« First Steps with DataFrames.jl</a><a class="docs-footer-nextpage" href="../working_with_dataframes/">Working with DataFrames »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ (a = 3, b = 4)</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../basics/">« First Steps with DataFrames.jl</a><a class="docs-footer-nextpage" href="../working_with_dataframes/">Working with DataFrames »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/importing_and_exporting/index.html b/dev/man/importing_and_exporting/index.html
index 2b925427d..694834181 100644
--- a/dev/man/importing_and_exporting/index.html
+++ b/dev/man/importing_and_exporting/index.html
@@ -51,4 +51,4 @@
  148 │         6.5         3.0          5.2         2.0  Iris-virginica
  149 │         6.2         3.4          5.4         2.3  Iris-virginica
  150 │         5.9         3.0          5.1         1.8  Iris-virginica
-                                                        135 rows omitted</code></pre><p>Observe that in our example:</p><ul><li><code>header</code> is a <code>Matrix</code> therefore we had to pass <code>vec(header)</code> to the <code>DataFrame</code> constructor;</li><li>we broadcasted the <code>identity</code> function over the <code>iris_raw</code> data frame to perform narrowing of <code>eltype</code> of columns of <code>iris_raw</code>; the reason is that read in by the <code>readdlm</code> function is stored into a <code>data</code> <code>Matrix</code> so all columns in <code>iris_raw</code> initially have the same <code>eltype</code> – in this case it had to be <code>Any</code> as some of the columns are numeric and some are string.</li></ul><p>All such operations (and many more) are automatically handled by CSV.jl.</p><p>Similarly, you can use the <code>writedlm</code> function from the <code>DelimitedFiles</code> module to save a data frame like this:</p><pre><code class="language-julia hljs">writedlm(&quot;test.csv&quot;, Iterators.flatten(([names(iris)], eachrow(iris))), &#39;,&#39;)</code></pre><p>As you can see the code required to transform <code>iris</code> into a proper input to the <code>writedlm</code> function so that you can create the CSV file having the expected format is not easy. Therefore CSV.jl is the preferred package to write CSV files for data stored in data frames.</p><h2 id="Other-formats"><a class="docs-heading-anchor" href="#Other-formats">Other formats</a><a id="Other-formats-1"></a><a class="docs-heading-anchor-permalink" href="#Other-formats" title="Permalink"></a></h2><p>Other data formats are supported for reading and writing in the following packages (non exhaustive list):</p><ul><li>Apache Arrow (including Feather v2): <a href="https://github.com/JuliaData/Arrow.jl">Arrow.jl</a></li><li>Apache Feather (v1): <a href="https://github.com/JuliaData/Feather.jl">Feather.jl</a></li><li>Apache Avro: <a href="https://github.com/JuliaData/Avro.jl">Avro.jl</a></li><li>JSON: <a href="https://github.com/JuliaData/JSONTables.jl">JSONTables.jl</a></li><li>Parquet: <a href="https://gitlab.com/ExpandingMan/Parquet2.jl">Parquet2.jl</a></li><li>Stata, SAS and SPSS: <a href="https://github.com/junyuan-chen/ReadStatTables.jl">ReadStatTables.jl</a> (alternatively <a href="https://www.queryverse.org/">Queryverse</a>  users can choose <a href="https://github.com/queryverse/StatFiles.jl">StatFiles.jl</a>)</li><li>reading R data files (.rda, .RData): <a href="https://github.com/JuliaData/RData.jl">RData.jl</a></li><li>Microsoft Excel (XLSX): <a href="https://github.com/felipenoris/XLSX.jl">XLSX.jl</a></li><li>Copying/pasting to clipboard, for sending data to and from spreadsheets: <a href="https://github.com/pdeffebach/ClipData.jl">ClipData.jl</a></li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../working_with_dataframes/">« Working with DataFrames</a><a class="docs-footer-nextpage" href="../joins/">Joins »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+                                                        135 rows omitted</code></pre><p>Observe that in our example:</p><ul><li><code>header</code> is a <code>Matrix</code> therefore we had to pass <code>vec(header)</code> to the <code>DataFrame</code> constructor;</li><li>we broadcasted the <code>identity</code> function over the <code>iris_raw</code> data frame to perform narrowing of <code>eltype</code> of columns of <code>iris_raw</code>; the reason is that read in by the <code>readdlm</code> function is stored into a <code>data</code> <code>Matrix</code> so all columns in <code>iris_raw</code> initially have the same <code>eltype</code> – in this case it had to be <code>Any</code> as some of the columns are numeric and some are string.</li></ul><p>All such operations (and many more) are automatically handled by CSV.jl.</p><p>Similarly, you can use the <code>writedlm</code> function from the <code>DelimitedFiles</code> module to save a data frame like this:</p><pre><code class="language-julia hljs">writedlm(&quot;test.csv&quot;, Iterators.flatten(([names(iris)], eachrow(iris))), &#39;,&#39;)</code></pre><p>As you can see the code required to transform <code>iris</code> into a proper input to the <code>writedlm</code> function so that you can create the CSV file having the expected format is not easy. Therefore CSV.jl is the preferred package to write CSV files for data stored in data frames.</p><h2 id="Other-formats"><a class="docs-heading-anchor" href="#Other-formats">Other formats</a><a id="Other-formats-1"></a><a class="docs-heading-anchor-permalink" href="#Other-formats" title="Permalink"></a></h2><p>Other data formats are supported for reading and writing in the following packages (non exhaustive list):</p><ul><li>Apache Arrow (including Feather v2): <a href="https://github.com/JuliaData/Arrow.jl">Arrow.jl</a></li><li>Apache Feather (v1): <a href="https://github.com/JuliaData/Feather.jl">Feather.jl</a></li><li>Apache Avro: <a href="https://github.com/JuliaData/Avro.jl">Avro.jl</a></li><li>JSON: <a href="https://github.com/JuliaData/JSONTables.jl">JSONTables.jl</a></li><li>Parquet: <a href="https://gitlab.com/ExpandingMan/Parquet2.jl">Parquet2.jl</a></li><li>Stata, SAS and SPSS: <a href="https://github.com/junyuan-chen/ReadStatTables.jl">ReadStatTables.jl</a> (alternatively <a href="https://www.queryverse.org/">Queryverse</a>  users can choose <a href="https://github.com/queryverse/StatFiles.jl">StatFiles.jl</a>)</li><li>reading R data files (.rda, .RData): <a href="https://github.com/JuliaData/RData.jl">RData.jl</a></li><li>Microsoft Excel (XLSX): <a href="https://github.com/felipenoris/XLSX.jl">XLSX.jl</a></li><li>Copying/pasting to clipboard, for sending data to and from spreadsheets: <a href="https://github.com/pdeffebach/ClipData.jl">ClipData.jl</a></li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../working_with_dataframes/">« Working with DataFrames</a><a class="docs-footer-nextpage" href="../joins/">Joins »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/joins/index.html b/dev/man/joins/index.html
index 1962c02b5..0000611c5 100644
--- a/dev/man/joins/index.html
+++ b/dev/man/joins/index.html
@@ -283,4 +283,4 @@
    2 │     2      2  a
    3 │     3      3  missing
    4 │     4      4  b</code></pre><p>Note that in this case the order and number of rows in the left table is not changed. Therefore, in particular, it is not allowed to have duplicate keys in the right table:</p><pre><code class="nohighlight hljs">julia&gt; leftjoin!(main, DataFrame(id=[2, 2], info_bad=[&quot;a&quot;, &quot;b&quot;]), on=:id)
-ERROR: ArgumentError: duplicate rows found in right table</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../importing_and_exporting/">« Importing and Exporting Data (I/O)</a><a class="docs-footer-nextpage" href="../split_apply_combine/">Split-apply-combine »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ERROR: ArgumentError: duplicate rows found in right table</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../importing_and_exporting/">« Importing and Exporting Data (I/O)</a><a class="docs-footer-nextpage" href="../split_apply_combine/">Split-apply-combine »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/missing/index.html b/dev/man/missing/index.html
index 658d73cd3..d7fc56801 100644
--- a/dev/man/missing/index.html
+++ b/dev/man/missing/index.html
@@ -137,4 +137,4 @@
 
 julia&gt; missings(Int, 1, 3)
 1×3 Matrix{Union{Missing, Int64}}:
- missing  missing  missing</code></pre><p>See the <a href="https://docs.julialang.org/en/v1/manual/missing/">Julia manual</a> for more information about missing values.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../categorical/">« Categorical Data</a><a class="docs-footer-nextpage" href="../querying_frameworks/">Data manipulation frameworks »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ missing  missing  missing</code></pre><p>See the <a href="https://docs.julialang.org/en/v1/manual/missing/">Julia manual</a> for more information about missing values.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../categorical/">« Categorical Data</a><a class="docs-footer-nextpage" href="../querying_frameworks/">Data manipulation frameworks »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/querying_frameworks/index.html b/dev/man/querying_frameworks/index.html
index 6d080b0cd..08c4be49b 100644
--- a/dev/man/querying_frameworks/index.html
+++ b/dev/man/querying_frameworks/index.html
@@ -242,4 +242,4 @@
        end
 1-element Vector{String}:
  &quot;Roger&quot;
-</code></pre><p>A query that ends with a <code>@collect</code> statement without a specific type will materialize the query results into an array. Note also the difference in the <code>@select</code> statement: The previous queries all used the <code>{}</code> syntax in the <code>@select</code> statement to project results into a tabular format. The last query instead just selects a single value from each row in the <code>@select</code> statement.</p><p>These examples only scratch the surface of what one can do with <a href="https://github.com/queryverse/Query.jl">Query.jl</a>, and the interested reader is referred to the <a href="http://www.queryverse.org/Query.jl/stable/">Query.jl documentation</a> for more information.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../missing/">« Missing Data</a><a class="docs-footer-nextpage" href="../comparisons/">Comparison with Python/R/Stata »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</code></pre><p>A query that ends with a <code>@collect</code> statement without a specific type will materialize the query results into an array. Note also the difference in the <code>@select</code> statement: The previous queries all used the <code>{}</code> syntax in the <code>@select</code> statement to project results into a tabular format. The last query instead just selects a single value from each row in the <code>@select</code> statement.</p><p>These examples only scratch the surface of what one can do with <a href="https://github.com/queryverse/Query.jl">Query.jl</a>, and the interested reader is referred to the <a href="http://www.queryverse.org/Query.jl/stable/">Query.jl documentation</a> for more information.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../missing/">« Missing Data</a><a class="docs-footer-nextpage" href="../comparisons/">Comparison with Python/R/Stata »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/reshaping_and_pivoting/index.html b/dev/man/reshaping_and_pivoting/index.html
index 944a636d0..f5a6f2bd8 100644
--- a/dev/man/reshaping_and_pivoting/index.html
+++ b/dev/man/reshaping_and_pivoting/index.html
@@ -316,4 +316,4 @@
 ─────┼─────────────────────────────
    1 │ b               1     two
    2 │ c               3     4
-   3 │ d               true  false</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../split_apply_combine/">« Split-apply-combine</a><a class="docs-footer-nextpage" href="../sorting/">Sorting »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+   3 │ d               true  false</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../split_apply_combine/">« Split-apply-combine</a><a class="docs-footer-nextpage" href="../sorting/">Sorting »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/sorting/index.html b/dev/man/sorting/index.html
index ca0eea6a7..0fe2b663d 100644
--- a/dev/man/sorting/index.html
+++ b/dev/man/sorting/index.html
@@ -158,4 +158,4 @@
  148 │         5.1         3.3          1.7         0.5  Iris-setosa
  149 │         5.1         3.8          1.9         0.4  Iris-setosa
  150 │         4.8         3.4          1.9         0.2  Iris-setosa
-                                                        135 rows omitted</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../reshaping_and_pivoting/">« Reshaping</a><a class="docs-footer-nextpage" href="../categorical/">Categorical Data »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+                                                        135 rows omitted</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../reshaping_and_pivoting/">« Reshaping</a><a class="docs-footer-nextpage" href="../categorical/">Categorical Data »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/split_apply_combine/index.html b/dev/man/split_apply_combine/index.html
index f141958d4..0c71b558d 100644
--- a/dev/man/split_apply_combine/index.html
+++ b/dev/man/split_apply_combine/index.html
@@ -814,4 +814,4 @@
 3-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
  GroupKey: (customer_id = &quot;c&quot;,)
  GroupKey: (customer_id = &quot;b&quot;,)
- GroupKey: (customer_id = &quot;a&quot;,)</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../joins/">« Joins</a><a class="docs-footer-nextpage" href="../reshaping_and_pivoting/">Reshaping »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ GroupKey: (customer_id = &quot;a&quot;,)</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../joins/">« Joins</a><a class="docs-footer-nextpage" href="../reshaping_and_pivoting/">Reshaping »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/man/working_with_dataframes/index.html b/dev/man/working_with_dataframes/index.html
index b4345c1be..f07cdf41b 100644
--- a/dev/man/working_with_dataframes/index.html
+++ b/dev/man/working_with_dataframes/index.html
@@ -744,4 +744,4 @@
    1 │ a             1  missing  x
    2 │ missing       2  j        y
    3 │ b             3  k        missing
-   4 │ missing       4  h        z</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../getting_started/">« Getting Started</a><a class="docs-footer-nextpage" href="../importing_and_exporting/">Importing and Exporting Data (I/O) »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Thursday 12 December 2024 15:48">Thursday 12 December 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+   4 │ missing       4  h        z</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../getting_started/">« Getting Started</a><a class="docs-footer-nextpage" href="../importing_and_exporting/">Importing and Exporting Data (I/O) »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Friday 13 December 2024 11:52">Friday 13 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/objects.inv b/dev/objects.inv
index 52cbf4fb6..8b11cd776 100644
Binary files a/dev/objects.inv and b/dev/objects.inv differ
diff --git a/dev/search_index.js b/dev/search_index.js
index 89915f41d..80bfb4a53 100644
--- a/dev/search_index.js
+++ b/dev/search_index.js
@@ -1,3 +1,3 @@
 var documenterSearchIndex = {"docs":
-[{"location":"lib/internals/","page":"Internals","title":"Internals","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/internals/#Internals","page":"Internals","title":"Internals","text":"","category":"section"},{"location":"lib/internals/","page":"Internals","title":"Internals","text":"warning: Internal API\nThe functions, methods and types listed on this page are internal to DataFrames and are not considered to be part of the public API.","category":"page"},{"location":"lib/internals/","page":"Internals","title":"Internals","text":"compacttype\ngennames\ngetmaxwidths\nourshow\nourstrwidth\n@spawn_for_chunks\n@spawn_or_run_task\n@spawn_or_run\ndefault_table_transformation\nisreadonly","category":"page"},{"location":"lib/internals/#DataFrames.compacttype","page":"Internals","title":"DataFrames.compacttype","text":"compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)\n\nReturn compact string representation of type T.\n\nFor displaying data frame we do not want string representation of type to be longer than maxwidth. This function implements rules how type names are cropped if they are longer than maxwidth.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.gennames","page":"Internals","title":"DataFrames.gennames","text":"gennames(n::Integer)\n\nGenerate standardized names for columns of a DataFrame. The first name will be :x1, the second :x2, etc.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.getmaxwidths","page":"Internals","title":"DataFrames.getmaxwidths","text":"DataFrames.getmaxwidths(df::AbstractDataFrame,\n                        io::IO,\n                        rowindices1::AbstractVector{Int},\n                        rowindices2::AbstractVector{Int},\n                        rowlabel::Symbol,\n                        rowid::Union{Integer, Nothing},\n                        show_eltype::Bool,\n                        buffer::IOBuffer)\n\nCalculate, for each column of an AbstractDataFrame, the maximum string width used to render the name of that column, its type, and the longest entry in that column – among the rows of the data frame will be rendered to IO. The widths for all columns are returned as a vector.\n\nReturn a Vector{Int} giving the maximum string widths required to render each column, including that column's name and type.\n\nNOTE: The last entry of the result vector is the string width of the implicit row ID column contained in every AbstractDataFrame.\n\nArguments\n\ndf::AbstractDataFrame: The data frame whose columns will be printed.\nio::IO: The IO to which df is to be printed\n`rowindices1::AbstractVector{Int}: A set of indices of the first chunk of the AbstractDataFrame that would be rendered to IO.\n`rowindices2::AbstractVector{Int}: A set of indices of the second chunk of the AbstractDataFrame that would be rendered to IO. Can be empty if the AbstractDataFrame would be printed without any ellipses.\nrowlabel::AbstractString: The label that will be used when rendered the numeric ID's of each row. Typically, this will be set to \"Row\".\nrowid: Used to handle showing DataFrameRow.\nshow_eltype: Whether to print the column type  under the column name in the heading.\nbuffer: buffer passed around to avoid reallocations in ourstrwidth\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.ourshow","page":"Internals","title":"DataFrames.ourshow","text":"DataFrames.ourshow(io::IO, x::Any, truncstring::Int)\n\nRender a value to an IO object compactly using print. truncstring indicates the approximate number of text characters width to truncate the output (if it is a non-positive value then no truncation is applied).\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.ourstrwidth","page":"Internals","title":"DataFrames.ourstrwidth","text":"DataFrames.ourstrwidth(io::IO, x::Any, buffer::IOBuffer, truncstring::Int)\n\nDetermine the number of characters that would be used to print a value.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.@spawn_for_chunks","page":"Internals","title":"DataFrames.@spawn_for_chunks","text":"@spawn_for_chunks basesize for i in range ... end\n\nParallelize a for loop by spawning separate tasks iterating each over a chunk of at least basesize elements in range.\n\nA number of tasks higher than Threads.nthreads() may be spawned, since that can allow for a more efficient load balancing in case some threads are busy (nested parallelism).\n\n\n\n\n\n","category":"macro"},{"location":"lib/internals/#DataFrames.@spawn_or_run_task","page":"Internals","title":"DataFrames.@spawn_or_run_task","text":"@spawn_or_run_task threads expr\n\nEquivalent to Threads.@spawn if threads === true, otherwise run expr and return a Task that returns its value.\n\n\n\n\n\n","category":"macro"},{"location":"lib/internals/#DataFrames.@spawn_or_run","page":"Internals","title":"DataFrames.@spawn_or_run","text":"@spawn_or_run threads expr\n\nEquivalent to Threads.@spawn if threads === true, otherwise run expr.\n\n\n\n\n\n","category":"macro"},{"location":"lib/internals/#DataFrames.default_table_transformation","page":"Internals","title":"DataFrames.default_table_transformation","text":"default_table_transformation(df_sel::AbstractDataFrame, fun)\n\nThis is a default implementation called when AsTable(...) => fun is requested. The df_sel argument is a data frame storing columns selected by AsTable(...) selector.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.isreadonly","page":"Internals","title":"DataFrames.isreadonly","text":"isreadonly(fun)\n\nTrait returning a Bool indicator if function fun is only reading the passed argument. Such a function guarantees not to modify nor return in any form the passed argument. By default false is returned.\n\nThis function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental. Adding a method to isreadonly for a specific function fun will improve performance of AsTable(...) => ByRow(fun∘collect) operation.\n\n\n\n\n\n","category":"function"},{"location":"man/basics/#First-Steps-with-DataFrames.jl","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"","category":"section"},{"location":"man/basics/#Setting-up-the-Environment","page":"First Steps with DataFrames.jl","title":"Setting up the Environment","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If want to use the DataFrames.jl package you need to install it first. You can do it using the following commands:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Pkg\n\njulia> Pkg.add(\"DataFrames\")","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"or","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> ] # ']' should be pressed\n\n(@v1.9) pkg> add DataFrames","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to make sure everything works as expected you can run the tests bundled with DataFrames.jl, but be warned that it will take more than 30 minutes:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Pkg\n\njulia> Pkg.test(\"DataFrames\") # Warning! This will take more than 30 minutes.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Additionally, it is recommended to check the version of DataFrames.jl that you have installed with the status command.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> ]\n\n(@v1.9) pkg> status DataFrames\n      Status `~\\v1.6\\Project.toml`\n  [a93c6f00] DataFrames v1.5.0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Throughout the rest of the tutorial we will assume that you have installed the DataFrames.jl package and have already typed using DataFrames which loads the package:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using DataFrames","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The most fundamental type provided by DataFrames.jl is DataFrame, where typically each row is interpreted as an observation and each column as a feature.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Advanced installation configuration\nDataFrames.jl puts in extra time and effort when the package is being built (precompiled) to make sure it is more responsive when you are using it. However, in some scenarios users might want to avoid this extra precompilaion effort to reduce the time needed to build the package and later to load it. To disable precompilation of DataFrames.jl in your current project follow the instructions given in the PrecompileTools.jl documentation","category":"page"},{"location":"man/basics/#Constructors-and-Basic-Utility-Functions","page":"First Steps with DataFrames.jl","title":"Constructors and Basic Utility Functions","text":"","category":"section"},{"location":"man/basics/#Constructors","page":"First Steps with DataFrames.jl","title":"Constructors","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this section you will see several ways to create a DataFrame using the constructor. You can find a detailed list of supported constructors along with more examples in the documentation of the DataFrame object.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We start by creating an empty DataFrame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame()\n0×0 DataFrame","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Now let us initialize a DataFrame with several columns. This is a basic way to do it is the following:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(A=1:3, B=5:7, fixed=1)\n3×3 DataFrame\n Row │ A      B      fixed\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      1\n   2 │     2      6      1\n   3 │     3      7      1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Observe that using this constructor scalars, like 1 for the column :fixed get automatically broadcasted to fill all rows of the created DataFrame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Sometimes one needs to create a data frame whose column names are not valid Julia identifiers. In such a case the following form, where = is replaced by => is handy:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(\"customer age\" => [15, 20, 25],\n                 \"first name\" => [\"Rohit\", \"Rahul\", \"Akshat\"])\n3×2 DataFrame\n Row │ customer age  first name\n     │ Int64         String\n─────┼──────────────────────────\n   1 │           15  Rohit\n   2 │           20  Rahul\n   3 │           25  Akshat","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Notice that this time we have passed column names as strings.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Often you have your source data stored in a dictionary. Provided that the keys of the dictionary are strings or Symbols you can also easily create a DataFrame from it:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> dict = Dict(\"customer age\" => [15, 20, 25],\n                   \"first name\" => [\"Rohit\", \"Rahul\", \"Akshat\"])\nDict{String, Vector} with 2 entries:\n  \"first name\"   => [\"Rohit\", \"Rahul\", \"Akshat\"]\n  \"customer age\" => [15, 20, 25]\n\njulia> DataFrame(dict)\n3×2 DataFrame\n Row │ customer age  first name\n     │ Int64         String\n─────┼──────────────────────────\n   1 │           15  Rohit\n   2 │           20  Rahul\n   3 │           25  Akshat\n\njulia> dict = Dict(:customer_age => [15, 20, 25],\n                   :first_name => [\"Rohit\", \"Rahul\", \"Akshat\"])\nDict{Symbol, Vector} with 2 entries:\n  :customer_age => [15, 20, 25]\n  :first_name   => [\"Rohit\", \"Rahul\", \"Akshat\"]\n\njulia> DataFrame(dict)\n3×2 DataFrame\n Row │ customer_age  first_name\n     │ Int64         String\n─────┼──────────────────────────\n   1 │           15  Rohit\n   2 │           20  Rahul\n   3 │           25  Akshat","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Using Symbols, e.g. :customer_age rather than strings, e.g. \"customer age\" to denote column names is preferred as it is faster. However, as you can see in the example above if our column name contains a space it is not very convenient to pass it as a Symbol (you would have to write Symbol(\"customer age\"), which is verbose) so using a string is more convenient.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"It is also quite common to create a DataFrame from a NamedTuple of vectors or a vector of NamedTuples. Here are some examples of these operations:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame((a=[1, 2], b=[3, 4]))\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> DataFrame([(a=1, b=0), (a=2, b=0)])\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Sometimes your source data might have a heterogeneous set of columns for each observation. Here is an example:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> source = [(type=\"circle\", radius=10), (type=\"square\", side=20)]\n2-element Vector{NamedTuple{names, Tuple{String, Int64}} where names}:\n (type = \"circle\", radius = 10)\n (type = \"square\", side = 20)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to create a data frame from such data containing all columns present in at least one of the source observations, with a missing entry if some column is not present then you can use Tables.dictcolumntable function to help you create the desired data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(Tables.dictcolumntable(source))\n2×3 DataFrame\n Row │ type    radius   side\n     │ String  Int64?   Int64?\n─────┼──────────────────────────\n   1 │ circle       10  missing\n   2 │ square  missing       20","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The role of Tables.dictcolumntable is to make sure that the DataFrame constructor gets information about all columns present in the source data and properly instantiates them. If we did not use this function the DataFrame constructor would assume that the first row of data contains the set of columns present in the source, which would lead to an error in our example:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(source)\nERROR: type NamedTuple has no field radius","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us finish our review of constructors by showing how to create a DataFrame from a matrix. In this case you pass a matrix as a first argument. If the second argument is just :auto then column names x1, x2, ... will be auto generated.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame([1 0; 2 0], :auto)\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Alternatively you can pass a vector of column names as a second argument to the DataFrame constructor:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> mat = [1 2 4 5; 15 58 69 41; 23 21 26 69]\n3×4 Matrix{Int64}:\n  1   2   4   5\n 15  58  69  41\n 23  21  26  69\n\njulia> nms = [\"a\", \"b\", \"c\", \"d\"]\n4-element Vector{String}:\n \"a\"\n \"b\"\n \"c\"\n \"d\"\n\njulia> DataFrame(mat, nms)\n3×4 DataFrame\n Row │ a      b      c      d\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      4      5\n   2 │    15     58     69     41\n   3 │    23     21     26     69","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You now know how to create a DataFrame from data that you already have in your Julia session. In the next section we show how to load data to a DataFrame from disk.","category":"page"},{"location":"man/basics/#Reading-Data-From-CSV-Files","page":"First Steps with DataFrames.jl","title":"Reading Data From CSV Files","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Here we focus on one of the most common scenarios, where one has data stored on disk in the CSV format.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"First make sure you have CSV.jl installed. You can do it using the following instructions:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Pkg\n\njulia> Pkg.add(\"CSV\")","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In order to read the file in we will use the CSV.read function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using CSV\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"german.csv\");\n\njulia> german_ref = CSV.read(path, DataFrame)\n1000×10 DataFrame\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  Int64  String7  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0     67  male         2  own      NA               little         ⋯\n    2 │     1     22  female       2  own      little           moderate\n    3 │     2     49  male         1  own      little           NA\n    4 │     3     45  male         2  free     little           little\n    5 │     4     53  male         2  free     little           little         ⋯\n    6 │     5     35  male         1  free     NA               NA\n    7 │     6     53  male         2  own      quite rich       NA\n    8 │     7     35  male         3  rent     little           moderate\n  ⋮   │   ⋮      ⋮       ⋮       ⋮       ⋮            ⋮                ⋮       ⋱\n  994 │   993     30  male         3  own      little           little         ⋯\n  995 │   994     50  male         2  own      NA               NA\n  996 │   995     31  female       1  own      little           NA\n  997 │   996     40  male         3  own      little           little\n  998 │   997     38  male         2  own      little           NA             ⋯\n  999 │   998     23  male         2  free     little           little\n 1000 │   999     27  male         2  own      moderate         moderate\n                                                  4 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the data frame is wider and taller than the display width, so it got cropped and its 4 rightmost columns and middle 985 rows were not printed. Later in the tutorial we will discuss how to force Julia to show the whole data frame if we wanted so.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Also observe that DataFrames.jl displays the data type of the column below its name. In our case, it is an Int64, or String7 and String15.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us mention here the difference between the standard String type in Julia and e.g. the String7 or String15 types. The types with number suffix denote strings that have a fixed width (similar CHAR(N) type provided by many data bases). Such strings are much faster to work with (especially if you have many of them) than the standard String type because their instances are not heap allocated. For this reason CSV.read by default reads in narrow string columns using these fixed-width types.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us now explain in detail the following code block:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"german.csv\");\n\ngerman_ref = CSV.read(path, DataFrame)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"we are storing the german.csv file in the DataFrames.jl repository to make user's life easier and avoid having to download it each time;\npkgdir(DataFrames) gives us the full path to the root of the DataFrames.jl package.\nthen from this directory we need to move to the directory where the german.csv file is stored; we use joinpath as this is a recommended way to compose paths to resources stored on disk in an operating system independent way (remember that Windows and Unix differ as they use either / or \\ as path separator; the joinpath function ensures we are not running into issues with this);\nthen we read the CSV file; the second argument to CSV.read is DataFrame to indicate that we want to read in the file into a DataFrame (as CSV.read allows for many different target formats of data it can read-into).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Before proceeding copy the reference data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german = copy(german_ref); # we copy the data frame","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this way we can always easily restore our data even if we mess up the german data frame by modifying it.","category":"page"},{"location":"man/basics/#Basic-Operations-on-Data-Frames","page":"First Steps with DataFrames.jl","title":"Basic Operations on Data Frames","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To extract the columns of a data frame directly (i.e. without copying) you can use one of the following syntaxes: german.Sex, german.\"Sex\", german[!, :Sex] or german[!, \"Sex\"].","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The two latter syntaxes using indexing are more flexible as they allow us passing a variable holding the name of the column, and not only a literal name as in the case of the syntax using a ..","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german.Sex\n1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}:\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n ⋮\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n\njulia> colname = \"Sex\"\n\"Sex\"\n\njulia> german[!, colname]\n1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}:\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n ⋮\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Since german.Sex does not make a copy when extracting a column from the data frame, changing the elements of the vector returned by this operation will affect the values stored in the original german data frame. To get a copy of the column you can use german[:, :Sex] or german[:, \"Sex\"]. In this case changing the vector returned by this operation does not affect the data stored in the german data frame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The === function allows us to check if both expressions produce the same object and confirm the behavior described above:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german.Sex === german[!, :Sex]\ntrue\n\njulia> german.Sex === german[:, :Sex]\nfalse","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can obtain a vector of column names of the data frame as Strings using the names function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> names(german)\n10-element Vector{String}:\n \"id\"\n \"Age\"\n \"Sex\"\n \"Job\"\n \"Housing\"\n \"Saving accounts\"\n \"Checking account\"\n \"Credit amount\"\n \"Duration\"\n \"Purpose\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Sometimes you are interested in names of columns that meet a particular condition.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"For example you can get column names with a given element type by passing this type as a second argument to the names function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> names(german, AbstractString)\n5-element Vector{String}:\n \"Sex\"\n \"Housing\"\n \"Saving accounts\"\n \"Checking account\"\n \"Purpose\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can explore more options of filtering column names in the documentation of the names function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If instead you wanted to get column names of a data frame as Symbols use the propertynames function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> propertynames(german)\n10-element Vector{Symbol}:\n :id\n :Age\n :Sex\n :Job\n :Housing\n Symbol(\"Saving accounts\")\n Symbol(\"Checking account\")\n Symbol(\"Credit amount\")\n :Duration\n :Purpose","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the column names containing spaces are not very convenient to work with as Symbols because they require more typing and introduce some visual noise.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you were interested in element types of the columns instead. You can use the eachcol(german) function to get an iterator over the columns of the data frame. Then you can broadcast the eltype function over it to get the desired result:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> eltype.(eachcol(german))\n10-element Vector{DataType}:\n Int64\n Int64\n String7\n Int64\n String7\n String15\n String15\n Int64\n Int64\n String31","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nRemember that DataFrames.jl allows to use Symbols (like :id) and strings (like \"id\") for all column indexing operations for convenience. However, using Symbols is slightly faster, but strings are simpler to work with when non standard characters are present in column names or one wants to manipulate them.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Before we wrap up let us discuss the empty and empty! functions that remove all rows from a DataFrame. Understanding the difference between the behavior of these two functions will help you to understand the function naming scheme in DataFrames.jl in general.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us start with the example of using the empty and empty! functions:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> empty(german)\n0×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┴──────────────────────────────────────────────────────────────────────────\n                                                               4 columns omitted\n\njulia> german\n1000×10 DataFrame\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  Int64  String7  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0     67  male         2  own      NA               little         ⋯\n    2 │     1     22  female       2  own      little           moderate\n    3 │     2     49  male         1  own      little           NA\n    4 │     3     45  male         2  free     little           little\n    5 │     4     53  male         2  free     little           little         ⋯\n    6 │     5     35  male         1  free     NA               NA\n    7 │     6     53  male         2  own      quite rich       NA\n    8 │     7     35  male         3  rent     little           moderate\n  ⋮   │   ⋮      ⋮       ⋮       ⋮       ⋮            ⋮                ⋮       ⋱\n  994 │   993     30  male         3  own      little           little         ⋯\n  995 │   994     50  male         2  own      NA               NA\n  996 │   995     31  female       1  own      little           NA\n  997 │   996     40  male         3  own      little           little\n  998 │   997     38  male         2  own      little           NA             ⋯\n  999 │   998     23  male         2  free     little           little\n 1000 │   999     27  male         2  own      moderate         moderate\n                                                  4 columns and 985 rows omitted\n\njulia> empty!(german)\n0×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┴──────────────────────────────────────────────────────────────────────────\n                                                               4 columns omitted\n\njulia> german\n0×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┴──────────────────────────────────────────────────────────────────────────\n                                                               4 columns omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the above example empty function created a new DataFrame with the same column names and column element types as german but with zero rows. On the other hand empty! function removed all rows from german in-place and made each of its columns empty.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The difference between the behavior of the empty and empty! functions is an application of the stylistic convention employed in the Julia language. This convention is followed in all functions provided by the DataFrames.jl package.","category":"page"},{"location":"man/basics/#Getting-Basic-Information-about-a-Data-Frame","page":"First Steps with DataFrames.jl","title":"Getting Basic Information about a Data Frame","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this section we will learn about how to get basic information on our german DataFrame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The size function returns the dimensions of the data frame. First we restore the german data frame, as we have just emptied it above.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german = copy(german_ref);\n\njulia> size(german)\n(1000, 10)\n\njulia> size(german, 1)\n1000\n\njulia> size(german, 2)\n10","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Additionally the nrow and ncol functions can be used to get the number of rows and columns in a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> nrow(german)\n1000\n\njulia> ncol(german)\n10","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To get basic statistics of data in your data frame use the describe function (check out the help of describe for information on how to customize the shown statistics).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> describe(german)\n10×7 DataFrame\n Row │ variable          mean     min       median  max              nmissing  ⋯\n     │ Symbol            Union…   Any       Union…  Any              Int64     ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ id                499.5    0         499.5   999                     0  ⋯\n   2 │ Age               35.546   19        33.0    75                      0\n   3 │ Sex                        female            male                    0\n   4 │ Job               1.904    0         2.0     3                       0\n   5 │ Housing                    free              rent                    0  ⋯\n   6 │ Saving accounts            NA                rich                    0\n   7 │ Checking account           NA                rich                    0\n   8 │ Credit amount     3271.26  250       2319.5  18424                   0\n   9 │ Duration          20.903   4         18.0    72                      0  ⋯\n  10 │ Purpose                    business          vacation/others         0\n                                                                1 column omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To limit the columns processed by describe use cols keyword argument, e.g.:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> describe(german, cols=1:3)\n3×7 DataFrame\n Row │ variable  mean    min     median  max   nmissing  eltype\n     │ Symbol    Union…  Any     Union…  Any   Int64     DataType\n─────┼────────────────────────────────────────────────────────────\n   1 │ id        499.5   0       499.5   999          0  Int64\n   2 │ Age       35.546  19      33.0    75           0  Int64\n   3 │ Sex               female          male         0  String7","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The default statistics reported are mean, min, median, max, number of missing values, and element type of the column. missing values are skipped when computing the summary statistics.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can adjust how data frame is displayed by calling the show function manually: show(german, allrows=true) prints all rows even if they do not fit on screen and show(german, allcols=true) does the same for columns, e.g.:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> show(german, allcols=true)\n1000×10 DataFrame\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking account  Credit amount  Duration  Purpose\n      │ Int64  Int64  String7  Int64  String7  String15         String15          Int64          Int64     String31\n──────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n    1 │     0     67  male         2  own      NA               little                     1169         6  radio/TV\n    2 │     1     22  female       2  own      little           moderate                   5951        48  radio/TV\n    3 │     2     49  male         1  own      little           NA                         2096        12  education\n    4 │     3     45  male         2  free     little           little                     7882        42  furniture/equipment\n    5 │     4     53  male         2  free     little           little                     4870        24  car\n    6 │     5     35  male         1  free     NA               NA                         9055        36  education\n    7 │     6     53  male         2  own      quite rich       NA                         2835        24  furniture/equipment\n    8 │     7     35  male         3  rent     little           moderate                   6948        36  car\n  ⋮   │   ⋮      ⋮       ⋮       ⋮       ⋮            ⋮                ⋮                ⋮           ⋮               ⋮\n  994 │   993     30  male         3  own      little           little                     3959        36  furniture/equipment\n  995 │   994     50  male         2  own      NA               NA                         2390        12  car\n  996 │   995     31  female       1  own      little           NA                         1736        12  furniture/equipment\n  997 │   996     40  male         3  own      little           little                     3857        30  car\n  998 │   997     38  male         2  own      little           NA                          804        12  radio/TV\n  999 │   998     23  male         2  free     little           little                     1845        45  radio/TV\n 1000 │   999     27  male         2  own      moderate         moderate                   4576        45  car\n                                                                                                               985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"It is easy to compute descriptive statistics directly on individual columns using the functions defined in the Statistics module:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Statistics\n\njulia> mean(german.Age)\n35.546","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If instead we want to apply some function to all columns of a data frame we can use the mapcols function. It returns a DataFrame where each column of the source data frame is transformed using a function passed as a first argument. Note that mapcols guarantees not to reuse the columns from german in the returned DataFrame. If the transformation returns its argument then it gets copied before being stored.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> mapcols(id -> id .^ 2, german)\n1000×10 DataFrame\n  Row │ id      Age    Sex           Job    Housing   Saving accounts       Ch ⋯\n      │ Int64   Int64  String        Int64  String    String                St ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │      0   4489  malemale          4  ownown    NANA                  li ⋯\n    2 │      1    484  femalefemale      4  ownown    littlelittle          mo\n    3 │      4   2401  malemale          1  ownown    littlelittle          NA\n    4 │      9   2025  malemale          4  freefree  littlelittle          li\n    5 │     16   2809  malemale          4  freefree  littlelittle          li ⋯\n    6 │     25   1225  malemale          1  freefree  NANA                  NA\n    7 │     36   2809  malemale          4  ownown    quite richquite rich  NA\n    8 │     49   1225  malemale          9  rentrent  littlelittle          mo\n  ⋮   │   ⋮       ⋮         ⋮          ⋮       ⋮               ⋮               ⋱\n  994 │ 986049    900  malemale          9  ownown    littlelittle          li ⋯\n  995 │ 988036   2500  malemale          4  ownown    NANA                  NA\n  996 │ 990025    961  femalefemale      1  ownown    littlelittle          NA\n  997 │ 992016   1600  malemale          9  ownown    littlelittle          li\n  998 │ 994009   1444  malemale          4  ownown    littlelittle          NA ⋯\n  999 │ 996004    529  malemale          4  freefree  littlelittle          li\n 1000 │ 998001    729  malemale          4  ownown    moderatemoderate      mo\n                                                  4 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to look at first and last rows of a data frame then you can do this using the first and last functions respectively:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> first(german, 6)\n6×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n   2 │     1     22  female       2  own      little           moderate\n   3 │     2     49  male         1  own      little           NA\n   4 │     3     45  male         2  free     little           little\n   5 │     4     53  male         2  free     little           little          ⋯\n   6 │     5     35  male         1  free     NA               NA\n                                                               4 columns omitted\n\njulia> last(german, 5)\n5×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │   995     31  female       1  own      little           NA              ⋯\n   2 │   996     40  male         3  own      little           little\n   3 │   997     38  male         2  own      little           NA\n   4 │   998     23  male         2  free     little           little\n   5 │   999     27  male         2  own      moderate         moderate        ⋯\n                                                               4 columns omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Using first and last without passing the number of rows will return a first/last DataFrameRow in the data frame. DataFrameRow is a view into a single row of an AbstractDataFrame. It stores a reference to a parent DataFrame and information about which row and columns from the parent are selected. You can think of DataFrameRow as a NamedTuple that is mutable, i.e. allows to update the source data frame, which is often useful.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> first(german)\nDataFrameRow\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n                                                               4 columns omitted\n\njulia> last(german)\nDataFrameRow\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  Int64  String7  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n 1000 │   999     27  male         2  own      moderate         moderate       ⋯\n                                                               4 columns omitted","category":"page"},{"location":"man/basics/#Getting-and-Setting-Data-in-a-Data-Frame","page":"First Steps with DataFrames.jl","title":"Getting and Setting Data in a Data Frame","text":"","category":"section"},{"location":"man/basics/#Indexing-Syntax","page":"First Steps with DataFrames.jl","title":"Indexing Syntax","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Data frame can be indexed in a similar way to matrices. In the Indexing section of the manual you can find all details about all the available options. Here we highlight the basic ones.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The general syntax for indexing is data_frame[selected_rows, selected_columns]. Observe that, as opposed to matrices in Julia Base, it is required to always pass both row and column selector. The colon : indicates that all items (rows or columns depending on its position) should be retained. Here are a few examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[1:5, [:Sex, :Age]]\n5×2 DataFrame\n Row │ Sex      Age\n     │ String7  Int64\n─────┼────────────────\n   1 │ male        67\n   2 │ female      22\n   3 │ male        49\n   4 │ male        45\n   5 │ male        53\n\njulia> german[1:5, :]\n5×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n   2 │     1     22  female       2  own      little           moderate\n   3 │     2     49  male         1  own      little           NA\n   4 │     3     45  male         2  free     little           little\n   5 │     4     53  male         2  free     little           little          ⋯\n                                                               4 columns omitted\n\njulia> german[[1, 6, 15], :]\n3×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n   2 │     5     35  male         1  free     NA               NA\n   3 │    14     28  female       2  rent     little           little\n                                                               4 columns omitted\n\njulia> german[:, [:Age, :Sex]]\n1000×2 DataFrame\n  Row │ Age    Sex\n      │ Int64  String7\n──────┼────────────────\n    1 │    67  male\n    2 │    22  female\n    3 │    49  male\n    4 │    45  male\n    5 │    53  male\n    6 │    35  male\n    7 │    53  male\n    8 │    35  male\n  ⋮   │   ⋮       ⋮\n  994 │    30  male\n  995 │    50  male\n  996 │    31  female\n  997 │    40  male\n  998 │    38  male\n  999 │    23  male\n 1000 │    27  male\n       985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Pay attention that german[!, [:Sex]] and german[:, [:Sex]] returns a data frame object, while german[!, :Sex] and german[:, :Sex] returns a vector. In the first case, [:Sex] is a vector, indicating that the resulting object should be a data frame. On the other hand, :Sex is a single Symbol, indicating that a single column vector should be extracted. Note that in the first case a vector is required to be passed (not just any iterable), so e.g. german[:, (:Age, :Sex)] is not allowed, but german[:, [:Age, :Sex]] is valid. Below we show both operations to highlight this difference:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[!, [:Sex]]\n1000×1 DataFrame\n  Row │ Sex\n      │ String7\n──────┼─────────\n    1 │ male\n    2 │ female\n    3 │ male\n    4 │ male\n    5 │ male\n    6 │ male\n    7 │ male\n    8 │ male\n  ⋮   │    ⋮\n  994 │ male\n  995 │ male\n  996 │ female\n  997 │ male\n  998 │ male\n  999 │ male\n 1000 │ male\n985 rows omitted\n\njulia> german[!, :Sex]\n1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}:\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n ⋮\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As it was explained earlier in this tutorial the difference between using ! and : when passing a row index is that ! does not perform a copy of columns, while : does when reading data from a data frame. Therefore german[!, [:Sex]] data frame stores the same vector as the source german data frame, while german[:, [:Sex]] stores its copy.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The ! selector normally should be avoided as using it can lead to hard to catch bugs. However, when working with very large data frames it can be useful to save memory and improve performance of operations.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Recapping what we have already learned, To get the column :Age from the german data frame you can do the following:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"to copy the vector: german[:, :Age], german[:, \"Age\"] or german[:, 2];\nto get a vector without copying: german.Age, german.\"Age\", german[!, :Age], german[!, \"Age\"] or german[!, 2].","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To get the first two columns as a DataFrame, we can index as follows:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"to get the copied columns: german[:, 1:2], german[:, [:id, :Age]], or german[:, [\"id\", \"Age\"]];\nto reuse the columns without copying: german[!, 1:2], german[!, [:id, :Age]], or german[!, [\"id\", \"Age\"]].","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to can get a single cell of a data frame use the same syntax as the one that gets a cell of a matrix:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[4, 4]\n2","category":"page"},{"location":"man/basics/#Views","page":"First Steps with DataFrames.jl","title":"Views","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We can also create a view of a data frame. It is often useful as it is more memory efficient than creating a materialized selection. You can create it using a view function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> view(german, :, 2:5)\n1000×4 SubDataFrame\n  Row │ Age    Sex      Job    Housing\n      │ Int64  String7  Int64  String7\n──────┼────────────────────────────────\n    1 │    67  male         2  own\n    2 │    22  female       2  own\n    3 │    49  male         1  own\n    4 │    45  male         2  free\n    5 │    53  male         2  free\n    6 │    35  male         1  free\n    7 │    53  male         2  own\n    8 │    35  male         3  rent\n  ⋮   │   ⋮       ⋮       ⋮       ⋮\n  994 │    30  male         3  own\n  995 │    50  male         2  own\n  996 │    31  female       1  own\n  997 │    40  male         3  own\n  998 │    38  male         2  own\n  999 │    23  male         2  free\n 1000 │    27  male         2  own\n                       985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"or using a @view macro:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[end:-1:1, [1, 4]]\n1000×2 SubDataFrame\n  Row │ id     Job\n      │ Int64  Int64\n──────┼──────────────\n    1 │   999      2\n    2 │   998      2\n    3 │   997      2\n    4 │   996      3\n    5 │   995      1\n    6 │   994      2\n    7 │   993      3\n    8 │   992      1\n  ⋮   │   ⋮      ⋮\n  994 │     6      2\n  995 │     5      1\n  996 │     4      2\n  997 │     3      2\n  998 │     2      1\n  999 │     1      2\n 1000 │     0      2\n     985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Similarly we can get a view of one column of a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[1:5, 1]\n5-element view(::Vector{Int64}, 1:5) with eltype Int64:\n 0\n 1\n 2\n 3\n 4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"its single cell:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[2, 2]\n0-dimensional view(::Vector{Int64}, 2) with eltype Int64:\n22","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"or a single row:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[3, 2:5]\nDataFrameRow\n Row │ Age    Sex      Job    Housing\n     │ Int64  String7  Int64  String7\n─────┼────────────────────────────────\n   3 │    49  male         1  own","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the row and column indexing syntax is exactly the same as for indexing. The only difference is that we do not create a new object, but a view into an existing one.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In order to compare the performance of indexing vs creation of a view let us run the following benchmark using the BenchmarkTools.jl package (please install it if you want to re-run this comparison):","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using BenchmarkTools\n\njulia> @btime $german[1:end-1, 1:end-1];\n  9.900 μs (44 allocations: 57.56 KiB)\n\njulia> @btime @view $german[1:end-1, 1:end-1];\n  67.332 ns (2 allocations: 32 bytes)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see creation of a view is:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"an order of magnitude faster;\nallocates much less memory.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The downside of the view is that:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"it points to the same memory as its parent (so changing a view changes the parent, which is sometimes undesirable);\nsome operations might be a bit slower (as DataFrames.jl needs to perform a mapping of indices of a view to indices of the parent).","category":"page"},{"location":"man/basics/#Changing-the-Data-Stored-in-a-Data-Frame","page":"First Steps with DataFrames.jl","title":"Changing the Data Stored in a Data Frame","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In order to show how to perform mutating operations on a data frame we make a subset of a german data frame first:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1 = german[1:6, 2:4]\n6×3 DataFrame\n Row │ Age    Sex      Job\n     │ Int64  String7  Int64\n─────┼───────────────────────\n   1 │    67  male         2\n   2 │    22  female       2\n   3 │    49  male         1\n   4 │    45  male         2\n   5 │    53  male         2\n   6 │    35  male         1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the following example we replace the column :Age in our df1 data frame with a new vector:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> val = [80, 85, 98, 95, 78, 89]\n6-element Vector{Int64}:\n 80\n 85\n 98\n 95\n 78\n 89\n\njulia> df1.Age = val\n6-element Vector{Int64}:\n 80\n 85\n 98\n 95\n 78\n 89\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex      Job\n     │ Int64  String7  Int64\n─────┼───────────────────────\n   1 │    80  male         2\n   2 │    85  female       2\n   3 │    98  male         1\n   4 │    95  male         2\n   5 │    78  male         2\n   6 │    89  male         1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This is a non-copying operation. One can perform it only if val vector has the same length as number of rows of df1 or as a special case if df1 would not have any columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1.Age === val # no copy is performed\ntrue","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If in indexing you select a subset of rows from a data frame the mutation is performed in place, i.e. writing to an existing vector. Below setting values of column :Job in rows 1:3 to values [2, 4, 6]:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[1:3, :Job] = [2, 3, 2]\n3-element Vector{Int64}:\n 2\n 3\n 2\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex      Job\n     │ Int64  String7  Int64\n─────┼───────────────────────\n   1 │    80  male         2\n   2 │    85  female       3\n   3 │    98  male         2\n   4 │    95  male         2\n   5 │    78  male         2\n   6 │    89  male         1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As a special rule using ! as row selector replaces column without copying (just like in the df1.Age = val example above). For example below we replace the :Sex column:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Sex] = [\"male\", \"female\", \"female\", \"transgender\", \"female\", \"male\"]\n6-element Vector{String}:\n \"male\"\n \"female\"\n \"female\"\n \"transgender\"\n \"female\"\n \"male\"\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex          Job\n     │ Int64  String       Int64\n─────┼───────────────────────────\n   1 │    80  male             2\n   2 │    85  female           3\n   3 │    98  female           2\n   4 │    95  transgender      2\n   5 │    78  female           2\n   6 │    89  male             1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Similarly to setting selected rows of a single column we can also set selected columns of a given row of a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[3, 1:3] = [78, \"male\", 4]\n3-element Vector{Any}:\n 78\n   \"male\"\n  4\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex          Job\n     │ Int64  String       Int64\n─────┼───────────────────────────\n   1 │    80  male             2\n   2 │    85  female           3\n   3 │    78  male             4\n   4 │    95  transgender      2\n   5 │    78  female           2\n   6 │    89  male             1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We have already mentioned that DataFrameRow can be used to mutate its parent data frame. Here are a few examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> dfr = df1[2, :] # DataFrameRow with the second row and all columns of df1\nDataFrameRow\n Row │ Age    Sex     Job\n     │ Int64  String  Int64\n─────┼──────────────────────\n   2 │    85  female      3\n\njulia> dfr.Age = 98 # set value of col `:Age` in row `2` to `98` in-place\n98\n\njulia> dfr\nDataFrameRow\n Row │ Age    Sex     Job\n     │ Int64  String  Int64\n─────┼──────────────────────\n   2 │    98  female      3\n\njulia> dfr[2:3] = [\"male\", 2] # set values of entries in columns `:Sex` and `:Job`\n2-element Vector{Any}:\n  \"male\"\n 2\n\njulia> dfr\nDataFrameRow\n Row │ Age    Sex     Job\n     │ Int64  String  Int64\n─────┼──────────────────────\n   2 │    98  male        2","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This operations updated the data stored in the df1 data frame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In a similar fashion views can be used to update data stored in their parent data frame. Here are some examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> sdf = view(df1, :, 2:3)\n6×2 SubDataFrame\n Row │ Sex          Job\n     │ String       Int64\n─────┼────────────────────\n   1 │ male             2\n   2 │ male             2\n   3 │ male             4\n   4 │ transgender      2\n   5 │ female           2\n   6 │ male             1\n\njulia> sdf[2, :Sex] = \"female\" # set value of col `:Sex` in second row to `female` in-place\n\"female\"\n\njulia> sdf\n6×2 SubDataFrame\n Row │ Sex          Job\n     │ String       Int64\n─────┼────────────────────\n   1 │ male             2\n   2 │ female           2\n   3 │ male             4\n   4 │ transgender      2\n   5 │ female           2\n   6 │ male             1\n\njulia> sdf[6, 1:2] = [\"female\", 3]\n2-element Vector{Any}:\n  \"female\"\n 3\n\njulia> sdf\n6×2 SubDataFrame\n Row │ Sex          Job\n     │ String       Int64\n─────┼────────────────────\n   1 │ male             2\n   2 │ female           2\n   3 │ male             4\n   4 │ transgender      2\n   5 │ female           2\n   6 │ female           3","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In all these cases the parent of sdf view was also updated.","category":"page"},{"location":"man/basics/#Broadcasting-Assignment","page":"First Steps with DataFrames.jl","title":"Broadcasting Assignment","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Apart from normal assignment one can perform broadcasting assignment using the .= operation.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Before we move forward let us explain how broadcasting works in Julia. The standard syntax to perform broadcasting is to use .. For example, as opposed to R this operation fails:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> s = [25, 26, 35, 56]\n4-element Vector{Int64}:\n 25\n 26\n 35\n 56\n\njulia> s[2:3] = 0\nERROR: ArgumentError: indexed assignment with a single value to possibly many locations is not supported; perhaps use broadcasting `.=` instead?","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Instead we have to write:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> s[2:3] .= 0\n2-element view(::Vector{Int64}, 2:3) with eltype Int64:\n 0\n 0\n\njulia> s\n4-element Vector{Int64}:\n 25\n  0\n  0\n 56","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Similar syntax is fully supported in DataFrames.jl. Here, Column :Age is replaced freshly allocated vector because of broadcasting assignment:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Age] .= [85, 89, 78, 58, 96, 68] # col `:Age` is replaced freshly allocated vector\n6-element Vector{Int64}:\n 85\n 89\n 78\n 58\n 96\n 68\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex          Job\n     │ Int64  String       Int64\n─────┼───────────────────────────\n   1 │    85  male             2\n   2 │    89  female           2\n   3 │    78  male             4\n   4 │    58  transgender      2\n   5 │    96  female           2\n   6 │    68  female           3","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Using the : instead of ! above would perform a broadcasting assignment in-place into an existing column. The major difference between in-place and replace operations is that replacing columns is needed if new values have a different type than the old ones.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the examples below we operate on columns :Customers and :City that are not present in df1. In this case using ! and : are equivalent and a new column is allocated:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Customers] .= [\"Rohit\", \"Akshat\", \"Rahul\", \"Aayush\", \"Prateek\", \"Anam\"]\n6-element Vector{String}:\n \"Rohit\"\n \"Akshat\"\n \"Rahul\"\n \"Aayush\"\n \"Prateek\"\n \"Anam\"\n\njulia> df1[:, :City] .= [\"Kanpur\", \"Lucknow\", \"Bhuvneshwar\", \"Jaipur\", \"Ranchi\", \"Dehradoon\"]\n6-element Vector{String}:\n \"Kanpur\"\n \"Lucknow\"\n \"Bhuvneshwar\"\n \"Jaipur\"\n \"Ranchi\"\n \"Dehradoon\"\n\njulia> df1\n6×5 DataFrame\n Row │ Age    Sex          Job    Customers  City\n     │ Int64  String       Int64  String     String\n─────┼───────────────────────────────────────────────────\n   1 │    85  male             2  Rohit      Kanpur\n   2 │    89  female           2  Akshat     Lucknow\n   3 │    78  male             4  Rahul      Bhuvneshwar\n   4 │    58  transgender      2  Aayush     Jaipur\n   5 │    96  female           2  Prateek    Ranchi\n   6 │    68  female           3  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"A most common broadcasting assignment operation is when a scalar is used on the right hand side, e.g:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[:, 3] .= 4 # an in-place replacement of values stored in column number 3 by 4\n6-element view(::Vector{Int64}, :) with eltype Int64:\n 4\n 4\n 4\n 4\n 4\n 4\n\njulia> df1\n6×5 DataFrame\n Row │ Age    Sex          Job    Customers  City\n     │ Int64  String       Int64  String     String\n─────┼───────────────────────────────────────────────────\n   1 │    85  male             4  Rohit      Kanpur\n   2 │    89  female           4  Akshat     Lucknow\n   3 │    78  male             4  Rahul      Bhuvneshwar\n   4 │    58  transgender      4  Aayush     Jaipur\n   5 │    96  female           4  Prateek    Ranchi\n   6 │    68  female           4  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"For : row selector the broadcasting assignment operation works in-place, so the following operation throws an error:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[:, :Age] .= \"Economics\"\nERROR: MethodError: Cannot `convert` an object of type String to an object of type Int64","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We need to use ! instead as it replaces the old vector with a freshly allocated one:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Age] .= \"Economics\"\n6-element Vector{String}:\n \"Economics\"\n \"Economics\"\n \"Economics\"\n \"Economics\"\n \"Economics\"\n \"Economics\"\n\njulia> df1\n6×5 DataFrame\n Row │ Age        Sex          Job    Customers  City\n     │ String     String       Int64  String     String\n─────┼───────────────────────────────────────────────────────\n   1 │ Economics  male             4  Rohit      Kanpur\n   2 │ Economics  female           4  Akshat     Lucknow\n   3 │ Economics  male             4  Rahul      Bhuvneshwar\n   4 │ Economics  transgender      4  Aayush     Jaipur\n   5 │ Economics  female           4  Prateek    Ranchi\n   6 │ Economics  female           4  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"There are some scenarios in DataFrames.jl, when we naturally want a broadcasting-like behaviour, but do not allow for the use of . operation. In such cases a so-called pseudo-broadcasting is performed for user convenience. We have already seen it in examples of DataFrame constructor. Below we show pseudo-broadcasting at work in the insertcols! function, that inserts a column into a data frame in an arbitrary position.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the example below we are creating a column :Country with the insertcols! function. Since we pass a scalar \"India\" value of the column it is broadcasted to all rows in the output data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> insertcols!(df1, 1, :Country => \"India\")\n6×6 DataFrame\n Row │ Country  Age        Sex          Job    Customers  City\n     │ String   String     String       Int64  String     String\n─────┼────────────────────────────────────────────────────────────────\n   1 │ India    Economics  male             4  Rohit      Kanpur\n   2 │ India    Economics  female           4  Akshat     Lucknow\n   3 │ India    Economics  male             4  Rahul      Bhuvneshwar\n   4 │ India    Economics  transgender      4  Aayush     Jaipur\n   5 │ India    Economics  female           4  Prateek    Ranchi\n   6 │ India    Economics  female           4  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can pass a column location where you want to put the inserted column as a second argument to the insertcols! function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> insertcols!(df1, 4, :b => exp(4))\n6×7 DataFrame\n Row │ Country  Age        Sex          b        Job    Customers  City        ⋯\n     │ String   String     String       Float64  Int64  String     String      ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ India    Economics  male         54.5982      4  Rohit      Kanpur      ⋯\n   2 │ India    Economics  female       54.5982      4  Akshat     Lucknow\n   3 │ India    Economics  male         54.5982      4  Rahul      Bhuvneshwar\n   4 │ India    Economics  transgender  54.5982      4  Aayush     Jaipur\n   5 │ India    Economics  female       54.5982      4  Prateek    Ranchi      ⋯\n   6 │ India    Economics  female       54.5982      4  Anam       Dehradoon","category":"page"},{"location":"man/basics/#Not,-Between,-Cols,-and-All-Column-Selectors","page":"First Steps with DataFrames.jl","title":"Not, Between, Cols, and All Column Selectors","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can use Not, Between, Cols, and All selectors in more complex column selection scenarios:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Not selector (from the InvertedIndices.jl package) allows us to specify the columns we want to exclude from the resulting data frame. We can put any valid other column selector inside Not;\nBetween selector allows us to specify a range of columns (we can pass the start and stop column using any of the single column selector syntaxes);\nCols(...) selector picks a union of other selectors passed as its arguments;\nAll() allows us to select all columns of DataFrame; this is the same as passing :;\nregular expression to select columns whose names match it.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us give some examples of these selectors.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Drop :Age column:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[:, Not(:Age)]\n1000×9 DataFrame\n  Row │ id     Sex      Job    Housing  Saving accounts  Checking account  Cre ⋯\n      │ Int64  String7  Int64  String7  String15         String15          Int ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0  male         2  own      NA               little                ⋯\n    2 │     1  female       2  own      little           moderate\n    3 │     2  male         1  own      little           NA\n    4 │     3  male         2  free     little           little\n    5 │     4  male         2  free     little           little                ⋯\n    6 │     5  male         1  free     NA               NA\n    7 │     6  male         2  own      quite rich       NA\n    8 │     7  male         3  rent     little           moderate\n  ⋮   │   ⋮       ⋮       ⋮       ⋮            ⋮                ⋮              ⋱\n  994 │   993  male         3  own      little           little                ⋯\n  995 │   994  male         2  own      NA               NA\n  996 │   995  female       1  own      little           NA\n  997 │   996  male         3  own      little           little\n  998 │   997  male         2  own      little           NA                    ⋯\n  999 │   998  male         2  free     little           little\n 1000 │   999  male         2  own      moderate         moderate\n                                                  3 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Select columns starting from :Sex and ending at :Housing:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[:, Between(:Sex, :Housing)]\n1000×3 DataFrame\n  Row │ Sex     Job    Housing\n      │ String  Int64  String\n──────┼────────────────────────\n    1 │ male        2  own\n    2 │ female      2  own\n    3 │ male        1  own\n    4 │ male        2  free\n    5 │ male        2  free\n    6 │ male        1  free\n    7 │ male        2  own\n    8 │ male        3  rent\n  ⋮   │   ⋮       ⋮       ⋮\n  994 │ male        3  own\n  995 │ male        2  own\n  996 │ female      1  own\n  997 │ male        3  own\n  998 │ male        2  own\n  999 │ male        2  free\n 1000 │ male        2  own\n               985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the example below Cols selector is picking a union of \"Age\" and Between(\"Sex\", \"Job\") selectors passed as its arguments:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[:, Cols(\"Age\", Between(\"Sex\", \"Job\"))]\n1000×3 DataFrame\n  Row │ Age    Sex      Job\n      │ Int64  String7  Int64\n──────┼───────────────────────\n    1 │    67  male         2\n    2 │    22  female       2\n    3 │    49  male         1\n    4 │    45  male         2\n    5 │    53  male         2\n    6 │    35  male         1\n    7 │    53  male         2\n    8 │    35  male         3\n  ⋮   │   ⋮       ⋮       ⋮\n  994 │    30  male         3\n  995 │    50  male         2\n  996 │    31  female       1\n  997 │    40  male         3\n  998 │    38  male         2\n  999 │    23  male         2\n 1000 │    27  male         2\n              985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can also use Regex (regular expressions) to select columns. In the example below we select columns that have \"S\" in their name and also we use Not to drop row number 5:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[Not(5), r\"S\"]\n999×2 DataFrame\n Row │ Sex      Saving accounts\n     │ String7  String15\n─────┼──────────────────────────\n   1 │ male     NA\n   2 │ female   little\n   3 │ male     little\n   4 │ male     little\n   5 │ male     NA\n   6 │ male     quite rich\n   7 │ male     little\n   8 │ male     rich\n  ⋮  │    ⋮            ⋮\n 993 │ male     little\n 994 │ male     NA\n 995 │ female   little\n 996 │ male     little\n 997 │ male     little\n 998 │ male     little\n 999 │ male     moderate\n                984 rows omitted","category":"page"},{"location":"man/basics/#Basic-Usage-of-Transformation-Functions","page":"First Steps with DataFrames.jl","title":"Basic Usage of Transformation Functions","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In DataFrames.jl we have five functions that we can be used to perform transformations of columns of a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"combine: creates a new data frame populated with columns that are results of transformation applied to the source data frame columns, potentially combining its rows;\nselect: creates a new data frame that has the same number of rows as the source data frame populated with columns that are results of transformations applied to the source data frame columns;\nselect!: the same as select but updates the passed data frame in place;\ntransform: the same as select but keeps the columns that were already present in the data frame (note though that these columns can be potentially modified by the transformation passed to transform);\ntransform!: the same as transform but updates the passed data frame in place.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The fundamental ways to specify a transformation are:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column => transformation => target_column_name; In this scenario the source_column is passed as an argument to transformation function and stored in target_column_name column.\nsource_column => transformation; In this scenario we apply the transformation function to source_column and the target column names is automatically generated.\nsource_column => target_column_name renames the source_column to target_column_name.\nsource_column just keep the source column as is in the result without any transformation;","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"These rules are typically called transformation mini-language.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us move to the examples of application of these rules","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Statistics\n\njulia> combine(german, :Age => mean => :mean_age)\n1×1 DataFrame\n Row │ mean_age\n     │ Float64\n─────┼──────────\n   1 │   35.546\n\njulia> select(german, :Age => mean => :mean_age)\n1000×1 DataFrame\n  Row │ mean_age\n      │ Float64\n──────┼──────────\n    1 │   35.546\n    2 │   35.546\n    3 │   35.546\n    4 │   35.546\n    5 │   35.546\n    6 │   35.546\n    7 │   35.546\n    8 │   35.546\n  ⋮   │    ⋮\n  994 │   35.546\n  995 │   35.546\n  996 │   35.546\n  997 │   35.546\n  998 │   35.546\n  999 │   35.546\n 1000 │   35.546\n 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see in both cases the mean function was applied to :Age column and the result was stored in the :mean_age column. The difference between the combine and select functions is that the combine aggregates data and produces as many rows as were returned by the transformation function. On the other hand the select function always keeps the number of rows in a data frame to be the same as in the source data frame. Therefore in this case the result of the mean function got broadcasted.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As combine potentially allows any number of rows to be produced as a result of the transformation if we have a combination of transformations where some of them produce a vector, and other produce scalars then scalars get broadcasted exactly like in  select. Here is an example:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> combine(german, :Age => mean => :mean_age, :Housing => unique => :housing)\n3×2 DataFrame\n Row │ mean_age  housing\n     │ Float64   String7\n─────┼───────────────────\n   1 │   35.546  own\n   2 │   35.546  free\n   3 │   35.546  rent","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Note, however, that it is not allowed to return vectors of different lengths in different transformations:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> combine(german, :Age, :Housing => unique => :Housing)\nERROR: ArgumentError: New columns must have the same length as old columns","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us discuss some other examples using select. Often we want to apply some function not to the whole column of a data frame, but rather to its individual elements. Normally we can achieve this using broadcasting like this:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, :Sex => (x -> uppercase.(x)) => :Sex)\n1000×1 DataFrame\n  Row │ Sex\n      │ String\n──────┼────────\n    1 │ MALE\n    2 │ FEMALE\n    3 │ MALE\n    4 │ MALE\n    5 │ MALE\n    6 │ MALE\n    7 │ MALE\n    8 │ MALE\n  ⋮   │   ⋮\n  994 │ MALE\n  995 │ MALE\n  996 │ FEMALE\n  997 │ MALE\n  998 │ MALE\n  999 │ MALE\n 1000 │ MALE\n985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This pattern is encountered very often in practice, therefore there is a ByRow convenience wrapper for a function that creates its broadcasted variant. In these examples ByRow is a special type used for selection operations to signal that the wrapped function should be applied to each element (row) of the selection. Here we are passing ByRow wrapper to target column name :Sex using uppercase function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, :Sex => ByRow(uppercase) => :SEX)\n1000×1 DataFrame\n  Row │ SEX\n      │ String\n──────┼────────\n    1 │ MALE\n    2 │ FEMALE\n    3 │ MALE\n    4 │ MALE\n    5 │ MALE\n    6 │ MALE\n    7 │ MALE\n    8 │ MALE\n  ⋮   │   ⋮\n  994 │ MALE\n  995 │ MALE\n  996 │ FEMALE\n  997 │ MALE\n  998 │ MALE\n  999 │ MALE\n 1000 │ MALE\n985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this case we transform our source column :Age using ByRow wrapper and automatically generate the target column name:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, :Age, :Age => ByRow(sqrt))\n1000×2 DataFrame\n  Row │ Age    Age_sqrt\n      │ Int64  Float64\n──────┼─────────────────\n    1 │    67   8.18535\n    2 │    22   4.69042\n    3 │    49   7.0\n    4 │    45   6.7082\n    5 │    53   7.28011\n    6 │    35   5.91608\n    7 │    53   7.28011\n    8 │    35   5.91608\n  ⋮   │   ⋮       ⋮\n  994 │    30   5.47723\n  995 │    50   7.07107\n  996 │    31   5.56776\n  997 │    40   6.32456\n  998 │    38   6.16441\n  999 │    23   4.79583\n 1000 │    27   5.19615\n        985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"When we pass just a column (without the => part) we can use any column selector that is allowed in indexing.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Here we exclude the column :Age from the resulting data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, Not(:Age))\n1000×9 DataFrame\n  Row │ id     Sex      Job    Housing  Saving accounts  Checking account  Cre ⋯\n      │ Int64  String7  Int64  String7  String15         String15          Int ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0  male         2  own      NA               little                ⋯\n    2 │     1  female       2  own      little           moderate\n    3 │     2  male         1  own      little           NA\n    4 │     3  male         2  free     little           little\n    5 │     4  male         2  free     little           little                ⋯\n    6 │     5  male         1  free     NA               NA\n    7 │     6  male         2  own      quite rich       NA\n    8 │     7  male         3  rent     little           moderate\n  ⋮   │   ⋮       ⋮       ⋮       ⋮            ⋮                ⋮              ⋱\n  994 │   993  male         3  own      little           little                ⋯\n  995 │   994  male         2  own      NA               NA\n  996 │   995  female       1  own      little           NA\n  997 │   996  male         3  own      little           little\n  998 │   997  male         2  own      little           NA                    ⋯\n  999 │   998  male         2  free     little           little\n 1000 │   999  male         2  own      moderate         moderate\n                                                  3 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the next example we drop columns \"Age\", \"Saving accounts\", \"Checking account\", \"Credit amount\", and \"Purpose\". Note that this time we use string column selectors because some of the column names have spaces in them:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, Not([\"Age\", \"Saving accounts\", \"Checking account\",\n                           \"Credit amount\", \"Purpose\"]))\n1000×5 DataFrame\n  Row │ id     Sex      Job    Housing  Duration\n      │ Int64  String7  Int64  String7  Int64\n──────┼──────────────────────────────────────────\n    1 │     0  male         2  own             6\n    2 │     1  female       2  own            48\n    3 │     2  male         1  own            12\n    4 │     3  male         2  free           42\n    5 │     4  male         2  free           24\n    6 │     5  male         1  free           36\n    7 │     6  male         2  own            24\n    8 │     7  male         3  rent           36\n  ⋮   │   ⋮       ⋮       ⋮       ⋮        ⋮\n  994 │   993  male         3  own            36\n  995 │   994  male         2  own            12\n  996 │   995  female       1  own            12\n  997 │   996  male         3  own            30\n  998 │   997  male         2  own            12\n  999 │   998  male         2  free           45\n 1000 │   999  male         2  own            45\n                                 985 rows omitted\n","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As another example let us present that the r\"S\" regular expression we used above also works with select:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, r\"S\")\n1000×2 DataFrame\n  Row │ Sex      Saving accounts\n      │ String7  String15\n──────┼──────────────────────────\n    1 │ male     NA\n    2 │ female   little\n    3 │ male     little\n    4 │ male     little\n    5 │ male     little\n    6 │ male     NA\n    7 │ male     quite rich\n    8 │ male     little\n  ⋮   │    ⋮            ⋮\n  994 │ male     little\n  995 │ male     NA\n  996 │ female   little\n  997 │ male     little\n  998 │ male     little\n  999 │ male     little\n 1000 │ male     moderate\n                 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The benefit of select or combine over indexing is that it is easier to get the union of several column selectors, e.g.:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, r\"S\", \"Job\", 1)\n1000×4 DataFrame\n  Row │ Sex      Saving accounts  Job    id\n      │ String7  String15         Int64  Int64\n──────┼────────────────────────────────────────\n    1 │ male     NA                   2      0\n    2 │ female   little               2      1\n    3 │ male     little               1      2\n    4 │ male     little               2      3\n    5 │ male     little               2      4\n    6 │ male     NA                   1      5\n    7 │ male     quite rich           2      6\n    8 │ male     little               3      7\n  ⋮   │    ⋮            ⋮           ⋮      ⋮\n  994 │ male     little               3    993\n  995 │ male     NA                   2    994\n  996 │ female   little               1    995\n  997 │ male     little               3    996\n  998 │ male     little               2    997\n  999 │ male     little               2    998\n 1000 │ male     moderate             2    999\n                               985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Taking advantage of this flexibility here is an idiomatic pattern to move some column to the front of a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, \"Sex\", :)\n1000×10 DataFrame\n  Row │ Sex      id     Age    Job    Housing  Saving accounts  Checking accou ⋯\n      │ String7  Int64  Int64  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │ male         0     67      2  own      NA               little         ⋯\n    2 │ female       1     22      2  own      little           moderate\n    3 │ male         2     49      1  own      little           NA\n    4 │ male         3     45      2  free     little           little\n    5 │ male         4     53      2  free     little           little         ⋯\n    6 │ male         5     35      1  free     NA               NA\n    7 │ male         6     53      2  own      quite rich       NA\n    8 │ male         7     35      3  rent     little           moderate\n  ⋮   │    ⋮       ⋮      ⋮      ⋮       ⋮            ⋮                ⋮       ⋱\n  994 │ male       993     30      3  own      little           little         ⋯\n  995 │ male       994     50      2  own      NA               NA\n  996 │ female     995     31      1  own      little           NA\n  997 │ male       996     40      3  own      little           little\n  998 │ male       997     38      2  own      little           NA             ⋯\n  999 │ male       998     23      2  free     little           little\n 1000 │ male       999     27      2  own      moderate         moderate\n                                                  4 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Below, we are simply passing source column and target column name to rename them (without specifying the transformation part):","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, :Sex => :x1, :Age => :x2)\n1000×2 DataFrame\n  Row │ x1       x2\n      │ String7  Int64\n──────┼────────────────\n    1 │ male        67\n    2 │ female      22\n    3 │ male        49\n    4 │ male        45\n    5 │ male        53\n    6 │ male        35\n    7 │ male        53\n    8 │ male        35\n  ⋮   │    ⋮       ⋮\n  994 │ male        30\n  995 │ male        50\n  996 │ female      31\n  997 │ male        40\n  998 │ male        38\n  999 │ male        23\n 1000 │ male        27\n       985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"It is important to note that select always returns a data frame, even if a single column selected as opposed to indexing syntax. Compare the following:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, :Age)\n1000×1 DataFrame\n  Row │ Age\n      │ Int64\n──────┼───────\n    1 │    67\n    2 │    22\n    3 │    49\n    4 │    45\n    5 │    53\n    6 │    35\n    7 │    53\n    8 │    35\n  ⋮   │   ⋮\n  994 │    30\n  995 │    50\n  996 │    31\n  997 │    40\n  998 │    38\n  999 │    23\n 1000 │    27\n985 rows omitted\n\njulia> german[:, :Age]\n1000-element Vector{Int64}:\n 67\n 22\n 49\n 45\n 53\n 35\n 53\n 35\n 61\n 28\n  ⋮\n 34\n 23\n 30\n 50\n 31\n 40\n 38\n 23\n 27","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"By default select copies columns of a passed source data frame. In order to avoid copying, pass the copycols=false keyword argument:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = select(german, :Sex)\n1000×1 DataFrame\n  Row │ Sex\n      │ String7\n──────┼─────────\n    1 │ male\n    2 │ female\n    3 │ male\n    4 │ male\n    5 │ male\n    6 │ male\n    7 │ male\n    8 │ male\n  ⋮   │    ⋮\n  994 │ male\n  995 │ male\n  996 │ female\n  997 │ male\n  998 │ male\n  999 │ male\n 1000 │ male\n985 rows omitted\n\njulia> df.Sex === german.Sex # copy\nfalse\n\njulia> df = select(german, :Sex, copycols=false)\n1000×1 DataFrame\n  Row │ Sex\n      │ String7\n──────┼─────────\n    1 │ male\n    2 │ female\n    3 │ male\n    4 │ male\n    5 │ male\n    6 │ male\n    7 │ male\n    8 │ male\n  ⋮   │    ⋮\n  994 │ male\n  995 │ male\n  996 │ female\n  997 │ male\n  998 │ male\n  999 │ male\n 1000 │ male\n985 rows omitted\n\njulia> df.Sex === german.Sex # no-copy is performed\ntrue","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To perform the selection operation in-place use select!:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select!(german, Not(:Age));\n\njulia> german\n1000×9 DataFrame\n  Row │ id     Sex      Job    Housing  Saving accounts  Checking account  Cre ⋯\n      │ Int64  String7  Int64  String7  String15         String15          Int ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0  male         2  own      NA               little                ⋯\n    2 │     1  female       2  own      little           moderate\n    3 │     2  male         1  own      little           NA\n    4 │     3  male         2  free     little           little\n    5 │     4  male         2  free     little           little                ⋯\n    6 │     5  male         1  free     NA               NA\n    7 │     6  male         2  own      quite rich       NA\n    8 │     7  male         3  rent     little           moderate\n  ⋮   │   ⋮       ⋮       ⋮       ⋮            ⋮                ⋮              ⋱\n  994 │   993  male         3  own      little           little                ⋯\n  995 │   994  male         2  own      NA               NA\n  996 │   995  female       1  own      little           NA\n  997 │   996  male         3  own      little           little\n  998 │   997  male         2  own      little           NA                    ⋯\n  999 │   998  male         2  free     little           little\n 1000 │   999  male         2  own      moderate         moderate\n                                                  3 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the :Age column was dropped from the german data frame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The transform and transform! functions work identically to select and select! with the only difference that they retain all columns that are present in the source data frame. Here are some examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german = copy(german_ref);\n\njulia> df = german_ref[1:8, 1:5]\n8×5 DataFrame\n Row │ id     Age    Sex      Job    Housing\n     │ Int64  Int64  String7  Int64  String7\n─────┼───────────────────────────────────────\n   1 │     0     67  male         2  own\n   2 │     1     22  female       2  own\n   3 │     2     49  male         1  own\n   4 │     3     45  male         2  free\n   5 │     4     53  male         2  free\n   6 │     5     35  male         1  free\n   7 │     6     53  male         2  own\n   8 │     7     35  male         3  rent\n\njulia> transform(df, :Age => maximum)\n8×6 DataFrame\n Row │ id     Age    Sex      Job    Housing  Age_maximum\n     │ Int64  Int64  String7  Int64  String7  Int64\n─────┼────────────────────────────────────────────────────\n   1 │     0     67  male         2  own               67\n   2 │     1     22  female       2  own               67\n   3 │     2     49  male         1  own               67\n   4 │     3     45  male         2  free              67\n   5 │     4     53  male         2  free              67\n   6 │     5     35  male         1  free              67\n   7 │     6     53  male         2  own               67\n   8 │     7     35  male         3  rent              67","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the example below we are swapping values stored in columns :Sex and :Age:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(german, :Age => :Sex, :Sex => :Age)\n1000×10 DataFrame\n  Row │ id     Age      Sex    Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  String7  Int64  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0  male        67      2  own      NA               little         ⋯\n    2 │     1  female      22      2  own      little           moderate\n    3 │     2  male        49      1  own      little           NA\n    4 │     3  male        45      2  free     little           little\n    5 │     4  male        53      2  free     little           little         ⋯\n    6 │     5  male        35      1  free     NA               NA\n    7 │     6  male        53      2  own      quite rich       NA\n    8 │     7  male        35      3  rent     little           moderate\n  ⋮   │   ⋮       ⋮       ⋮      ⋮       ⋮            ⋮                ⋮       ⋱\n  994 │   993  male        30      3  own      little           little         ⋯\n  995 │   994  male        50      2  own      NA               NA\n  996 │   995  female      31      1  own      little           NA\n  997 │   996  male        40      3  own      little           little\n  998 │   997  male        38      2  own      little           NA             ⋯\n  999 │   998  male        23      2  free     little           little\n 1000 │   999  male        27      2  own      moderate         moderate\n                                                  4 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If we give more than one source column to a transformation they are passed as consecutive positional arguments. So for example the [:Age, :Job] => (+) => :res transformation below evaluates +(df1.Age, df1.Job) (which adds two columns) and stores the result in the :res column:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(german, :Age, :Job, [:Age, :Job] => (+) => :res)\n1000×3 DataFrame\n  Row │ Age    Job    res\n      │ Int64  Int64  Int64\n──────┼─────────────────────\n    1 │    67      2     69\n    2 │    22      2     24\n    3 │    49      1     50\n    4 │    45      2     47\n    5 │    53      2     55\n    6 │    35      1     36\n    7 │    53      2     55\n    8 │    35      3     38\n  ⋮   │   ⋮      ⋮      ⋮\n  994 │    30      3     33\n  995 │    50      2     52\n  996 │    31      1     32\n  997 │    40      3     43\n  998 │    38      2     40\n  999 │    23      2     25\n 1000 │    27      2     29\n            985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the examples given in this introductory tutorial we did not cover all options of the transformation mini-language. More advanced examples, in particular showing how to pass or produce multiple columns using the AsTable operation (which you might have seen in some DataFrames.jl demos) are given in the later sections of the manual.","category":"page"},{"location":"man/importing_and_exporting/#Importing-and-Exporting-Data-(I/O)","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"","category":"section"},{"location":"man/importing_and_exporting/#CSV-Files","page":"Importing and Exporting Data (I/O)","title":"CSV Files","text":"","category":"section"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"For reading and writing tabular data from CSV and other delimited text files, use the CSV.jl package.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"If you have not used the CSV.jl package before then you may need to install it first:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"using Pkg\nPkg.add(\"CSV\")","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"The CSV.jl functions are not loaded automatically and must be imported into the session.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"using CSV","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"A dataset can now be read from a CSV file at path input using","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"DataFrame(CSV.File(input))","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"A DataFrame can be written to a CSV file at path output using","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"df = DataFrame(x=1, y=2)\nCSV.write(output, df)","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"The behavior of CSV functions can be adapted via keyword arguments. For more information, see ?CSV.File, ?CSV.read and ?CSV.write, or checkout the online CSV.jl documentation.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"In simple cases, when compilation latency of CSV.jl might be an issue, using the DelimitedFiles module from the Julia standard library can be considered. Here is an example showing how to read in the data and perform its post-processing:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"julia> using DelimitedFiles, DataFrames\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> data, header = readdlm(path, ',', header=true);\n\njulia> iris_raw = DataFrame(data, vec(header))\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Any          Any         Any          Any         Any\n─────┼──────────────────────────────────────────────────────────────────\n   1 │ 5.1          3.5         1.4          0.2         Iris-setosa\n   2 │ 4.9          3.0         1.4          0.2         Iris-setosa\n   3 │ 4.7          3.2         1.3          0.2         Iris-setosa\n   4 │ 4.6          3.1         1.5          0.2         Iris-setosa\n   5 │ 5.0          3.6         1.4          0.2         Iris-setosa\n   6 │ 5.4          3.9         1.7          0.4         Iris-setosa\n   7 │ 4.6          3.4         1.4          0.3         Iris-setosa\n   8 │ 5.0          3.4         1.5          0.2         Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │ 6.8          3.2         5.9          2.3         Iris-virginica\n 145 │ 6.7          3.3         5.7          2.5         Iris-virginica\n 146 │ 6.7          3.0         5.2          2.3         Iris-virginica\n 147 │ 6.3          2.5         5.0          1.9         Iris-virginica\n 148 │ 6.5          3.0         5.2          2.0         Iris-virginica\n 149 │ 6.2          3.4         5.4          2.3         Iris-virginica\n 150 │ 5.9          3.0         5.1          1.8         Iris-virginica\n                                                        135 rows omitted\n\njulia> iris = identity.(iris_raw)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     SubStrin…\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Observe that in our example:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"header is a Matrix therefore we had to pass vec(header) to the DataFrame constructor;\nwe broadcasted the identity function over the iris_raw data frame to perform narrowing of eltype of columns of iris_raw; the reason is that read in by the readdlm function is stored into a data Matrix so all columns in iris_raw initially have the same eltype – in this case it had to be Any as some of the columns are numeric and some are string.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"All such operations (and many more) are automatically handled by CSV.jl.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Similarly, you can use the writedlm function from the DelimitedFiles module to save a data frame like this:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"writedlm(\"test.csv\", Iterators.flatten(([names(iris)], eachrow(iris))), ',')","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"As you can see the code required to transform iris into a proper input to the writedlm function so that you can create the CSV file having the expected format is not easy. Therefore CSV.jl is the preferred package to write CSV files for data stored in data frames.","category":"page"},{"location":"man/importing_and_exporting/#Other-formats","page":"Importing and Exporting Data (I/O)","title":"Other formats","text":"","category":"section"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Other data formats are supported for reading and writing in the following packages (non exhaustive list):","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Apache Arrow (including Feather v2): Arrow.jl\nApache Feather (v1): Feather.jl\nApache Avro: Avro.jl\nJSON: JSONTables.jl\nParquet: Parquet2.jl\nStata, SAS and SPSS: ReadStatTables.jl (alternatively Queryverse  users can choose StatFiles.jl)\nreading R data files (.rda, .RData): RData.jl\nMicrosoft Excel (XLSX): XLSX.jl\nCopying/pasting to clipboard, for sending data to and from spreadsheets: ClipData.jl","category":"page"},{"location":"man/querying_frameworks/#Data-manipulation-frameworks","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Three frameworks provide convenience methods to manipulate DataFrames: DataFramesMeta.jl, DataFrameMacros.jl and Query.jl. They implement a functionality similar to dplyr or LINQ.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"These frameworks are designed both to make it easier for new users to start working with data frames in Julia and to allow advanced users to write more compact code.","category":"page"},{"location":"man/querying_frameworks/#TidierData.jl","page":"Data manipulation frameworks","title":"TidierData.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl, part of  the Tidier ecosystem, is a macro-based  data analysis interface that wraps DataFrames.jl.  The instructions below are for version  0.16.0 of TidierData.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First, install the TidierData.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"TidierData\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl enables clean, readable, and fast code for all major data transformation  functions including  aggregating,  pivoting,  nesting,  and joining  data frames. TidierData re-exports DataFrame from DataFrames.jl, @chain from Chain.jl, and  Statistics.jl to streamline data operations. ","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl is heavily inspired by the dplyr and tidyr R packages (part of the R  tidyverse), which it aims to implement using pure Julia by wrapping DataFrames.jl. While TidierData.jl borrows conventions from the tidyverse, it is important to note that the  tidyverse itself is often not considered idiomatic R code. TidierData.jl brings  data analysis conventions from tidyverse into Julia to have the best of both worlds:  tidy syntax and the speed and flexibility of the Julia language.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl has two major differences from other macro-based packages. First, TidierData.jl  uses tidy expressions. An example of a tidy expression is a = mean(b), where b refers  to an existing column in the data frame, and a refers to either a new or existing column.  Referring to variables outside of the data frame requires prefixing variables with !!.  For example, a = mean(!!b) refers to a variable b outside the data frame. Second,  TidierData.jl aims to make broadcasting mostly invisible through  auto-vectorization. TidierData.jl currently uses a lookup table to decide which functions not to  vectorize; all other functions are automatically vectorized. This allows for  writing of concise expressions: @mutate(df, a = a - mean(a)) transforms the a column  by subtracting each value by the mean of the column. Behind the scenes, the right-hand  expression is converted to a .- mean(a) because mean() is in the lookup table as a  function that should not be vectorized. Take a look at the  auto-vectorization documentation for details.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"One major benefit of combining tidy expressions with auto-vectorization is that  TidierData.jl code (which uses DataFrames.jl as its backend) can work directly on  databases using TidierDB.jl,  which converts tidy expressions into SQL, supporting DuckDB and several other backends.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using TidierData\n\njulia> df = DataFrame(\n                name = [\"John\", \"Sally\", \"Roger\"],\n                age = [54.0, 34.0, 79.0],\n                children = [0, 2, 4]\n            )\n3×3 DataFrame\n Row │ name    age      children\n     │ String  Float64  Int64\n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> @chain df begin\n           @filter(children != 2)\n           @select(name, num_children = children)\n       end\n2×2 DataFrame\n Row │ name    num_children \n     │ String  Int64        \n─────┼──────────────────────\n   1 │ John               0\n   2 │ Roger              4","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Below are examples showcasing @group_by with @summarize or @mutate - analagous to the split, apply, combine pattern.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> df = DataFrame(\n                groups = repeat('a':'e', inner = 2), \n                b_col = 1:10, \n                c_col = 11:20, \n                d_col = 111:120\n            )\n10×4 DataFrame\n Row │ groups  b_col  c_col  d_col \n     │ Char    Int64  Int64  Int64 \n─────┼─────────────────────────────\n   1 │ a           1     11    111\n   2 │ a           2     12    112\n   3 │ b           3     13    113\n   4 │ b           4     14    114\n   5 │ c           5     15    115\n   6 │ c           6     16    116\n   7 │ d           7     17    117\n   8 │ d           8     18    118\n   9 │ e           9     19    119\n  10 │ e          10     20    120\n\njulia> @chain df begin\n           @filter(b_col > 2)\n           @group_by(groups)\n           @summarise(median_b = median(b_col), \n                      across((b_col:d_col), mean))   \n       end\n4×5 DataFrame\n Row │ groups  median_b  b_col_mean  c_col_mean  d_col_mean \n     │ Char    Float64   Float64     Float64     Float64    \n─────┼──────────────────────────────────────────────────────\n   1 │ b            3.5         3.5        13.5       113.5\n   2 │ c            5.5         5.5        15.5       115.5\n   3 │ d            7.5         7.5        17.5       117.5\n   4 │ e            9.5         9.5        19.5       119.5\n\njulia> @chain df begin\n           @filter(b_col > 4 && c_col <= 18)\n           @group_by(groups)\n           @mutate(\n               new_col = b_col + maximum(d_col),\n               new_col2 = c_col - maximum(d_col),\n               new_col3 = case_when(c_col >= 18  => \"high\",\n                                    c_col > 15   => \"medium\",\n                                    true         => \"low\"))\n           @select(starts_with(\"new\"))\n           @ungroup # required because `@mutate` does not ungroup\n       end\n4×4 DataFrame\n Row │ groups  new_col  new_col2  new_col3 \n     │ Char    Int64    Int64     String   \n─────┼─────────────────────────────────────\n   1 │ c           121      -101  low\n   2 │ c           122      -100  medium\n   3 │ d           125      -101  medium\n   4 │ d           126      -100  high","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"For more examples, please visit the TidierData.jl documentation.","category":"page"},{"location":"man/querying_frameworks/#DataFramesMeta.jl","page":"Data manipulation frameworks","title":"DataFramesMeta.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The DataFramesMeta.jl package provides a convenient yet fast macro-based interface to work with DataFrames. The instructions below are for version 0.10.0 of DataFramesMeta.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First install the DataFramesMeta.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"DataFramesMeta\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The major benefit of the package is it provides a more convenient syntax for the transformation functions transform, select, and combine  via the macros @transform, @select, @combine, and more.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"DataFramesMeta.jl also reexports the @chain macro from  Chain.jl, allowing users to pipe the output of one transformation as an input to another, as with  |> and %>% in R. ","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Below we present several selected examples of usage of the package.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First we subset rows of the source data frame using a logical condition and select two of its columns, renaming one of them:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using DataFramesMeta\n\njulia> df = DataFrame(name=[\"John\", \"Sally\", \"Roger\"],\n                      age=[54.0, 34.0, 79.0],\n                      children=[0, 2, 4])\n3×3 DataFrame\n Row │ name    age      children\n     │ String  Float64  Int64\n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> @chain df begin\n           @rsubset :age > 40 \n           @select(:number_of_children = :children, :name)\n       end\n2×2 DataFrame\n Row │ number_of_children  name\n     │ Int64               String\n─────┼────────────────────────────\n   1 │                  0  John\n   2 │                  4  Roger","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"In the following examples we show that DataFramesMeta.jl also supports the split-apply-combine pattern:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> df = DataFrame(key=repeat(1:3, 4), value=1:12)\n12×2 DataFrame\n Row │ key    value\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     1      4\n   5 │     2      5\n   6 │     3      6\n   7 │     1      7\n   8 │     2      8\n   9 │     3      9\n  10 │     1     10\n  11 │     2     11\n  12 │     3     12\n\njulia> @chain df begin\n           @rsubset :value > 3 \n           @by(:key, :min = minimum(:value), :max = maximum(:value))\n           @select(:key, :range = :max - :min)\n        end\n3×2 DataFrame\n Row │ key    range\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      6\n   2 │     2      6\n   3 │     3      6\n\njulia> @chain df begin\n           groupby(:key)\n           @transform :value0 = :value .- minimum(:value)\n       end\n12×3 DataFrame\n Row │ key    value  value0\n     │ Int64  Int64  Int64\n─────┼──────────────────────\n   1 │     1      1       0\n   2 │     2      2       0\n   3 │     3      3       0\n   4 │     1      4       3\n   5 │     2      5       3\n   6 │     3      6       3\n   7 │     1      7       6\n   8 │     2      8       6\n   9 │     3      9       6\n  10 │     1     10       9\n  11 │     2     11       9\n  12 │     3     12       9","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"You can find more details about how this package can be used on the DataFramesMeta.jl GitHub page.","category":"page"},{"location":"man/querying_frameworks/#DataFrameMacros.jl","page":"Data manipulation frameworks","title":"DataFrameMacros.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"DataFrameMacros.jl is an alternative to DataFramesMeta.jl with an additional focus on convenient solutions for the transformation of multiple columns at once. The instructions below are for version 0.3 of DataFrameMacros.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First, install the DataFrameMacros.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"DataFrameMacros\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"In DataFrameMacros.jl, all but the @combine macro are row-wise by default. There is also a @groupby which allows creating grouping columns on the fly using the same syntax as @transform, for grouping by new columns without writing them out twice.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"In the example below, you can also see some of DataFrameMacros.jl's multi-column features, where mean is applied to both age columns at once by selecting them with the r\"age\" regex. The new column names are then derived using the \"{}\" shortcut which splices the transformed column names into a string.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using DataFrames, DataFrameMacros, Chain, Statistics\n\njulia> df = DataFrame(name=[\"John\", \"Sally\", \"Roger\"],\n                      age=[54.0, 34.0, 79.0],\n                      children=[0, 2, 4])\n3×3 DataFrame\n Row │ name    age      children \n     │ String  Float64  Int64    \n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> @chain df begin\n           @transform :age_months = :age * 12\n           @groupby :has_child = :children > 0\n           @combine \"mean_{}\" = mean({r\"age\"})\n       end\n2×3 DataFrame\n Row │ has_child  mean_age  mean_age_months \n     │ Bool       Float64   Float64         \n─────┼──────────────────────────────────────\n   1 │     false      54.0            648.0\n   2 │      true      56.5            678.0","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"There's also the capability to reference a group of multiple columns as a single unit, for example to run aggregations over them, with the {{ }} syntax. In the following example, the first quarter is compared to the maximum of the other three:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> df = DataFrame(q1 = [12.0, 0.4, 42.7],\n                      q2 = [6.4, 2.3, 40.9],\n                      q3 = [9.5, 0.2, 13.6],\n                      q4 = [6.3, 5.4, 39.3])\n3×4 DataFrame\n Row │ q1       q2       q3       q4      \n     │ Float64  Float64  Float64  Float64 \n─────┼────────────────────────────────────\n   1 │    12.0      6.4      9.5      6.3\n   2 │     0.4      2.3      0.2      5.4\n   3 │    42.7     40.9     13.6     39.3\n\njulia> @transform df :q1_best = :q1 > maximum({{Not(:q1)}})\n3×5 DataFrame\n Row │ q1       q2       q3       q4       q1_best \n     │ Float64  Float64  Float64  Float64  Bool    \n─────┼─────────────────────────────────────────────\n   1 │    12.0      6.4      9.5      6.3     true\n   2 │     0.4      2.3      0.2      5.4    false\n   3 │    42.7     40.9     13.6     39.3     true","category":"page"},{"location":"man/querying_frameworks/#Query.jl","page":"Data manipulation frameworks","title":"Query.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The Query.jl package provides advanced data manipulation capabilities for DataFrames (and many other data structures). This section provides a short introduction to the package, the Query.jl documentation has a more comprehensive documentation of the package. The instructions here are for version 1.0.0 of Query.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"To get started, install the Query.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"Query\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A query is started with the @from macro and consists of a series of query commands. Query.jl provides commands that can filter, project, join, flatten and group data from a DataFrame. A query can return an iterator, or one can materialize the results of a query into a variety of data structures, including a new DataFrame.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A simple example of a query looks like this:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using DataFrames, Query\n\njulia> df = DataFrame(name=[\"John\", \"Sally\", \"Roger\"],\n                      age=[54.0, 34.0, 79.0],\n                      children=[0, 2, 4])\n3×3 DataFrame\n Row │ name    age      children\n     │ String  Float64  Int64\n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> q1 = @from i in df begin\n            @where i.age > 40\n            @select {number_of_children=i.children, i.name}\n            @collect DataFrame\n       end\n2×2 DataFrame\n Row │ number_of_children  name\n     │ Int64               String\n─────┼────────────────────────────\n   1 │                  0  John\n   2 │                  4  Roger","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The query starts with the @from macro. The first argument i is the name of the range variable that will be used to refer to an individual row in later query commands. The next argument df is the data source that one wants to query. The @where command in this query will filter the source data by applying the filter condition i.age > 40. This filters out any rows in which the age column is not larger than 40. The @select command then projects the columns of the source data onto a new column structure. The example here applies three specific modifications: 1) it only keeps a subset of the columns in the source DataFrame, i.e. the age column will not be part of the transformed data; 2) it changes the order of the two columns that are selected; and 3) it renames one of the columns that is selected from children to number_of_children. The example query uses the {} syntax to achieve this. A {} in a Query.jl expression instantiates a new NamedTuple, i.e. it is a shortcut for writing @NT(number_of_children=>i.children, name=>i.name). The @collect statement determines the data structure that the query returns. In this example the results are returned as a DataFrame.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A query without a @collect statement returns a standard julia iterator that can be used with any normal julia language construct that can deal with iterators. The following code returns a julia iterator for the query results:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> q2 = @from i in df begin\n                   @where i.age > 40\n                   @select {number_of_children=i.children, i.name}\n              end; # suppress printing the iterator type\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"One can loop over the results using a standard julia for statement:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> total_children = 0\n0\n\njulia> for i in q2\n           global total_children += i.number_of_children\n       end\n\njulia> total_children\n4\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Or one can use a comprehension to extract the name of a subset of rows:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> y = [i.name for i in q2 if i.number_of_children > 0]\n1-element Vector{String}:\n \"Roger\"\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The last example (extracting only the name and applying a second filter) could of course be completely expressed as a query expression:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> q3 = @from i in df begin\n            @where i.age > 40 && i.children > 0\n            @select i.name\n            @collect\n       end\n1-element Vector{String}:\n \"Roger\"\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A query that ends with a @collect statement without a specific type will materialize the query results into an array. Note also the difference in the @select statement: The previous queries all used the {} syntax in the @select statement to project results into a tabular format. The last query instead just selects a single value from each row in the @select statement.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"These examples only scratch the surface of what one can do with Query.jl, and the interested reader is referred to the Query.jl documentation for more information.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/indexing/#Indexing","page":"Indexing","title":"Indexing","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Pages = [\"indexing.md\"]","category":"page"},{"location":"lib/indexing/#General-rules","page":"Indexing","title":"General rules","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following rules explain target functionality of how getindex, setindex!, view, and broadcasting are intended to work with DataFrame, SubDataFrame and DataFrameRow objects.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following values are a valid column index:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"a scalar, later denoted as col:\na Symbol;\nan AbstractString;\nan Integer that is not Bool;\na vector, later denoted as cols:\na vector of Symbol (does not have to be a subtype of AbstractVector{Symbol});\na vector of AbstractString (does not have to be a subtype of AbstractVector{<:AbstractString});\na vector of Integer that are not Bool (does not have to be a subtype of AbstractVector{<:Integer});\na vector of Bool (must be a subtype of AbstractVector{Bool});\na regular expression (will be expanded to a vector of matching column names);\na Not expression (see InvertedIndices.jl); Not(idx) selects all indices not in the passed idx; when passed as column selector Not(idx...) is equivalent to Not(Cols(idx...)).\na Cols expression (see DataAPI.jl); Cols(idxs...) selects the union of the selections in idxs; in particular Cols() selects no columns and Cols(:) selects all columns; a special rule is Cols(predicate), where predicate is a predicate function; in this case the columns whose names passed to predicate as strings return true are selected.\na Between expression (see DataAPI.jl); Between(first, last) selects the columns between first and last inclusively;\nan All expression (see DataAPI.jl); All() selects all columns, equivalent to :;\na literal colon : (selects all columns).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following values are a valid row index:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"a scalar, later denoted as row:\nan Integer that is not Bool;\na vector, later denoted as rows:\na vector of Integer that are not Bool (does not have to be a subtype of AbstractVector{<:Integer});\na vector of Bool (must be a subtype of AbstractVector{Bool});\na Not expression (see InvertedIndices.jl);\na literal colon : (selects all rows with copying);\na literal exclamation mark ! (selects all rows without copying).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Additionally it is allowed to index into an AbstractDataFrame using a two-dimensional CartesianIndex.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"In the descriptions below df represents a DataFrame, sdf is a SubDataFrame and dfr is a DataFrameRow.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":": always expands to axes(df, 1) or axes(sdf, 1).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"df.col works like df[!, col] and sdf.col works like sdf[!, col] in all cases. An exception is that under Julia 1.6 or earlier df.col .= v and sdf.col .= v performs in-place broadcasting if col is present in df/sdf and is a valid identifier (this inconsistency is not present under Julia 1.7 and later).","category":"page"},{"location":"lib/indexing/#getindex-and-view","page":"Indexing","title":"getindex and view","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following list specifies the behavior of getindex and view operations depending on argument types.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"In particular a description explicitly mentions that the data is copied or reused without copying.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"For performance reasons, accessing, via getindex or view, a single row and multiple cols of a DataFrame, a SubDataFrame or a DataFrameRow always returns a DataFrameRow (which is a view type).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"getindex on DataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"df[row, col] -> the value contained in row row of column col, the same as df[!, col][row];\ndf[CartesianIndex(row, col)] -> the same as df[row, col];\ndf[row, cols] -> a DataFrameRow with parent df;\ndf[rows, col] -> a copy of the vector df[!, col] with only the entries                    corresponding to rows selected, the same as df[!, col][rows];\ndf[rows, cols] -> a DataFrame containing copies of columns cols with                     only the entries corresponding to rows selected;\ndf[!, col] -> the vector contained in column col returned without copying;                 the same as df.col if col is a valid identifier.\ndf[!, cols] -> create a new DataFrame with columns cols without copying                  of columns; the same as select(df, cols, copycols=false).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"view on DataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"@view df[row, col] -> a 0-dimensional view into df[!, col] in row row,                         the same as view(df[!, col], row);\n@view df[CartesianIndex(row, col)] -> the same as @view df[row, col];\n@view df[row, cols] -> the same as df[row, cols];\n@view df[rows, col] -> a view into df[!, col] with rows selected, the                          same as view(df[!, col], rows);\n@view df[rows, cols] -> a SubDataFrame with rows selected with parent df;\n@view df[!, col] -> a view into df[!, col]  with all rows.\n@view df[!, cols] -> the same as @view df[:, cols].","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"getindex on SubDataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"sdf[row, col] -> a value contained in row row of column col;\nsdf[CartesianIndex(row, col)] -> the same as sdf[row, col];\nsdf[row, cols] -> a DataFrameRow with parent parent(sdf);\nsdf[rows, col] -> a copy of sdf[!, col] with only rows rows selected,                     the same as sdf[!, col][rows];\nsdf[rows, cols] -> a DataFrame containing columns cols and sdf[rows, col] as a vector for each col in cols;\nsdf[!, col] -> a view of entries corresponding to sdf in the vector                  parent(sdf)[!, col]; the same as sdf.col if col is a                  valid identifier.\nsdf[!, cols] -> create a new SubDataFrame with columns cols, the same                   parent as sdf, and the same rows selected; the same as                   select(sdf, cols, copycols=false).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"view on SubDataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"@view sdf[row, col] -> a 0-dimensional view into df[!, col] at row                          row, the same as view(sdf[!, col], row);\n@view sdf[CartesianIndex(row, col)] -> the same as @view sdf[row, col];\n@view sdf[row, cols] -> a DataFrameRow with parent parent(sdf);\n@view sdf[rows, col] -> a view into sdf[!, col] vector with rows                           selected, the same as view(sdf[!, col], rows);\n@view sdf[rows, cols] -> a SubDataFrame with parent parent(sdf);\n@view sdf[!, col] -> a view into sdf[!, col] vector with all rows.\n@view sdf[!, cols] -> the same as @view sdf[:, cols].","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"getindex on DataFrameRow:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"dfr[col] -> the value contained in column col of dfr; the same as               dfr.col if col is a valid identifier;\ndfr[cols] -> a DataFrameRow with parent parent(dfr);","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"view on DataFrameRow:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"@view dfr[col] -> a 0-dimensional view into                     parent(dfr)[DataFrames.row(dfr), col];\n@view dfr[cols] -> a DataFrameRow with parent parent(dfr);","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that views created with columns selector set to : change their columns' count if columns are added/removed/renamed in the parent; if column selector is other than : then view points to selected columns by their number at the moment of creation of the view.","category":"page"},{"location":"lib/indexing/#setindex!","page":"Indexing","title":"setindex!","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following list specifies the behavior of setindex! operations depending on argument types.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"In particular a description explicitly mentions if the assignment is in-place.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that if a setindex! operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"setindex! on DataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"df[row, col] = v -> set value of col in row row to v in-place;\ndf[CartesianIndex(row, col)] = v -> the same as df[row, col] = v;\ndf[row, cols] = v -> set row row of columns cols in-place; the same as                        dfr = df[row, cols]; dfr[:] = v;\ndf[rows, col] = v -> set rows rows of column col in-place; v must be                        an AbstractVector; if rows is : and col is a                        Symbol or AbstractString that is not present in                        df then a new column in df is created and holds a                        copy of v; equivalent to df.col = copy(v) if                        col is a valid identifier;\ndf[rows, cols] = v -> set rows rows of columns cols in-place; v must                         be an AbstractMatrix or an AbstractDataFrame (in                         this case column names must match);\ndf[!, col] = v -> replaces col with v without copying (with the                     exception that if v is an AbstractRange it gets                     converted to a Vector); also if col is a Symbol or                     AbstractString that is not present in df then a new                     column in df is created and holds v; equivalent to                     df.col = v if col is a valid identifier; this is                     allowed if ncol(df) == 0 || length(v) == nrow(df);\ndf[!, cols] = v -> replaces existing columns cols in data frame df with                      copying; v must be an AbstractMatrix or an                      AbstractDataFrame (in the latter case column names must                      match);","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"setindex! on SubDataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"sdf[row, col] = v -> set value of col in row row to v in-place;\nsdf[CartesianIndex(row, col)] = v -> the same as sdf[row, col] = v;\nsdf[row, cols] = v -> the same as dfr = df[row, cols]; dfr[:] = v in-place;\nsdf[rows, col] = v -> set rows rows of column col, in-place; v must be                         an abstract vector;\nsdf[rows, cols] = v -> set rows rows of columns cols in-place; v can                          be an AbstractMatrix or v can be                          AbstractDataFrame in which case column names must                          match;\nsdf[!, col] = v -> replaces col with v with copying; if col is present                      in sdf then filtered-out rows in newly created vector                      are filled with values already present in that column and                      promote_type is used to determine the eltype of the                      new column; if col is not present in sdf then the                      operation is only allowed if sdf was created with :                      as column selector, in which case filtered-out rows are                      filled with missing; equivalent to sdf.col = v if                      col is a valid identifier; operation is allowed if                      length(v) == nrow(sdf);\nsdf[!, cols] = v -> replaces existing columns cols in data frame sdf                       with copying; v must be an AbstractMatrix or an                       AbstractDataFrame (in the latter case column names                       must match); filtered-out rows in newly created vectors                       are filled with values already present in respective                       columns and promote_type is used to determine the                       eltype of the new columns;","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"note: Note\nThe rules above mean that sdf[:, col] = v is an in-place operation if col is present in sdf, therefore it will be fast in general. On the other hand using sdf[!, col] = v or sdf.col = v will always allocate a new vector, which is more expensive computationally.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"setindex! on DataFrameRow:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"dfr[col] = v -> set value of col in row row to v in-place;                   equivalent to dfr.col = v if col is a valid identifier;\ndfr[cols] = v -> set values of entries in columns cols in dfr by                    elements of v in place; v can be: 1) a Tuple or an                    AbstractArray, in which cases it must have a number of                    elements equal to length(dfr), 2) an AbstractDict, in                    which case column names must match, 3) a NamedTuple or                    DataFrameRow, in which case column names and order must                    match;","category":"page"},{"location":"lib/indexing/#Broadcasting","page":"Indexing","title":"Broadcasting","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following broadcasting rules apply to AbstractDataFrame objects:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"AbstractDataFrame behaves in broadcasting like a two-dimensional collection compatible with matrices.\nIf an AbstractDataFrame takes part in broadcasting then a DataFrame is always produced as a result. In this case the requested broadcasting operation produces an object with exactly two dimensions. An exception is when an AbstractDataFrame is used only as a source of broadcast assignment into an object of dimensionality higher than two.\nIf multiple AbstractDataFrame objects take part in broadcasting then they have to have identical column names.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that if broadcasting assignment operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Broadcasting DataFrameRow is currently not allowed (which is consistent with NamedTuple).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"It is possible to assign a value to AbstractDataFrame and DataFrameRow objects using the .= operator. In such an operation AbstractDataFrame is considered as two-dimensional and DataFrameRow as single-dimensional.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"note: Note\nThe rule above means that, similar to single-dimensional objects in Base (e.g. vectors), DataFrameRow is considered to be column-oriented.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Additional rules:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"in the df[CartesianIndex(row, col)] .= v, df[row, col] .= v syntaxes v is broadcasted into the contents of df[row, col] (this is consistent with Julia Base);\nin the df[row, cols] .= v syntaxes the assignment to df is performed in-place;\nin the df[rows, col] .= v and df[rows, cols] .= v syntaxes the assignment to df is performed in-place; if rows is : and col is Symbol or AbstractString and it is missing from df then a new column is allocated and added; the length of the column is always the value of nrow(df) before the assignment takes place;\nin the df[!, col] .= v syntax column col is replaced by a freshly allocated vector; if col is Symbol or AbstractString and it is missing from df then a new column is allocated added; the length of the column is always the value of nrow(df) before the assignment takes place;\nthe df[!, cols] .= v syntax replaces existing columns cols in data frame df with freshly allocated vectors;\ndf.col .= v syntax currently performs in-place assignment to an existing vector df.col; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if :col is not present in df then a new column will be created in df.\nin the sdf[CartesianIndex(row, col)] .= v, sdf[row, col] .= v and sdf[row, cols] .= v syntaxes the assignment to sdf is performed in-place;\nin the sdf[rows, col] .= v and sdf[rows, cols] .= v syntaxes the assignment to sdf is performed in-place; if rows is : and col is a Symbol or AbstractString referring to a column missing from sdf and sdf was created with : as column selector then a new column is allocated and added; the filtered-out rows are filled with missing;\nin the sdf[!, col] .= v syntax column col is replaced by a freshly allocated vector; the filtered-out rows are filled with values already present in col; if col is a Symbol or AbstractString referring to a column missing from sdf and was sdf created with : as column selector then a new column is allocated and added; in this case the filtered-out rows are filled with missing;\nthe sdf[!, cols] .= v syntax replaces existing columns cols in data frame sdf with freshly allocated vectors; the filtered-out rows are filled with values already present in cols;\nsdf.col .= v syntax currently performs in-place assignment to an existing vector sdf.col; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if :col is not present in sdf then a new column will be created in sdf if sdf was created with : as a column selector.\ndfr.col .= v syntax is allowed and performs in-place assignment to a value extracted by dfr.col.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that sdf[!, col] .= v and sdf[!, cols] .= v syntaxes are not allowed as sdf can be only modified in-place.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"If column indexing using Symbol or AbstractString names in cols is performed, the order of columns in the operation is specified by the order of names.","category":"page"},{"location":"lib/indexing/#Indexing-GroupedDataFrames","page":"Indexing","title":"Indexing GroupedDataFrames","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"A GroupedDataFrame can behave as either an AbstractVector or AbstractDict depending on the type of index used. Integers (or arrays of them) trigger vector-like indexing while Tupless and NamedTuples trigger dictionary-like indexing. An intermediate between the two is the GroupKey type returned by keys(::GroupedDataFrame), which behaves similarly to a NamedTuple but has performance on par with integer indexing.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The elements of a GroupedDataFrame are SubDataFrames of its parent.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"gd[i::Integer] -> Get the ith group.\ngd[key::NamedTuple] -> Get the group corresponding to the given values of the grouping columns. The fields of the NamedTuple must match the grouping columns columns passed to groupby (including order).\ngd[key::Tuple] -> Same as previous, but omitting the names on key.\nget(gd, key::Union{Tuple, NamedTuple}, default) -> Get group for key key, returning default if it does not exist.\ngd[key::GroupKey] -> Get the group corresponding to the GroupKey key (one of the elements of the vector returned by keys(::GroupedDataFrame)). This should be nearly as fast as integer indexing.\ngd[a::AbstractVector] -> Select multiple groups and return them in a new GroupedDataFrame object. Groups may be selected by integer position using an array of Integers or Bools, similar to a standard array. Alternatively the array may contain keys of any of the types supported for dictionary-like indexing (GroupKey, Tuple, or NamedTuple). Selected groups must be unique, and different types of indices cannot be mixed.\ngd[n::Not] -> Any of the above types wrapped in Not. The result will be a  new GroupedDataFrame containing all groups in gd not selected by the  wrapped index.","category":"page"},{"location":"lib/indexing/#Common-API-for-types-defined-in-DataFrames.jl","page":"Indexing","title":"Common API for types defined in DataFrames.jl","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"This table presents return value types of calling names, propertynames, keys, length and ndims on types exposed to the user by DataFrames.jl:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Type names propertynames keys length ndims\nAbstractDataFrame Vector{String} Vector{Symbol} undefined undefined 2\nDataFrameRow Vector{String} Vector{Symbol} Vector{Symbol} Int 1\nDataFrameRows Vector{String} Vector{Symbol} vector of Int Int 1\nDataFrameColumns Vector{String} Vector{Symbol} Vector{Symbol} Int 1\nGroupedDataFrame Vector{String} tuple of fields GroupKeys Int 1\nGroupKeys undefined tuple of fields vector of Int Int 1\nGroupKey Vector{String} Vector{Symbol} Vector{Symbol} Int 1","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Additionally the above types T (i.e. AbstractDataFrame, DataFrameRow, DataFrameRows, DataFrameColumns, GroupedDataFrame, GroupKeys, GroupKey) the following methods are defined:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"size(::T) returning a Tuple of Int.\nsize(::T, ::Integer) returning an Int.\naxes(::T) returning a Tuple of Int vectors.\naxes(::T, ::Integer) returning an Int vector for a valid dimension (except  DataFrameRows and GroupKeys for which Base.OneTo(1) is also returned  for a dimension higher than a valid one because they are AbstractVector).\nfirstindex(::T) returning 1 (except AbstractDataFrame for which it is undefined).\nfirstindex(::T, ::Integer) returning 1 for a valid dimension (except  DataFrameRows and GroupKeys for which 1 is also returned for a  dimension higher than a valid one because they are AbstractVector).\nlastindex(::T) returning Int (except AbstractDataFrame for which it is undefined).\nlastindex(::T, ::Integer) returning Int for a valid dimension  (except  DataFrameRows and GroupKeys for which 1 is also returned for a  dimension higher than a valid one because they are AbstractVector).","category":"page"},{"location":"man/reshaping_and_pivoting/#Reshaping-and-Pivoting-Data","page":"Reshaping","title":"Reshaping and Pivoting Data","text":"","category":"section"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Reshape data from wide to long format using the stack function:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> using DataFrames, CSV\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> iris = CSV.read(path, DataFrame)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted\n\njulia> stack(iris, 1:4)\n600×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 594 │ Iris-virginica  PetalWidth       2.3\n 595 │ Iris-virginica  PetalWidth       2.5\n 596 │ Iris-virginica  PetalWidth       2.3\n 597 │ Iris-virginica  PetalWidth       1.9\n 598 │ Iris-virginica  PetalWidth       2.0\n 599 │ Iris-virginica  PetalWidth       2.3\n 600 │ Iris-virginica  PetalWidth       1.8\n                            585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"The second optional argument to stack indicates the columns to be stacked. These are normally referred to as the measured variables. Column names can also be given:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, [:SepalLength, :SepalWidth, :PetalLength, :PetalWidth])\n600×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 594 │ Iris-virginica  PetalWidth       2.3\n 595 │ Iris-virginica  PetalWidth       2.5\n 596 │ Iris-virginica  PetalWidth       2.3\n 597 │ Iris-virginica  PetalWidth       1.9\n 598 │ Iris-virginica  PetalWidth       2.0\n 599 │ Iris-virginica  PetalWidth       2.3\n 600 │ Iris-virginica  PetalWidth       1.8\n                            585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Note that all columns can be of different types. Type promotion follows the rules of vcat.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"The stacked DataFrame that results includes all of the columns not specified to be stacked. These are repeated for each stacked column. These are normally referred to as identifier (id) columns. In addition to the id columns, two additional columns labeled :variable and :values contain the column identifier and the stacked columns.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"A third optional argument to stack represents the id columns that are repeated. This makes it easier to specify which variables you want included in the long format:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, [:SepalLength, :SepalWidth], :Species)\n300×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 294 │ Iris-virginica  SepalWidth       3.2\n 295 │ Iris-virginica  SepalWidth       3.3\n 296 │ Iris-virginica  SepalWidth       3.0\n 297 │ Iris-virginica  SepalWidth       2.5\n 298 │ Iris-virginica  SepalWidth       3.0\n 299 │ Iris-virginica  SepalWidth       3.4\n 300 │ Iris-virginica  SepalWidth       3.0\n                            285 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"If you prefer to specify the id columns then use Not with stack like this:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, Not(:Species))\n600×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 594 │ Iris-virginica  PetalWidth       2.3\n 595 │ Iris-virginica  PetalWidth       2.5\n 596 │ Iris-virginica  PetalWidth       2.3\n 597 │ Iris-virginica  PetalWidth       1.9\n 598 │ Iris-virginica  PetalWidth       2.0\n 599 │ Iris-virginica  PetalWidth       2.3\n 600 │ Iris-virginica  PetalWidth       1.8\n                            585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"unstack converts from a long format to a wide format. The default is requires specifying which columns are an id variable, column variable names, and column values:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> iris.id = 1:size(iris, 1)\n1:150\n\njulia> longdf = stack(iris, Not([:Species, :id]))\n600×4 DataFrame\n Row │ Species         id     variable     value\n     │ String15        Int64  String       Float64\n─────┼─────────────────────────────────────────────\n   1 │ Iris-setosa         1  SepalLength      5.1\n   2 │ Iris-setosa         2  SepalLength      4.9\n   3 │ Iris-setosa         3  SepalLength      4.7\n   4 │ Iris-setosa         4  SepalLength      4.6\n   5 │ Iris-setosa         5  SepalLength      5.0\n   6 │ Iris-setosa         6  SepalLength      5.4\n   7 │ Iris-setosa         7  SepalLength      4.6\n   8 │ Iris-setosa         8  SepalLength      5.0\n  ⋮  │       ⋮           ⋮         ⋮          ⋮\n 594 │ Iris-virginica    144  PetalWidth       2.3\n 595 │ Iris-virginica    145  PetalWidth       2.5\n 596 │ Iris-virginica    146  PetalWidth       2.3\n 597 │ Iris-virginica    147  PetalWidth       1.9\n 598 │ Iris-virginica    148  PetalWidth       2.0\n 599 │ Iris-virginica    149  PetalWidth       2.3\n 600 │ Iris-virginica    150  PetalWidth       1.8\n                                   585 rows omitted\n\njulia> unstack(longdf, :id, :variable, :value)\n150×5 DataFrame\n Row │ id     SepalLength  SepalWidth  PetalLength  PetalWidth\n     │ Int64  Float64?     Float64?    Float64?     Float64?\n─────┼─────────────────────────────────────────────────────────\n   1 │     1          5.1         3.5          1.4         0.2\n   2 │     2          4.9         3.0          1.4         0.2\n   3 │     3          4.7         3.2          1.3         0.2\n   4 │     4          4.6         3.1          1.5         0.2\n   5 │     5          5.0         3.6          1.4         0.2\n   6 │     6          5.4         3.9          1.7         0.4\n   7 │     7          4.6         3.4          1.4         0.3\n   8 │     8          5.0         3.4          1.5         0.2\n  ⋮  │   ⋮         ⋮           ⋮            ⋮           ⋮\n 144 │   144          6.8         3.2          5.9         2.3\n 145 │   145          6.7         3.3          5.7         2.5\n 146 │   146          6.7         3.0          5.2         2.3\n 147 │   147          6.3         2.5          5.0         1.9\n 148 │   148          6.5         3.0          5.2         2.0\n 149 │   149          6.2         3.4          5.4         2.3\n 150 │   150          5.9         3.0          5.1         1.8\n                                               135 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"If the remaining columns are unique, you can skip the id variable and use:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> unstack(longdf, :variable, :value)\n150×6 DataFrame\n Row │ Species         id     SepalLength  SepalWidth  PetalLength  PetalWidth ⋯\n     │ String15        Int64  Float64?     Float64?    Float64?     Float64?   ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ Iris-setosa         1          5.1         3.5          1.4         0.2 ⋯\n   2 │ Iris-setosa         2          4.9         3.0          1.4         0.2\n   3 │ Iris-setosa         3          4.7         3.2          1.3         0.2\n   4 │ Iris-setosa         4          4.6         3.1          1.5         0.2\n   5 │ Iris-setosa         5          5.0         3.6          1.4         0.2 ⋯\n   6 │ Iris-setosa         6          5.4         3.9          1.7         0.4\n   7 │ Iris-setosa         7          4.6         3.4          1.4         0.3\n   8 │ Iris-setosa         8          5.0         3.4          1.5         0.2\n  ⋮  │       ⋮           ⋮         ⋮           ⋮            ⋮           ⋮      ⋱\n 144 │ Iris-virginica    144          6.8         3.2          5.9         2.3 ⋯\n 145 │ Iris-virginica    145          6.7         3.3          5.7         2.5\n 146 │ Iris-virginica    146          6.7         3.0          5.2         2.3\n 147 │ Iris-virginica    147          6.3         2.5          5.0         1.9\n 148 │ Iris-virginica    148          6.5         3.0          5.2         2.0 ⋯\n 149 │ Iris-virginica    149          6.2         3.4          5.4         2.3\n 150 │ Iris-virginica    150          5.9         3.0          5.1         1.8\n                                                               135 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"You can even skip passing the :variable and :value values as positional arguments, as they will be used by default, and write:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> unstack(longdf)\n150×6 DataFrame\n Row │ Species         id     SepalLength  SepalWidth  PetalLength  PetalWidth ⋯\n     │ String15        Int64  Float64?     Float64?    Float64?     Float64?   ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ Iris-setosa         1          5.1         3.5          1.4         0.2 ⋯\n   2 │ Iris-setosa         2          4.9         3.0          1.4         0.2\n   3 │ Iris-setosa         3          4.7         3.2          1.3         0.2\n   4 │ Iris-setosa         4          4.6         3.1          1.5         0.2\n   5 │ Iris-setosa         5          5.0         3.6          1.4         0.2 ⋯\n   6 │ Iris-setosa         6          5.4         3.9          1.7         0.4\n   7 │ Iris-setosa         7          4.6         3.4          1.4         0.3\n   8 │ Iris-setosa         8          5.0         3.4          1.5         0.2\n  ⋮  │       ⋮           ⋮         ⋮           ⋮            ⋮           ⋮      ⋱\n 144 │ Iris-virginica    144          6.8         3.2          5.9         2.3 ⋯\n 145 │ Iris-virginica    145          6.7         3.3          5.7         2.5\n 146 │ Iris-virginica    146          6.7         3.0          5.2         2.3\n 147 │ Iris-virginica    147          6.3         2.5          5.0         1.9\n 148 │ Iris-virginica    148          6.5         3.0          5.2         2.0 ⋯\n 149 │ Iris-virginica    149          6.2         3.4          5.4         2.3\n 150 │ Iris-virginica    150          5.9         3.0          5.1         1.8\n                                                               135 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Passing view=true to stack returns a data frame whose columns are views into the original wide data frame. Here is an example:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, view=true)\n600×4 DataFrame\n Row │ Species         id     variable     value\n     │ String15        Int64  String       Float64\n─────┼─────────────────────────────────────────────\n   1 │ Iris-setosa         1  SepalLength      5.1\n   2 │ Iris-setosa         2  SepalLength      4.9\n   3 │ Iris-setosa         3  SepalLength      4.7\n   4 │ Iris-setosa         4  SepalLength      4.6\n   5 │ Iris-setosa         5  SepalLength      5.0\n   6 │ Iris-setosa         6  SepalLength      5.4\n   7 │ Iris-setosa         7  SepalLength      4.6\n   8 │ Iris-setosa         8  SepalLength      5.0\n  ⋮  │       ⋮           ⋮         ⋮          ⋮\n 594 │ Iris-virginica    144  PetalWidth       2.3\n 595 │ Iris-virginica    145  PetalWidth       2.5\n 596 │ Iris-virginica    146  PetalWidth       2.3\n 597 │ Iris-virginica    147  PetalWidth       1.9\n 598 │ Iris-virginica    148  PetalWidth       2.0\n 599 │ Iris-virginica    149  PetalWidth       2.3\n 600 │ Iris-virginica    150  PetalWidth       1.8\n                                   585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"This saves memory. To create the view, several AbstractVectors are defined:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":":variable column – EachRepeatedVector This repeats the variables N times where N is the number of rows of the original AbstractDataFrame.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":":value column – StackedVector This is provides a view of the original columns stacked together.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Id columns – RepeatedVector This repeats the original columns N times where N is the number of columns stacked.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"To do aggregation, use the split-apply-combine functions in combination with unstack or use the combine keyword argument in unstack. Here is an example:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> using Statistics\n\njulia> d = stack(iris, Not(:Species))\n750×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 744 │ Iris-virginica  id             144.0\n 745 │ Iris-virginica  id             145.0\n 746 │ Iris-virginica  id             146.0\n 747 │ Iris-virginica  id             147.0\n 748 │ Iris-virginica  id             148.0\n 749 │ Iris-virginica  id             149.0\n 750 │ Iris-virginica  id             150.0\n                            735 rows omitted\n\njulia> agg = combine(groupby(d, [:variable, :Species]), :value => mean => :vmean)\n15×3 DataFrame\n Row │ variable     Species          vmean\n     │ String       String15         Float64\n─────┼───────────────────────────────────────\n   1 │ SepalLength  Iris-setosa        5.006\n   2 │ SepalLength  Iris-versicolor    5.936\n   3 │ SepalLength  Iris-virginica     6.588\n   4 │ SepalWidth   Iris-setosa        3.418\n   5 │ SepalWidth   Iris-versicolor    2.77\n   6 │ SepalWidth   Iris-virginica     2.974\n   7 │ PetalLength  Iris-setosa        1.464\n   8 │ PetalLength  Iris-versicolor    4.26\n   9 │ PetalLength  Iris-virginica     5.552\n  10 │ PetalWidth   Iris-setosa        0.244\n  11 │ PetalWidth   Iris-versicolor    1.326\n  12 │ PetalWidth   Iris-virginica     2.026\n  13 │ id           Iris-setosa       25.5\n  14 │ id           Iris-versicolor   75.5\n  15 │ id           Iris-virginica   125.5\n\njulia> unstack(agg, :variable, :Species, :vmean)\n5×4 DataFrame\n Row │ variable     Iris-setosa  Iris-versicolor  Iris-virginica\n     │ String       Float64?     Float64?         Float64?\n─────┼───────────────────────────────────────────────────────────\n   1 │ SepalLength        5.006            5.936           6.588\n   2 │ SepalWidth         3.418            2.77            2.974\n   3 │ PetalLength        1.464            4.26            5.552\n   4 │ PetalWidth         0.244            1.326           2.026\n   5 │ id                25.5             75.5           125.5\n\njulia> unstack(d, :variable, :Species, :value, combine=mean)\n5×4 DataFrame\n Row │ variable     Iris-setosa  Iris-versicolor  Iris-virginica\n     │ String       Float64?     Float64?         Float64?\n─────┼───────────────────────────────────────────────────────────\n   1 │ SepalLength        5.006            5.936           6.588\n   2 │ SepalWidth         3.418            2.77            2.974\n   3 │ PetalLength        1.464            4.26            5.552\n   4 │ PetalWidth         0.244            1.326           2.026\n   5 │ id                25.5             75.5           125.5","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"To turn an AbstractDataFrame on its side, use permutedims.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> df1 = DataFrame(a=[\"x\", \"y\"], b=[1.0, 2.0], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b        c      d\n     │ String  Float64  Int64  Bool\n─────┼───────────────────────────────\n   1 │ x           1.0      3   true\n   2 │ y           2.0      4  false\n\njulia> permutedims(df1, 1)\n3×3 DataFrame\n Row │ a       x        y\n     │ String  Float64  Float64\n─────┼──────────────────────────\n   1 │ b           1.0      2.0\n   2 │ c           3.0      4.0\n   3 │ d           1.0      0.0","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Note that the column indexed by src_colnames in the original df becomes the column names in the permuted result, and the column names of the original become a new column. Typically, this would be used on columns with homogeneous element types, since the element types of the other columns are the result of promote_type on all the permuted columns. Note also that, by default, the new column created from the column names of the original df has the same name as src_namescol. An optional positional argument dest_namescol can alter this:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> df2 = DataFrame(a=[\"x\", \"y\"], b=[1, \"two\"], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b    c      d\n     │ String  Any  Int64  Bool\n─────┼───────────────────────────\n   1 │ x       1        3   true\n   2 │ y       two      4  false\n\njulia> permutedims(df2, 1, \"different_name\")\n3×3 DataFrame\n Row │ different_name  x     y\n     │ String          Any   Any\n─────┼─────────────────────────────\n   1 │ b               1     two\n   2 │ c               3     4\n   3 │ d               true  false","category":"page"},{"location":"man/categorical/#man-categorical","page":"Categorical Data","title":"Categorical Data","text":"","category":"section"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"Often, we have to deal with columns in a data frame that take on a small number of levels:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> v = [\"Group A\", \"Group A\", \"Group A\", \"Group B\", \"Group B\", \"Group B\"]\n6-element Vector{String}:\n \"Group A\"\n \"Group A\"\n \"Group A\"\n \"Group B\"\n \"Group B\"\n \"Group B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The naive encoding used in a Vector represents every entry of this vector as a full string. In contrast, we can represent the data more efficiently by replacing the strings with indices into a small pool of levels. There are two benefits of doing this. The first is that such vectors will tend to use less memory. The second is that they can be efficiently grouped using the groupby function.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"There are two common types that allow to perform level pooling:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"PooledVector from PooledArrays.jl;\nCategoricalVector from CategoricalArrays.jl.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The difference between PooledVector and CategoricalVector is the following:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"PooledVector is intended for cases where data compression is the only objective;\nCategoricalVector is designed to additionally provide full support  for working with categorical variables, both with unordered (nominal variables) and ordered categories (ordinal variables) at the expense of allowing only AbstractString, AbstractChar, or Number element types (optionally in a union with Missing).","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"CategoricalVector is useful in particular when unique values in the array (levels) should respect a meaningful ordering, like when printing tables, drawing plots or fitting regression models. CategoricalArrays.jl provides functions to set and retrieve this order and compare values according to it. On the contrary, the PooledVector type is essentially a drop-in replacement for Vector with almost no user-visible differences except for lower memory use and higher performance. ","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"Below we show selected examples of working with CategoricalArrays.jl. See the CategoricalArrays.jl documentation package for more information regarding categorical arrays. Also note that in this section we discuss only vectors because we are considering a data frame context. However, in general both packages allow to work with arrays of any dimensionality.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"In order to follow the examples below you need to install the CategoricalArrays.jl package first.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> using CategoricalArrays\n\njulia> cv = categorical(v)\n6-element CategoricalArray{String,1,UInt32}:\n \"Group A\"\n \"Group A\"\n \"Group A\"\n \"Group B\"\n \"Group B\"\n \"Group B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"CategoricalVectorss support missing values.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv = categorical([\"Group A\", missing, \"Group A\",\n                         \"Group B\", \"Group B\", missing])\n6-element CategoricalArray{Union{Missing, String},1,UInt32}:\n \"Group A\"\n missing\n \"Group A\"\n \"Group B\"\n \"Group B\"\n missing","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"In addition to representing repeated data efficiently, the CategoricalArray type allows us to determine efficiently the allowed levels of the variable at any time using the levels function (note that levels may or may not be actually used in the data):","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> levels(cv)\n2-element Vector{String}:\n \"Group A\"\n \"Group B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The levels! function also allows changing the order of appearance of the levels, which can be useful for display purposes or when working with ordered variables.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> levels!(cv, [\"Group B\", \"Group A\"])\n6-element CategoricalArray{Union{Missing, String},1,UInt32}:\n \"Group A\"\n missing\n \"Group A\"\n \"Group B\"\n \"Group B\"\n missing\n\njulia> levels(cv)\n2-element Vector{String}:\n \"Group B\"\n \"Group A\"\n\njulia> sort(cv)\n6-element CategoricalArray{Union{Missing, String},1,UInt32}:\n \"Group B\"\n \"Group B\"\n \"Group A\"\n \"Group A\"\n missing\n missing","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"By default, a CategoricalVector is able to represent 2^32 different levels. You can use less memory by calling the compress function:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv = compress(cv)\n6-element CategoricalArray{Union{Missing, String},1,UInt8}:\n \"Group A\"\n missing\n \"Group A\"\n \"Group B\"\n \"Group B\"\n missing\n","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The categorical function additionally accepts a keyword argument compress which when set to true is equivalent to calling compress on the new vector:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv1 = categorical([\"A\", \"B\"], compress=true)\n2-element CategoricalArray{String,1,UInt8}:\n \"A\"\n \"B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"If the ordered keyword argument is set to true, the resulting CategoricalVector will be ordered, which means that its levels can be tested for order (rather than throwing an error):","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv2 = categorical([\"A\", \"B\"], ordered=true)\n2-element CategoricalArray{String,1,UInt32}:\n \"A\"\n \"B\"\n\njulia> cv1[1] < cv1[2]\nERROR: ArgumentError: Unordered CategoricalValue objects cannot be tested for order using <. Use isless instead, or call the ordered! function on the parent array to change this\n\njulia> cv2[1] < cv2[2]\ntrue","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"You can check if a CategoricalVector is ordered using the isordered function and change between ordered and unordered using ordered! function.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> isordered(cv1)\nfalse\n\njulia> ordered!(cv1, true)\n2-element CategoricalArray{String,1,UInt8}:\n \"A\"\n \"B\"\n\njulia> isordered(cv1)\ntrue\n\njulia> cv1[1] < cv1[2]\ntrue","category":"page"},{"location":"man/sorting/#Sorting","page":"Sorting","title":"Sorting","text":"","category":"section"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"Sorting is a fundamental component of data analysis. Basic sorting is trivial: just calling sort! will sort all columns, in place:","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"julia> using DataFrames, CSV\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> iris = CSV.read(path, DataFrame)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted\n\njulia> sort!(iris)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.3         3.0          1.1         0.1  Iris-setosa\n   2 │         4.4         2.9          1.4         0.2  Iris-setosa\n   3 │         4.4         3.0          1.3         0.2  Iris-setosa\n   4 │         4.4         3.2          1.3         0.2  Iris-setosa\n   5 │         4.5         2.3          1.3         0.3  Iris-setosa\n   6 │         4.6         3.1          1.5         0.2  Iris-setosa\n   7 │         4.6         3.2          1.4         0.2  Iris-setosa\n   8 │         4.6         3.4          1.4         0.3  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         7.4         2.8          6.1         1.9  Iris-virginica\n 145 │         7.6         3.0          6.6         2.1  Iris-virginica\n 146 │         7.7         2.6          6.9         2.3  Iris-virginica\n 147 │         7.7         2.8          6.7         2.0  Iris-virginica\n 148 │         7.7         3.0          6.1         2.3  Iris-virginica\n 149 │         7.7         3.8          6.7         2.2  Iris-virginica\n 150 │         7.9         3.8          6.4         2.0  Iris-virginica\n                                                        135 rows omitted","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"Observe that all columns are taken into account lexicographically when sorting the DataFrame.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"You can also call the sort function to create a new DataFrame with freshly allocated sorted vectors.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"In sorting DataFrames, you may want to sort different columns with different options. Here are some examples showing most of the possible options:","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"julia> sort!(iris, rev = true)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         7.9         3.8          6.4         2.0  Iris-virginica\n   2 │         7.7         3.8          6.7         2.2  Iris-virginica\n   3 │         7.7         3.0          6.1         2.3  Iris-virginica\n   4 │         7.7         2.8          6.7         2.0  Iris-virginica\n   5 │         7.7         2.6          6.9         2.3  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         7.4         2.8          6.1         1.9  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         4.6         3.2          1.4         0.2  Iris-setosa\n 145 │         4.6         3.1          1.5         0.2  Iris-setosa\n 146 │         4.5         2.3          1.3         0.3  Iris-setosa\n 147 │         4.4         3.2          1.3         0.2  Iris-setosa\n 148 │         4.4         3.0          1.3         0.2  Iris-setosa\n 149 │         4.4         2.9          1.4         0.2  Iris-setosa\n 150 │         4.3         3.0          1.1         0.1  Iris-setosa\n                                                        135 rows omitted\n\njulia> sort!(iris, [:Species, :SepalWidth])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.5         2.3          1.3         0.3  Iris-setosa\n   2 │         4.4         2.9          1.4         0.2  Iris-setosa\n   3 │         5.0         3.0          1.6         0.2  Iris-setosa\n   4 │         4.9         3.0          1.4         0.2  Iris-setosa\n   5 │         4.8         3.0          1.4         0.3  Iris-setosa\n   6 │         4.8         3.0          1.4         0.1  Iris-setosa\n   7 │         4.4         3.0          1.3         0.2  Iris-setosa\n   8 │         4.3         3.0          1.1         0.1  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.7         3.3          5.7         2.1  Iris-virginica\n 145 │         6.3         3.3          6.0         2.5  Iris-virginica\n 146 │         6.3         3.4          5.6         2.4  Iris-virginica\n 147 │         6.2         3.4          5.4         2.3  Iris-virginica\n 148 │         7.2         3.6          6.1         2.5  Iris-virginica\n 149 │         7.9         3.8          6.4         2.0  Iris-virginica\n 150 │         7.7         3.8          6.7         2.2  Iris-virginica\n                                                        135 rows omitted\n\njulia> sort!(iris, [order(:Species, by=length), order(:SepalLength, rev=true)])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────────\n   1 │         5.8         4.0          1.2         0.2  Iris-setosa\n   2 │         5.7         3.8          1.7         0.3  Iris-setosa\n   3 │         5.7         4.4          1.5         0.4  Iris-setosa\n   4 │         5.5         3.5          1.3         0.2  Iris-setosa\n   5 │         5.5         4.2          1.4         0.2  Iris-setosa\n   6 │         5.4         3.4          1.7         0.2  Iris-setosa\n   7 │         5.4         3.4          1.5         0.4  Iris-setosa\n   8 │         5.4         3.7          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮              ⋮\n 144 │         5.5         2.6          4.4         1.2  Iris-versicolor\n 145 │         5.4         3.0          4.5         1.5  Iris-versicolor\n 146 │         5.2         2.7          3.9         1.4  Iris-versicolor\n 147 │         5.1         2.5          3.0         1.1  Iris-versicolor\n 148 │         5.0         2.0          3.5         1.0  Iris-versicolor\n 149 │         5.0         2.3          3.3         1.0  Iris-versicolor\n 150 │         4.9         2.4          3.3         1.0  Iris-versicolor\n                                                         135 rows omitted","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"Keywords used above include rev (to sort in reverse), and by (to apply a function to values before comparing them). Each keyword can either be a single value, a vector with values corresponding to individual columns, or a selector: :, Cols, All, Not, Between, or Regex.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"As an alternative to using a vector values you can use order to specify an ordering for a particular column within a set of columns.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"The following two examples show two ways to sort the iris dataset with the same result: :Species will be ordered in reverse order, and within groups, rows will be sorted by increasing :PetalLength:","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"julia> sort!(iris, [:Species, :PetalLength], rev=[true, false])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.9         2.5          4.5         1.7  Iris-virginica\n   2 │         6.2         2.8          4.8         1.8  Iris-virginica\n   3 │         6.0         3.0          4.8         1.8  Iris-virginica\n   4 │         6.3         2.7          4.9         1.8  Iris-virginica\n   5 │         6.1         3.0          4.9         1.8  Iris-virginica\n   6 │         5.6         2.8          4.9         2.0  Iris-virginica\n   7 │         6.3         2.5          5.0         1.9  Iris-virginica\n   8 │         6.0         2.2          5.0         1.5  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         4.7         3.2          1.6         0.2  Iris-setosa\n 145 │         5.7         3.8          1.7         0.3  Iris-setosa\n 146 │         5.4         3.4          1.7         0.2  Iris-setosa\n 147 │         5.4         3.9          1.7         0.4  Iris-setosa\n 148 │         5.1         3.3          1.7         0.5  Iris-setosa\n 149 │         5.1         3.8          1.9         0.4  Iris-setosa\n 150 │         4.8         3.4          1.9         0.2  Iris-setosa\n                                                        135 rows omitted\n\njulia> sort!(iris, [order(:Species, rev=true), :PetalLength])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.9         2.5          4.5         1.7  Iris-virginica\n   2 │         6.2         2.8          4.8         1.8  Iris-virginica\n   3 │         6.0         3.0          4.8         1.8  Iris-virginica\n   4 │         6.3         2.7          4.9         1.8  Iris-virginica\n   5 │         6.1         3.0          4.9         1.8  Iris-virginica\n   6 │         5.6         2.8          4.9         2.0  Iris-virginica\n   7 │         6.3         2.5          5.0         1.9  Iris-virginica\n   8 │         6.0         2.2          5.0         1.5  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         4.7         3.2          1.6         0.2  Iris-setosa\n 145 │         5.7         3.8          1.7         0.3  Iris-setosa\n 146 │         5.4         3.4          1.7         0.2  Iris-setosa\n 147 │         5.4         3.9          1.7         0.4  Iris-setosa\n 148 │         5.1         3.3          1.7         0.5  Iris-setosa\n 149 │         5.1         3.8          1.9         0.4  Iris-setosa\n 150 │         4.8         3.4          1.9         0.2  Iris-setosa\n                                                        135 rows omitted","category":"page"},{"location":"man/working_with_dataframes/#Working-with-Data-Frames","page":"Working with DataFrames","title":"Working with Data Frames","text":"","category":"section"},{"location":"man/working_with_dataframes/#Examining-the-Data","page":"Working with DataFrames","title":"Examining the Data","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The default printing of DataFrame objects only includes a sample of rows and columns that fits on screen:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using DataFrames\n\njulia> df = DataFrame(A=1:2:1000, B=repeat(1:10, inner=50), C=1:500)\n500×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n   4 │     7      1      4\n   5 │     9      1      5\n   6 │    11      1      6\n   7 │    13      1      7\n   8 │    15      1      8\n  ⋮  │   ⋮      ⋮      ⋮\n 494 │   987     10    494\n 495 │   989     10    495\n 496 │   991     10    496\n 497 │   993     10    497\n 498 │   995     10    498\n 499 │   997     10    499\n 500 │   999     10    500\n           485 rows omitted","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Printing options can be adjusted by calling the show function manually: show(df, allrows=true) prints all rows even if they do not fit on screen and show(df, allcols=true) does the same for columns.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The first and last functions can be used to look at the first and last rows of a data frame (respectively):","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> first(df, 6)\n6×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n   4 │     7      1      4\n   5 │     9      1      5\n   6 │    11      1      6\n\njulia> last(df, 6)\n6×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │   989     10    495\n   2 │   991     10    496\n   3 │   993     10    497\n   4 │   995     10    498\n   5 │   997     10    499\n   6 │   999     10    500","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Also notice that when DataFrame is printed to the console or rendered in HTML (e.g. in Jupyter Notebook) you get an information about type of elements held in its columns. For example in this case:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using CategoricalArrays\n\njulia> DataFrame(a=1:2, b=[1.0, missing],\n                 c=categorical('a':'b'), d=[1//2, missing])\n2×4 DataFrame\n Row │ a      b          c     d\n     │ Int64  Float64?   Cat…  Rational…?\n─────┼────────────────────────────────────\n   1 │     1        1.0  a           1//2\n   2 │     2  missing    b        missing\n","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"we can observe that:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"the first column :a can hold elements of type Int64;\nthe second column :b can hold Float64 or Missing, which is indicated by ? printed after the name of type;\nthe third column :c can hold categorical data; here we notice …, which indicates that the actual name of the type was long and got truncated;\nthe type information in fourth column :d presents a situation where the name is both truncated and the type allows Missing.","category":"page"},{"location":"man/working_with_dataframes/#Taking-a-Subset","page":"Working with DataFrames","title":"Taking a Subset","text":"","category":"section"},{"location":"man/working_with_dataframes/#Indexing-syntax","page":"Working with DataFrames","title":"Indexing syntax","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Specific subsets of a data frame can be extracted using the indexing syntax, similar to matrices. In the Indexing section of the manual you can find all the details about the available options. Here we highlight the basic options.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The colon : indicates that all items (rows or columns depending on its position) should be retained:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[1:3, :]\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n\njulia> df[[1, 5, 10], :]\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     9      1      5\n   3 │    19      1     10\n\njulia> df[:, [:A, :B]]\n500×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     3      1\n   3 │     5      1\n   4 │     7      1\n   5 │     9      1\n   6 │    11      1\n   7 │    13      1\n   8 │    15      1\n  ⋮  │   ⋮      ⋮\n 494 │   987     10\n 495 │   989     10\n 496 │   991     10\n 497 │   993     10\n 498 │   995     10\n 499 │   997     10\n 500 │   999     10\n    485 rows omitted\n\njulia> df[1:3, [:B, :A]]\n3×2 DataFrame\n Row │ B      A\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     1      3\n   3 │     1      5\n\njulia> df[[3, 1], [:C]]\n2×1 DataFrame\n Row │ C\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     1","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Do note that df[!, [:A]] and df[:, [:A]] return a DataFrame object, while df[!, :A] and df[:, :A] return a vector:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[!, [:A]]\n500×1 DataFrame\n Row │ A\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     3\n   3 │     5\n   4 │     7\n   5 │     9\n   6 │    11\n   7 │    13\n   8 │    15\n  ⋮  │   ⋮\n 494 │   987\n 495 │   989\n 496 │   991\n 497 │   993\n 498 │   995\n 499 │   997\n 500 │   999\n485 rows omitted\n\njulia> df[!, [:A]] == df[:, [:A]]\ntrue\n\njulia> df[!, :A]\n500-element Vector{Int64}:\n   1\n   3\n   5\n   7\n   9\n  11\n  13\n  15\n  17\n  19\n   ⋮\n 983\n 985\n 987\n 989\n 991\n 993\n 995\n 997\n 999\n\njulia> df[!, :A] == df[:, :A]\ntrue","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In the first case, [:A] is a vector, indicating that the resulting object should be a DataFrame. On the other hand, :A is a single symbol, indicating that a single column vector should be extracted. Note that in the first case a vector is required to be passed (not just any iterable), so e.g. df[:, (:x1, :x2)] is not allowed, but df[:, [:x1, :x2]] is valid.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is also possible to use a regular expression as a selector of columns matching it:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x1=1, x2=2, y=3)\n1×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> df[!, r\"x\"]\n1×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"A Not selector (from the InvertedIndices package) can be used to select all columns excluding a specific subset:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[!, Not(:x1)]\n1×2 DataFrame\n Row │ x2     y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      3","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Finally, you can use Not, Between, Cols and All selectors in more complex column selection scenarios (note that Cols() selects no columns while All() selects all columns therefore Cols is a preferred selector if you write generic code). Here are examples of using each of these selectors:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(r=1, x1=2, x2=3, y=4)\n1×4 DataFrame\n Row │ r      x1     x2     y\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      3      4\n\njulia> df[:, Not(:r)] # drop :r column\n1×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      3      4\n\njulia> df[:, Between(:r, :x2)] # keep columns between :r and :x2\n1×3 DataFrame\n Row │ r      x1     x2\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> df[:, All()] # keep all columns\n1×4 DataFrame\n Row │ r      x1     x2     y\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      3      4\n\njulia> df[:, Cols(x -> startswith(x, \"x\"))] # keep columns whose name starts with \"x\"\n1×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      3","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The following examples show a more complex use of the Cols selector, which moves all columns whose names match r\"x\" regular expression respectively to the front and to the end of the data frame:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[:, Cols(r\"x\", :)]\n1×4 DataFrame\n Row │ x1     x2     r      y\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     2      3      1      4\n\njulia> df[:, Cols(Not(r\"x\"), :)]\n1×4 DataFrame\n Row │ r      y      x1     x2\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      2      3","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The indexing syntax can also be used to select rows based on conditions on variables:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:2:1000, B=repeat(1:10, inner=50), C=1:500)\n500×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n   4 │     7      1      4\n   5 │     9      1      5\n   6 │    11      1      6\n   7 │    13      1      7\n   8 │    15      1      8\n  ⋮  │   ⋮      ⋮      ⋮\n 494 │   987     10    494\n 495 │   989     10    495\n 496 │   991     10    496\n 497 │   993     10    497\n 498 │   995     10    498\n 499 │   997     10    499\n 500 │   999     10    500\n           485 rows omitted\n\njulia> df[df.A .> 500, :]\n250×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │   501      6    251\n   2 │   503      6    252\n   3 │   505      6    253\n   4 │   507      6    254\n   5 │   509      6    255\n   6 │   511      6    256\n   7 │   513      6    257\n   8 │   515      6    258\n  ⋮  │   ⋮      ⋮      ⋮\n 244 │   987     10    494\n 245 │   989     10    495\n 246 │   991     10    496\n 247 │   993     10    497\n 248 │   995     10    498\n 249 │   997     10    499\n 250 │   999     10    500\n           235 rows omitted\n\njulia> df[(df.A .> 500) .& (300 .< df.C .< 400), :]\n99×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │   601      7    301\n   2 │   603      7    302\n   3 │   605      7    303\n   4 │   607      7    304\n   5 │   609      7    305\n   6 │   611      7    306\n   7 │   613      7    307\n   8 │   615      7    308\n  ⋮  │   ⋮      ⋮      ⋮\n  93 │   785      8    393\n  94 │   787      8    394\n  95 │   789      8    395\n  96 │   791      8    396\n  97 │   793      8    397\n  98 │   795      8    398\n  99 │   797      8    399\n            84 rows omitted","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Where a specific subset of values needs to be matched, the in() function can be applied:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[in.(df.A, Ref([1, 5, 601])), :]\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     5      1      3\n   3 │   601      7    301","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The Ref wrapper to [1, 5, 601] is needed to protect the vector against being broadcasted over (the vector will be treated as a scalar when wrapped in Ref). You could write this operation using a comprehension like this (note that it would be slower so it is not recommended): [a in [1, 5, 601] for a in df.A].","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Equivalently, the in function can be called with a single argument to create a function object that tests whether each value belongs to the subset (partial application of in): df[in([1, 5, 601]).(df.A), :].","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"note: Note\nAs with matrices, subsetting from a data frame will usually return a copy of columns, not a view or direct reference.The only indexing situations where data frames will not return a copy are:when a ! is placed in the first indexing position (df[!, :A], or df[!, [:A, :B]]),\nwhen using . (getpropery) notation (df.A),\nwhen a single row is selected using an integer (df[1, [:A, :B]])\nwhen view or @view is used (e.g. @view df[1:3, :A]).More details on copies, views, and references can be found in the getindex and view section.","category":"page"},{"location":"man/working_with_dataframes/#Subsetting-functions","page":"Working with DataFrames","title":"Subsetting functions","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"An alternative approach to row subsetting in a data frame is to use the subset function, or the subset! function, which is its in-place variant.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"These functions take a data frame as their first argument. The following positional arguments (one or more) are filtering condition specifications that must be jointly met. Each condition should be passed as a Pair consisting of source column(s) and a function specifying the filtering condition taking this or these column(s) as arguments:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> subset(df, :A => a -> a .< 10, :C => c -> isodd.(c))\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     5      1      3\n   3 │     9      1      5","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is a frequent situation that missing values might be present in the filtering columns, which could then lead the filtering condition to return missing instead of the expected true or false. In order to handle this situation one can either use the coalesce function or pass the skipmissing=true keyword argument to subset. Here is an example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x=[1, 2, missing, 4])\n4×1 DataFrame\n Row │ x\n     │ Int64?\n─────┼─────────\n   1 │       1\n   2 │       2\n   3 │ missing\n   4 │       4\n\njulia> subset(df, :x => x -> coalesce.(iseven.(x), false))\n2×1 DataFrame\n Row │ x\n     │ Int64?\n─────┼────────\n   1 │      2\n   2 │      4\n\njulia> subset(df, :x => x -> iseven.(x), skipmissing=true)\n2×1 DataFrame\n Row │ x\n     │ Int64?\n─────┼────────\n   1 │      2\n   2 │      4","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The subset function has been designed in a way that is consistent with how column transformations are specified in functions like combine, select, and transform. Examples of column transformations accepted by these functions are provided in the following section.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Additionally DataFrames.jl extends the filter and filter! functions provided in Julia Base, which also allow subsetting a data frame. These methods are defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset and subset! functions instead, as they are consistent with other DataFrames.jl functions (as opposed to filter and filter!).","category":"page"},{"location":"man/working_with_dataframes/#Selecting-and-transforming-columns","page":"Working with DataFrames","title":"Selecting and transforming columns","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"You can also use the select/select! and transform/transform! functions to select, rename and transform columns in a data frame.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The select function creates a new data frame:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x1=[1, 2], x2=[3, 4], y=[5, 6])\n2×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      3      5\n   2 │     2      4      6\n\njulia> select(df, Not(:x1)) # drop column :x1 in a new data frame\n2×2 DataFrame\n Row │ x2     y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     3      5\n   2 │     4      6\n\njulia> select(df, r\"x\") # select columns containing 'x' character\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> select(df, :x1 => :a1, :x2 => :a2) # rename columns\n2×2 DataFrame\n Row │ a1     a2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> select(df, :x1, :x2 => (x -> x .- minimum(x)) => :x2) # transform columns\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      1\n\njulia> select(df, :x2, :x2 => ByRow(sqrt)) # transform columns by row\n2×2 DataFrame\n Row │ x2     x2_sqrt\n     │ Int64  Float64\n─────┼────────────────\n   1 │     3  1.73205\n   2 │     4  2.0\n\njulia> select(df, :x1, :x2, [:x1, :x2] => ((x1, x2) -> x1 ./ x2) => :z) # transform multiple columns\n2×3 DataFrame\n Row │ x1     x2     z\n     │ Int64  Int64  Float64\n─────┼────────────────────────\n   1 │     1      3  0.333333\n   2 │     2      4  0.5\n\njulia> select(df, :x1, :x2, [:x1, :x2] => ByRow((x1, x2) -> x1 / x2) => :z)  # transform multiple columns by row\n2×3 DataFrame\n Row │ x1     x2     z\n     │ Int64  Int64  Float64\n─────┼────────────────────────\n   1 │     1      3  0.333333\n   2 │     2      4  0.5\n\njulia> select(df, AsTable(:) => ByRow(extrema) => [:lo, :hi]) # return multiple columns\n2×2 DataFrame\n Row │ lo     hi\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is important to note that select always returns a data frame, even if a single column is selected (as opposed to indexing syntax).","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> select(df, :x1)\n2×1 DataFrame\n Row │ x1\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df[:, :x1]\n2-element Vector{Int64}:\n 1\n 2","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"By default select copies columns of a passed source data frame. In order to avoid copying, pass copycols=false:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df2 = select(df, :x1)\n2×1 DataFrame\n Row │ x1\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df2.x1 === df.x1\nfalse\n\njulia> df2 = select(df, :x1, copycols=false)\n2×1 DataFrame\n Row │ x1\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df2.x1 === df.x1\ntrue","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"To perform the selection operation in-place use select!:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> select!(df, Not(:x1));\n\njulia> df\n2×2 DataFrame\n Row │ x2     y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     3      5\n   2 │     4      6","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"transform and transform! functions work identically to select and select!, with the only difference that they retain all columns that are present in the source data frame. Here are some more advanced examples.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"First we show how to generate a column that is a sum of all other columns in the data frame using the All() selector:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x1=[1, 2], x2=[3, 4], y=[5, 6])\n2×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      3      5\n   2 │     2      4      6\n\njulia> transform(df, All() => +)\n2×4 DataFrame\n Row │ x1     x2     y      x1_x2_y_+\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────────\n   1 │     1      3      5          9\n   2 │     2      4      6         12","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Using the ByRow wrapper, we can easily compute for each row the name of column with the highest score:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using Random\n\njulia> Random.seed!(1);\n\njulia> df = DataFrame(rand(10, 3), [:a, :b, :c])\n10×3 DataFrame\n Row │ a           b          c\n     │ Float64     Float64    Float64\n─────┼──────────────────────────────────\n   1 │ 0.236033    0.555751   0.0769509\n   2 │ 0.346517    0.437108   0.640396\n   3 │ 0.312707    0.424718   0.873544\n   4 │ 0.00790928  0.773223   0.278582\n   5 │ 0.488613    0.28119    0.751313\n   6 │ 0.210968    0.209472   0.644883\n   7 │ 0.951916    0.251379   0.0778264\n   8 │ 0.999905    0.0203749  0.848185\n   9 │ 0.251662    0.287702   0.0856352\n  10 │ 0.986666    0.859512   0.553206\n\njulia> transform(df, AsTable(:) => ByRow(argmax) => :prediction)\n10×4 DataFrame\n Row │ a           b          c          prediction\n     │ Float64     Float64    Float64    Symbol\n─────┼──────────────────────────────────────────────\n   1 │ 0.236033    0.555751   0.0769509  b\n   2 │ 0.346517    0.437108   0.640396   c\n   3 │ 0.312707    0.424718   0.873544   c\n   4 │ 0.00790928  0.773223   0.278582   b\n   5 │ 0.488613    0.28119    0.751313   c\n   6 │ 0.210968    0.209472   0.644883   c\n   7 │ 0.951916    0.251379   0.0778264  a\n   8 │ 0.999905    0.0203749  0.848185   a\n   9 │ 0.251662    0.287702   0.0856352  b\n  10 │ 0.986666    0.859512   0.553206   a","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In the most complex example below we compute row-wise sum, number of elements, and mean, while ignoring missing values.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using Statistics\n\njulia> df = DataFrame(x=[1, 2, missing], y=[1, missing, missing])\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64?\n─────┼──────────────────\n   1 │       1        1\n   2 │       2  missing\n   3 │ missing  missing\n\njulia> transform(df, AsTable(:) .=>\n                     ByRow.([sum∘skipmissing,\n                             x -> count(!ismissing, x),\n                             mean∘skipmissing]) .=>\n                     [:sum, :n, :mean])\n3×5 DataFrame\n Row │ x        y        sum    n      mean\n     │ Int64?   Int64?   Int64  Int64  Float64\n─────┼─────────────────────────────────────────\n   1 │       1        1      2      2      1.0\n   2 │       2  missing      2      1      2.0\n   3 │ missing  missing      0      0    NaN","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"While the DataFrames.jl package provides basic data manipulation capabilities, users are encouraged to use querying frameworks for more convenient and powerful operations:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"the Query.jl package provides a LINQ-like interface to a large number of data sources\nthe DataFramesMeta.jl package provides interfaces similar to LINQ and dplyr\nthe DataFrameMacros.jl package provides macros for most standard functions from DataFrames.jl, with convenient syntax for the manipulation of multiple columns at once.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"See the Data manipulation frameworks section for more information.","category":"page"},{"location":"man/working_with_dataframes/#Summarizing-Data","page":"Working with DataFrames","title":"Summarizing Data","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The describe function returns a data frame summarizing the elementary statistics and information about each column:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:4, B=[\"M\", \"F\", \"F\", \"M\"])\n4×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  F\n   3 │     3  F\n   4 │     4  M\n\njulia> describe(df)\n2×7 DataFrame\n Row │ variable  mean    min  median  max  nmissing  eltype\n     │ Symbol    Union…  Any  Union…  Any  Int64     DataType\n─────┼────────────────────────────────────────────────────────\n   1 │ A         2.5     1    2.5     4           0  Int64\n   2 │ B                 F            M           0  String","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"If you are interested in describing only a subset of columns, then the easiest way to do it is to pass a subset of an original data frame to describe like this:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> describe(df[!, [:A]])\n1×7 DataFrame\n Row │ variable  mean     min    median   max    nmissing  eltype\n     │ Symbol    Float64  Int64  Float64  Int64  Int64     DataType\n─────┼──────────────────────────────────────────────────────────────\n   1 │ A             2.5      1      2.5      4         0  Int64","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Of course, one can also compute descriptive statistics directly on individual columns:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using Statistics\n\njulia> mean(df.A)\n2.5","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"We can also apply a function to each column of a DataFrame using combine. For example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:4, B=4.0:-1.0:1.0)\n4×2 DataFrame\n Row │ A      B\n     │ Int64  Float64\n─────┼────────────────\n   1 │     1      4.0\n   2 │     2      3.0\n   3 │     3      2.0\n   4 │     4      1.0\n\njulia> combine(df, All() .=> sum)\n1×2 DataFrame\n Row │ A_sum  B_sum\n     │ Int64  Float64\n─────┼────────────────\n   1 │    10     10.0\n\njulia> combine(df, All() .=> sum, All() .=> prod)\n1×4 DataFrame\n Row │ A_sum  B_sum    A_prod  B_prod\n     │ Int64  Float64  Int64   Float64\n─────┼─────────────────────────────────\n   1 │    10     10.0      24     24.0\n\njulia> combine(df, All() .=> [sum prod]) # the same using 2-dimensional broadcasting\n1×4 DataFrame\n Row │ A_sum  B_sum    A_prod  B_prod\n     │ Int64  Float64  Int64   Float64\n─────┼─────────────────────────────────\n   1 │    10     10.0      24     24.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"If you would prefer the result to have the same number of rows as the source data frame, use select instead of combine.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In the remainder of this section we will discuss more advanced topics related to the operation specification syntax, so you may decide to skip them if you want to focus on the most common usage patterns.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"A DataFrame can store values of any type as its columns, for example below we show how one can store a Tuple:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df2 = combine(df, All() .=> extrema)\n1×2 DataFrame\n Row │ A_extrema  B_extrema\n     │ Tuple…     Tuple…\n─────┼───────────────────────\n   1 │ (1, 4)     (1.0, 4.0)","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Later you might want to expand the tuples into separate columns storing the computed minima and maxima. This can be achieved by passing multiple columns for the output. Here is an example of how this can be done by writing the column names by-hand for a single input column:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df2, \"A_extrema\" => identity => [\"A_min\", \"A_max\"])\n1×2 DataFrame\n Row │ A_min  A_max\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"You can extend it to handling all columns in df2 using broadcasting:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df2, All() .=> identity .=> [[\"A_min\", \"A_max\"], [\"B_min\", \"B_max\"]])\n1×4 DataFrame\n Row │ A_min  A_max  B_min    B_max\n     │ Int64  Int64  Float64  Float64\n─────┼────────────────────────────────\n   1 │     1      4      1.0      4.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"This approach works, but can be improved. Instead of writing all the column names manually we can instead use a function as a way to specify target column names based on source column names:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df2, All() .=> identity .=> c -> first(c) .* [\"_min\", \"_max\"])\n1×4 DataFrame\n Row │ A_min  A_max  B_min    B_max\n     │ Int64  Int64  Float64  Float64\n─────┼────────────────────────────────\n   1 │     1      4      1.0      4.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that in this example we needed to pass identity explicitly since with All() => (c -> first(c) .* [\"_min\", \"_max\"]) the right-hand side part would be treated as a transformation and not as a rule for target column names generation.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"You might want to perform the transformation of the source data frame into the result we have just shown in one step. This can be achieved with the following expression:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df, All() .=> Ref∘extrema .=> c -> c .* [\"_min\", \"_max\"])\n1×4 DataFrame\n Row │ A_min  A_max  B_min    B_max\n     │ Int64  Int64  Float64  Float64\n─────┼────────────────────────────────\n   1 │     1      4      1.0      4.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that in this case we needed to add a Ref call in the Ref∘extrema operation specification. Without Ref, combine iterates the contents of the value returned by the operation specification function, which in our case is a tuple of numbers, and tries to expand it assuming that each produced value represents one row, so one gets an error:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df, All() .=> extrema .=> [c -> c .* [\"_min\", \"_max\"]])\nERROR: ArgumentError: 'Tuple{Int64, Int64}' iterates 'Int64' values,\nwhich doesn't satisfy the Tables.jl `AbstractRow` interface","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that we used Ref as it is a container that is typically used in DataFrames.jl when one wants to store one row, however, in general it could be another iterator (e.g. a tuple).","category":"page"},{"location":"man/working_with_dataframes/#Handling-of-Columns-Stored-in-a-DataFrame","page":"Working with DataFrames","title":"Handling of Columns Stored in a DataFrame","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Functions that transform a DataFrame to produce a new DataFrame always perform a copy of the columns by default, for example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:4, B=4.0:-1.0:1.0)\n4×2 DataFrame\n Row │ A      B\n     │ Int64  Float64\n─────┼────────────────\n   1 │     1      4.0\n   2 │     2      3.0\n   3 │     3      2.0\n   4 │     4      1.0\n\njulia> df2 = copy(df);\n\njulia> df2.A === df.A\nfalse","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"On the other hand, in-place functions, whose names end with !, may mutate the column vectors of the DataFrame they take as an argument. For example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> x = [3, 1, 2];\n\njulia> df = DataFrame(x=x)\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     1\n   3 │     2\n\njulia> sort!(df)\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> x\n3-element Vector{Int64}:\n 3\n 1\n 2\n\njulia> df.x[1] = 100\n100\n\njulia> df\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │   100\n   2 │     2\n   3 │     3\n\njulia> x\n3-element Vector{Int64}:\n 3\n 1\n 2","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that in the above example the original x vector is not mutated in the process, as the DataFrame(x=x) constructor makes a copy by default.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In-place functions are safe to call, except when a view of the DataFrame (created via a view, @view or groupby) or when a DataFrame created with copycols=false are in use.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is possible to have a direct access to a column col of a DataFrame df using the syntaxes df.col, df[!, :col], via the eachcol function, by accessing a parent of a view of a column of a DataFrame, or simply by storing the reference to the column vector before the DataFrame was created with copycols=false.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> x = [3, 1, 2];\n\njulia> df = DataFrame(x=x)\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     1\n   3 │     2\n\njulia> df.x == x\ntrue\n\njulia> df[!, 1] !== x\ntrue\n\njulia> eachcol(df)[1] === df.x\ntrue","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that a column obtained from a DataFrame using one of these methods should not be mutated without caution.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The exact rules of handling columns of a DataFrame are explained in The design of handling of columns of a DataFrame section of the manual.","category":"page"},{"location":"man/working_with_dataframes/#Replacing-Data","page":"Working with DataFrames","title":"Replacing Data","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Several approaches can be used to replace some values with others in a data frame. Some apply the replacement to all values in a data frame, and others to individual columns or subset of columns.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Do note that in-place replacement requires that the replacement value can be converted to the column's element type. In particular, this implies that replacing a value with missing requires a call to allowmissing! if the column did not allow for missing values.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Replacement operations affecting a single column can be performed using replace!:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using DataFrames\n\njulia> df = DataFrame(a=[\"a\", \"None\", \"b\", \"None\"], b=1:4,\n                      c=[\"None\", \"j\", \"k\", \"h\"], d=[\"x\", \"y\", \"None\", \"z\"])\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  None    x\n   2 │ None        2  j       y\n   3 │ b           3  k       None\n   4 │ None        4  h       z\n\njulia> replace!(df.a, \"None\" => \"c\")\n4-element Vector{String}:\n \"a\"\n \"c\"\n \"b\"\n \"c\"\n\njulia> df\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  None    x\n   2 │ c           2  j       y\n   3 │ b           3  k       None\n   4 │ c           4  h       z","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"This is equivalent to df.a = replace(df.a, \"None\" => \"c\"), but operates in-place, without allocating a new column vector.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Replacement operations on multiple columns or on the whole data frame can be performed in-place using the broadcasting syntax:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"# replacement on a subset of columns [:c, :d]\njulia> df[:, [:c, :d]] .= ifelse.(df[!, [:c, :d]] .== \"None\", \"c\", df[!, [:c, :d]])\n4×2 SubDataFrame\n Row │ c       d\n     │ String  String\n─────┼────────────────\n   1 │ c       x\n   2 │ j       y\n   3 │ k       c\n   4 │ h       z\n\njulia> df\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  c       x\n   2 │ c           2  j       y\n   3 │ b           3  k       c\n   4 │ c           4  h       z\n\njulia> df .= ifelse.(df .== \"c\", \"None\", df) # replacement on entire data frame\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  None    x\n   2 │ None        2  j       y\n   3 │ b           3  k       None\n   4 │ None        4  h       z","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Do note that in the above examples, changing .= to just = will allocate new column vectors instead of applying the operation in-place.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"When replacing values with missing, if the columns do not already allow for missing values, one has to either avoid in-place operation and use = instead of .=, or call allowmissing! beforehand:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df2 = ifelse.(df .== \"None\", missing, df) # do not operate in-place (`df = ` would also work)\n4×4 DataFrame\n Row │ a        b      c        d\n     │ String?  Int64  String?  String?\n─────┼──────────────────────────────────\n   1 │ a            1  missing  x\n   2 │ missing      2  j        y\n   3 │ b            3  k        missing\n   4 │ missing      4  h        z\n\njulia> allowmissing!(df) # operate in-place after allowing for missing\n4×4 DataFrame\n Row │ a        b       c        d\n     │ String?  Int64?  String?  String?\n─────┼───────────────────────────────────\n   1 │ a             1  None     x\n   2 │ None          2  j        y\n   3 │ b             3  k        None\n   4 │ None          4  h        z\n\njulia> df .= ifelse.(df .== \"None\", missing, df)\n4×4 DataFrame\n Row │ a        b       c        d\n     │ String?  Int64?  String?  String?\n─────┼───────────────────────────────────\n   1 │ a             1  missing  x\n   2 │ missing       2  j        y\n   3 │ b             3  k        missing\n   4 │ missing       4  h        z","category":"page"},{"location":"man/comparisons/#Comparisons","page":"Comparison with Python/R/Stata","title":"Comparisons","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"This section compares DataFrames.jl with other data manipulation frameworks in Python, R, and Stata.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"A sample data set can be created using the following code:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"using DataFrames\nusing Statistics\n\ndf = DataFrame(grp=repeat(1:2, 3), x=6:-1:1, y=4:9, z=[3:7; missing], id='a':'f')\ndf2 = DataFrame(grp=[1, 3], w=[10, 11])","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"note: Note\nSome of the operations mutate the tables so every operation assumes that it is done on the original data frame.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that in the comparisons presented below predicates like x -> x >= 1 can be more compactly written as >=(1). The latter form has an additional benefit that it is compiled only once per Julia session (as opposed to x -> x >= 1 which defines a new anonymous function every time it is introduced).","category":"page"},{"location":"man/comparisons/#Comparison-with-the-Python-package-pandas","page":"Comparison with Python/R/Stata","title":"Comparison with the Python package pandas","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with the Python package pandas (version 1.1.0):","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"import pandas as pd\nimport numpy as np\n\ndf = pd.DataFrame({'grp': [1, 2, 1, 2, 1, 2],\n                   'x': range(6, 0, -1),\n                   'y': range(4, 10),\n                   'z': [3, 4, 5, 6, 7, None]},\n                   index = list('abcdef'))\ndf2 = pd.DataFrame({'grp': [1, 3], 'w': [10, 11]})","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Because pandas supports multi-index, this example data frame is set up with a to f as row indices rather than a separate id column.","category":"page"},{"location":"man/comparisons/#Accessing-data","page":"Comparison with Python/R/Stata","title":"Accessing data","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nCell indexing by location df.iloc[1, 1] df[2, 2]\nRow slicing by location df.iloc[1:3] df[2:3, :]\nColumn slicing by location df.iloc[:, 1:] df[:, 2:end]\nRow indexing by label df.loc['c'] df[findfirst(==('c'), df.id), :]\nColumn indexing by label df.loc[:, 'x'] df[:, :x]\nColumn slicing by label df.loc[:, ['x', 'z']] df[:, [:x, :z]]\n df.loc[:, 'x':'z'] df[:, Between(:x, :z)]\nMixed indexing df.loc['c'][1] df[findfirst(==('c'), df.id), 2]","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that Julia uses 1-based indexing, inclusive on both ends. A special keyword end can be used to indicate the last index. Likewise, the begin keyword can be used to indicate the first index.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In addition, when indexing a data frame with the findfirst function, a single DataFrameRow object is returned. In the case that id is not unique, you can use the findall function or boolean indexing instead. It would then return a DataFrame object containing all matched rows. The following two lines of code are functionally equivalent:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"df[findall(==('c'), df.id), :]\ndf[df.id .== 'c', :]","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"DataFrames.jl's indexing always produces a consistent and predictable return type. By contrast, pandas' loc function returns a Series object when there is exactly one 'c' value in the index, and it returns a DataFrame object when there are multiple rows having the index value of 'c'.","category":"page"},{"location":"man/comparisons/#Common-operations","page":"Comparison with Python/R/Stata","title":"Common operations","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nReduce multiple values df['z'].mean(skipna = False) mean(df.z)\n df['z'].mean() mean(skipmissing(df.z))\n df[['z']].agg(['mean']) combine(df, :z => mean ∘ skipmissing)\nAdd new columns df.assign(z1 = df['z'] + 1) transform(df, :z => (v -> v .+ 1) => :z1)\nRename columns df.rename(columns = {'x': 'x_new'}) rename(df, :x => :x_new)\nPick & transform columns df.assign(x_mean = df['x'].mean())[['x_mean', 'y']] select(df, :x => mean, :y)\nSort rows df.sort_values(by = 'x') sort(df, :x)\n df.sort_values(by = ['grp', 'x'], ascending = [True, False]) sort(df, [:grp, order(:x, rev = true)])\nDrop missing rows df.dropna() dropmissing(df)\nSelect unique rows df.drop_duplicates() unique(df)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that pandas skips NaN values in its analytic functions by default. By contrast, Julia functions do not skip NaN's. If necessary, you can filter out the NaN's before processing, for example, mean(Iterators.filter(!isnan, x)).","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Pandas uses NaN for representing both missing data and the floating point \"not a number\" value. Julia defines a special value missing for representing missing data. DataFrames.jl respects general rules in Julia in propagating missing values by default. If necessary, the skipmissing function can be used to remove missing data. See the Missing Data section for more information.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In addition, pandas keeps the original column name after applying a function. DataFrames.jl appends a suffix to the column name by default. To keep it simple, the examples above do not synchronize the column names between pandas and DataFrames.jl (you can pass renamecols=false keyword argument to select, transform and combine functions to retain old column names).","category":"page"},{"location":"man/comparisons/#Mutating-operations","page":"Comparison with Python/R/Stata","title":"Mutating operations","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nAdd new columns df['z1'] = df['z'] + 1 df.z1 = df.z .+ 1\n  transform!(df, :z => (x -> x .+ 1) => :z1)\n df.insert(1, 'const', 10) insertcols!(df, 2, :const => 10)\nRename columns df.rename(columns = {'x': 'x_new'}, inplace = True) rename!(df, :x => :x_new)\nSort rows df.sort_values(by = 'x', inplace = True) sort!(df, :x)\nDrop missing rows df.dropna(inplace = True) dropmissing!(df)\nSelect unique rows df.drop_duplicates(inplace = True) unique!(df)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Generally speaking, DataFrames.jl follows the Julia convention of using ! in the function name to indicate mutation behavior.","category":"page"},{"location":"man/comparisons/#Grouping-data-and-aggregation","page":"Comparison with Python/R/Stata","title":"Grouping data and aggregation","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"DataFrames.jl provides a groupby function to apply operations over each group independently. The result of groupby is a GroupedDataFrame object which may be processed using the combine, transform, or select functions. The following table illustrates some common grouping and aggregation usages.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nAggregate by groups df.groupby('grp')['x'].mean() combine(groupby(df, :grp), :x => mean)\nRename column after aggregation df.groupby('grp')['x'].mean().rename(\"my_mean\") combine(groupby(df, :grp), :x => mean => :my_mean)\nAdd aggregated data as column df.join(df.groupby('grp')['x'].mean(), on='grp', rsuffix='_mean') transform(groupby(df, :grp), :x => mean)\n...and select output columns df.join(df.groupby('grp')['x'].mean(), on='grp', rsuffix='_mean')[['grp', 'x_mean']] select(groupby(df, :grp), :id, :x => mean)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that pandas returns a Series object for 1-dimensional result unless reset_index is called afterwards. The corresponding DataFrames.jl examples return an equivalent DataFrame object. Consider the first example:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":">>> df.groupby('grp')['x'].mean()\ngrp\n1    4\n2    3\nName: x, dtype: int64","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"For DataFrames.jl, it looks like this:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"julia> combine(groupby(df, :grp), :x => mean)\n2×2 DataFrame\n Row │ grp    x_mean\n     │ Int64  Float64\n─────┼────────────────\n   1 │     1      4.0\n   2 │     2      3.0","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In DataFrames.jl, the GroupedDataFrame object supports an efficient key lookup. Hence, it performs well when you need to perform lookups repeatedly.","category":"page"},{"location":"man/comparisons/#More-advanced-commands","page":"Comparison with Python/R/Stata","title":"More advanced commands","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"This section includes more complex examples.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nComplex Function df[['z']].agg(lambda v: np.mean(np.cos(v))) combine(df, :z => v -> mean(cos, skipmissing(v)))\nAggregate multiple columns df.agg({'x': max, 'y': min}) combine(df, :x => maximum, :y => minimum)\n df[['x', 'y']].mean() combine(df, [:x, :y] .=> mean)\n df.filter(regex=(\"^x\")).mean() combine(df, names(df, r\"^x\") .=> mean)\nApply function over multiple variables df.assign(x_y_cor = np.corrcoef(df.x, df.y)[0, 1]) transform(df, [:x, :y] => cor)\nRow-wise operation df.assign(x_y_min = df.apply(lambda v: min(v.x, v.y), axis=1)) transform(df, [:x, :y] => ByRow(min))\n df.assign(x_y_argmax = df.apply(lambda v: df.columns[v.argmax()], axis=1)) transform(df, AsTable([:x, :y]) => ByRow(argmax))\nDataFrame as input df.groupby('grp').head(2) combine(d -> first(d, 2), groupby(df, :grp))\nDataFrame as output df[['x']].agg(lambda x: [min(x), max(x)]) combine(df, :x => (x -> (x=[minimum(x), maximum(x)],)) => AsTable)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that pandas preserves the same row order after groupby whereas DataFrames.jl shows them grouped by the provided keys after the combine operation, but select and transform retain an original row ordering.","category":"page"},{"location":"man/comparisons/#Joining-data-frames","page":"Comparison with Python/R/Stata","title":"Joining data frames","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"DataFrames.jl supports join operations similar to a relational database.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nInner join pd.merge(df, df2, how = 'inner', on = 'grp') innerjoin(df, df2, on = :grp)\nOuter join pd.merge(df, df2, how = 'outer', on = 'grp') outerjoin(df, df2, on = :grp)\nLeft join pd.merge(df, df2, how = 'left', on = 'grp') leftjoin(df, df2, on = :grp)\nRight join pd.merge(df, df2, how = 'right', on = 'grp') rightjoin(df, df2, on = :grp)\nSemi join (filtering) df[df.grp.isin(df2.grp)] semijoin(df, df2, on = :grp)\nAnti join (filtering) df[~df.grp.isin(df2.grp)] antijoin(df, df2, on = :grp)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"For multi-column joins, both pandas and DataFrames.jl accept an array for the on keyword argument.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In the cases of semi joins and anti joins, the isin function in pandas can still be used as long as the join keys are combined in a tuple. In DataFrames.jl, it just works normally with an array of join keys specified in the on keyword argument.","category":"page"},{"location":"man/comparisons/#Comparison-with-the-R-package-dplyr","page":"Comparison with Python/R/Stata","title":"Comparison with the R package dplyr","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with the R package dplyr (version 1):","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"df <- tibble(grp = rep(1:2, 3), x = 6:1, y = 4:9,\n             z = c(3:7, NA), id = letters[1:6])","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation dplyr DataFrames.jl\nReduce multiple values summarize(df, mean(x)) combine(df, :x => mean)\nAdd new columns mutate(df, x_mean = mean(x)) transform(df, :x => mean => :x_mean)\nRename columns rename(df, x_new = x) rename(df, :x => :x_new)\nPick columns select(df, x, y) select(df, :x, :y)\nPick & transform columns transmute(df, mean(x), y) select(df, :x => mean, :y)\nPick rows filter(df, x >= 1) subset(df, :x => ByRow(x -> x >= 1))\nSort rows arrange(df, x) sort(df, :x)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"As in dplyr, some of these functions can be applied to grouped data frames, in which case they operate by group:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation dplyr DataFrames.jl\nReduce multiple values summarize(group_by(df, grp), mean(x)) combine(groupby(df, :grp), :x => mean)\nAdd new columns mutate(group_by(df, grp), mean(x)) transform(groupby(df, :grp), :x => mean)\nPick & transform columns transmute(group_by(df, grp), mean(x), y) select(groupby(df, :grp), :x => mean, :y)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The table below compares more advanced commands:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation dplyr DataFrames.jl\nComplex Function summarize(df, mean(x, na.rm = T)) combine(df, :x => x -> mean(skipmissing(x)))\nTransform several columns summarize(df, max(x), min(y)) combine(df, :x => maximum,  :y => minimum)\n summarize(df, across(c(x, y), mean)) combine(df, [:x, :y] .=> mean)\n summarize(df, across(starts_with(\"x\"), mean)) combine(df, names(df, r\"^x\") .=> mean)\n summarize(df, across(c(x, y), list(max, min))) combine(df, ([:x, :y] .=> [maximum minimum])...)\nMultivariate function mutate(df, cor(x, y)) transform(df, [:x, :y] => cor)\nRow-wise mutate(rowwise(df), min(x, y)) transform(df, [:x, :y] => ByRow(min))\n mutate(rowwise(df), which.max(c_across(matches(\"^x\")))) transform(df, AsTable(r\"^x\") => ByRow(argmax))\nDataFrame as input summarize(df, head(across(), 2)) combine(d -> first(d, 2), df)\nDataFrame as output summarize(df, tibble(value = c(min(x), max(x)))) combine(df, :x => (x -> (value = [minimum(x), maximum(x)],)) => AsTable)","category":"page"},{"location":"man/comparisons/#Comparison-with-the-R-package-data.table","page":"Comparison with Python/R/Stata","title":"Comparison with the R package data.table","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with the R package data.table (version 1.14.1).","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"library(data.table)\ndf  <- data.table(grp = rep(1:2, 3), x = 6:1, y = 4:9,\n                  z = c(3:7, NA), id = letters[1:6])\ndf2 <- data.table(grp=c(1,3), w = c(10,11))","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nReduce multiple values df[, .(mean(x))] combine(df, :x => mean)\nAdd new columns df[, x_mean:=mean(x) ] transform!(df, :x => mean => :x_mean)\nRename column (in place) setnames(df, \"x\", \"x_new\") rename!(df, :x => :x_new)\nRename multiple columns (in place) setnames(df, c(\"x\", \"y\"), c(\"x_new\", \"y_new\")) rename!(df, [:x, :y] .=> [:x_new, :y_new])\nPick columns as dataframe df[, .(x, y)] select(df, :x, :y)\nPick column as a vector df[, x] df[!, :x]\nRemove columns df[, -\"x\"] select(df, Not(:x))\nRemove columns (in place) df[, x:=NULL] select!(df, Not(:x))\nRemove columns (in place) df[, c(\"x\", \"y\"):=NULL] select!(df, Not([:x, :y]))\nPick & transform columns df[, .(mean(x), y)] select(df, :x => mean, :y)\nPick rows df[ x >= 1 ] filter(:x => >=(1), df)\nSort rows (in place) setorder(df, x) sort!(df, :x)\nSort rows df[ order(x) ] sort(df, :x)","category":"page"},{"location":"man/comparisons/#Grouping-data-and-aggregation-2","page":"Comparison with Python/R/Stata","title":"Grouping data and aggregation","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nReduce multiple values df[, mean(x), by=id ] combine(groupby(df, :id), :x => mean)\nAdd new columns (in place) df[, x_mean:=mean(x), by=id] transform!(groupby(df, :id), :x => mean)\nPick & transform columns df[, .(x_mean = mean(x), y), by=id] select(groupby(df, :id), :x => mean, :y)","category":"page"},{"location":"man/comparisons/#More-advanced-commands-2","page":"Comparison with Python/R/Stata","title":"More advanced commands","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nComplex Function df[, .(mean(x, na.rm=TRUE)) ] combine(df, :x => x -> mean(skipmissing(x)))\nTransform certain rows (in place) df[x<=0, x:=0] df.x[df.x .<= 0] .= 0\nTransform several columns df[, .(max(x), min(y)) ] combine(df, :x => maximum, :y => minimum)\n df[, lapply(.SD, mean), .SDcols = c(\"x\", \"y\") ] combine(df, [:x, :y] .=> mean)\n df[, lapply(.SD, mean), .SDcols = patterns(\"*x\") ] combine(df, names(df, r\"^x\") .=> mean)\n dcast(df, . ~ ., list(max,min), value.var = c(\"x\",\"y\")) combine(df, ([:x, :y] .=> [maximum minimum])...)\nMultivariate function df[, .(cor(x,y)) ] transform(df, [:x, :y] => cor)\nRow-wise df[, min_xy := min(x, y), by = 1:nrow(df)] transform!(df, [:x, :y] => ByRow(min))\n df[, argmax_xy := which.max(.SD) , .SDcols = patterns(\"*x\"), by = 1:nrow(df) ] transform!(df, AsTable(r\"^x\") => ByRow(argmax))\nDataFrame as output df[, .SD[1], by=grp] combine(groupby(df, :grp), first)\nDataFrame as output df[, .SD[which.max(x)], by=grp] combine(groupby(df, :grp), sdf -> sdf[argmax(sdf.x), :])\nReshape longer longdf = melt(df, measure.vars=c(\"x\",\"y\"), id.vars=\"id\") longdf = stack(df, [:x, :y], :id)\nReshape wider dcast(longdf, id ~ variable, value.var=\"value\") unstack(longdf, :id, :variable, :value)","category":"page"},{"location":"man/comparisons/#Joining-data-frames-2","page":"Comparison with Python/R/Stata","title":"Joining data frames","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nInner join merge(df, df2, on = \"grp\") innerjoin(df, df2, on = :grp)\nOuter join merge(df, df2, all = TRUE, on = \"grp\") outerjoin(df, df2, on = :grp)\nLeft join merge(df, df2, all.x = TRUE, on = \"grp\") leftjoin(df, df2, on = :grp)\nRight join merge(df, df2, all.y = TRUE, on = \"grp\") rightjoin(df, df2, on = :grp)\nAnti join (filtering) df[!df2, on = \"grp\" ] antijoin(df, df2, on = :grp)\nSemi join (filtering) merge(df1, df2[, .(grp)]) semijoin(df, df2, on = :grp)","category":"page"},{"location":"man/comparisons/#Comparison-with-Stata-(version-8-and-above)","page":"Comparison with Python/R/Stata","title":"Comparison with Stata (version 8 and above)","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with Stata:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation Stata DataFrames.jl\nReduce multiple values collapse (mean) x combine(df, :x => mean)\nAdd new columns egen x_mean = mean(x) transform!(df, :x => mean => :x_mean)\nRename columns rename x x_new rename!(df, :x => :x_new)\nPick columns keep x y select!(df, :x, :y)\nPick rows keep if x >= 1 subset!(df, :x => ByRow(x -> x >= 1))\nSort rows sort x sort!(df, :x)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that the suffix ! (i.e. transform!, select!, etc) ensures that the operation transforms the dataframe in place, as in Stata","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Some of these functions can be applied to grouped data frames, in which case they operate by group:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation Stata DataFrames.jl\nAdd new columns egen x_mean = mean(x), by(grp) transform!(groupby(df, :grp), :x => mean)\nReduce multiple values collapse (mean) x, by(grp) combine(groupby(df, :grp), :x => mean)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The table below compares more advanced commands:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation Stata DataFrames.jl\nTransform certain rows replace x = 0 if x <= 0 transform(df, :x => (x -> ifelse.(x .<= 0, 0, x)) => :x)\nTransform several columns collapse (max) x (min) y combine(df, :x => maximum,  :y => minimum)\n collapse (mean) x y combine(df, [:x, :y] .=> mean)\n collapse (mean) x* combine(df, names(df, r\"^x\") .=> mean)\n collapse (max) x y (min) x y combine(df, ([:x, :y] .=> [maximum minimum])...)\nMultivariate function egen z = corr(x y) transform!(df, [:x, :y] => cor => :z)\nRow-wise egen z = rowmin(x y) transform!(df, [:x, :y] => ByRow(min) => :z)","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/types/#Types","page":"Types","title":"Types","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"Pages = [\"types.md\"]","category":"page"},{"location":"lib/types/#Type-hierarchy-design","page":"Types","title":"Type hierarchy design","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"AbstractDataFrame is an abstract type that provides an interface for data frame types. It is not intended as a fully generic interface for working with tabular data, which is the role of interfaces defined by Tables.jl instead.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"DataFrame is the most fundamental subtype of AbstractDataFrame, which stores a set of columns as AbstractVector objects. Indexing of all stored columns must be 1-based. Also, all functions exposed by DataFrames.jl API make sure to collect passed AbstractRange source columns before storing them in a DataFrame.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"SubDataFrame is an AbstractDataFrame subtype representing a view into a DataFrame. It stores only a reference to the parent DataFrame and information about which rows and columns from the parent are selected (both as integer indices referring to the parent). Typically it is created using the view function or is returned by indexing into a GroupedDataFrame object.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"GroupedDataFrame is a type that stores the result of a  grouping operation performed on an AbstractDataFrame. It is intended to be created as a result of a call to the groupby function.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"DataFrameRow is a view into a single row of an AbstractDataFrame. It stores only a reference to a parent DataFrame and information about which row and columns from the parent are selected (both as integer indices referring to the parent). The DataFrameRow type supports iteration over columns of the row and is similar in functionality to the NamedTuple type, but allows for modification of data stored in the parent DataFrame and reflects changes done to the parent after the creation of the view. Typically objects of the DataFrameRow type are encountered when returned by the eachrow function, or when accessing a single row of a DataFrame or SubDataFrame via getindex or view.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The eachrow function returns a value of the DataFrameRows type, which serves as an iterator over rows of an AbstractDataFrame, returning DataFrameRow objects. The DataFrameRows is a subtype of AbstractVector and supports its interface with the exception that it is read-only.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"Similarly, the eachcol function returns a value of the DataFrameColumns type, which is not an AbstractVector, but supports most of its API. The key differences are that it is read-only and that the keys function returns a vector of Symbols (and not integers as for normal vectors).","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"Note that DataFrameRows and DataFrameColumns are not exported and should not be constructed directly, but using the eachrow and eachcol functions.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The RepeatedVector and StackedVector types are subtypes of AbstractVector and support its interface with the exception that they are read only. Note that they are not exported and should not be constructed directly, but they are columns of a DataFrame returned by stack with view=true.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The ByRow type is a special type used for selection operations to signal that the wrapped function should be applied to each element (row) of the selection.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The AsTable type is a special type used for selection operations to signal that the columns selected by a wrapped selector should be passed as a NamedTuple to the function or to signal that it is requested to expand the return value of a transformation into multiple columns.","category":"page"},{"location":"lib/types/#man-columnhandling","page":"Types","title":"The design of handling of columns of a DataFrame","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"When a DataFrame is constructed columns are copied by default. You can disable this behavior by setting copycols keyword argument to false. The exception is if an AbstractRange is passed as a column, then it is always collected to a Vector.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"Also functions that transform a DataFrame to produce a new DataFrame perform a copy of the columns, unless they are passed copycols=false (available only for functions that could perform a transformation without copying the columns). Examples of such functions are vcat, hcat, filter, dropmissing, getindex, copy or the DataFrame constructor mentioned above.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The generic single-argument constructor DataFrame(table) has copycols=nothing by default, meaning that columns are copied unless table signals that a copy of columns doesn't need to be made (this is done by wrapping the source table in Tables.CopiedColumns). CSV.jl does this when CSV.read(file, DataFrame) is called, since columns are built only for the purpose of use in a DataFrame constructor. Another example is Arrow.Table, where arrow data is inherently immutable so columns can't be accidentally mutated anyway. To be able to mutate arrow data, columns must be materialized, which can be accomplished via DataFrame(arrow_table, copycols=true).","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"On the contrary, functions that create a view of a DataFrame do not by definition make copies of the columns, and therefore require particular caution. This includes view, which returns a SubDataFrame or a DataFrameRow, and groupby, which returns a GroupedDataFrame.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"A partial exception to this rule is the stack function with view=true which creates a DataFrame that contains views of the columns from the source DataFrame.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"In-place functions whose names end with ! (like sort! or dropmissing!, setindex!, push!, append!) may mutate the column vectors of the DataFrame they take as an argument. These functions are safe to call due to the rules described above, except when a view of the DataFrame is in use (via a SubDataFrame, a DataFrameRow or a GroupedDataFrame). In the latter case, calling such a function on the parent might corrupt the view, which make trigger errors, silently return invalid data or even cause Julia to crash. The same caution applies when DataFrame was created using columns of another DataFrame without copying (for instance when copycols=false in functions such as DataFrame or hcat).","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"It is possible to have a direct access to a column col of a DataFrame df (e.g. this can be useful in performance critical code to avoid copying), using one of the following methods:","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"via the getproperty function using the syntax df.col;\nvia the getindex function using the syntax df[!, :col] (note this is in contrast to df[:, :col] which copies);\nby creating a DataFrameColumns object using the eachcol function;\nby calling the parent function on a view of a column of the DataFrame, e.g. parent(@view df[:, :col]);\nby storing the reference to the column before creating a DataFrame with copycols=false;","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"A column obtained from a DataFrame using one of the above methods should not be mutated without caution because:","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"resizing a column vector will corrupt its parent DataFrame and any associated views as methods only check the length of the column when it is added to the DataFrame and later assume that all columns have the same length;\nreordering values in a column vector (e.g. using sort!) will break the consistency of rows with other columns, which will also affect views (if any);\nchanging values contained in a column vector is acceptable as long as it is not used as a grouping column in a GroupedDataFrame created based on the DataFrame.","category":"page"},{"location":"lib/types/#Types-specification","page":"Types","title":"Types specification","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"AbstractDataFrame\nAsTable\nDataFrame\nDataFrameRow\nGroupedDataFrame\nGroupKey\nGroupKeys\nSubDataFrame\nDataFrameRows\nDataFrameColumns\nRepeatedVector\nStackedVector","category":"page"},{"location":"lib/types/#DataFrames.AbstractDataFrame","page":"Types","title":"DataFrames.AbstractDataFrame","text":"AbstractDataFrame\n\nAn abstract type for which all concrete types expose an interface for working with tabular data.\n\nAn AbstractDataFrame is a two-dimensional table with Symbols or strings for column names.\n\nDataFrames.jl defines two types that are subtypes of AbstractDataFrame: DataFrame and SubDataFrame.\n\nIndexing and broadcasting\n\nAbstractDataFrame can be indexed by passing two indices specifying row and column selectors. The allowed indices are a superset of indices that can be used for standard arrays. You can also access a single column of an AbstractDataFrame using getproperty and setproperty! functions. Columns can be selected using integers, Symbols, or strings. In broadcasting AbstractDataFrame behavior is similar to a Matrix.\n\nA detailed description of getindex, setindex!, getproperty, setproperty!, broadcasting and broadcasting assignment for data frames is given in the \"Indexing\" section of the manual.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.AsTable","page":"Types","title":"DataFrames.AsTable","text":"AsTable(cols)\n\nA type having a special meaning in source => transformation => destination selection operations supported by combine, select, select!, transform, transform!, subset, and subset!.\n\nIf AsTable(cols) is used in source position it signals that the columns selected by the wrapped selector cols should be passed as a NamedTuple to the function.\n\nIf AsTable is used in destination position it means that the result of the transformation operation is a vector of containers (or a single container if ByRow(transformation) is used) that should be expanded  into multiple columns using keys to get column names.\n\nExamples\n\njulia> df1 = DataFrame(a=1:3, b=11:13)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n\njulia> df2 = select(df1, AsTable([:a, :b]) => ByRow(identity))\n3×1 DataFrame\n Row │ a_b_identity\n     │ NamedTuple…\n─────┼─────────────────\n   1 │ (a = 1, b = 11)\n   2 │ (a = 2, b = 12)\n   3 │ (a = 3, b = 13)\n\njulia> select(df2, :a_b_identity => AsTable)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n\njulia> select(df1, AsTable([:a, :b]) => ByRow(nt -> map(x -> x^2, nt)) => AsTable)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     4    144\n   3 │     9    169\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrame","page":"Types","title":"DataFrames.DataFrame","text":"DataFrame <: AbstractDataFrame\n\nAn AbstractDataFrame that stores a set of named columns.\n\nThe columns are normally AbstractVectors stored in memory, particularly a Vector, PooledVector or CategoricalVector.\n\nConstructors\n\nDataFrame(pairs::Pair...; makeunique::Bool=false, copycols::Bool=true)\nDataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, copycols::Bool=true)\nDataFrame(ds::AbstractDict; copycols::Bool=true)\nDataFrame(; kwargs..., copycols::Bool=true)\n\nDataFrame(table; copycols::Union{Bool, Nothing}=nothing)\nDataFrame(table, names::AbstractVector;\n          makeunique::Bool=false, copycols::Union{Bool, Nothing}=nothing)\nDataFrame(columns::AbstractVecOrMat, names::AbstractVector;\n          makeunique::Bool=false, copycols::Bool=true)\n\nDataFrame(::DataFrameRow; copycols::Bool=true)\nDataFrame(::GroupedDataFrame; copycols::Bool=true, keepkeys::Bool=true)\n\nKeyword arguments\n\ncopycols : whether vectors passed as columns should be copied; by default set to true and the vectors are copied; if set to false then the constructor will still copy the passed columns if it is not possible to construct a DataFrame without materializing new columns. Note the copycols=nothing default in the Tables.jl compatible constructor; it is provided as certain input table types may have already made a copy of columns or the columns may otherwise be immutable, in which case columns are not copied by default. To force a copy in such cases, or to get mutable columns from an immutable input table (like Arrow.Table), pass copycols=true explicitly.\nmakeunique : if false (the default), an error will be raised\n\n(note that not all constructors support these keyword arguments)\n\nDetails on behavior of different constructors\n\nIt is allowed to pass a vector of Pairs, a list of Pairs as positional arguments, or a list of keyword arguments. In this case each pair is considered to represent a column name to column value mapping and column name must be a Symbol or string. Alternatively a dictionary can be passed to the constructor in which case its entries are considered to define the column name and column value pairs. If the dictionary is a Dict then column names will be sorted in the returned DataFrame.\n\nIn all the constructors described above column value can be a vector which is consumed as is or an object of any other type (except AbstractArray). In the latter case the passed value is automatically repeated to fill a new vector of the appropriate length. As a particular rule values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated in the same way.\n\nIt is also allowed to pass a vector of vectors or a matrix as as the first argument. In this case the second argument must be a vector of Symbols or strings specifying column names, or the symbol :auto to generate column names x1, x2, ... automatically. Note that in this case if the first argument is a matrix and copycols=false the columns of the created DataFrame will be views of columns the source matrix.\n\nIf a single positional argument is passed to a DataFrame constructor then it is assumed to be of type that implements the Tables.jl interface using which the returned DataFrame is materialized.\n\nIf two positional arguments are passed, where the second argument is an AbstractVector, then the first argument is taken to be a table as described in the previous paragraph, and columns names of the resulting data frame are taken from the vector passed as the second positional argument.\n\nFinally it is allowed to construct a DataFrame from a DataFrameRow or a GroupedDataFrame. In the latter case the keepkeys keyword argument specifies whether the resulting DataFrame should contain the grouping columns of the passed GroupedDataFrame and the order of rows in the result follows the order of groups in the GroupedDataFrame passed.\n\nNotes\n\nThe DataFrame constructor by default copies all columns vectors passed to it. Pass the copycols=false keyword argument (where supported) to reuse vectors without copying them.\n\nBy default an error will be raised if duplicates in column names are found. Pass makeunique=true keyword argument (where supported) to accept duplicate names, in which case they will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf an AbstractRange is passed to a DataFrame constructor as a column it is always collected to a Vector (even if copycols=false). As a general rule AbstractRange values are always materialized to a Vector by all functions in DataFrames.jl before being stored in a DataFrame.\n\nDataFrame can store only columns that use 1-based indexing. Attempting to store a vector using non-standard indexing raises an error.\n\nThe DataFrame type is designed to allow column types to vary and to be dynamically changed also after it is constructed. Therefore DataFrames are not type stable. For performance-critical code that requires type-stability either use the functionality provided by select/transform/combine functions, use Tables.columntable and Tables.namedtupleiterator functions, use barrier functions, or provide type assertions to the variables that hold columns extracted from a DataFrame.\n\nMetadata: this function preserves all table and column-level metadata. As a special case if a GroupedDataFrame is passed then only :note-style metadata from parent of the GroupedDataFrame is preserved.\n\nExamples\n\njulia> DataFrame((a=[1, 2], b=[3, 4])) # Tables.jl table constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> DataFrame([(a=1, b=0), (a=2, b=0)]) # Tables.jl table constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(\"a\" => 1:2, \"b\" => 0) # Pair constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([:a => 1:2, :b => 0]) # vector of Pairs constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(Dict(:a => 1:2, :b => 0)) # dictionary constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(a=1:2, b=0) # keyword argument constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([[1, 2], [0, 0]], [:a, :b]) # vector of vectors constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([1 0; 2 0], :auto) # matrix constructor\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrameRow","page":"Types","title":"DataFrames.DataFrameRow","text":"DataFrameRow{<:AbstractDataFrame, <:AbstractIndex}\n\nA view of one row of an AbstractDataFrame.\n\nA DataFrameRow is returned by getindex or view functions when one row and a selection of columns are requested, or when iterating the result of the call to the eachrow function.\n\nThe DataFrameRow constructor can also be called directly:\n\nDataFrameRow(parent::AbstractDataFrame, row::Integer, cols=:)\n\nA DataFrameRow supports the iteration interface and can therefore be passed to functions that expect a collection as an argument. Its element type is always Any.\n\nIndexing is one-dimensional like specifying a column of a DataFrame. You can also access the data in a DataFrameRow using the getproperty and setproperty! functions and convert it to a Tuple, NamedTuple, or Vector using the corresponding functions.\n\nIf the selection of columns in a parent data frame is passed as : (a colon) then DataFrameRow will always have all columns from the parent, even if they are added or removed after its creation.\n\nExamples\n\njulia> df = DataFrame(a=repeat([1, 2], outer=[2]),\n                      b=repeat([\"a\", \"b\"], inner=[2]),\n                      c=1:4)\n4×3 DataFrame\n Row │ a      b       c\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  a           1\n   2 │     2  a           2\n   3 │     1  b           3\n   4 │     2  b           4\n\njulia> df[1, :]\nDataFrameRow\n Row │ a      b       c\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  a           1\n\njulia> @view df[end, [:a]]\nDataFrameRow\n Row │ a\n     │ Int64\n─────┼───────\n   4 │     2\n\njulia> eachrow(df)[1]\nDataFrameRow\n Row │ a      b       c\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  a           1\n\njulia> Tuple(df[1, :])\n(1, \"a\", 1)\n\njulia> NamedTuple(df[1, :])\n(a = 1, b = \"a\", c = 1)\n\njulia> Vector(df[1, :])\n3-element Vector{Any}:\n 1\n  \"a\"\n 1\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.GroupedDataFrame","page":"Types","title":"DataFrames.GroupedDataFrame","text":"GroupedDataFrame\n\nThe result of a groupby operation on an AbstractDataFrame; a view into the AbstractDataFrame grouped by rows.\n\nNot meant to be constructed directly, see groupby.\n\nOne can get the names of columns used to create GroupedDataFrame using the groupcols function. Similarly the groupindices function returns a vector of group indices for each row of the parent data frame.\n\nAfter its creation, a GroupedDataFrame reflects the grouping of rows that was valid at its creation time. Therefore grouping columns of its parent data frame must not be mutated, and rows must not be added nor removed from it. To safeguard the user against such cases, if the number of rows in the parent data frame changes then trying to use GroupedDataFrame will throw an error. However, one can add or remove columns to the parent data frame without invalidating the GroupedDataFrame provided that columns used for grouping are not changed.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.GroupKey","page":"Types","title":"DataFrames.GroupKey","text":"GroupKey{T<:GroupedDataFrame}\n\nKey for one of the groups of a GroupedDataFrame. Contains the values of the corresponding grouping columns and behaves similarly to a NamedTuple, but using it to index its GroupedDataFrame is more efficient than using the equivalent Tuple and NamedTuple, and much more efficient than using the equivalent AbstractDict.\n\nInstances of this type are returned by keys(::GroupedDataFrame) and are not meant to be constructed directly.\n\nIndexing fields of GroupKey is allowed using an integer, a Symbol, or a string. It is also possible to access the data in a GroupKey using the getproperty function. A GroupKey can be converted to a Tuple, NamedTuple, a Vector, or a Dict. When converted to a Dict, the keys of the Dict are Symbols.\n\nSee keys(::GroupedDataFrame) for more information.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.GroupKeys","page":"Types","title":"DataFrames.GroupKeys","text":"GroupKeys{T<:GroupedDataFrame} <: AbstractVector{GroupKey{T}}\n\nA vector containing all GroupKey objects for a given GroupedDataFrame.\n\nSee keys(::GroupedDataFrame) for more information.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.SubDataFrame","page":"Types","title":"DataFrames.SubDataFrame","text":"SubDataFrame{<:AbstractDataFrame, <:AbstractIndex, <:AbstractVector{Int}} <: AbstractDataFrame\n\nA view of an AbstractDataFrame. It is returned by a call to the view function on an AbstractDataFrame if a collections of rows and columns are specified.\n\nA SubDataFrame is an AbstractDataFrame, so expect that most DataFrame functions should work. Such methods include describe, summary, nrow, size, by, stack, and join.\n\nIf the selection of columns in a parent data frame is passed as : (a colon) then SubDataFrame will always have all columns from the parent, even if they are added or removed after its creation.\n\nExamples\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8)\n8×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     2      1      2\n   3 │     3      2      3\n   4 │     4      1      4\n   5 │     1      2      5\n   6 │     2      1      6\n   7 │     3      2      7\n   8 │     4      1      8\n\njulia> sdf1 = view(df, :, 2:3) # column subsetting\n8×2 SubDataFrame\n Row │ b      c\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      1\n   2 │     1      2\n   3 │     2      3\n   4 │     1      4\n   5 │     2      5\n   6 │     1      6\n   7 │     2      7\n   8 │     1      8\n\njulia> sdf2 = @view df[end:-1:1, [1, 3]]  # row and column subsetting\n8×2 SubDataFrame\n Row │ a      c\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      8\n   2 │     3      7\n   3 │     2      6\n   4 │     1      5\n   5 │     4      4\n   6 │     3      3\n   7 │     2      2\n   8 │     1      1\n\njulia> sdf3 = groupby(df, :a)[1]  # indexing a GroupedDataFrame returns a SubDataFrame\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrameRows","page":"Types","title":"DataFrames.DataFrameRows","text":"DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow}\n\nIterator over rows of an AbstractDataFrame, with each row represented as a DataFrameRow.\n\nA value of this type is returned by the eachrow function.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrameColumns","page":"Types","title":"DataFrames.DataFrameColumns","text":"DataFrameColumns{<:AbstractDataFrame}\n\nA vector-like object that allows iteration over columns of an AbstractDataFrame.\n\nIndexing into DataFrameColumns objects using integer, Symbol or string returns the corresponding column (without copying). Indexing into DataFrameColumns objects using a multiple column selector returns a subsetted DataFrameColumns object with a new parent containing only the selected columns (without copying).\n\nDataFrameColumns supports most of the AbstractVector API. The key differences are that it is read-only and that the keys function returns a vector of Symbols (and not integers as for normal vectors).\n\nIn particular findnext, findprev, findfirst, findlast, and findall functions are supported, and in findnext and findprev functions it is allowed to pass an integer, string, or Symbol as a reference index.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.RepeatedVector","page":"Types","title":"DataFrames.RepeatedVector","text":"RepeatedVector{T} <: AbstractVector{T}\n\nAn AbstractVector that is a view into another AbstractVector with repeated elements\n\nNOTE: Not exported.\n\nConstructor\n\nRepeatedVector(parent::AbstractVector, inner::Int, outer::Int)\n\nArguments\n\nparent : the AbstractVector that's repeated\ninner : the number of times each element is repeated\nouter : the number of times the whole vector is repeated after expanded by inner\n\ninner and outer have the same meaning as similarly named arguments to repeat.\n\nExamples\n\nRepeatedVector([1, 2], 3, 1)   # [1, 1, 1, 2, 2, 2]\nRepeatedVector([1, 2], 1, 3)   # [1, 2, 1, 2, 1, 2]\nRepeatedVector([1, 2], 2, 2)   # [1, 1, 2, 2, 1, 1, 2, 2]\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.StackedVector","page":"Types","title":"DataFrames.StackedVector","text":"StackedVector <: AbstractVector\n\nAn AbstractVector that is a linear, concatenated view into another set of AbstractVectors\n\nNOTE: Not exported.\n\nConstructor\n\nStackedVector(d::AbstractVector)\n\nArguments\n\nd... : one or more AbstractVectors\n\nExamples\n\nStackedVector(Any[[1, 2], [9, 10], [11, 12]])  # [1, 2, 9, 10, 11, 12]\n\n\n\n\n\n","category":"type"},{"location":"man/joins/#Database-Style-Joins","page":"Joins","title":"Database-Style Joins","text":"","category":"section"},{"location":"man/joins/#Introduction-to-joins","page":"Joins","title":"Introduction to joins","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"We often need to combine two or more data sets together to provide a complete picture of the topic we are studying. For example, suppose that we have the following two data sets:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> using DataFrames\n\njulia> people = DataFrame(ID=[20, 40], Name=[\"John Doe\", \"Jane Doe\"])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    20  John Doe\n   2 │    40  Jane Doe\n\njulia> jobs = DataFrame(ID=[20, 40], Job=[\"Lawyer\", \"Doctor\"])\n2×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │    20  Lawyer\n   2 │    40  Doctor","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"We might want to work with a larger data set that contains both the names and jobs for each ID. We can do this using the innerjoin function:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  Doctor","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In relational database theory, this operation is generally referred to as a join. The columns used to determine which rows should be combined during a join are called keys.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"The following functions are provided to perform seven kinds of joins:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"innerjoin: the output contains rows for values of the key that exist in all passed data frames.\nleftjoin: the output contains rows for values of the key that exist in the first (left) argument, whether or not that value exists in the second (right) argument.\nrightjoin: the output contains rows for values of the key that exist in the second (right) argument, whether or not that value exists in the first (left) argument.\nouterjoin: the output contains rows for values of the key that exist in any of the passed data frames.\nsemijoin: Like an inner join, but output is restricted to columns from the first (left) argument.\nantijoin: The output contains rows for values of the key that exist in the first (left) but not the second (right) argument. As with semijoin, output is restricted to columns from the first (left) argument.\ncrossjoin: The output is the cartesian product of rows from all passed data frames.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"See the Wikipedia page on SQL joins for more information.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Here are examples of different kinds of join:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> jobs = DataFrame(ID=[20, 60], Job=[\"Lawyer\", \"Astronaut\"])\n2×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼──────────────────\n   1 │    20  Lawyer\n   2 │    60  Astronaut\n\njulia> innerjoin(people, jobs, on = :ID)\n1×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │    20  John Doe  Lawyer\n\njulia> leftjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String?\n─────┼──────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  missing\n\njulia> rightjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String?   String\n─────┼────────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    60  missing   Astronaut\n\njulia> outerjoin(people, jobs, on = :ID)\n3×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String?   String?\n─────┼────────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  missing\n   3 │    60  missing   Astronaut\n\njulia> semijoin(people, jobs, on = :ID)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    20  John Doe\n\njulia> antijoin(people, jobs, on = :ID)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    40  Jane Doe","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Cross joins are the only kind of join that does not use a on key:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> crossjoin(people, jobs, makeunique = true)\n4×4 DataFrame\n Row │ ID     Name      ID_1   Job\n     │ Int64  String    Int64  String\n─────┼───────────────────────────────────\n   1 │    20  John Doe     20  Lawyer\n   2 │    20  John Doe     60  Astronaut\n   3 │    40  Jane Doe     20  Lawyer\n   4 │    40  Jane Doe     60  Astronaut","category":"page"},{"location":"man/joins/#Key-value-comparisons-and-floating-point-values","page":"Joins","title":"Key value comparisons and floating point values","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Key values from the two or more data frames are compared using the isequal function. This is consistent with the Set and Dict types in Julia Base.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"It is not recommended to use floating point numbers as keys: floating point comparisons can be surprising and unpredictable. If you do use floating point keys, note that by default an error is raised when keys include -0.0 (negative zero) or NaN values. Here is an example:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(DataFrame(id=[-0.0]), DataFrame(id=[0.0]), on=:id)\nERROR: ArgumentError: Currently for numeric values `NaN` and `-0.0` in their real or imaginary components are not allowed. Such value was found in column :id in left data frame. Use CategoricalArrays.jl to wrap these values in a CategoricalVector to perform the requested join.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"This can be overridden by wrapping the key values in a categorical vector.","category":"page"},{"location":"man/joins/#Joining-on-key-columns-with-different-names","page":"Joins","title":"Joining on key columns with different names","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In order to join data frames on keys which have different names in the left and right tables, you may pass left => right pairs as on argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> a = DataFrame(ID=[20, 40], Name=[\"John Doe\", \"Jane Doe\"])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    20  John Doe\n   2 │    40  Jane Doe\n\njulia> b = DataFrame(IDNew=[20, 40], Job=[\"Lawyer\", \"Doctor\"])\n2×2 DataFrame\n Row │ IDNew  Job\n     │ Int64  String\n─────┼───────────────\n   1 │    20  Lawyer\n   2 │    40  Doctor\n\njulia> innerjoin(a, b, on = :ID => :IDNew)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  Doctor","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Here is another example with multiple columns:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> a = DataFrame(City=[\"Amsterdam\", \"London\", \"London\", \"New York\", \"New York\"],\n                     Job=[\"Lawyer\", \"Lawyer\", \"Lawyer\", \"Doctor\", \"Doctor\"],\n                     Category=[1, 2, 3, 4, 5])\n5×3 DataFrame\n Row │ City       Job     Category\n     │ String     String  Int64\n─────┼─────────────────────────────\n   1 │ Amsterdam  Lawyer         1\n   2 │ London     Lawyer         2\n   3 │ London     Lawyer         3\n   4 │ New York   Doctor         4\n   5 │ New York   Doctor         5\n\njulia> b = DataFrame(Location=[\"Amsterdam\", \"London\", \"London\", \"New York\", \"New York\"],\n                     Work=[\"Lawyer\", \"Lawyer\", \"Lawyer\", \"Doctor\", \"Doctor\"],\n                     Name=[\"a\", \"b\", \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ Location   Work    Name\n     │ String     String  String\n─────┼───────────────────────────\n   1 │ Amsterdam  Lawyer  a\n   2 │ London     Lawyer  b\n   3 │ London     Lawyer  c\n   4 │ New York   Doctor  d\n   5 │ New York   Doctor  e\n\njulia> innerjoin(a, b, on = [:City => :Location, :Job => :Work])\n9×4 DataFrame\n Row │ City       Job     Category  Name\n     │ String     String  Int64     String\n─────┼─────────────────────────────────────\n   1 │ Amsterdam  Lawyer         1  a\n   2 │ London     Lawyer         2  b\n   3 │ London     Lawyer         3  b\n   4 │ London     Lawyer         2  c\n   5 │ London     Lawyer         3  c\n   6 │ New York   Doctor         4  d\n   7 │ New York   Doctor         5  d\n   8 │ New York   Doctor         4  e\n   9 │ New York   Doctor         5  e","category":"page"},{"location":"man/joins/#Handling-of-duplicate-keys-and-tracking-source-data-frame","page":"Joins","title":"Handling of duplicate keys and tracking source data frame","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Additionally, notice that in the last join rows 2 and 3 had the same values on on variables in both joined DataFrames. In such a situation innerjoin, outerjoin, leftjoin and rightjoin will produce all combinations of matching rows. In our example rows from 2 to 5 were created as a result. The same behavior can be observed for rows 4 and 5 in both joined DataFrames.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In order to check that columns passed as the on argument define unique keys (according to isequal) in each input data frame you can set the validate keyword argument to a two-element tuple or a pair of Bool values, with each element indicating whether to run check for the corresponding data frame. Here is an example for the join operation described above:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(a, b, on = [(:City => :Location), (:Job => :Work)], validate=(true, true))\nERROR: ArgumentError: Merge key(s) are not unique in both df1 and df2. df1 contains 2 duplicate keys: (City = \"London\", Job = \"Lawyer\") and (City = \"New York\", Job = \"Doctor\"). df2 contains 2 duplicate keys: (Location = \"London\", Work = \"Lawyer\") and (Location = \"New York\", Work = \"Doctor\").","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Finally, using the source keyword argument you can add a column to the resulting data frame indicating whether the given row appeared only in the left, the right or both data frames. Here is an example:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> a = DataFrame(ID=[20, 40], Name=[\"John\", \"Jane\"])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼───────────────\n   1 │    20  John\n   2 │    40  Jane\n\njulia> b = DataFrame(ID=[20, 60], Job=[\"Lawyer\", \"Doctor\"])\n2×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │    20  Lawyer\n   2 │    60  Doctor\n\njulia> outerjoin(a, b, on=:ID, validate=(true, true), source=:source)\n3×4 DataFrame\n Row │ ID     Name     Job      source\n     │ Int64  String?  String?  String\n─────┼─────────────────────────────────────\n   1 │    20  John     Lawyer   both\n   2 │    40  Jane     missing  left_only\n   3 │    60  missing  Doctor   right_only","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Note that this time we also used the validate keyword argument and it did not produce errors as the keys defined in both source data frames were unique.","category":"page"},{"location":"man/joins/#Renaming-joined-columns","page":"Joins","title":"Renaming joined columns","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Often you want to keep track of the source data frame. This feature is supported with the renamecols keyword argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(a, b, on=:ID, renamecols = \"_left\" => \"_right\")\n1×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String     String\n─────┼─────────────────────────────\n   1 │    20  John       Lawyer","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In the above example we added the \"_left\" suffix to the non-key columns from the left table and the \"_right\" suffix to the non-key columns from the right table.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Alternatively it is allowed to pass a function transforming column names:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(a, b, on=:ID, renamecols = lowercase => uppercase)\n1×3 DataFrame\n Row │ ID     name    JOB\n     │ Int64  String  String\n─────┼───────────────────────\n   1 │    20  John    Lawyer\n","category":"page"},{"location":"man/joins/#Matching-missing-values-in-joins","page":"Joins","title":"Matching missing values in joins","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"By default when you try to to perform a join on a key that has missing values you get an error:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> df1 = DataFrame(id=[1, missing, 3], a=1:3)\n3×2 DataFrame\n Row │ id       a\n     │ Int64?   Int64\n─────┼────────────────\n   1 │       1      1\n   2 │ missing      2\n   3 │       3      3\n\njulia> df2 = DataFrame(id=[1, 2, missing], b=1:3)\n3×2 DataFrame\n Row │ id       b\n     │ Int64?   Int64\n─────┼────────────────\n   1 │       1      1\n   2 │       2      2\n   3 │ missing      3\n\njulia> innerjoin(df1, df2, on=:id)\nERROR: ArgumentError: Missing values in key columns are not allowed when matchmissing == :error. `missing` found in column :id in left data frame.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"If you would prefer missing values to be treated as equal pass the matchmissing=:equal keyword argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(df1, df2, on=:id, matchmissing=:equal)\n2×3 DataFrame\n Row │ id       a      b\n     │ Int64?   Int64  Int64\n─────┼───────────────────────\n   1 │       1      1      1\n   2 │ missing      2      3","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Alternatively you might want to drop all rows with missing values. In this case pass matchmissing=:notequal:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(df1, df2, on=:id, matchmissing=:notequal)\n1×3 DataFrame\n Row │ id      a      b\n     │ Int64?  Int64  Int64\n─────┼──────────────────────\n   1 │      1      1      1","category":"page"},{"location":"man/joins/#Specifying-row-order-in-the-join-result","page":"Joins","title":"Specifying row order in the join result","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"By default the order of rows produced by the join operation is undefined:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> df_left = DataFrame(id=[1, 2, 4, 5], left=1:4)\n4×2 DataFrame\n Row │ id     left\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     4      3\n   4 │     5      4\n\njulia> df_right = DataFrame(id=[2, 1, 3, 6, 7], right=1:5)\n5×2 DataFrame\n Row │ id     right\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      1\n   2 │     1      2\n   3 │     3      3\n   4 │     6      4\n   5 │     7      5\n\njulia> outerjoin(df_left, df_right, on=:id)\n7×3 DataFrame\n Row │ id     left     right\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     2        2        1\n   2 │     1        1        2\n   3 │     4        3  missing\n   4 │     5        4  missing\n   5 │     3  missing        3\n   6 │     6  missing        4\n   7 │     7  missing        5","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"If you would like the result to keep the row order of the left table pass the order=:left keyword argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> outerjoin(df_left, df_right, on=:id, order=:left)\n7×3 DataFrame\n Row │ id     left     right\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     1        1        2\n   2 │     2        2        1\n   3 │     4        3  missing\n   4 │     5        4  missing\n   5 │     3  missing        3\n   6 │     6  missing        4\n   7 │     7  missing        5","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Note that in this case keys missing from the left table are put after the keys present in it.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Similarly order=:right keeps the order of the right table (and puts keys not present in it at the end):","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> outerjoin(df_left, df_right, on=:id, order=:right)\n7×3 DataFrame\n Row │ id     left     right\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     2        2        1\n   2 │     1        1        2\n   3 │     3  missing        3\n   4 │     6  missing        4\n   5 │     7  missing        5\n   6 │     4        3  missing\n   7 │     5        4  missing","category":"page"},{"location":"man/joins/#In-place-left-join","page":"Joins","title":"In-place left join","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"A common operation is adding data from a reference table to some main table. It is possible to perform such an in-place update using the leftjoin! function. In this case the left table is updated in place with matching rows from the right table.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> main = DataFrame(id=1:4, main=1:4)\n4×2 DataFrame\n Row │ id     main\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n\njulia> leftjoin!(main, DataFrame(id=[2, 4], info=[\"a\", \"b\"]), on=:id);\n\njulia> main\n4×3 DataFrame\n Row │ id     main   info\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     1      1  missing\n   2 │     2      2  a\n   3 │     3      3  missing\n   4 │     4      4  b","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Note that in this case the order and number of rows in the left table is not changed. Therefore, in particular, it is not allowed to have duplicate keys in the right table:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> leftjoin!(main, DataFrame(id=[2, 2], info_bad=[\"a\", \"b\"]), on=:id)\nERROR: ArgumentError: duplicate rows found in right table","category":"page"},{"location":"lib/metadata/#Metadata","page":"Metadata","title":"Metadata","text":"","category":"section"},{"location":"lib/metadata/#Design-of-metadata-support","page":"Metadata","title":"Design of metadata support","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"DataFrames.jl allows you to store and retrieve metadata on table and column level. This is supported using the functions defined by the DataAPI.jl interface:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"for table-level metadata: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!;\nfor column-level metadata: colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Additionally you might find the TableMetadataTools.jl package useful. This package defines several convenience functions for performing typical metadata operations.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Assume that we work with a data frame-like object df that has a column col (referred to either via a Symbol, a string or an integer index).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Table-level metadata are key-value pairs that are attached to df. Column-level metadata are key-value pairs that are attached to a specific column col of df data frame.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"To check whether some key key is present in table-level metadata of data frame df you can write key in metadatakeys(df). Similarly to check whether key key is present in column-level metadata of data frame df for column col write key in colmetadatakeys(df, col).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Additionally each metadata key-value pair has a style information attached to it. In DataFrames.jl the metadata style influences how metadata is propagated when df is transformed. The following metadata styles are supported:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":":default: Metadata having this style is considered to be attached to a concrete state of df. This means that any operation on this data frame invalidates such metadata and it is dropped in the result of such operation. Note that this happens even if the operation eventually does not change the data frame: the rule is that calling a function that might alter a data frame drops such metadata; in this way it is possible to statically determine whether metadata of styles other than :note is dropped after a function call. Only two functions are exceptions that keep non-:note-style metadata, as these operations are specifically designed to create an identical copy of the source data frame:\nDataFrame constructor;\ncopy of a data frame;\n:note: Metadata having this style is considered to be an annotation of a table or a column that should be propagated under transformations (exact propagation rules of such metadata are described below).\nAll other metadata styles are allowed but they are currently treated as having :default-style (this might change in the future if other standard metadata styles are defined).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"All DataAPI.jl metadata functions work with DataFrame, SubDataFrame, DataFrameRow objects, and objects returned by eachrow and eachcol functions. In this section collectively these objects will be called data frame-like, and follow the rules:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"objects returned by eachrow and eachcol functions have the same metadata as their parent AbstractDataFrame;\nSubDataFrame and DataFrameRow only expose metadata from their parent DataFrame whose style is :note.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Notably, metadata is not supported for GroupedDataFrame and you can't add, modify, nor view metadata through the GroupedDataFrame itself. It is possible only through its parent.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"note: Note\nDataFrames.jl allows users to extract out columns of a data frame and perform operations on them. Such operations will not affect metadata. Therefore, even if some metadata has :default style it might no longer correctly describe the column's contents if the user mutates columns directly.","category":"page"},{"location":"lib/metadata/#DataFrames.jl-specific-design-principles-for-use-of-metadata","page":"Metadata","title":"DataFrames.jl-specific design principles for use of metadata","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"DataFrames.jl supports storing any object as metadata values. However, it is recommended to use strings as values of the metadata, as some storage formats, like for example Apache Arrow, only support strings.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"For all functions that operate on column-level metadata, an ArgumentError is thrown if passed column is not present in a data frame.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"If metadata! or colmetadata! is used to add metadata to a SubDataFrame or a DataFrameRow then:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"using metadata that has style other than :note throws an error;\ntrying to add key-value pair for which a mapping for key already exists with style other than :note in the parent data frame throws an error.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"DataFrames.jl is designed so that there is no performance overhead due to metadata support when there is no metadata in a data frame. Therefore if you need maximum performance of operations that do not rely on metadata call emptymetadata! and emptycolmetadata! before running these operations.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Processing metadata for SubDataFrame and DataFrameRow has more overhead than for other types defined in DataFrames.jl that support metadata, because they have a more complex logic of handling it (they support only :note-style metadata, which means that other metadata needs to be filtered-out).","category":"page"},{"location":"lib/metadata/#Examples","page":"Metadata","title":"Examples","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Here is a simple example how you can work with metadata in DataFrames.jl:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"julia> using DataFrames\n\njulia> df = DataFrame(name=[\"Jan Krzysztof Duda\", \"Jan Krzysztof Duda\",\n                            \"Radosław Wojtaszek\", \"Radosław Wojtaszek\"],\n                      date=[\"2022-Jun\", \"2021-Jun\", \"2022-Jun\", \"2021-Jun\"],\n                      rating=[2750, 2729, 2708, 2687])\n4×3 DataFrame\n Row │ name                date      rating\n     │ String              String    Int64\n─────┼──────────────────────────────────────\n   1 │ Jan Krzysztof Duda  2022-Jun    2750\n   2 │ Jan Krzysztof Duda  2021-Jun    2729\n   3 │ Radosław Wojtaszek  2022-Jun    2708\n   4 │ Radosław Wojtaszek  2021-Jun    2687\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"caption\", \"ELO ratings of chess players\", style=:note);\n\njulia> collect(metadatakeys(df))\n1-element Vector{String}:\n \"caption\"\n\njulia> \"caption\" in metadatakeys(df)\ntrue\n\njulia> metadata(df, \"caption\")\n\"ELO ratings of chess players\"\n\njulia> metadata(df, \"caption\", style=true)\n(\"ELO ratings of chess players\", :note)\n\njulia> emptymetadata!(df);\n\njulia> metadatakeys(df)\n()\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :name, \"label\", \"First and last name of a player\", style=:note);\n\njulia> colmetadata!(df, :date, \"label\", \"Rating date in yyyy-u format\", style=:note);\n\njulia> colmetadata!(df, :rating, \"label\", \"ELO rating in classical time control\", style=:note);\n\njulia> \"label\" in colmetadatakeys(df, :rating)\ntrue\n\njulia> colmetadata(df, :rating, \"label\")\n\"ELO rating in classical time control\"\n\njulia> colmetadata(df, :rating, \"label\", style=true)\n(\"ELO rating in classical time control\", :note)\n\njulia> collect(colmetadatakeys(df))\n3-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n   :date => [\"label\"]\n :rating => [\"label\"]\n   :name => [\"label\"]\n\njulia> [only(names(df, col)) =>\n        [key => colmetadata(df, col, key) for key in metakeys] for\n        (col, metakeys) in colmetadatakeys(df)]\n3-element Vector{Pair{String, Vector{Pair{String, String}}}}:\n   \"date\" => [\"label\" => \"Rating date in yyyy-u format\"]\n \"rating\" => [\"label\" => \"ELO rating in classical time control\"]\n   \"name\" => [\"label\" => \"First and last name of a player\"]\n\njulia> emptycolmetadata!(df);\n\njulia> colmetadatakeys(df)\n()","category":"page"},{"location":"lib/metadata/#Propagation-of-:note-style-metadata","page":"Metadata","title":"Propagation of :note-style metadata","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"An important design feature of :note-style metadata is how it is handled when data frames are transformed.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"note: Note\nThe provided rules might slightly change in the future. Any change to :note-style metadata propagation rules will not be considered as breaking and can be done in any minor release of DataFrames.jl. Such changes might be made based on users' feedback about what metadata propagation rules are most convenient in practice.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"The general design rules for propagation of :note-style metadata are as follows.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"For operations that take a single data frame as an input:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Table level metadata is propagated to the returned data frame object.\nFor column-level metadata:\nin all cases when a single column is transformed to a single column and the name of the column does not change (or is automatically changed e.g. to de-duplicate column names or via column renaming in joins) column-level metadata is preserved (example operations of this kind are getindex, subset, joins, mapcols).\nin all cases when a single column is transformed with identity or copy to a single column, column-level metadata is preserved even if column name is changed (example operations of this kind are rename, or the :x => :y or :x => copy => :y operation specification in select).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"For operations that take multiple data frames as their input two cases are distinguished:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"When there is a natural main table in the operation (append!, prepend!, leftjoin, leftjoin!, rightjoin, semijoin, antijoin, setindex!):\ntable-level metadata is taken from the main table;\ncolumn-level metadata for columns from the main table is taken from main table;\ncolumn-level metadata for columns from the non-main table is taken only for columns not present in the main table.\nWhen all tables are equivalent (hcat, vcat, innerjoin, outerjoin):\ntable-level metadata is preserved only for keys which are defined in all passed tables and have the same value;\ncolumn-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"In all these operations when metadata is preserved the values in the key-value pairs are not copied (this is relevant in case of mutable values).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"note: Note\nThe rules for column-level :note-style metadata propagation are designed to make the right decision in common cases. In particular, they assume that if source and target column name is the same then the metadata for the column is not changed. While this is valid for many operations, it is not always true in general. For example the :x => ByRow(log) => :x transformation might invalidate metadata if it contained unit of measure of the variable. In such cases user must either use a different name for the output column, set metadata style to :default before the operation, or manually drop or update such metadata from the :x column after the transformation.","category":"page"},{"location":"lib/metadata/#Operations-that-preserve-:note-style-metadata","page":"Metadata","title":"Operations that preserve :note-style metadata","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Most of the functions in DataFrames.jl only preserve table and column metadata whose style is :note. Some functions use a more complex logic, even if they follow the general rules described above (in particular under any transformation all non-:note-style metadata is always dropped). These are:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"describe drops all metadata.\nhcat: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved.\nvcat: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value;\nstack: propagates table-level metadata and column-level metadata for identifier columns.\nunstack: propagates table-level metadata and column-level metadata for row keys columns.\npermutedims: propagates table-level metadata and drops column-level  metadata.\nbroadcasted assignment does not change target metadata; under Julia earlier than 1.7 operation of kind df.a .= s does not drop non-:note-style metadata; under Julia 1.7 or later this operation preserves only :note-style metadata\nbroadcasting propagates table-level metadata if some key is present in all passed data frames and value associated with it is identical in all passed data frames; column-level metadata is propagated for columns if some key for a given column is present in all passed data frames and value associated with it is identical in all passed data frames.\ngetindex preserves table-level metadata and column-level metadata for selected columns\nsetindex! does not affect table-level and column-level metadata\npush!, pushfirst!, insert! do not affect table-level nor column-level metadata (even if they add new columns and pushed row is a DataFrameRow or other value supporting metadata interface)\nappend! and prepend! do not change table and column-level metadata of the destination data frame, except that if new columns are added and these columns have metadata in the appended/prepended table then this metadata is preserved.\nleftjoin!, leftjoin: table and column-level metadata is taken from the left table except for non-key columns from right table for which metadata is taken from right table;\nrightjoin: table and column-level metadata is taken from the right table except for non-key columns from left table for which metadata is taken from left table;\ninnerjoin, outerjoin: propagates table-level metadata only for keys that are defined in all passed data frames and have the same value; column-level metadata is propagated for all columns except for key columns, for which it is propagated only for keys that are defined in all passed data frames and have the same value.\nsemijoin, antijoin: table and column-level metadata is taken from the left table.\ncrossjoin: propagates table-level metadata only for keys that are defined in both passed data frames and have the same value; propagates column-level metadata from both passed data frames.\nselect, select!, transform, transform!, combine: propagate table-level metadata; column-level metadata is propagated if: a) a single column is transformed to a single column and the name of the column does not change    (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/functions/#Functions","page":"Functions","title":"Functions","text":"","category":"section"},{"location":"lib/functions/#Multithreading-support","page":"Functions","title":"Multithreading support","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"By default, selected operations in DataFrames.jl automatically use multiple threads when available. Multi-threading is task-based and implemented using the @spawn macro from Julia Base. Tasks are therefore scheduled on the :default threadpool. Functions that take user-defined functions and may run it in parallel accept a threads keyword argument which allows disabling multithreading when the provided function requires serial execution or is not thread-safe.","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"This is a list of operations that currently make use of multi-threading:","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"DataFrame constructor with copycols=true; also recursively all functions that call this constructor, e.g. copy.\ngetindex when multiple columns are selected.\ngroupby (both when hashing is required and when fast path using DataAPI.refpool is used).\n*join functions for composing output data frame (but currently not for finding matching rows in joined data frames).\ncombine, select[!], and transform[!] on GroupedDataFrame when either of the conditions below is met:\nmultiple transformations are performed (each transformation is spawned in a separate task)\na transformation produces one row per group and the passed transformation is a custom function (i.e. not for standard reductions, which use optimized single-threaded methods).\ndropmissing when the provided data frame has more than 1 column and view=false  (subsetting of individual columns is spawned in separate tasks).","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"In general at least Julia 1.4 is required to ensure that multi-threading is used and the Julia process must be started with more than one thread. Some operations turn on multi-threading only if enough rows in the processed data frame are present (the exact threshold when multi-threading is enabled is considered to be undefined and might change in the future).","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"Except for the list above, where multi-threading is used automatically, all functions provided by DataFrames.jl that update a data frame are not thread safe. This means that while they can be called from any thread, the caller is responsible for ensuring that a given DataFrame object is never modified by one thread while others are using it (either for reading or writing). Using the same DataFrame at the same time from different threads is safe as long as it is not modified.","category":"page"},{"location":"lib/functions/#Index","page":"Functions","title":"Index","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"Pages = [\"functions.md\"]","category":"page"},{"location":"lib/functions/#Constructing-data-frames","page":"Functions","title":"Constructing data frames","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"allcombinations\ncopy\nsimilar","category":"page"},{"location":"lib/functions/#DataAPI.allcombinations","page":"Functions","title":"DataAPI.allcombinations","text":"allcombinations(DataFrame, pairs::Pair...)\nallcombinations(DataFrame; kwargs...)\n\nCreate a DataFrame from all combinations of values in passed arguments. The first passed values vary fastest.\n\nArguments associating a column name with values to expand can be specified either as Pairs passed as positional arguments, or as keyword arguments. Column names must be Symbols or strings and must be unique.\n\nColumn value can be a vector which is consumed as is or an object of any other type (except AbstractArray). In the latter case the passed value is treated as having length one for expansion. As a particular rule values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated as having length one.\n\nSee also: crossjoin can be used to get the cartesian product of rows from passed data frames.\n\nExamples\n\njulia> allcombinations(DataFrame, a=1:2, b='a':'c')\n6×2 DataFrame\n Row │ a      b\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n   2 │     2  a\n   3 │     1  b\n   4 │     2  b\n   5 │     1  c\n   6 │     2  c\n\njulia> allcombinations(DataFrame, \"a\" => 1:2, \"b\" => 'a':'c', \"c\" => \"const\")\n6×3 DataFrame\n Row │ a      b     c\n     │ Int64  Char  String\n─────┼─────────────────────\n   1 │     1  a     const\n   2 │     2  a     const\n   3 │     1  b     const\n   4 │     2  b     const\n   5 │     1  c     const\n   6 │     2  c     const\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.copy","page":"Functions","title":"Base.copy","text":"copy(df::DataFrame; copycols::Bool=true)\n\nCopy data frame df. If copycols=true (the default), return a new  DataFrame holding copies of column vectors in df. If copycols=false, return a new DataFrame sharing column vectors with df.\n\nMetadata: this function preserves all table-level and column-level metadata.\n\n\n\n\n\ncopy(dfr::DataFrameRow)\n\nConstruct a NamedTuple with the same contents as the DataFrameRow. This method returns a NamedTuple so that the returned object is not affected by changes to the parent data frame of which dfr is a view.\n\n\n\n\n\ncopy(key::GroupKey)\n\nConstruct a NamedTuple with the same contents as the GroupKey.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.similar","page":"Functions","title":"Base.similar","text":"similar(df::AbstractDataFrame, rows::Integer=nrow(df))\n\nCreate a new DataFrame with the same column names and column element types as df. An optional second argument can be provided to request a number of rows that is different than the number of rows present in df.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Summary-information","page":"Functions","title":"Summary information","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"describe\nisempty\nlength\nncol\nndims\nnrow\nrownumber\nshow\nsize","category":"page"},{"location":"lib/functions/#DataAPI.describe","page":"Functions","title":"DataAPI.describe","text":"describe(df::AbstractDataFrame; cols=:)\ndescribe(df::AbstractDataFrame, stats::Union{Symbol, Pair}...; cols=:)\n\nReturn descriptive statistics for a data frame as a new DataFrame where each row represents a variable and each column a summary statistic.\n\nArguments\n\ndf : the AbstractDataFrame\nstats::Union{Symbol, Pair}... : the summary statistics to report. Arguments can be:\nA symbol from the list :mean, :std, :min, :q25, :median, :q75, :max, :sum, :eltype, :nunique, :nuniqueall, :first, :last, :nnonmissing, and :nmissing. The default statistics used are :mean, :min, :median, :max, :nmissing, and :eltype.\n:detailed as the only Symbol argument to return all statistics except :first, :last, :sum, :nuniqueall, and :nnonmissing.\n:all as the only Symbol argument to return all statistics.\nA function => name pair where name is a Symbol or string. This will create a column of summary statistics with the provided name.\ncols : a keyword argument allowing to select only a subset or transformation of columns from df to describe. Can be any column selector or transformation accepted by select.\n\nDetails\n\nFor Real columns, compute the mean, standard deviation, minimum, first quantile, median, third quantile, and maximum. If a column does not derive from Real, describe will attempt to calculate all statistics, using nothing as a fall-back in the case of an error.\n\nWhen stats contains :nunique, describe will report the number of unique values in a column. If a column's base type derives from Real, :nunique will return nothings. Use :nuniqueall to report the number of unique values in all columns.\n\nMissing values are filtered in the calculation of all statistics, however the column :nmissing will report the number of missing values of that variable and :nnonmissing the number of non-missing values.\n\nIf custom functions are provided, they are called repeatedly with the vector corresponding to each column as the only argument. For columns allowing for missing values, the vector is wrapped in a call to skipmissing: custom functions must therefore support such objects (and not only vectors), and cannot access missing values.\n\nMetadata: this function drops all metadata.\n\nExamples\n\njulia> df = DataFrame(i=1:10, x=0.1:0.1:1.0, y='a':'j');\n\njulia> describe(df)\n3×7 DataFrame\n Row │ variable  mean    min  median  max  nmissing  eltype\n     │ Symbol    Union…  Any  Union…  Any  Int64     DataType\n─────┼────────────────────────────────────────────────────────\n   1 │ i         5.5     1    5.5     10          0  Int64\n   2 │ x         0.55    0.1  0.55    1.0         0  Float64\n   3 │ y                 a            j           0  Char\n\njulia> describe(df, :min, :max)\n3×3 DataFrame\n Row │ variable  min  max\n     │ Symbol    Any  Any\n─────┼────────────────────\n   1 │ i         1    10\n   2 │ x         0.1  1.0\n   3 │ y         a    j\n\njulia> describe(df, :min, sum => :sum)\n3×3 DataFrame\n Row │ variable  min  sum\n     │ Symbol    Any  Union…\n─────┼───────────────────────\n   1 │ i         1    55\n   2 │ x         0.1  5.5\n   3 │ y         a\n\njulia> describe(df, :min, sum => :sum, cols=:x)\n1×3 DataFrame\n Row │ variable  min      sum\n     │ Symbol    Float64  Float64\n─────┼────────────────────────────\n   1 │ x             0.1      5.5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.isempty","page":"Functions","title":"Base.isempty","text":"isempty(df::AbstractDataFrame)\n\nReturn true if data frame df has zero rows, and false otherwise.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.length","page":"Functions","title":"Base.length","text":"length(dfr::DataFrameRow)\n\nReturn the number of elements of dfr.\n\nSee also: size\n\nExamples\n\njulia> dfr = DataFrame(a=1:3, b='a':'c')[1, :]\nDataFrameRow\n Row │ a      b\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> length(dfr)\n2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.ncol","page":"Functions","title":"DataAPI.ncol","text":"ncol(df::AbstractDataFrame)\n\nReturn the number of columns in an AbstractDataFrame df.\n\nSee also nrow, size.\n\nExamples\n\njulia> df = DataFrame(i=1:10, x=rand(10), y=rand([\"a\", \"b\", \"c\"], 10));\n\njulia> ncol(df)\n3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.ndims","page":"Functions","title":"Base.ndims","text":"ndims(::AbstractDataFrame)\nndims(::Type{<:AbstractDataFrame})\n\nReturn the number of dimensions of a data frame, which is always 2.\n\n\n\n\n\nndims(::DataFrameRow)\nndims(::Type{<:DataFrameRow})\n\nReturn the number of dimensions of a data frame row, which is always 1.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.nrow","page":"Functions","title":"DataAPI.nrow","text":"nrow(df::AbstractDataFrame)\n\nReturn the number of rows in an AbstractDataFrame df.\n\nSee also: ncol, size.\n\nExamples\n\njulia> df = DataFrame(i=1:10, x=rand(10), y=rand([\"a\", \"b\", \"c\"], 10));\n\njulia> nrow(df)\n10\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.rownumber","page":"Functions","title":"DataAPI.rownumber","text":"rownumber(dfr::DataFrameRow)\n\nReturn a row number in the AbstractDataFrame that dfr was created from.\n\nNote that this differs from the first element in the tuple returned by parentindices. The latter gives the row number in the parent(dfr), which is the source DataFrame where data that dfr gives access to is stored.\n\nExamples\n\njulia> df = DataFrame(reshape(1:12, 3, 4), :auto)\n3×4 DataFrame\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      7     10\n   2 │     2      5      8     11\n   3 │     3      6      9     12\n\njulia> dfr = df[2, :]\nDataFrameRow\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   2 │     2      5      8     11\n\njulia> rownumber(dfr)\n2\n\njulia> parentindices(dfr)\n(2, Base.OneTo(4))\n\njulia> parent(dfr)\n3×4 DataFrame\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      7     10\n   2 │     2      5      8     11\n   3 │     3      6      9     12\n\njulia> dfv = @view df[2:3, 1:3]\n2×3 SubDataFrame\n Row │ x1     x2     x3\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      5      8\n   2 │     3      6      9\n\njulia> dfrv = dfv[2, :]\nDataFrameRow\n Row │ x1     x2     x3\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   3 │     3      6      9\n\njulia> rownumber(dfrv)\n2\n\njulia> parentindices(dfrv)\n(3, 1:3)\n\njulia> parent(dfrv)\n3×4 DataFrame\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      7     10\n   2 │     2      5      8     11\n   3 │     3      6      9     12\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.show","page":"Functions","title":"Base.show","text":"show([io::IO, ]df::AbstractDataFrame;\n     allrows::Bool = !get(io, :limit, false),\n     allcols::Bool = !get(io, :limit, false),\n     allgroups::Bool = !get(io, :limit, false),\n     rowlabel::Symbol = :Row,\n     summary::Bool = true,\n     eltypes::Bool = true,\n     truncate::Int = 32,\n     kwargs...)\n\nRender a data frame to an I/O stream. The specific visual representation chosen depends on the width of the display.\n\nIf io is omitted, the result is printed to stdout, and allrows, allcols and allgroups default to false.\n\nArguments\n\nio::IO: The I/O stream to which df will be printed.\ndf::AbstractDataFrame: The data frame to print.\nallrows::Bool: Whether to print all rows, rather than a subset that fits the device height. By default this is the case only if io does not have the IOContext property limit set.\nallcols::Bool: Whether to print all columns, rather than a subset that fits the device width. By default this is the case only if io does not have the IOContext property limit set.\nallgroups::Bool: Whether to print all groups rather than the first and last, when df is a GroupedDataFrame. By default this is the case only if io does not have the IOContext property limit set.\nrowlabel::Symbol = :Row: The label to use for the column containing row numbers.\nsummary::Bool = true: Whether to print a brief string summary of the data frame.\neltypes::Bool = true: Whether to print the column types under column names.\ntruncate::Int = 32: the maximal display width the output can use before being truncated (in the textwidth sense, excluding …). If truncate is 0 or less, no truncation is applied.\nkwargs...: Any keyword argument supported by the function pretty_table of PrettyTables.jl can be passed here to customize the output.\n\nExamples\n\njulia> using DataFrames\n\njulia> df = DataFrame(A=1:3, B=[\"x\", \"y\", \"z\"]);\n\njulia> show(df, show_row_number=false)\n3×2 DataFrame\n A      B\n Int64  String\n───────────────\n     1  x\n     2  y\n     3  z\n\n\n\n\n\nshow(io::IO, mime::MIME, df::AbstractDataFrame)\n\nRender a data frame to an I/O stream in MIME type mime.\n\nArguments\n\nio::IO: The I/O stream to which df will be printed.\nmime::MIME: supported MIME types are: \"text/plain\", \"text/html\", \"text/latex\", \"text/csv\", \"text/tab-separated-values\" (the last two MIME types do not support  showing #undef values)\ndf::AbstractDataFrame: The data frame to print.\n\nAdditionally selected MIME types support passing the following keyword arguments:\n\nMIME type \"text/plain\" accepts all listed keyword arguments and their behavior is identical as for show(::IO, ::AbstractDataFrame)\nMIME type \"text/html\" accepts the following keyword arguments:\neltypes::Bool = true: Whether to print the column types under column names.\nsummary::Bool = true: Whether to print a brief string summary of the data frame.\nmax_column_width::AbstractString = \"\": The maximum column width. It must     be a string containing a valid CSS length. For example, passing     \"100px\" will limit the width of all columns to 100 pixels. If empty,     the columns will be rendered without limits.\nkwargs...: Any keyword argument supported by the function pretty_table of PrettyTables.jl can be passed here to customize the output.\n\nExamples\n\njulia> show(stdout, MIME(\"text/latex\"), DataFrame(A=1:3, B=[\"x\", \"y\", \"z\"]))\n\\begin{tabular}{r|cc}\n\t& A & B\\\\\n\t\\hline\n\t& Int64 & String\\\\\n\t\\hline\n\t1 & 1 & x \\\\\n\t2 & 2 & y \\\\\n\t3 & 3 & z \\\\\n\\end{tabular}\n14\n\njulia> show(stdout, MIME(\"text/csv\"), DataFrame(A=1:3, B=[\"x\", \"y\", \"z\"]))\n\"A\",\"B\"\n1,\"x\"\n2,\"y\"\n3,\"z\"\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.size","page":"Functions","title":"Base.size","text":"size(df::AbstractDataFrame[, dim])\n\nReturn a tuple containing the number of rows and columns of df. Optionally a dimension dim can be specified, where 1 corresponds to rows and 2 corresponds to columns.\n\nSee also: nrow, ncol\n\nExamples\n\njulia> df = DataFrame(a=1:3, b='a':'c');\n\njulia> size(df)\n(3, 2)\n\njulia> size(df, 1)\n3\n\n\n\n\n\nsize(dfr::DataFrameRow[, dim])\n\nReturn a 1-tuple containing the number of elements of dfr. If an optional dimension dim is specified, it must be 1, and the number of elements is returned directly as a number.\n\nSee also: length\n\nExamples\n\njulia> dfr = DataFrame(a=1:3, b='a':'c')[1, :]\nDataFrameRow\n Row │ a      b\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> size(dfr)\n(2,)\n\njulia> size(dfr, 1)\n2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Working-with-column-names","page":"Functions","title":"Working with column names","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"names\npropertynames\nrename\nrename!","category":"page"},{"location":"lib/functions/#Base.names","page":"Functions","title":"Base.names","text":"names(df::AbstractDataFrame, cols=:)\nnames(df::DataFrameRow, cols=:)\nnames(df::GroupedDataFrame, cols=:)\nnames(df::DataFrameRows, cols=:)\nnames(df::DataFrameColumns, cols=:)\nnames(df::GroupKey)\n\nReturn a freshly allocated Vector{String} of names of columns contained in df.\n\nIf cols is passed then restrict returned column names to those matching the selector (this is useful in particular with regular expressions, Cols, Not, and Between). cols can be:\n\nany column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers); these column selectors are documented in the General rules section of the Indexing part of the DataFrames.jl manual\na Type, in which case names of columns whose eltype is a subtype of T are returned\na Function predicate taking the column name as a string and returning true for columns that should be kept\n\nSee also propertynames which returns a Vector{Symbol} (except for GroupedDataFrame in which case use Symbol.(names(df))).\n\nExamples\n\njulia> df = DataFrame(x1=[1, missing, missing], x2=[3, 2, 4], x3=[3, missing, 2], x4=Union{Int, Missing}[2, 4, 4])\n3×4 DataFrame\n Row │ x1       x2     x3       x4\n     │ Int64?   Int64  Int64?   Int64?\n─────┼─────────────────────────────────\n   1 │       1      3        3       2\n   2 │ missing      2  missing       4\n   3 │ missing      4        2       4\n\njulia> names(df)\n4-element Vector{String}:\n \"x1\"\n \"x2\"\n \"x3\"\n \"x4\"\n\njulia> names(df, Int) # pick columns whose element type is Int\n1-element Vector{String}:\n \"x2\"\n\njulia> names(df, x -> x[end] == '2') # pick columns for which last character in their name is '2'\n1-element Vector{String}:\n \"x2\"\n\njulia> fun(col) = sum(skipmissing(col)) >= 10\nfun (generic function with 1 method)\n\njulia> names(df, fun.(eachcol(df))) # pick columns for which sum of their elements is at least 10\n1-element Vector{String}:\n \"x4\"\n\njulia> names(df, eltype.(eachcol(df)) .>: Missing) # pick columns that allow missing values\n3-element Vector{String}:\n \"x1\"\n \"x3\"\n \"x4\"\n\njulia> names(df, any.(ismissing, eachcol(df))) # pick columns that contain missing values\n2-element Vector{String}:\n \"x1\"\n \"x3\"\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.propertynames","page":"Functions","title":"Base.propertynames","text":"propertynames(df::AbstractDataFrame)\n\nReturn a freshly allocated Vector{Symbol} of names of columns contained in df.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.rename","page":"Functions","title":"DataFrames.rename","text":"rename(df::AbstractDataFrame, vals::AbstractVector{Symbol};\n       makeunique::Bool=false)\nrename(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};\n       makeunique::Bool=false)\nrename(df::AbstractDataFrame, (from => to)::Pair...)\nrename(df::AbstractDataFrame, d::AbstractDict)\nrename(df::AbstractDataFrame, d::AbstractVector{<:Pair})\nrename(f::Function, df::AbstractDataFrame; cols=All())\n\nCreate a new data frame that is a copy of df with changed column names. Each name is changed at most once. Permutation of names is allowed.\n\nArguments\n\ndf : the AbstractDataFrame; if it is a SubDataFrame then renaming is only allowed if it was created using : as a column selector.\nd : an AbstractDict or an AbstractVector of Pairs that maps the original names or column numbers to new names\nf : a function which for each column selected by the cols keyword argument takes the old name as a String and returns the new name that gets converted to a Symbol; the cols column selector can be any value accepted as column selector by the names function\nvals : new column names as a vector of Symbols or AbstractStrings of the same length as the number of columns in df\nmakeunique : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf pairs are passed to rename (as positional arguments or in a dictionary or a vector) then:\n\nfrom value can be a Symbol, an AbstractString or an Integer;\nto value can be a Symbol or an AbstractString.\n\nMixing symbols and strings in to and from is not allowed.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nColumn-level :note-style metadata is considered to be attached to column number: when a column is renamed, its :note-style metadata becomes associated to its new name.\n\nSee also: rename!\n\nExamples\n\njulia> df = DataFrame(i=1, x=2, y=3)\n1×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, [:a, :b, :c])\n1×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, :i => \"A\", :x => \"X\")\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, :x => :y, :y => :x)\n1×3 DataFrame\n Row │ i      y      x\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, [1 => :A, 2 => :X])\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, Dict(\"i\" => \"A\", \"x\" => \"X\"))\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(uppercase, df)\n1×3 DataFrame\n Row │ I      X      Y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(uppercase, df, cols=contains('x'))\n1×3 DataFrame\n Row │ i      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.rename!","page":"Functions","title":"DataFrames.rename!","text":"rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};\n        makeunique::Bool=false)\nrename!(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};\n        makeunique::Bool=false)\nrename!(df::AbstractDataFrame, (from => to)::Pair...)\nrename!(df::AbstractDataFrame, d::AbstractDict)\nrename!(df::AbstractDataFrame, d::AbstractVector{<:Pair})\nrename!(f::Function, df::AbstractDataFrame; cols=All())\n\nRename columns of df in-place. Each name is changed at most once. Permutation of names is allowed.\n\nArguments\n\ndf : the AbstractDataFrame\nd : an AbstractDict or an AbstractVector of Pairs that maps the original names or column numbers to new names\nf : a function which for each column selected by the cols keyword argument takes the old name as a String and returns the new name that gets converted to a Symbol; the cols column selector can be any value accepted as column selector by the names function\nvals : new column names as a vector of Symbols or AbstractStrings of the same length as the number of columns in df\nmakeunique : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf pairs are passed to rename! (as positional arguments or in a dictionary or a vector) then:\n\nfrom value can be a Symbol, an AbstractString or an Integer;\nto value can be a Symbol or an AbstractString.\n\nMixing symbols and strings in to and from is not allowed.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame). Column-level :note-style metadata is considered to be attached to column number: when a column is renamed, its :note-style metadata becomes associated to its new name.\n\nSee also: rename\n\nExamples\n\njulia> df = DataFrame(i=1, x=2, y=3)\n1×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(df, Dict(:i => \"A\", :x => \"X\"))\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(df, [:a, :b, :c])\n1×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(df, [:a, :b, :a])\nERROR: ArgumentError: Duplicate variable names: :a. Pass makeunique=true to make them unique using a suffix automatically.\n\njulia> rename!(df, [:a, :b, :a], makeunique=true)\n1×3 DataFrame\n Row │ a      b      a_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(uppercase, df)\n1×3 DataFrame\n Row │ A      B      A_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(lowercase, df, cols=contains('A'))\n1×3 DataFrame\n Row │ a      B      a_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Mutating-and-transforming-data-frames-and-grouped-data-frames","page":"Functions","title":"Mutating and transforming data frames and grouped data frames","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"append!\ncombine\nfillcombinations\nflatten\nhcat\ninsert!\ninsertcols\ninsertcols!\ninvpermute!\nmapcols\nmapcols!\npermute!\nprepend!\npush!\npushfirst!\nreduce\nrepeat\nrepeat!\nreverse\nreverse!\nselect\nselect!\nshuffle\nshuffle!\ntable_transformation\ntransform\ntransform!\nvcat","category":"page"},{"location":"lib/functions/#Base.append!","page":"Functions","title":"Base.append!","text":"append!(df::DataFrame, tables...; cols::Symbol=:setequal,\n        promote::Bool=(cols in [:union, :subset]))\n\nAdd the rows of tables passed as tables to the end of df. If the table is not an AbstractDataFrame then it is converted using DataFrame(table, copycols=false) before being appended.\n\nThe exact behavior of append! depends on the cols argument:\n\nIf cols == :setequal (this is the default) then df2 must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then df2 must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if df2 is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then df2 may contain more columns than df, but all column names that are present in df must be present in df2 and only these are used.\nIf cols == :subset then append! behaves like for :intersect but if some column is missing in df2 then a missing value is pushed to df.\nIf cols == :union then append! adds columns missing in df that are present in df2, for columns present in df but missing in df2 a missing value is pushed.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nThe above rule has the following exceptions:\n\nIf df has no columns then copies of columns from df2 are added to it.\nIf df2 has no columns then calling append! leaves df unchanged.\n\nPlease note that append! must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for columns present in df are preserved. If new columns are added their :note-style metadata is copied from the appended table. Other metadata is dropped.\n\nSee also: use push! to add individual rows to a data frame, prepend! to add a table at the beginning, and vcat to vertically concatenate data frames.\n\nExamples\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4.0:6.0, B=4:6)\n3×2 DataFrame\n Row │ A        B\n     │ Float64  Int64\n─────┼────────────────\n   1 │     4.0      4\n   2 │     5.0      5\n   3 │     6.0      6\n\njulia> append!(df1, df2);\n\njulia> df1\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n   5 │     5      5\n   6 │     6      6\n\njulia> append!(df2, DataFrame(A=1), (; C=1:2), cols=:union)\n6×3 DataFrame\n Row │ A          B        C\n     │ Float64?   Int64?   Int64?\n─────┼─────────────────────────────\n   1 │       4.0        4  missing\n   2 │       5.0        5  missing\n   3 │       6.0        6  missing\n   4 │       1.0  missing  missing\n   5 │ missing    missing        1\n   6 │ missing    missing        2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.combine","page":"Functions","title":"DataFrames.combine","text":"combine(df::AbstractDataFrame, args...;\n        renamecols::Bool=true, threads::Bool=true)\ncombine(f::Callable, df::AbstractDataFrame;\n        renamecols::Bool=true, threads::Bool=true)\ncombine(gd::GroupedDataFrame, args...;\n        keepkeys::Bool=true, ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\ncombine(f::Base.Callable, gd::GroupedDataFrame;\n        keepkeys::Bool=true, ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\n\nCreate a new data frame that contains columns from df or gd specified by args and return it. The result can have any number of rows that is determined by the values returned by passed transformations.\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nkeepkeys::Bool=true : whether grouping columns of gd should be kept in the returned data frame.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> combine(df, :a => sum, nrow, renamecols=false)\n1×2 DataFrame\n Row │ a      nrow\n     │ Int64  Int64\n─────┼──────────────\n   1 │     6      3\n\njulia> combine(df, :a => ByRow(sin) => :c, :b)\n3×2 DataFrame\n Row │ c         b\n     │ Float64   Int64\n─────┼─────────────────\n   1 │ 0.841471      4\n   2 │ 0.909297      5\n   3 │ 0.14112       6\n\njulia> combine(df, :, [:a, :b] => (a, b) -> a .+ b .- sum(b)/length(b))\n3×3 DataFrame\n Row │ a      b      a_b_function\n     │ Int64  Int64  Float64\n─────┼────────────────────────────\n   1 │     1      4           0.0\n   2 │     2      5           2.0\n   3 │     3      6           4.0\n\njulia> combine(df, All() .=> [minimum maximum])\n1×4 DataFrame\n Row │ a_minimum  b_minimum  a_maximum  b_maximum\n     │ Int64      Int64      Int64      Int64\n─────┼────────────────────────────────────────────\n   1 │         1          4          3          6\n\njulia> using Statistics\n\njulia> combine(df, AsTable(:) => ByRow(mean), renamecols=false)\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> combine(df, AsTable(:) => ByRow(mean) => x -> join(x, \"_\"))\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> combine(first, df)\n1×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n\njulia> df = DataFrame(a=1:3, b=4:6, c=7:9)\n3×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      7\n   2 │     2      5      8\n   3 │     3      6      9\n\njulia> combine(df, AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => :stats,\n               AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => AsTable)\n3×3 DataFrame\n Row │ stats                    mean     std\n     │ NamedTup…                Float64  Float64\n─────┼───────────────────────────────────────────\n   1 │ (mean = 4.0, std = 3.0)      4.0      3.0\n   2 │ (mean = 5.0, std = 3.0)      5.0      3.0\n   3 │ (mean = 6.0, std = 3.0)      6.0      3.0\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> combine(gd, :c => sum, nrow)\n4×3 DataFrame\n Row │ a      c_sum  nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6      2\n   2 │     2      8      2\n   3 │     3     10      2\n   4 │     4     12      2\n\njulia> combine(gd, :c => sum, nrow, ungroup=false)\nGroupedDataFrame with 4 groups based on key: a\nFirst Group (1 row): a = 1\n Row │ a      c_sum  nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6      2\n⋮\nLast Group (1 row): a = 4\n Row │ a      c_sum  nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4     12      2\n\njulia> combine(gd) do d # do syntax for the slower variant\n           sum(d.c)\n       end\n4×2 DataFrame\n Row │ a      x1\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      6\n   2 │     2      8\n   3 │     3     10\n   4 │     4     12\n\njulia> combine(gd, :c => (x -> sum(log, x)) => :sum_log_c) # specifying a name for target column\n4×2 DataFrame\n Row │ a      sum_log_c\n     │ Int64  Float64\n─────┼──────────────────\n   1 │     1    1.60944\n   2 │     2    2.48491\n   3 │     3    3.04452\n   4 │     4    3.46574\n\njulia> combine(gd, [:b, :c] .=> sum) # passing a vector of pairs\n4×3 DataFrame\n Row │ a      b_sum  c_sum\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      6\n   2 │     2      2      8\n   3 │     3      4     10\n   4 │     4      2     12\n\njulia> combine(gd) do sdf # dropping group when DataFrame() is returned\n          sdf.c[1] != 1 ? sdf : DataFrame()\n       end\n6×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      2\n   2 │     2      1      6\n   3 │     3      2      3\n   4 │     3      2      7\n   5 │     4      1      4\n   6 │     4      1      8\n\nauto-splatting, renaming and keepkeys\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> combine(gd, :b => :b1, :c => :c1, [:b, :c] => +, keepkeys=false)\n8×3 DataFrame\n Row │ b1     c1     b_c_+\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      3\n   2 │     2      5      7\n   3 │     1      2      3\n   4 │     1      6      7\n   5 │     2      3      5\n   6 │     2      7      9\n   7 │     1      4      5\n   8 │     1      8      9\n\nbroadcasting and column expansion\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> combine(gd, :b, AsTable([:b, :c]) => ByRow(extrema) => [:min, :max])\n8×4 DataFrame\n Row │ a      b      min    max\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      2\n   2 │     1      2      2      5\n   3 │     2      1      1      2\n   4 │     2      1      1      6\n   5 │     3      2      2      3\n   6 │     3      2      2      7\n   7 │     4      1      1      4\n   8 │     4      1      1      8\n\njulia> combine(gd, [:b, :c] .=> Ref) # preventing vector from being spread across multiple rows\n4×3 DataFrame\n Row │ a      b_Ref      c_Ref\n     │ Int64  SubArray…  SubArray…\n─────┼─────────────────────────────\n   1 │     1  [2, 2]     [1, 5]\n   2 │     2  [1, 1]     [2, 6]\n   3 │     3  [2, 2]     [3, 7]\n   4 │     4  [1, 1]     [4, 8]\n\njulia> combine(gd, AsTable(Not(:a)) => Ref) # protecting result\n4×2 DataFrame\n Row │ a      b_c_Ref\n     │ Int64  NamedTup…\n─────┼─────────────────────────────────\n   1 │     1  (b = [2, 2], c = [1, 5])\n   2 │     2  (b = [1, 1], c = [2, 6])\n   3 │     3  (b = [2, 2], c = [3, 7])\n   4 │     4  (b = [1, 1], c = [4, 8])\n\njulia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false)\n8×4 DataFrame\n Row │ a      b      c      b_c\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      3\n   2 │     1      2      5      7\n   3 │     2      1      2      3\n   4 │     2      1      6      7\n   5 │     3      2      3      5\n   6 │     3      2      7      9\n   7 │     4      1      4      5\n   8 │     4      1      8      9\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.fillcombinations","page":"Functions","title":"DataFrames.fillcombinations","text":"fillcombinations(df::AbstractDataFrame, indexcols;\n                     allowduplicates::Bool=false,\n                     fill=missing)\n\nGenerate all combinations of levels of column(s) indexcols in data frame df. Levels and their order are determined by the levels function (i.e. unique values sorted lexicographically by default, or a custom set of levels for e.g. CategoricalArray columns), in addition to missing if present.\n\nFor combinations of indexcols not present in df these columns are filled with the fill value (missing by default).\n\nIf allowduplicates=false (the default) indexcols may only contain unique combinations of indexcols values. If allowduplicates=true duplicates are allowed.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=1:2, y='a':'b', z=[\"x\", \"y\"])\n2×3 DataFrame\n Row │ x      y     z\n     │ Int64  Char  String\n─────┼─────────────────────\n   1 │     1  a     x\n   2 │     2  b     y\n\njulia> fillcombinations(df, [:x, :y])\n4×3 DataFrame\n Row │ x      y     z\n     │ Int64  Char  String?\n─────┼──────────────────────\n   1 │     1  a     x\n   2 │     2  a     missing\n   3 │     1  b     missing\n   4 │     2  b     y\n\njulia> fillcombinations(df, [:y, :z], fill=0)\n4×3 DataFrame\n Row │ x       y     z\n     │ Int64?  Char  String\n─────┼──────────────────────\n   1 │      1  a     x\n   2 │      0  b     x\n   3 │      0  a     y\n   4 │      2  b     y\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.flatten","page":"Functions","title":"DataFrames.flatten","text":"flatten(df::AbstractDataFrame, cols; scalar::Type=Union{})\n\nWhen columns cols of data frame df have iterable elements that define length (for example a Vector of Vectors), return a DataFrame where each element of each col in cols is flattened, meaning the column corresponding to col becomes a longer vector where the original entries are concatenated. Elements of row i of df in columns other than cols will be repeated according to the length of df[i, col]. These lengths must therefore be the same for each col in cols, or else an error is raised. Note that these elements are not copied, and thus if they are mutable changing them in the returned DataFrame will affect df.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf scalar is passed then values that have this type in flattened columns are treated as scalars and broadcasted as many times as is needed to match lengths of values stored in other columns. If all values in a row are scalars, a single row is produced.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df1 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])\n2×3 DataFrame\n Row │ a      b       c\n     │ Int64  Array…  Array…\n─────┼───────────────────────\n   1 │     1  [1, 2]  [5, 6]\n   2 │     2  [3, 4]  [7, 8]\n\njulia> flatten(df1, :b)\n4×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Array…\n─────┼──────────────────────\n   1 │     1      1  [5, 6]\n   2 │     1      2  [5, 6]\n   3 │     2      3  [7, 8]\n   4 │     2      4  [7, 8]\n\njulia> flatten(df1, [:b, :c])\n4×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      5\n   2 │     1      2      6\n   3 │     2      3      7\n   4 │     2      4      8\n\njulia> df2 = DataFrame(a=[1, 2], b=[(\"p\", \"q\"), (\"r\", \"s\")])\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Tuple…\n─────┼───────────────────\n   1 │     1  (\"p\", \"q\")\n   2 │     2  (\"r\", \"s\")\n\njulia> flatten(df2, :b)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  String\n─────┼───────────────\n   1 │     1  p\n   2 │     1  q\n   3 │     2  r\n   4 │     2  s\n\njulia> df3 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7]])\n2×3 DataFrame\n Row │ a      b       c\n     │ Int64  Array…  Array…\n─────┼───────────────────────\n   1 │     1  [1, 2]  [5, 6]\n   2 │     2  [3, 4]  [7]\n\njulia> flatten(df3, [:b, :c])\nERROR: ArgumentError: Lengths of iterables stored in columns :b and :c are not the same in row 2\n\njulia> df4 = DataFrame(a=[1, 2, 3],\n                       b=[[1, 2], missing, missing],\n                       c=[[5, 6], missing, [7, 8]])\n3×3 DataFrame\n Row │ a      b        c\n     │ Int64  Array…?  Array…?\n─────┼─────────────────────────\n   1 │     1  [1, 2]   [5, 6]\n   2 │     2  missing  missing\n   3 │     3  missing  [7, 8]\n\njulia> flatten(df4, [:b, :c], scalar=Missing)\n5×3 DataFrame\n Row │ a      b        c\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     1        1        5\n   2 │     1        2        6\n   3 │     2  missing  missing\n   4 │     3  missing        7\n   5 │     3  missing        8\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.hcat","page":"Functions","title":"Base.hcat","text":"hcat(df::AbstractDataFrame...;\n     makeunique::Bool=false, copycols::Bool=true)\n\nHorizontally concatenate data frames.\n\nIf makeunique=false (the default) column names of passed objects must be unique. If makeunique=true then duplicate column names will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf copycols=true (the default) then the DataFrame returned by hcat will contain copied columns from the source data frames. If copycols=false then it will contain columns as they are stored in the source (without copying). This option should be used with caution as mutating either the columns in sources or in the returned DataFrame might lead to the corruption of the other object.\n\nMetadata: hcat propagates table-level :note-style metadata for keys that are present in all passed data frames and have the same value; it propagates column-level :note-style metadata.\n\nExample\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4:6, B=4:6)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n\njulia> df3 = hcat(df1, df2, makeunique=true)\n3×4 DataFrame\n Row │ A      B      A_1    B_1\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      1      4      4\n   2 │     2      2      5      5\n   3 │     3      3      6      6\n\njulia> df3.A === df1.A\nfalse\n\njulia> df3 = hcat(df1, df2, makeunique=true, copycols=false);\n\njulia> df3.A === df1.A\ntrue\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.insert!","page":"Functions","title":"Base.insert!","text":"insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray};\n        cols::Symbol=:setequal, promote::Bool=false)\ninsert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple,\n                                                  AbstractDict, Tables.AbstractRow};\n        cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))\n\nAdd one row to df at position index in-place, taking the values from row. index must be a integer between 1 and nrow(df)+1.\n\nColumn types of df are preserved, and new values are converted if necessary. An error is thrown if conversion fails.\n\nIf row is neither a DataFrameRow, NamedTuple nor AbstractDict then it must be a Tuple or an AbstractArray and columns are matched by order of appearance. In this case row must contain the same number of elements as the number of columns in df.\n\nIf row is a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow then values in row are matched to columns in df based on names. The exact behavior depends on the cols argument value in the following way:\n\nIf cols == :setequal (this is the default) then row must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then row must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if row is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then row may contain more columns than df, but all column names that are present in df must be present in row and only they are used to populate a new row in df.\nIf cols == :subset then the behavior is like for :intersect but if some column is missing in row then a missing value is pushed to df.\nIf cols == :union then columns missing in df that are present in row are added to df (using missing for existing rows) and a missing value is pushed to columns missing in row that are present in df.\n\nIf row is not a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow the cols keyword argument must be :setequal (the default), because such rows do not provide column name information.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nAs a special case, if df has no columns and row is a NamedTuple, DataFrameRow, or Tables.AbstractRow, columns are created for all values in row, using their names and order.\n\nPlease note that this function must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: push!, pushfirst!\n\nExamples\n\njulia> df = DataFrame(A='a':'c', B=1:3)\n3×2 DataFrame\n Row │ A     B\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> insert!(df, 2, (true, false), promote=true)\n4×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ true      0\n   3 │ b         2\n   4 │ c         3\n\njulia> insert!(df, 5, df[1, :])\n5×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ true      0\n   3 │ b         2\n   4 │ c         3\n   5 │ a         1\n\njulia> insert!(df, 1, (C=\"something\", A=11, B=12), cols=:intersect)\n6×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ 11       12\n   2 │ a         1\n   3 │ true      0\n   4 │ b         2\n   5 │ c         3\n   6 │ a         1\n\njulia> insert!(df, 7, Dict(:A=>1.0, :C=>1.0), cols=:union)\n7×3 DataFrame\n Row │ A     B        C\n     │ Any   Int64?   Float64?\n─────┼──────────────────────────\n   1 │ 11         12  missing\n   2 │ a           1  missing\n   3 │ true        0  missing\n   4 │ b           2  missing\n   5 │ c           3  missing\n   6 │ a           1  missing\n   7 │ 1.0   missing        1.0\n\njulia> insert!(df, 3, NamedTuple(), cols=:subset)\n8×3 DataFrame\n Row │ A        B        C\n     │ Any      Int64?   Float64?\n─────┼─────────────────────────────\n   1 │ 11            12  missing\n   2 │ a              1  missing\n   3 │ missing  missing  missing\n   4 │ true           0  missing\n   5 │ b              2  missing\n   6 │ c              3  missing\n   7 │ a              1  missing\n   8 │ 1.0      missing        1.0\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.insertcols","page":"Functions","title":"DataFrames.insertcols","text":"insertcols(df::AbstractDataFrame[, col], (name=>val)::Pair...;\n           after::Bool=false, makeunique::Bool=false, copycols::Bool=true)\n\nInsert a column into a copy of df data frame using the insertcols! function and return the newly created data frame.\n\nIf col is omitted it is set to ncol(df)+1 (the column is inserted as the last column).\n\nArguments\n\ndf : the data frame to which we want to add columns\ncol : a position at which we want to insert a column, passed as an integer or a column name (a string or a Symbol); the column selected with col and columns following it are shifted to the right in df after the operation\nname : the name of the new column\nval : an AbstractVector giving the contents of the new column or a value of any type other than AbstractArray which will be repeated to fill a new vector; As a particular rule a values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated in the same way\nafter : if true columns are inserted after col\nmakeunique : defines what to do if name already exists in df; if it is false an error will be thrown; if it is true a new unique name will be generated by adding a suffix\ncopycols : whether vectors passed as columns should be copied\n\nIf val is an AbstractRange then the result of collect(val) is inserted.\n\nIf df is a SubDataFrame then it must have been created with : as column selector (otherwise an error is thrown). In this case the copycols keyword argument is ignored (i.e. the added column is always copied) and the parent data frame's column is filled with missing in rows that are filtered out by df.\n\nIf df isa DataFrame that has no columns and only values other than AbstractVector are passed then it is used to create a one-element column. If df isa DataFrame that has no columns and at least one AbstractVector is passed then its length is used to determine the number of elements in all created columns. In all other cases the number of rows in all created columns must match nrow(df).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also insertcols!.\n\nExamples\n\njulia> df = DataFrame(a=1:3)\n3×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> insertcols(df, 1, :b => 'a':'c')\n3×2 DataFrame\n Row │ b     a\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> insertcols(df, :c => 2:4, :c => 3:5, makeunique=true)\n3×3 DataFrame\n Row │ a      c      c_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n   2 │     2      3      4\n   3 │     3      4      5\n\njulia> insertcols(df, :a, :d => 7:9, after=true)\n3×2 DataFrame\n Row │ a      d\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      7\n   2 │     2      8\n   3 │     3      9\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.insertcols!","page":"Functions","title":"DataFrames.insertcols!","text":"insertcols!(df::AbstractDataFrame[, col], (name=>val)::Pair...;\n            after::Bool=false, makeunique::Bool=false, copycols::Bool=true)\n\nInsert a column into a data frame in place. Return the updated data frame.\n\nIf col is omitted it is set to ncol(df)+1 (the column is inserted as the last column).\n\nArguments\n\ndf : the data frame to which we want to add columns\ncol : a position at which we want to insert a column, passed as an integer or a column name (a string or a Symbol); the column selected with col and columns following it are shifted to the right in df after the operation\nname : the name of the new column\nval : an AbstractVector giving the contents of the new column or a value of any type other than AbstractArray which will be repeated to fill a new vector; As a particular rule a values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated in the same way\nafter : if true columns are inserted after col\nmakeunique : defines what to do if name already exists in df; if it is false an error will be thrown; if it is true a new unique name will be generated by adding a suffix\ncopycols : whether vectors passed as columns should be copied\n\nIf val is an AbstractRange then the result of collect(val) is inserted.\n\nIf df is a SubDataFrame then it must have been created with : as column selector (otherwise an error is thrown). In this case the copycols keyword argument is ignored (i.e. the added column is always copied) and the parent data frame's column is filled with missing in rows that are filtered out by df.\n\nIf df isa DataFrame that has no columns and only values other than AbstractVector are passed then it is used to create a one-element column. If df isa DataFrame that has no columns and at least one AbstractVector is passed then its length is used to determine the number of elements in all created columns. In all other cases the number of rows in all created columns must match nrow(df).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nSee also insertcols.\n\nExamples\n\njulia> df = DataFrame(a=1:3)\n3×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> insertcols!(df, 1, :b => 'a':'c')\n3×2 DataFrame\n Row │ b     a\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> insertcols!(df, 2, :c => 2:4, :c => 3:5, makeunique=true)\n3×4 DataFrame\n Row │ b     c      c_1    a\n     │ Char  Int64  Int64  Int64\n─────┼───────────────────────────\n   1 │ a         2      3      1\n   2 │ b         3      4      2\n   3 │ c         4      5      3\n\njulia> insertcols!(df, :b, :d => 7:9, after=true)\n3×5 DataFrame\n Row │ b     d      c      c_1    a\n     │ Char  Int64  Int64  Int64  Int64\n─────┼──────────────────────────────────\n   1 │ a         7      2      3      1\n   2 │ b         8      3      4      2\n   3 │ c         9      4      5      3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.invpermute!","page":"Functions","title":"Base.invpermute!","text":"invpermute!(df::AbstractDataFrame, p)\n\nLike permute!, but the inverse of the given permutation is applied.\n\ninvpermute! will produce a correct result even if some columns of passed data frame or permutation p are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then invpermute! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> permute!(df, [5, 3, 1, 2, 4])\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     3      8     13\n   3 │     1      6     11\n   4 │     2      7     12\n   5 │     4      9     14\n\njulia> invpermute!(df, [5, 3, 1, 2, 4])\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.mapcols","page":"Functions","title":"DataFrames.mapcols","text":"mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())\n\nReturn a DataFrame where each column of df selected by cols (by default, all columns) is transformed using function f. Columns not selected by cols are copied.\n\nf must return AbstractVector objects all with the same length or scalars (all values other than AbstractVector are considered to be a scalar).\n\nThe cols column selector can be any value accepted as column selector by the names function.\n\nNote that mapcols guarantees not to reuse the columns from df in the returned DataFrame. If f returns its argument then it gets copied before being stored.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> mapcols(x -> x.^2, df)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     4    144\n   3 │     9    169\n   4 │    16    196\n\njulia> mapcols(x -> x.^2, df, cols=r\"y\")\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     2    144\n   3 │     3    169\n   4 │     4    196\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.mapcols!","page":"Functions","title":"DataFrames.mapcols!","text":"mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All())\n\nUpdate a DataFrame in-place where each column of df selected by cols (by default, all columns) is transformed using function f. Columns not selected by cols are left unchanged.\n\nf must return AbstractVector objects all with the same length or scalars (all values other than AbstractVector are considered to be a scalar).\n\nNote that mapcols! reuses the columns from df if they are returned by f.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> mapcols!(x -> x.^2, df);\n\njulia> df\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     4    144\n   3 │     9    169\n   4 │    16    196\n\njulia> mapcols!(x -> 2 * x, df, cols=r\"x\");\n\njulia> df\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2    121\n   2 │     8    144\n   3 │    18    169\n   4 │    32    196\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.permute!","page":"Functions","title":"Base.permute!","text":"permute!(df::AbstractDataFrame, p)\n\nPermute data frame df in-place, according to permutation p. Throws ArgumentError if p is not a permutation.\n\nTo return a new data frame instead of permuting df in-place, use df[p, :].\n\npermute! will produce a correct result even if some columns of passed data frame or permutation p are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then permute! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> permute!(df, [5, 3, 1, 2, 4])\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     3      8     13\n   3 │     1      6     11\n   4 │     2      7     12\n   5 │     4      9     14\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.prepend!","page":"Functions","title":"Base.prepend!","text":"prepend!(df::DataFrame, tables...; cols::Symbol=:setequal,\n         promote::Bool=(cols in [:union, :subset]))\n\nAdd the rows of tables passed as tables to the beginning of df. If the table is not an AbstractDataFrame then it is converted using DataFrame(table, copycols=false) before being appended.\n\nAdd the rows of df2 to the beginning of df. If the second argument table is not an AbstractDataFrame then it is converted using DataFrame(table, copycols=false) before being prepended.\n\nThe exact behavior of prepend! depends on the cols argument:\n\nIf cols == :setequal (this is the default) then df2 must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then df2 must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if df2 is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then df2 may contain more columns than df, but all column names that are present in df must be present in df2 and only these are used.\nIf cols == :subset then append! behaves like for :intersect but if some column is missing in df2 then a missing value is pushed to df.\nIf cols == :union then append! adds columns missing in df that are present in df2, for columns present in df but missing in df2 a missing value is pushed.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nThe above rule has the following exceptions:\n\nIf df has no columns then copies of columns from df2 are added to it.\nIf df2 has no columns then calling prepend! leaves df unchanged.\n\nPlease note that prepend! must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for columns present in df are preserved. If new columns are added their :note-style metadata is copied from the appended table. Other metadata is dropped.\n\nSee also: use pushfirst! to add individual rows at the beginning of a data frame, append! to add a table at the end, and vcat to vertically concatenate data frames.\n\nExamples\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4.0:6.0, B=4:6)\n3×2 DataFrame\n Row │ A        B\n     │ Float64  Int64\n─────┼────────────────\n   1 │     4.0      4\n   2 │     5.0      5\n   3 │     6.0      6\n\njulia> prepend!(df1, df2);\n\njulia> df1\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n   4 │     1      1\n   5 │     2      2\n   6 │     3      3\n\njulia> prepend!(df2, DataFrame(A=1), (; C=1:2), cols=:union)\n6×3 DataFrame\n Row │ A          B        C\n     │ Float64?   Int64?   Int64?\n─────┼─────────────────────────────\n   1 │       1.0  missing  missing\n   2 │ missing    missing        1\n   3 │ missing    missing        2\n   4 │       4.0        4  missing\n   5 │       5.0        5  missing\n   6 │       6.0        6  missing\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.push!","page":"Functions","title":"Base.push!","text":"push!(df::DataFrame, row::Union{Tuple, AbstractArray}...;\n      cols::Symbol=:setequal, promote::Bool=false)\npush!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,\n                                Tables.AbstractRow}...;\n      cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))\n\nAdd one row at the end of df in-place, taking the values from row. Several rows can be added by passing them as separate arguments.\n\nColumn types of df are preserved, and new values are converted if necessary. An error is thrown if conversion fails.\n\nIf row is neither a DataFrameRow, NamedTuple nor AbstractDict then it must be a Tuple or an AbstractArray and columns are matched by order of appearance. In this case row must contain the same number of elements as the number of columns in df.\n\nIf row is a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow then values in row are matched to columns in df based on names. The exact behavior depends on the cols argument value in the following way:\n\nIf cols == :setequal (this is the default) then row must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then row must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if row is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then row may contain more columns than df, but all column names that are present in df must be present in row and only they are used to populate a new row in df.\nIf cols == :subset then the behavior is like for :intersect but if some column is missing in row then a missing value is pushed to df.\nIf cols == :union then columns missing in df that are present in row are added to df (using missing for existing rows) and a missing value is pushed to columns missing in row that are present in df.\n\nIf row is not a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow the cols keyword argument must be :setequal (the default), because such rows do not provide column name information.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nAs a special case, if df has no columns and row is a NamedTuple, DataFrameRow, or Tables.AbstractRow, columns are created for all values in row, using their names and order.\n\nPlease note that this function must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: pushfirst!, insert!\n\nExamples\n\njulia> df = DataFrame(A='a':'c', B=1:3)\n3×2 DataFrame\n Row │ A     B\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> push!(df, (true, false), promote=true)\n4×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n   4 │ true      0\n\njulia> push!(df, df[1, :])\n5×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n   4 │ true      0\n   5 │ a         1\n\njulia> push!(df, (C=\"something\", A=11, B=12), cols=:intersect)\n6×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n   4 │ true      0\n   5 │ a         1\n   6 │ 11       12\n\njulia> push!(df, Dict(:A=>1.0, :C=>1.0), cols=:union)\n7×3 DataFrame\n Row │ A     B        C\n     │ Any   Int64?   Float64?\n─────┼──────────────────────────\n   1 │ a           1  missing\n   2 │ b           2  missing\n   3 │ c           3  missing\n   4 │ true        0  missing\n   5 │ a           1  missing\n   6 │ 11         12  missing\n   7 │ 1.0   missing        1.0\n\njulia> push!(df, NamedTuple(), cols=:subset)\n8×3 DataFrame\n Row │ A        B        C\n     │ Any      Int64?   Float64?\n─────┼─────────────────────────────\n   1 │ a              1  missing\n   2 │ b              2  missing\n   3 │ c              3  missing\n   4 │ true           0  missing\n   5 │ a              1  missing\n   6 │ 11            12  missing\n   7 │ 1.0      missing        1.0\n   8 │ missing  missing  missing\n\njulia> push!(DataFrame(a=1, b=2), (3, 4), (5, 6))\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4\n   3 │     5      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.pushfirst!","page":"Functions","title":"Base.pushfirst!","text":"pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...;\n           cols::Symbol=:setequal, promote::Bool=false)\npushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,\n                                     Tables.AbstractRow}...;\n           cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))\n\nAdd one row at the beginning of df in-place, taking the values from row. Several rows can be added by passing them as separate arguments.\n\nColumn types of df are preserved, and new values are converted if necessary. An error is thrown if conversion fails.\n\nIf row is neither a DataFrameRow, NamedTuple nor AbstractDict then it must be a Tuple or an AbstractArray and columns are matched by order of appearance. In this case row must contain the same number of elements as the number of columns in df.\n\nIf row is a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow then values in row are matched to columns in df based on names. The exact behavior depends on the cols argument value in the following way:\n\nIf cols == :setequal (this is the default) then row must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then row must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if row is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then row may contain more columns than df, but all column names that are present in df must be present in row and only they are used to populate a new row in df.\nIf cols == :subset then the behavior is like for :intersect but if some column is missing in row then a missing value is pushed to df.\nIf cols == :union then columns missing in df that are present in row are added to df (using missing for existing rows) and a missing value is pushed to columns missing in row that are present in df.\n\nIf row is not a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow the cols keyword argument must be :setequal (the default), because such rows do not provide column name information.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nAs a special case, if df has no columns and row is a NamedTuple, DataFrameRow, or Tables.AbstractRow, columns are created for all values in row, using their names and order.\n\nPlease note that this function must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: push!, insert!\n\nExamples\n\njulia> df = DataFrame(A='a':'c', B=1:3)\n3×2 DataFrame\n Row │ A     B\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> pushfirst!(df, (true, false), promote=true)\n4×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ true      0\n   2 │ a         1\n   3 │ b         2\n   4 │ c         3\n\njulia> pushfirst!(df, df[1, :])\n5×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ true      0\n   2 │ true      0\n   3 │ a         1\n   4 │ b         2\n   5 │ c         3\n\njulia> pushfirst!(df, (C=\"something\", A=11, B=12), cols=:intersect)\n6×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ 11       12\n   2 │ true      0\n   3 │ true      0\n   4 │ a         1\n   5 │ b         2\n   6 │ c         3\n\njulia> pushfirst!(df, Dict(:A=>1.0, :C=>1.0), cols=:union)\n7×3 DataFrame\n Row │ A     B        C\n     │ Any   Int64?   Float64?\n─────┼──────────────────────────\n   1 │ 1.0   missing        1.0\n   2 │ 11         12  missing\n   3 │ true        0  missing\n   4 │ true        0  missing\n   5 │ a           1  missing\n   6 │ b           2  missing\n   7 │ c           3  missing\n\njulia> pushfirst!(df, NamedTuple(), cols=:subset)\n8×3 DataFrame\n Row │ A        B        C\n     │ Any      Int64?   Float64?\n─────┼─────────────────────────────\n   1 │ missing  missing  missing\n   2 │ 1.0      missing        1.0\n   3 │ 11            12  missing\n   4 │ true           0  missing\n   5 │ true           0  missing\n   6 │ a              1  missing\n   7 │ b              2  missing\n   8 │ c              3  missing\n\njulia> pushfirst!(DataFrame(a=1, b=2), (3, 4), (5, 6))\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     3      4\n   2 │     5      6\n   3 │     1      2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.reduce","page":"Functions","title":"Base.reduce","text":"reduce(::typeof(vcat),\n       dfs::Union{AbstractVector{<:AbstractDataFrame},\n                  Tuple{AbstractDataFrame, Vararg{AbstractDataFrame}}};\n       cols::Union{Symbol, AbstractVector{Symbol},\n                   AbstractVector{<:AbstractString}}=:setequal,\n       source::Union{Nothing, Symbol, AbstractString,\n                     Pair{<:Union{Symbol, AbstractString}, <:AbstractVector}}=nothing,\n       init::AbstractDataFrame=DataFrame())\n\nEfficiently reduce the given vector or tuple of AbstractDataFrames with vcat.\n\nSee the vcat docstring for a description of keyword arguments cols and source.\n\nThe keyword argument init is the initial value to use in the reductions. It must be a data frame that has zero rows. It is not taken into account when computing the value of the source column nor when determining metadata of the produced data frame.\n\nThe column order, names, and types of the resulting DataFrame, and the behavior of cols and source keyword arguments follow the rules specified for vcat of AbstractDataFrames.\n\nMetadata: vcat propagates table-level :note-style metadata for keys that are present in all passed data frames and have the same value. vcat propagates column-level :note-style metadata for keys that are present in all passed data frames that contain this column and have the same value.\n\nExample\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4:6, B=4:6)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n\njulia> df3 = DataFrame(A=7:9, C=7:9)\n3×2 DataFrame\n Row │ A      C\n     │ Int64  Int64\n─────┼──────────────\n   1 │     7      7\n   2 │     8      8\n   3 │     9      9\n\njulia> reduce(vcat, (df1, df2))\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n   5 │     5      5\n   6 │     6      6\n\njulia> reduce(vcat, [df1, df2, df3], cols=:union, source=:source)\n9×4 DataFrame\n Row │ A      B        C        source\n     │ Int64  Int64?   Int64?   Int64\n─────┼─────────────────────────────────\n   1 │     1        1  missing       1\n   2 │     2        2  missing       1\n   3 │     3        3  missing       1\n   4 │     4        4  missing       2\n   5 │     5        5  missing       2\n   6 │     6        6  missing       2\n   7 │     7  missing        7       3\n   8 │     8  missing        8       3\n   9 │     9  missing        9       3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.repeat","page":"Functions","title":"Base.repeat","text":"repeat(df::AbstractDataFrame; inner::Integer = 1, outer::Integer = 1)\n\nConstruct a data frame by repeating rows in df. inner specifies how many times each row is repeated, and outer specifies how many times the full set of rows is repeated.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat(df, inner=2, outer=3)\n12×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     1      3\n   3 │     2      4\n   4 │     2      4\n   5 │     1      3\n   6 │     1      3\n   7 │     2      4\n   8 │     2      4\n   9 │     1      3\n  10 │     1      3\n  11 │     2      4\n  12 │     2      4\n\n\n\n\n\nrepeat(df::AbstractDataFrame, count::Integer)\n\nConstruct a data frame by repeating each row in df the number of times specified by count.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat(df, 2)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n   3 │     1      3\n   4 │     2      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.repeat!","page":"Functions","title":"DataFrames.repeat!","text":"repeat!(df::DataFrame; inner::Integer=1, outer::Integer=1)\n\nUpdate a data frame df in-place by repeating its rows. inner specifies how many times each row is repeated, and outer specifies how many times the full set of rows is repeated. Columns of df are freshly allocated.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat!(df, inner=2, outer=3);\n\njulia> df\n12×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     1      3\n   3 │     2      4\n   4 │     2      4\n   5 │     1      3\n   6 │     1      3\n   7 │     2      4\n   8 │     2      4\n   9 │     1      3\n  10 │     1      3\n  11 │     2      4\n  12 │     2      4\n\n\n\n\n\nrepeat!(df::DataFrame, count::Integer)\n\nUpdate a data frame df in-place by repeating its rows the number of times specified by count. Columns of df are freshly allocated.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat(df, 2)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n   3 │     1      3\n   4 │     2      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.reverse","page":"Functions","title":"Base.reverse","text":"reverse(df::AbstractDataFrame, start=1, stop=nrow(df))\n\nReturn a data frame containing the rows in df in reversed order. If start and stop are provided, only rows in the start:stop range are affected.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> reverse(df)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     4      9     14\n   3 │     3      8     13\n   4 │     2      7     12\n   5 │     1      6     11\n\njulia> reverse(df, 2, 3)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     3      8     13\n   3 │     2      7     12\n   4 │     4      9     14\n   5 │     5     10     15\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.reverse!","page":"Functions","title":"Base.reverse!","text":"reverse!(df::AbstractDataFrame, start=1, stop=nrow(df))\n\nMutate data frame in-place to reverse its row order. If start and stop are provided, only rows in the start:stop range are affected.\n\nreverse! will produce a correct result even if some columns of passed data frame are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then reverse! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> reverse!(df)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     4      9     14\n   3 │     3      8     13\n   4 │     2      7     12\n   5 │     1      6     11\n\njulia> reverse!(df, 2, 3)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     3      8     13\n   3 │     4      9     14\n   4 │     2      7     12\n   5 │     1      6     11\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.select","page":"Functions","title":"DataFrames.select","text":"select(df::AbstractDataFrame, args...;\n       copycols::Bool=true, renamecols::Bool=true, threads::Bool=true)\nselect(args::Callable, df::DataFrame;\n       renamecols::Bool=true, threads::Bool=true)\nselect(gd::GroupedDataFrame, args...;\n       copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n       renamecols::Bool=true, threads::Bool=true)\nselect(f::Base.Callable, gd::GroupedDataFrame;\n       copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n       renamecols::Bool=true, threads::Bool=true)\n\nCreate a new data frame that contains columns from df or gd specified by args and return it. The result is guaranteed to have the same number of rows as df, except when no columns are selected (in which case the result has zero rows).\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\ncopycols::Bool=true : whether columns of the source data frame should be copied if no transformation is applied to them.\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nkeepkeys::Bool=true : whether grouping columns of gd should be kept in the returned data frame.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> select(df, 2)\n3×1 DataFrame\n Row │ b\n     │ Int64\n─────┼───────\n   1 │     4\n   2 │     5\n   3 │     6\n\njulia> select(df, :a => ByRow(sin) => :c, :b)\n3×2 DataFrame\n Row │ c         b\n     │ Float64   Int64\n─────┼─────────────────\n   1 │ 0.841471      4\n   2 │ 0.909297      5\n   3 │ 0.14112       6\n\njulia> select(df, :, [:a, :b] => (a, b) -> a .+ b .- sum(b)/length(b))\n3×3 DataFrame\n Row │ a      b      a_b_function\n     │ Int64  Int64  Float64\n─────┼────────────────────────────\n   1 │     1      4           0.0\n   2 │     2      5           2.0\n   3 │     3      6           4.0\n\njulia> select(df, All() .=> [minimum maximum])\n3×4 DataFrame\n Row │ a_minimum  b_minimum  a_maximum  b_maximum\n     │ Int64      Int64      Int64      Int64\n─────┼────────────────────────────────────────────\n   1 │         1          4          3          6\n   2 │         1          4          3          6\n   3 │         1          4          3          6\n\njulia> using Statistics\n\njulia> select(df, AsTable(:) => ByRow(mean), renamecols=false)\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> select(df, AsTable(:) => ByRow(mean) => x -> join(x, \"_\"))\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> select(first, df)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     1      4\n   3 │     1      4\n\njulia> df = DataFrame(a=1:3, b=4:6, c=7:9)\n3×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      7\n   2 │     2      5      8\n   3 │     3      6      9\n\njulia> select(df, AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => :stats,\n              AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => AsTable)\n3×3 DataFrame\n Row │ stats                    mean     std\n     │ NamedTup…                Float64  Float64\n─────┼───────────────────────────────────────────\n   1 │ (mean = 4.0, std = 3.0)      4.0      3.0\n   2 │ (mean = 5.0, std = 3.0)      5.0      3.0\n   3 │ (mean = 6.0, std = 3.0)      6.0      3.0\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8)\n8×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      1      2\n   3 │     1      2      3\n   4 │     2      1      4\n   5 │     2      2      5\n   6 │     1      1      6\n   7 │     1      2      7\n   8 │     2      1      8\n\njulia> gd = groupby(df, :a)\nGroupedDataFrame with 2 groups based on key: a\nFirst Group (5 rows): a = 1\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      1      2\n   3 │     1      2      3\n   4 │     1      1      6\n   5 │     1      2      7\n⋮\nLast Group (3 rows): a = 2\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      4\n   2 │     2      2      5\n   3 │     2      1      8\n\nspecifying a name for target column\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, :c => (x -> sum(log, x)) => :sum_log_c)\n8×2 DataFrame\n Row │ a      sum_log_c\n     │ Int64  Float64\n─────┼──────────────────\n   1 │     1    5.52943\n   2 │     1    5.52943\n   3 │     1    5.52943\n   4 │     2    5.07517\n   5 │     2    5.07517\n   6 │     1    5.52943\n   7 │     1    5.52943\n   8 │     2    5.07517\n\njulia> select(gd, [:b, :c] .=> sum) # passing a vector of pairs\n8×3 DataFrame\n Row │ a      b_sum  c_sum\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      8     19\n   2 │     1      8     19\n   3 │     1      8     19\n   4 │     2      4     17\n   5 │     2      4     17\n   6 │     1      8     19\n   7 │     1      8     19\n   8 │     2      4     17\n\nmultiple arguments, renaming and keepkeys\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, :b => :b1, :c => :c1, [:b, :c] => +, keepkeys=false)\n8×3 DataFrame\n Row │ b1     c1     b_c_+\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      3\n   2 │     1      2      3\n   3 │     2      3      5\n   4 │     1      4      5\n   5 │     2      5      7\n   6 │     1      6      7\n   7 │     2      7      9\n   8 │     1      8      9\n\nbroadcasting and column expansion\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, :b, AsTable([:b, :c]) => ByRow(extrema) => [:min, :max])\n8×4 DataFrame\n Row │ a      b      min    max\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      2\n   2 │     1      1      1      2\n   3 │     1      2      2      3\n   4 │     2      1      1      4\n   5 │     2      2      2      5\n   6 │     1      1      1      6\n   7 │     1      2      2      7\n   8 │     2      1      1      8\n\njulia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false)\n8×4 DataFrame\n Row │ a      b      c      b_c\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      3\n   2 │     1      1      2      3\n   3 │     1      2      3      5\n   4 │     2      1      4      5\n   5 │     2      2      5      7\n   6 │     1      1      6      7\n   7 │     1      2      7      9\n   8 │     2      1      8      9\n\ncolumn-independent operations\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, nrow, proprow, groupindices, eachindex)\n8×5 DataFrame\n Row │ a      nrow   proprow  groupindices  eachindex\n     │ Int64  Int64  Float64  Int64         Int64\n─────┼────────────────────────────────────────────────\n   1 │     1      5    0.625             1          1\n   2 │     1      5    0.625             1          2\n   3 │     1      5    0.625             1          3\n   4 │     2      3    0.375             2          1\n   5 │     2      3    0.375             2          2\n   6 │     1      5    0.625             1          4\n   7 │     1      5    0.625             1          5\n   8 │     2      3    0.375             2          3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.select!","page":"Functions","title":"DataFrames.select!","text":"select!(df::AbstractDataFrame, args...;\n        renamecols::Bool=true, threads::Bool=true)\nselect!(args::Base.Callable, df::DataFrame;\n        renamecols::Bool=true, threads::Bool=true)\nselect!(gd::GroupedDataFrame, args...; ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\nselect!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\n\nMutate df or gd in place to retain only columns or transformations specified by args... and return it. The result is guaranteed to have the same number of rows as df or parent of gd, except when no columns are selected (in which case the result has zero rows).\n\nIf a SubDataFrame or GroupedDataFrame{SubDataFrame} is passed, the parent data frame is updated using columns generated by args..., following the same rules as indexing:\n\nfor existing columns filtered-out rows are filled with values present in the old columns\nfor new columns (which is only allowed if SubDataFrame was created with : as column selector) filtered-out rows are filled with missing\ndropped columns (which are only allowed if SubDataFrame was created with : as column selector) are removed\nif SubDataFrame was not created with : as column selector then select! is only allowed if the transformations keep exactly the same sequence of column names as is in the passed df\n\nIf a GroupedDataFrame is passed then it is updated to reflect the new rows of its updated parent. If there are independent GroupedDataFrame objects constructed using the same parent data frame they might get corrupt.\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nSee select for examples.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Random.shuffle","page":"Functions","title":"Random.shuffle","text":"shuffle([rng=GLOBAL_RNG,] df::AbstractDataFrame)\n\nReturn a copy of df with randomly permuted rows. The optional rng argument specifies a random number generator.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> using Random, StableRNGs\n\njulia> rng = StableRNG(1234);\n\njulia> shuffle(rng, DataFrame(a=1:5, b=1:5))\n5×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      2\n   2 │     1      1\n   3 │     3      3\n   4 │     5      5\n   5 │     4      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Random.shuffle!","page":"Functions","title":"Random.shuffle!","text":"shuffle!([rng=GLOBAL_RNG,] df::AbstractDataFrame)\n\nRandomly permute rows of df in-place. The optional rng argument specifies a random number generator.\n\nshuffle! will produce a correct result even if some columns of passed data frame are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then shuffle! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> using Random, StableRNGs\n\njulia> rng = StableRNG(1234);\n\njulia> shuffle!(rng, DataFrame(a=1:5, b=1:5))\n5×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      2\n   2 │     1      1\n   3 │     3      3\n   4 │     5      5\n   5 │     4      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.table_transformation","page":"Functions","title":"DataFrames.table_transformation","text":"table_transformation(df_sel::AbstractDataFrame, fun)\n\nThis is the function called when AsTable(...) => fun is requested. The df_sel argument is a data frame storing columns selected by the AsTable(...) selector.\n\nBy default it calls default_table_transformation. However, it is allowed to add special methods for specific types of fun, as long as the result matches what would be produced by default_table_transformation, except that it is allowed to perform eltype conversion of the resulting vectors or value type promotions that are consistent with promote_type.\n\nIt is guaranteed that df_sel has at least one column.\n\nThe main use of special table_transformation methods is to provide more efficient than the default implementations of requested fun transformation.\n\nThis function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental.\n\nFast paths are implemented within DataFrames.jl for the following functions fun:\n\nsum, ByRow(sum), ByRow(sum∘skipmissing)\nlength, ByRow(length), ByRow(length∘skipmissing)\nmean, ByRow(mean), ByRow(mean∘skipmissing)\nByRow(var), ByRow(var∘skipmissing)\nByRow(std), ByRow(std∘skipmissing)\nByRow(median), ByRow(median∘skipmissing)\nminimum, ByRow(minimum), ByRow(minimum∘skipmissing)\nmaximum, ByRow(maximum), ByRow(maximum∘skipmissing)\nfun∘collect and ByRow(fun∘collect) where fun is any function\n\nNote that in order to improve the performance ByRow(sum), ByRow(sum∘skipmissing), ByRow(mean), and ByRow(mean∘skipmissing) perform all operations in the target element type. In some very rare cases (like mixing very large Int64 values and Float64 values) it can lead to a result different from the one that would be obtained by calling the function outside of DataFrames.jl. The way to avoid this precision loss is to use an anonymous function, e.g. instead of ByRow(sum) use ByRow(x -> sum(x)). However, in general for such scenarios even standard aggregation functions should not be considered to provide reliable output, and users are recommended to switch to higher precision calculations. An example of a case when standard sum is affected by the situation discussed is:\n\njulia> sum(Any[typemax(Int), typemax(Int), 1.0])\n-1.0\n\njulia> sum(Any[1.0, typemax(Int), typemax(Int)])\n1.8446744073709552e19\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.transform","page":"Functions","title":"DataFrames.transform","text":"transform(df::AbstractDataFrame, args...;\n          copycols::Bool=true, renamecols::Bool=true, threads::Bool=true)\ntransform(f::Callable, df::DataFrame;\n          renamecols::Bool=true, threads::Bool=true)\ntransform(gd::GroupedDataFrame, args...;\n          copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n          renamecols::Bool=true, threads::Bool=true)\ntransform(f::Base.Callable, gd::GroupedDataFrame;\n          copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n          renamecols::Bool=true, threads::Bool=true)\n\nCreate a new data frame that contains columns from df or gd plus columns specified by args and return it. The result is guaranteed to have the same number of rows as df. Equivalent to select(df, :, args...) or select(gd, :, args...).\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\ncopycols::Bool=true : whether columns of the source data frame should be copied if no transformation is applied to them.\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nkeepkeys::Bool=true : whether grouping columns of gd should be kept in the returned data frame.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nNote that when the first argument is a GroupedDataFrame, keepkeys=false is needed to be able to return a different value for the grouping column:\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nExamples\n\njulia> gdf = groupby(DataFrame(x=1:2), :x)\nGroupedDataFrame with 2 groups based on key: x\nFirst Group (1 row): x = 1\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n⋮\nLast Group (1 row): x = 2\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     2\n\njulia> transform(gdf, x -> (x=10,), keepkeys=false)\n2×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │    10\n   2 │    10\n\njulia> transform(gdf, x -> (x=10,), keepkeys=true)\nERROR: ArgumentError: column :x in returned data frame is not equal to grouping key :x\n\nSee select for more examples.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.transform!","page":"Functions","title":"DataFrames.transform!","text":"transform!(df::AbstractDataFrame, args...;\n           renamecols::Bool=true, threads::Bool=true)\ntransform!(args::Callable, df::AbstractDataFrame;\n           renamecols::Bool=true, threads::Bool=true)\ntransform!(gd::GroupedDataFrame, args...;\n           ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)\ntransform!(f::Base.Callable, gd::GroupedDataFrame;\n           ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)\n\nMutate df or gd in place to add columns specified by args... and return it. The result is guaranteed to have the same number of rows as df. Equivalent to select!(df, :, args...) or select!(gd, :, args...), except that column renaming performs a copy.\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nSee select for examples.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.vcat","page":"Functions","title":"Base.vcat","text":"vcat(dfs::AbstractDataFrame...;\n     cols::Union{Symbol, AbstractVector{Symbol},\n                 AbstractVector{<:AbstractString}}=:setequal,\n     source::Union{Nothing, Symbol, AbstractString,\n                   Pair{<:Union{Symbol, AbstractString}, <:AbstractVector}}=nothing)\n\nVertically concatenate AbstractDataFrames.\n\nThe cols keyword argument determines the columns of the returned data frame:\n\n:setequal: require all data frames to have the same column names disregarding order. If they appear in different orders, the order of the first provided data frame is used.\n:orderequal: require all data frames to have the same column names and in the same order.\n:intersect: only the columns present in all provided data frames are kept. If the intersection is empty, an empty data frame is returned.\n:union: columns present in at least one of the provided data frames are kept. Columns not present in some data frames are filled with missing where necessary.\nA vector of Symbols or strings: only listed columns are kept. Columns not present in some data frames are filled with missing where necessary.\n\nThe source keyword argument, if not nothing (the default), specifies the additional column to be added in the last position in the resulting data frame that will identify the source data frame. It can be a Symbol or an AbstractString, in which case the identifier will be the number of the passed source data frame, or a Pair consisting of a Symbol or an AbstractString and of a vector specifying the data frame identifiers (which do not have to be unique). The name of the source column is not allowed to be present in any source data frame.\n\nThe order of columns is determined by the order they appear in the included data frames, searching through the header of the first data frame, then the second, etc.\n\nThe element types of columns are determined using promote_type, as with vcat for AbstractVectors.\n\nvcat ignores empty data frames when composing the result (except for metadata), making it possible to initialize an empty data frame at the beginning of a loop and vcat onto it.\n\nMetadata: vcat propagates table-level :note-style metadata for keys that are present in all passed data frames and have the same value. vcat propagates column-level :note-style metadata for keys that are present in all passed data frames that contain this column and have the same value.\n\nExample\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4:6, B=4:6)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n\njulia> df3 = DataFrame(A=7:9, C=7:9)\n3×2 DataFrame\n Row │ A      C\n     │ Int64  Int64\n─────┼──────────────\n   1 │     7      7\n   2 │     8      8\n   3 │     9      9\n\njulia> df4 = DataFrame()\n0×0 DataFrame\n\njulia> vcat(df1, df2)\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n   5 │     5      5\n   6 │     6      6\n\njulia> vcat(df1, df3, cols=:union)\n6×3 DataFrame\n Row │ A      B        C\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     1        1  missing\n   2 │     2        2  missing\n   3 │     3        3  missing\n   4 │     7  missing        7\n   5 │     8  missing        8\n   6 │     9  missing        9\n\njulia> vcat(df1, df3, cols=:intersect)\n6×1 DataFrame\n Row │ A\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n   4 │     7\n   5 │     8\n   6 │     9\n\njulia> vcat(df4, df1)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> vcat(df1, df2, df3, df4, cols=:union, source=\"source\")\n9×4 DataFrame\n Row │ A      B        C        source\n     │ Int64  Int64?   Int64?   Int64\n─────┼─────────────────────────────────\n   1 │     1        1  missing       1\n   2 │     2        2  missing       1\n   3 │     3        3  missing       1\n   4 │     4        4  missing       2\n   5 │     5        5  missing       2\n   6 │     6        6  missing       2\n   7 │     7  missing        7       3\n   8 │     8  missing        8       3\n   9 │     9  missing        9       3\n\njulia> vcat(df1, df2, df4, df3, cols=:union, source=:source => 'a':'d')\n9×4 DataFrame\n Row │ A      B        C        source\n     │ Int64  Int64?   Int64?   Char\n─────┼─────────────────────────────────\n   1 │     1        1  missing  a\n   2 │     2        2  missing  a\n   3 │     3        3  missing  a\n   4 │     4        4  missing  b\n   5 │     5        5  missing  b\n   6 │     6        6  missing  b\n   7 │     7  missing        7  d\n   8 │     8  missing        8  d\n   9 │     9  missing        9  d\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Reshaping-data-frames-between-tall-and-wide-formats","page":"Functions","title":"Reshaping data frames between tall and wide formats","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"stack\nunstack\npermutedims","category":"page"},{"location":"lib/functions/#Base.stack","page":"Functions","title":"Base.stack","text":"stack(df::AbstractDataFrame[, measure_vars[, id_vars] ];\n      variable_name=:variable, value_name=:value,\n      view::Bool=false, variable_eltype::Type=String)\n\nStack a data frame df, i.e. convert it from wide to long format.\n\nReturn the long-format DataFrame with: columns for each of the id_vars, column value_name (:value by default) holding the values of the stacked columns (measure_vars), and column variable_name (:variable by default) a vector holding the name of the corresponding measure_vars variable.\n\nIf view=true then return a stacked view of a data frame (long format). The result is a view because the columns are special AbstractVectors that return views into the original data frame.\n\nArguments\n\ndf : the AbstractDataFrame to be stacked\nmeasure_vars : the columns to be stacked (the measurement variables), as a column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If neither measure_vars or id_vars are given, measure_vars defaults to all floating point columns.\nid_vars : the identifier columns that are repeated during stacking, as a column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). Defaults to all variables that are not measure_vars\nvariable_name : the name (Symbol or string) of the new stacked column that shall hold the names of each of measure_vars\nvalue_name : the name (Symbol or string) of the new stacked column containing the values from each of measure_vars\nview : whether the stacked data frame should be a view rather than contain freshly allocated vectors.\nvariable_eltype : determines the element type of column variable_name. By default a PooledArray{String} is created. If variable_eltype=Symbol a PooledVector{Symbol} is created, and if variable_eltype=CategoricalValue{String} a CategoricalArray{String} is produced (call using CategoricalArrays first if needed) Passing any other type T will produce a PooledVector{T} column as long as it supports conversion from String. When view=true, a RepeatedVector{T} is produced.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for identifier columns are preserved.\n\nExamples\n\njulia> df = DataFrame(a=repeat(1:3, inner=2),\n                      b=repeat(1:2, inner=3),\n                      c=repeat(1:1, inner=6),\n                      d=repeat(1:6, inner=1),\n                      e=string.('a':'f'))\n6×5 DataFrame\n Row │ a      b      c      d      e\n     │ Int64  Int64  Int64  Int64  String\n─────┼────────────────────────────────────\n   1 │     1      1      1      1  a\n   2 │     1      1      1      2  b\n   3 │     2      1      1      3  c\n   4 │     2      2      1      4  d\n   5 │     3      2      1      5  e\n   6 │     3      2      1      6  f\n\njulia> stack(df, [:c, :d])\n12×5 DataFrame\n Row │ a      b      e       variable  value\n     │ Int64  Int64  String  String    Int64\n─────┼───────────────────────────────────────\n   1 │     1      1  a       c             1\n   2 │     1      1  b       c             1\n   3 │     2      1  c       c             1\n   4 │     2      2  d       c             1\n   5 │     3      2  e       c             1\n   6 │     3      2  f       c             1\n   7 │     1      1  a       d             1\n   8 │     1      1  b       d             2\n   9 │     2      1  c       d             3\n  10 │     2      2  d       d             4\n  11 │     3      2  e       d             5\n  12 │     3      2  f       d             6\n\njulia> stack(df, [:c, :d], [:a])\n12×3 DataFrame\n Row │ a      variable  value\n     │ Int64  String    Int64\n─────┼────────────────────────\n   1 │     1  c             1\n   2 │     1  c             1\n   3 │     2  c             1\n   4 │     2  c             1\n   5 │     3  c             1\n   6 │     3  c             1\n   7 │     1  d             1\n   8 │     1  d             2\n   9 │     2  d             3\n  10 │     2  d             4\n  11 │     3  d             5\n  12 │     3  d             6\n\njulia> stack(df, Not([:a, :b, :e]))\n12×5 DataFrame\n Row │ a      b      e       variable  value\n     │ Int64  Int64  String  String    Int64\n─────┼───────────────────────────────────────\n   1 │     1      1  a       c             1\n   2 │     1      1  b       c             1\n   3 │     2      1  c       c             1\n   4 │     2      2  d       c             1\n   5 │     3      2  e       c             1\n   6 │     3      2  f       c             1\n   7 │     1      1  a       d             1\n   8 │     1      1  b       d             2\n   9 │     2      1  c       d             3\n  10 │     2      2  d       d             4\n  11 │     3      2  e       d             5\n  12 │     3      2  f       d             6\n\njulia> stack(df, Not([:a, :b, :e]), variable_name=:somemeasure)\n12×5 DataFrame\n Row │ a      b      e       somemeasure  value\n     │ Int64  Int64  String  String       Int64\n─────┼──────────────────────────────────────────\n   1 │     1      1  a       c                1\n   2 │     1      1  b       c                1\n   3 │     2      1  c       c                1\n   4 │     2      2  d       c                1\n   5 │     3      2  e       c                1\n   6 │     3      2  f       c                1\n   7 │     1      1  a       d                1\n   8 │     1      1  b       d                2\n   9 │     2      1  c       d                3\n  10 │     2      2  d       d                4\n  11 │     3      2  e       d                5\n  12 │     3      2  f       d                6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.unstack","page":"Functions","title":"DataFrames.unstack","text":"unstack(df::AbstractDataFrame, rowkeys, colkey, value;\n        renamecols::Function=identity, allowmissing::Bool=false,\n        combine=only, fill=missing, threads::Bool=true)\nunstack(df::AbstractDataFrame, colkey, value;\n        renamecols::Function=identity, allowmissing::Bool=false,\n        combine=only, fill=missing, threads::Bool=true)\nunstack(df::AbstractDataFrame;\n        renamecols::Function=identity, allowmissing::Bool=false,\n        combine=only, fill=missing, threads::Bool=true)\n\nUnstack data frame df, i.e. convert it from long to wide format.\n\nRow and column keys are ordered in the order of their first appearance.\n\nPositional arguments\n\ndf : the AbstractDataFrame to be unstacked\nrowkeys : the columns with a unique key for each row, if not given, find a key by grouping on anything not a colkey or value. Can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If rowkeys contains no columns all rows are assumed to have the same key.\ncolkey : the column (Symbol, string or integer) holding the column names in wide format, defaults to :variable\nvalues : the column storing values (Symbol, string or integer), defaults to :value\n\nKeyword arguments\n\nrenamecols: a function called on each unique value in colkey; it must return the name of the column to be created (typically as a string or a Symbol). Duplicates in resulting names when converted to Symbol are not allowed. By default no transformation is performed.\nallowmissing: if false (the default) then an error is thrown if colkey contains missing values; if true then a column referring to missing value is created.\ncombine: if only (the default) then an error is thrown if combination of rowkeys and colkey contains duplicate entries. Otherwise the passed value must be a function that is called on a vector view containing all elements for each combination of rowkeys and colkey present in the data.\nfill: missing row/column combinations are filled with this value. The default is missing. If the value column is a CategoricalVector and fill is not missing then in order to keep unstacked value columns also CategoricalVector the fill must be passed as CategoricalValue\nthreads: whether combine function may be run in separate tasks which can execute in parallel (possibly being applied to multiple groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if combine requires serial execution or is not thread-safe.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for row keys columns are preserved.\n\nDeprecations\n\nallowduplicates keyword argument is deprecated; instead use combine keyword argument; an equivalent to allowduplicates=true is combine=last and to allowduplicates=false is combine=only (the default);\n\nExamples\n\njulia> wide = DataFrame(id=1:6,\n                        a=repeat(1:3, inner=2),\n                        b=repeat(1.0:2.0, inner=3),\n                        c=repeat(1.0:1.0, inner=6),\n                        d=repeat(1.0:3.0, inner=2))\n6×5 DataFrame\n Row │ id     a      b        c        d\n     │ Int64  Int64  Float64  Float64  Float64\n─────┼─────────────────────────────────────────\n   1 │     1      1      1.0      1.0      1.0\n   2 │     2      1      1.0      1.0      1.0\n   3 │     3      2      1.0      1.0      2.0\n   4 │     4      2      2.0      1.0      2.0\n   5 │     5      3      2.0      1.0      3.0\n   6 │     6      3      2.0      1.0      3.0\n\njulia> long = stack(wide)\n18×4 DataFrame\n Row │ id     a      variable  value\n     │ Int64  Int64  String    Float64\n─────┼─────────────────────────────────\n   1 │     1      1  b             1.0\n   2 │     2      1  b             1.0\n   3 │     3      2  b             1.0\n   4 │     4      2  b             2.0\n   5 │     5      3  b             2.0\n   6 │     6      3  b             2.0\n   7 │     1      1  c             1.0\n   8 │     2      1  c             1.0\n  ⋮  │   ⋮      ⋮       ⋮         ⋮\n  12 │     6      3  c             1.0\n  13 │     1      1  d             1.0\n  14 │     2      1  d             1.0\n  15 │     3      2  d             2.0\n  16 │     4      2  d             2.0\n  17 │     5      3  d             3.0\n  18 │     6      3  d             3.0\n                         3 rows omitted\n\njulia> unstack(long)\n6×5 DataFrame\n Row │ id     a      b         c         d\n     │ Int64  Int64  Float64?  Float64?  Float64?\n─────┼────────────────────────────────────────────\n   1 │     1      1       1.0       1.0       1.0\n   2 │     2      1       1.0       1.0       1.0\n   3 │     3      2       1.0       1.0       2.0\n   4 │     4      2       2.0       1.0       2.0\n   5 │     5      3       2.0       1.0       3.0\n   6 │     6      3       2.0       1.0       3.0\n\njulia> unstack(long, :variable, :value)\n6×5 DataFrame\n Row │ id     a      b         c         d\n     │ Int64  Int64  Float64?  Float64?  Float64?\n─────┼────────────────────────────────────────────\n   1 │     1      1       1.0       1.0       1.0\n   2 │     2      1       1.0       1.0       1.0\n   3 │     3      2       1.0       1.0       2.0\n   4 │     4      2       2.0       1.0       2.0\n   5 │     5      3       2.0       1.0       3.0\n   6 │     6      3       2.0       1.0       3.0\n\njulia> unstack(long, :id, :variable, :value)\n6×4 DataFrame\n Row │ id     b         c         d\n     │ Int64  Float64?  Float64?  Float64?\n─────┼─────────────────────────────────────\n   1 │     1       1.0       1.0       1.0\n   2 │     2       1.0       1.0       1.0\n   3 │     3       1.0       1.0       2.0\n   4 │     4       2.0       1.0       2.0\n   5 │     5       2.0       1.0       3.0\n   6 │     6       2.0       1.0       3.0\n\njulia> unstack(long, [:id, :a], :variable, :value)\n6×5 DataFrame\n Row │ id     a      b         c         d\n     │ Int64  Int64  Float64?  Float64?  Float64?\n─────┼────────────────────────────────────────────\n   1 │     1      1       1.0       1.0       1.0\n   2 │     2      1       1.0       1.0       1.0\n   3 │     3      2       1.0       1.0       2.0\n   4 │     4      2       2.0       1.0       2.0\n   5 │     5      3       2.0       1.0       3.0\n   6 │     6      3       2.0       1.0       3.0\n\njulia> unstack(long, :id, :variable, :value, renamecols=x->Symbol(:_, x))\n6×4 DataFrame\n Row │ id     _b        _c        _d\n     │ Int64  Float64?  Float64?  Float64?\n─────┼─────────────────────────────────────\n   1 │     1       1.0       1.0       1.0\n   2 │     2       1.0       1.0       1.0\n   3 │     3       1.0       1.0       2.0\n   4 │     4       2.0       1.0       2.0\n   5 │     5       2.0       1.0       3.0\n   6 │     6       2.0       1.0       3.0\n\nNote that there are some differences between the widened results above.\n\njulia> df = DataFrame(id=[\"1\", \"1\", \"2\"],\n                      variable=[\"Var1\", \"Var2\", \"Var1\"],\n                      value=[1, 2, 3])\n3×3 DataFrame\n Row │ id      variable  value\n     │ String  String    Int64\n─────┼─────────────────────────\n   1 │ 1       Var1          1\n   2 │ 1       Var2          2\n   3 │ 2       Var1          3\n\njulia> unstack(df, :variable, :value, fill=0)\n2×3 DataFrame\n Row │ id      Var1   Var2\n     │ String  Int64  Int64\n─────┼──────────────────────\n   1 │ 1           1      2\n   2 │ 2           3      0\n\njulia> df = DataFrame(cols=[\"a\", \"a\", \"b\"], values=[1, 2, 4])\n3×2 DataFrame\n Row │ cols    values\n     │ String  Int64\n─────┼────────────────\n   1 │ a            1\n   2 │ a            2\n   3 │ b            4\n\njulia> unstack(df, :cols, :values, combine=copy)\n1×2 DataFrame\n Row │ a        b\n     │ Array…?  Array…?\n─────┼──────────────────\n   1 │ [1, 2]   [4]\n\njulia> unstack(df, :cols, :values, combine=sum)\n1×2 DataFrame\n Row │ a       b\n     │ Int64?  Int64?\n─────┼────────────────\n   1 │      3       4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.permutedims","page":"Functions","title":"Base.permutedims","text":"permutedims(df::AbstractDataFrame,\n            [src_namescol::Union{Int, Symbol, AbstractString}],\n            [dest_namescol::Union{Symbol, AbstractString}];\n            makeunique::Bool=false, strict::Bool=true)\n\nTurn df on its side such that rows become columns and values in the column indexed by src_namescol become the names of new columns. In the resulting DataFrame, column names of df will become the first column with name specified by dest_namescol.\n\nArguments\n\ndf : the AbstractDataFrame\nsrc_namescol : the column that will become the new header.  If omitted then column names :x1, :x2, ... are generated automatically.\ndest_namescol : the name of the first column in the returned DataFrame. Defaults to the same name as src_namescol. Not supported when src_namescol is a vector or is omitted.\nmakeunique : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate). Not supported when src_namescol is omitted.\nstrict : if true (the default), an error will be raised if the values contained in the src_namescol are not all Symbol or all AbstractString, or can all be converted to String using convert. If false then any values are accepted and the will be changed to strings using the string function. Not supported when src_namescol is a vector or is omitted.\n\nNote: The element types of columns in resulting DataFrame (other than the first column if it is created from df column names, which always has element type String) will depend on the element types of all input columns based on the result of promote_type. That is, if the source data frame contains Int and Float64 columns, resulting columns will have element type Float64. If the source has Int and String columns, resulting columns will have element type Any.\n\nMetadata: table-level :note-style metadata is preserved and column-level metadata is dropped.\n\nExamples\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> permutedims(df)\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4\n\njulia> permutedims(df, [:p, :q])\n2×2 DataFrame\n Row │ p      q\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4\n\njulia> df1 = DataFrame(a=[\"x\", \"y\"], b=[1.0, 2.0], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b        c      d\n     │ String  Float64  Int64  Bool\n─────┼───────────────────────────────\n   1 │ x           1.0      3   true\n   2 │ y           2.0      4  false\n\njulia> permutedims(df1, 1) # note the column types\n3×3 DataFrame\n Row │ a       x        y\n     │ String  Float64  Float64\n─────┼──────────────────────────\n   1 │ b           1.0      2.0\n   2 │ c           3.0      4.0\n   3 │ d           1.0      0.0\n\njulia> df2 = DataFrame(a=[\"x\", \"y\"], b=[1, \"two\"], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b    c      d\n     │ String  Any  Int64  Bool\n─────┼───────────────────────────\n   1 │ x       1        3   true\n   2 │ y       two      4  false\n\njulia> permutedims(df2, 1, \"different_name\")\n3×3 DataFrame\n Row │ different_name  x     y\n     │ String          Any   Any\n─────┼─────────────────────────────\n   1 │ b               1     two\n   2 │ c               3     4\n   3 │ d               true  false\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Sorting","page":"Functions","title":"Sorting","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"issorted\norder\nsort\nsort!\nsortperm","category":"page"},{"location":"lib/functions/#Base.issorted","page":"Functions","title":"Base.issorted","text":"issorted(df::AbstractDataFrame, cols=All();\n         lt::Union{Function, AbstractVector{<:Function}}=isless,\n         by::Union{Function, AbstractVector{<:Function}}=identity,\n         rev::Union{Bool, AbstractVector{Bool}}=false,\n         order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n         checkunique::Bool=false)\n\nTest whether data frame df sorted by column(s) cols. Checking against multiple columns is done lexicographically.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, check whether df is sorted on all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nExamples\n\njulia> df = DataFrame(a=[1, 2, 3, 4], b=[4, 3, 2, 1])\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      3\n   3 │     3      2\n   4 │     4      1\n\njulia> issorted(df)\ntrue\n\njulia> issorted(df, :a)\ntrue\n\njulia> issorted(df, :b)\nfalse\n\njulia> issorted(df, :b, rev=true)\ntrue\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.order","page":"Functions","title":"DataFrames.order","text":"order(col::ColumnIndex; kwargs...)\n\nSpecify sorting order for a column col in a data frame. kwargs can be lt, by, rev, and order with values following the rules defined in sort!.\n\nSee also: sort!, sort\n\nExamples\n\njulia> df = DataFrame(x=[-3, -1, 0, 2, 4], y=1:5)\n5×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │    -3      1\n   2 │    -1      2\n   3 │     0      3\n   4 │     2      4\n   5 │     4      5\n\njulia> sort(df, order(:x, rev=true))\n5×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      5\n   2 │     2      4\n   3 │     0      3\n   4 │    -1      2\n   5 │    -3      1\n\njulia> sort(df, order(:x, by=abs))\n5×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     0      3\n   2 │    -1      2\n   3 │     2      4\n   4 │    -3      1\n   5 │     4      5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.sort","page":"Functions","title":"Base.sort","text":"sort(df::AbstractDataFrame, cols=All();\n     alg::Union{Algorithm, Nothing}=nothing,\n     lt::Union{Function, AbstractVector{<:Function}}=isless,\n     by::Union{Function, AbstractVector{<:Function}}=identity,\n     rev::Union{Bool, AbstractVector{Bool}}=false,\n     order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n     view::Bool=false,\n     checkunique::Bool=false)\n\nReturn a data frame containing the rows in df sorted by column(s) cols. Sorting on multiple columns is done lexicographically.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, sort df on all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nIf alg is nothing (the default), the most appropriate algorithm is chosen automatically among TimSort, MergeSort and RadixSort depending on the type of the sorting columns and on the number of rows in df.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> sort(df, :x)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\njulia> sort(df, [:x, :y])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  b\n   2 │     1  c\n   3 │     2  a\n   4 │     3  b\n\njulia> sort(df, [:x, :y], rev=true)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n   3 │     1  c\n   4 │     1  b\n\njulia> sort(df, [:x, order(:y, rev=true)])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.sort!","page":"Functions","title":"Base.sort!","text":"sort!(df::AbstractDataFrame, cols=All();\n      alg::Union{Algorithm, Nothing}=nothing,\n      lt::Union{Function, AbstractVector{<:Function}}=isless,\n      by::Union{Function, AbstractVector{<:Function}}=identity,\n      rev::Union{Bool, AbstractVector{Bool}}=false,\n      order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n      checkunique::Bool=false)\n\nSort data frame df by column(s) cols by permuting its rows in-place. Sorting on multiple columns is done lexicographicallly.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, sort df on all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nIf alg is nothing (the default), the most appropriate algorithm is chosen automatically among TimSort, MergeSort and RadixSort depending on the type of the sorting columns and on the number of rows in df.\n\nsort! will produce a correct result even if some columns of passed data frame are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then sort! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> sort!(df, :x)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\njulia> sort!(df, [:x, :y])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  b\n   2 │     1  c\n   3 │     2  a\n   4 │     3  b\n\njulia> sort!(df, [:x, :y], rev=true)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n   3 │     1  c\n   4 │     1  b\n\njulia> sort!(df, [:x, order(:y, rev=true)])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.sortperm","page":"Functions","title":"Base.sortperm","text":"sortperm(df::AbstractDataFrame, cols=All();\n         alg::Union{Algorithm, Nothing}=nothing,\n         lt::Union{Function, AbstractVector{<:Function}}=isless,\n         by::Union{Function, AbstractVector{<:Function}}=identity,\n         rev::Union{Bool, AbstractVector{Bool}}=false,\n         order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n         checkunique::Bool=false)\n\nReturn a permutation vector of row indices of data frame df that puts them in sorted order according to column(s) cols. Order on multiple columns is computed lexicographically.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, return permutation vector based on sorting all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nIf alg is nothing (the default), the most appropriate algorithm is chosen automatically among TimSort, MergeSort and RadixSort depending on the type of the sorting columns and on the number of rows in df.\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> sortperm(df, :x)\n4-element Vector{Int64}:\n 2\n 4\n 3\n 1\n\njulia> sortperm(df, [:x, :y])\n4-element Vector{Int64}:\n 4\n 2\n 3\n 1\n\njulia> sortperm(df, [:x, :y], rev=true)\n4-element Vector{Int64}:\n 1\n 3\n 2\n 4\n\njulia> sortperm(df, [:x, order(:y, rev=true)])\n4-element Vector{Int64}:\n 2\n 4\n 3\n 1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Joining","page":"Functions","title":"Joining","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"antijoin\ncrossjoin\ninnerjoin\nleftjoin\nleftjoin!\nouterjoin\nrightjoin\nsemijoin","category":"page"},{"location":"lib/functions/#DataAPI.antijoin","page":"Functions","title":"DataAPI.antijoin","text":"antijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)\n\nPerform an anti join of two data frame objects and return a DataFrame containing the result. An anti join returns the subset of rows of df1 that do not match with the keys in df2.\n\nThe order of rows in the result is kept from df1.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : ignored as no columns are added to df1 columns (it is provided for consistency with other functions).\nvalidate : whether to check that columns passed as the on argument  define unique keys in each input data frame (according to isequal).  Can be a tuple or a pair, with the first element indicating whether to  run check for df1 and the second element for df2.  By default no check is performed.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata are taken from df1.\n\nSee also: innerjoin, leftjoin, rightjoin,           outerjoin, semijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> antijoin(name, job, on = :ID)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     3  Joe Blogs\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> antijoin(name, job2, on = :ID => :identifier)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     3  Joe Blogs\n\njulia> antijoin(name, job2, on = [:ID => :identifier])\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     3  Joe Blogs\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.crossjoin","page":"Functions","title":"DataAPI.crossjoin","text":"crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;\n          makeunique::Bool=false, renamecols=identity => identity)\ncrossjoin(df1, df2, dfs...; makeunique = false)\n\nPerform a cross join of two or more data frame objects and return a DataFrame containing the result. A cross join returns the cartesian product of rows from all passed data frames, where the first passed data frame is assigned to the dimension that changes the slowest and the last data frame is assigned to the dimension that changes the fastest.\n\nArguments\n\ndf1, df2, dfs... : the AbstractDataFrames to be joined\n\nKeyword Arguments\n\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String.\n\nIf more than two data frames are passed, the join is performed recursively with left associativity.\n\nMetadata: table-level :note-style metadata is preserved only for keys which are defined in all passed tables and have the same value. Column-level :note-style metadata is preserved from both tables.\n\nSee also: innerjoin, leftjoin, rightjoin,           outerjoin, semijoin, antijoin.\n\nExamples\n\njulia> df1 = DataFrame(X=1:3)\n3×1 DataFrame\n Row │ X\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> df2 = DataFrame(Y=[\"a\", \"b\"])\n2×1 DataFrame\n Row │ Y\n     │ String\n─────┼────────\n   1 │ a\n   2 │ b\n\njulia> crossjoin(df1, df2)\n6×2 DataFrame\n Row │ X      Y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  a\n   2 │     1  b\n   3 │     2  a\n   4 │     2  b\n   5 │     3  a\n   6 │     3  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.innerjoin","page":"Functions","title":"DataAPI.innerjoin","text":"innerjoin(df1, df2; on, makeunique=false, validate=(false, false),\n          renamecols=(identity => identity), matchmissing=:error,\n          order=:undefined)\ninnerjoin(df1, df2, dfs...; on, makeunique=false,\n          validate=(false, false), matchmissing=:error,\n          order=:undefined)\n\nPerform an inner join of two or more data frame objects and return a DataFrame containing the result. An inner join includes rows with keys that match in all passed data frames.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in df1. This behavior may change in future releases.\n\nArguments\n\ndf1, df2, dfs...: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). When joining only two data frames, a left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df1 and df2 on columns.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained. If :right then the order of rows  from the right data frame is retained.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nIf more than two data frames are passed, the join is performed recursively with left associativity. In this case the validate keyword argument is applied recursively with left associativity.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for key columns is preserved only for keys which are defined in all passed tables and have the same value. Column-level :note-style metadata is preserved for all other columns.\n\nSee also: leftjoin, rightjoin, outerjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> innerjoin(name, job, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> innerjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n2×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String     String\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n\njulia> innerjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n2×3 DataFrame\n Row │ ID     NAME      job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.leftjoin","page":"Functions","title":"DataAPI.leftjoin","text":"leftjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),\n         renamecols=(identity => identity), matchmissing=:error, order=:undefined)\n\nPerform a left join of two data frame objects and return a DataFrame containing the result. A left join includes all rows from df1.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in df1. This behavior may change in future releases.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name, for whether a row appeared in only df1 (\"left_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true.\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained. If :right then the order of rows  from the right data frame is retained (non-matching rows are put at the end).\n\nAll columns of the returned data frame will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata is taken from df1 (including key columns), except for columns added to it from df2, whose column-level :note-style metadata is taken from df2.\n\nSee also: innerjoin, rightjoin, outerjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> leftjoin(name, job, on = :ID)\n3×3 DataFrame\n Row │ ID     Name       Job\n     │ Int64  String     String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> leftjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n3×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String     String?\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\njulia> leftjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n3×3 DataFrame\n Row │ ID     NAME       job\n     │ Int64  String     String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.leftjoin!","page":"Functions","title":"DataFrames.leftjoin!","text":"leftjoin!(df1, df2; on, makeunique=false, source=nothing,\n          matchmissing=:error)\n\nPerform a left join of two data frame objects by updating the df1 with the joined columns from df2.\n\nA left join includes all rows from df1 and leaves all rows and columns from df1 untouched. Note that each row in df1 must have at most one match in df2. Otherwise, this function would not be able to execute the join in-place since new rows would need to be added to df1.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name, for whether a row appeared in only df1 (\"left_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\n\nThe columns added to df1 from df2 will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nMetadata: table-level and column-level :note-style metadata are taken from df1 (including key columns), except for columns added to it from df2, whose column-level :note-style metadata is taken from df2.\n\nSee also: leftjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> leftjoin!(name, job, on = :ID)\n3×3 DataFrame\n Row │ ID     Name       Job\n     │ Int64  String     String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> leftjoin!(name, job2, on = :ID => :identifier, makeunique=true, source=:source)\n3×5 DataFrame\n Row │ ID     Name       Job      Job_1    source\n     │ Int64  String     String?  String?  String\n─────┼───────────────────────────────────────────────\n   1 │     1  John Doe   Lawyer   Lawyer   both\n   2 │     2  Jane Doe   Doctor   Doctor   both\n   3 │     3  Joe Blogs  missing  missing  left_only\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.outerjoin","page":"Functions","title":"DataAPI.outerjoin","text":"outerjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),\n          renamecols=(identity => identity), matchmissing=:error, order=:undefined)\nouterjoin(df1, df2, dfs...; on, makeunique = false,\n          validate = (false, false), matchmissing=:error, order=:undefined)\n\nPerform an outer join of two or more data frame objects and return a DataFrame containing the result. An outer join includes rows with keys that appear in any of the passed data frames.\n\nThe order of rows in the result is undefined and may change in future releases.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the element type of these columns both df1 and df2. This behavior may change in future releases.\n\nArguments\n\ndf1, df2, dfs... : the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). When joining only two data frames, a left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name for whether a row appeared in only df1 (\"left_only\"), only df2 (\"right_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true. This argument is only supported when joining exactly two data frames.\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained (non-matching rows are put at the end).  If :right then the order of rows from the right data frame is retained  (non-matching rows are put at the end).\n\nAll columns of the returned data frame will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nIf more than two data frames are passed, the join is performed recursively with left associativity. In this case the indicator keyword argument is not supported and validate keyword argument is applied recursively with left associativity.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for key columns is preserved only for keys which are defined in all passed tables and have the same value. Column-level :note-style metadata is preserved for all other columns.\n\nSee also: innerjoin, leftjoin, rightjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> outerjoin(name, job, on = :ID)\n4×3 DataFrame\n Row │ ID     Name       Job\n     │ Int64  String?    String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n   4 │     4  missing    Farmer\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> outerjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n4×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String?    String?\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n   4 │     4  missing    Farmer\n\njulia> outerjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n4×3 DataFrame\n Row │ ID     NAME       job\n     │ Int64  String?    String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n   4 │     4  missing    Farmer\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.rightjoin","page":"Functions","title":"DataAPI.rightjoin","text":"rightjoin(df1, df2; on, makeunique=false, source=nothing,\n          validate=(false, false), renamecols=(identity => identity),\n          matchmissing=:error, order=:undefined)\n\nPerform a right join on two data frame objects and return a DataFrame containing the result. A right join includes all rows from df2.\n\nThe order of rows in the result is undefined and may change in future releases.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in df2. This behavior may change in future releases.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name for whether a row appeared in only df2 (\"right_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true.\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df1 on columns.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained (non-matching rows are put at the end).  If :right then the order of rows from the right data frame is retained.\n\nAll columns of the returned data frame will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata is taken from df2 (including key columns), except for columns added to it from df1, whose column-level :note-style metadata is taken from df1.\n\nSee also: innerjoin, leftjoin, outerjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> rightjoin(name, job, on = :ID)\n3×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String?   String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n   3 │     4  missing   Farmer\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> rightjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n3×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String?    String\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     4  missing    Farmer\n\njulia> rightjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n3×3 DataFrame\n Row │ ID     NAME      job\n     │ Int64  String?   String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n   3 │     4  missing   Farmer\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.semijoin","page":"Functions","title":"DataAPI.semijoin","text":"semijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)\n\nPerform a semi join of two data frame objects and return a DataFrame containing the result. A semi join returns the subset of rows of df1 that match with the keys in df2.\n\nThe order of rows in the result is kept from df1.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : ignored as no columns are added to df1 columns (it is provided for consistency with other functions).\nindicator : Default: nothing. If a Symbol or string, adds categorical indicator  column with the given name for whether a row appeared in only df1 (\"left_only\"),  only df2 (\"right_only\") or in both (\"both\"). If the name is already in use,  the column name will be modified if makeunique=true.\nvalidate : whether to check that columns passed as the on argument  define unique keys in each input data frame (according to isequal).  Can be a tuple or a pair, with the first element indicating whether to  run check for df1 and the second element for df2.  By default no check is performed.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata are taken from df1.\n\nSee also: innerjoin, leftjoin, rightjoin,           outerjoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> semijoin(name, job, on = :ID)\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> semijoin(name, job2, on = :ID => :identifier)\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n\njulia> semijoin(name, job2, on = [:ID => :identifier])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Grouping","page":"Functions","title":"Grouping","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"get\ngroupby\ngroupcols\ngroupindices\nkeys\nparent\nproprow\nvaluecols","category":"page"},{"location":"lib/functions/#Base.get","page":"Functions","title":"Base.get","text":"get(gd::GroupedDataFrame, key, default)\n\nGet a group based on the values of the grouping columns.\n\nkey may be a GroupKey, NamedTuple or Tuple of grouping column values (in the same order as the cols argument to groupby). It may also be an AbstractDict, in which case the order of the arguments does not matter.\n\nExamples\n\njulia> df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[2]),\n                      b=repeat([2, 1], outer=[3]),\n                      c=1:6);\n\njulia> gd = groupby(df, :a)\nGroupedDataFrame with 3 groups based on key: a\nFirst Group (2 rows): a = :foo\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ foo         2      1\n   2 │ foo         1      4\n⋮\nLast Group (2 rows): a = :baz\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ baz         2      3\n   2 │ baz         1      6\n\njulia> get(gd, (a=:bar,), nothing)\n2×3 SubDataFrame\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ bar         1      2\n   2 │ bar         2      5\n\njulia> get(gd, (:baz,), nothing)\n2×3 SubDataFrame\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ baz         2      3\n   2 │ baz         1      6\n\njulia> get(gd, (:qux,), nothing)\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.groupby","page":"Functions","title":"DataAPI.groupby","text":"groupby(df::AbstractDataFrame, cols;\n        sort::Union{Bool, Nothing, NamedTuple}=nothing,\n        skipmissing::Bool=false)\n\nReturn a GroupedDataFrame representing a view of an AbstractDataFrame split into row groups.\n\nArguments\n\ndf : an AbstractDataFrame to split\ncols : data frame columns to group by. Can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). In particular if the selector picks no columns then a single-group GroupedDataFrame is created. As a special case, if cols is a single column or a vector of columns then it can contain columns wrapped in order that will be used to determine the order of groups if sort is true or a NamedTuple (if sort is false, then passing order is an error; if sort is nothing then it is set to true when order is passed).\nsort : if sort=true sort groups according to the values of the grouping columns cols; if sort=false groups are created in their order of appearance in df; if sort=nothing (the default) then the fastest available grouping algorithm is picked and in consequence the order of groups in the result is undefined and may change in future releases; below a description of the current implementation is provided. Additionally sort can be a NamedTuple having some or all of alg, lt, by, rev, and order fields. In this case the groups are sorted and their order follows the sortperm order.\nskipmissing : whether to skip groups with missing values in one of the grouping columns cols\n\nDetails\n\nAn iterator over a GroupedDataFrame returns a SubDataFrame view for each grouping into df. Within each group, the order of rows in df is preserved.\n\nA GroupedDataFrame also supports indexing by groups, select, transform, and combine (which applies a function to each group and combines the result into a data frame).\n\nGroupedDataFrame also supports the dictionary interface. The keys are GroupKey objects returned by keys(::GroupedDataFrame), which can also be used to get the values of the grouping columns for each group. Tuples and NamedTuples containing the values of the grouping columns (in the same order as the cols argument) are also accepted as indices. Finally, an AbstractDict can be used to index into a grouped data frame where the keys are column names of the data frame. The order of the keys does not matter in this case.\n\nIn the current implementation if sort=nothing groups are ordered following the order of appearance of values in the grouping columns, except when all grouping columns provide non-nothing DataAPI.refpool, in which case the order of groups follows the order of values returned by DataAPI.refpool. As a particular application of this rule if all cols are CategoricalVectors then groups are always sorted. Integer columns with a narrow range also use this this optimization, so to the order of groups when grouping on integer columns is undefined. A column is considered to be an integer column when deciding on the grouping algorithm choice if its eltype is a subtype of Union{Missing, Real}, all its elements are either missing or pass isinteger test, and none of them is equal to -0.0.\n\nSee also\n\ncombine, select, select!, transform, transform!\n\nExamples\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a)\nGroupedDataFrame with 4 groups based on key: a\nFirst Group (2 rows): a = 1\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n⋮\nLast Group (2 rows): a = 4\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4      1      4\n   2 │     4      1      8\n\njulia> gd[1]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n\njulia> last(gd)\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4      1      4\n   2 │     4      1      8\n\njulia> gd[(a=3,)]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n\njulia> gd[Dict(\"a\" => 3)]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n\njulia> gd[(3,)]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n\njulia> k = first(keys(gd))\nGroupKey: (a = 1,)\n\njulia> gd[k]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n\njulia> for g in gd\n           println(g)\n       end\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      2\n   2 │     2      1      6\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4      1      4\n   2 │     4      1      8\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.groupcols","page":"Functions","title":"DataFrames.groupcols","text":"groupcols(gd::GroupedDataFrame)\n\nReturn a vector of Symbol column names in parent(gd) used for grouping.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.groupindices","page":"Functions","title":"DataFrames.groupindices","text":"groupindices(gd::GroupedDataFrame)\n\nReturn a vector of group indices for each row of parent(gd).\n\nRows appearing in group gd[i] are attributed index i. Rows not present in any group are attributed missing (this can happen if skipmissing=true was passed when creating gd, or if gd is a subset from a larger GroupedDataFrame).\n\nThe groupindices => target_col_name syntax (or just groupindices without specifying the target column name) is also supported in the transformation mini-language when passing a GroupedDataFrame to transformation functions (combine, select, etc.).\n\nExamples\n\njulia> df = DataFrame(id=[\"a\", \"c\", \"b\", \"b\", \"a\"])\n5×1 DataFrame\n Row │ id\n     │ String\n─────┼────────\n   1 │ a\n   2 │ c\n   3 │ b\n   4 │ b\n   5 │ a\n\njulia> gdf = groupby(df, :id);\n\njulia> combine(gdf, groupindices)\n3×2 DataFrame\n Row │ id      groupindices\n     │ String  Int64\n─────┼──────────────────────\n   1 │ a                  1\n   2 │ c                  2\n   3 │ b                  3\n\njulia> select(gdf, groupindices => :gid)\n5×2 DataFrame\n Row │ id      gid\n     │ String  Int64\n─────┼───────────────\n   1 │ a           1\n   2 │ c           2\n   3 │ b           3\n   4 │ b           3\n   5 │ a           1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.keys","page":"Functions","title":"Base.keys","text":"keys(gd::GroupedDataFrame)\n\nGet the set of keys for each group of the GroupedDataFrame gd as a GroupKeys object. Each key is a GroupKey, which behaves like a NamedTuple holding the values of the grouping columns for a given group. Unlike the equivalent Tuple, NamedTuple, and AbstractDict, these keys can be used to index into gd efficiently. The ordering of the keys is identical to the ordering of the groups of gd under iteration and integer indexing.\n\nExamples\n\njulia> df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[4]),\n                      b=repeat([2, 1], outer=[6]),\n                      c=1:12);\n\njulia> gd = groupby(df, [:a, :b])\nGroupedDataFrame with 6 groups based on keys: a, b\nFirst Group (2 rows): a = :foo, b = 2\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ foo         2      1\n   2 │ foo         2      7\n⋮\nLast Group (2 rows): a = :baz, b = 1\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ baz         1      6\n   2 │ baz         1     12\n\njulia> keys(gd)\n6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (a = :foo, b = 2)\n GroupKey: (a = :bar, b = 1)\n GroupKey: (a = :baz, b = 2)\n GroupKey: (a = :foo, b = 1)\n GroupKey: (a = :bar, b = 2)\n GroupKey: (a = :baz, b = 1)\n\njulia> k = keys(gd)[1]\nGroupKey: (a = :foo, b = 2)\n\njulia> keys(k)\n2-element Vector{Symbol}:\n :a\n :b\n\njulia> values(k)  # Same as Tuple(k)\n(:foo, 2)\n\njulia> NamedTuple(k)\n(a = :foo, b = 2)\n\njulia> k.a\n:foo\n\njulia> k[:a]\n:foo\n\njulia> k[1]\n:foo\n\nKeys can be used as indices to retrieve the corresponding group from their GroupedDataFrame:\n\njulia> gd[k]\n2×3 SubDataFrame\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ foo         2      1\n   2 │ foo         2      7\n\njulia> gd[keys(gd)[1]] == gd[1]\ntrue\n\n\n\n\n\nkeys(dfc::DataFrameColumns)\n\nGet a vector of column names of dfc as Symbols.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.parent","page":"Functions","title":"Base.parent","text":"parent(gd::GroupedDataFrame)\n\nReturn the parent data frame of gd.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.proprow","page":"Functions","title":"DataFrames.proprow","text":"proprow\n\nCompute the proportion of rows which belong to each group, i.e. its number of rows divided by the total number of rows in a GroupedDataFrame.\n\nThis function can only be used in the transformation mini-language via the proprow => target_col_name syntax (or just proprow without specifying the target column name), when passing a GroupedDataFrame to transformation functions (combine, select, etc.).\n\nExamples\n\njulia> df = DataFrame(id=[\"a\", \"c\", \"b\", \"b\", \"a\", \"b\"])\n6×1 DataFrame\n Row │ id\n     │ String\n─────┼────────\n   1 │ a\n   2 │ c\n   3 │ b\n   4 │ b\n   5 │ a\n   6 │ b\n\njulia> gdf = groupby(df, :id);\n\njulia> combine(gdf, proprow)\n3×2 DataFrame\n Row │ id      proprow\n     │ String  Float64\n─────┼──────────────────\n   1 │ a       0.333333\n   2 │ c       0.166667\n   3 │ b       0.5\n\njulia> select(gdf, proprow => :frac)\n6×2 DataFrame\n Row │ id      frac\n     │ String  Float64\n─────┼──────────────────\n   1 │ a       0.333333\n   2 │ c       0.166667\n   3 │ b       0.5\n   4 │ b       0.5\n   5 │ a       0.333333\n   6 │ b       0.5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.valuecols","page":"Functions","title":"DataFrames.valuecols","text":"valuecols(gd::GroupedDataFrame)\n\nReturn a vector of Symbol column names in parent(gd) not used for grouping.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Filtering-rows","page":"Functions","title":"Filtering rows","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"allunique\ndeleteat!\nempty\nempty!\nfilter\nfilter!\nkeepat!\nfirst\nlast\nnonunique\nonly\npop!\npopat!\npopfirst!\nresize!\nsubset\nsubset!\nunique\nunique!","category":"page"},{"location":"lib/functions/#Base.allunique","page":"Functions","title":"Base.allunique","text":"allunique(df::AbstractDataFrame, cols=:)\n\nReturn true if none of the rows of df are duplicated. Two rows are duplicates if all their columns contain equal values (according to isequal) for all columns in cols (by default, all columns).\n\nArguments\n\ndf : AbstractDataFrame\ncols : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by select.\n\nSee also unique and nonunique.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> allunique(df)\ntrue\n\njulia> allunique(df, :x)\nfalse\n\njulia> allunique(df, :i => ByRow(isodd))\nfalse\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.deleteat!","page":"Functions","title":"Base.deleteat!","text":"deleteat!(df::DataFrame, inds)\n\nDelete rows specified by inds from a DataFrame df in place and return it.\n\nInternally deleteat! is called for all columns so inds must be: a vector of sorted and unique integers, a boolean vector, an integer, or Not wrapping any valid selector.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> deleteat!(df, 2)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.empty","page":"Functions","title":"Base.empty","text":"empty(df::AbstractDataFrame)\n\nCreate a new DataFrame with the same column names and column element types as df but with zero rows.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.empty!","page":"Functions","title":"Base.empty!","text":"empty!(df::DataFrame)\n\nRemove all rows from df, making each of its columns empty.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> empty!(df)\n0×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┴──────────────\n\njulia> df.a, df.b\n(Int64[], Int64[])\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.filter","page":"Functions","title":"Base.filter","text":"filter(fun, df::AbstractDataFrame; view::Bool=false)\nfilter(cols => fun, df::AbstractDataFrame; view::Bool=false)\n\nReturn a data frame containing only rows from df for which fun returns true.\n\nIf cols is not specified then the predicate fun is passed DataFrameRows. Elements of a DataFrameRow may be accessed with dot syntax or column indexing inside fun.\n\nIf cols is specified then the predicate fun is passed elements of the corresponding columns as separate positional arguments, unless cols is an AsTable selector, in which case a NamedTuple of these arguments is passed. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers), and column duplicates are allowed if a vector of Symbols, strings, or integers is passed.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nPassing cols leads to a more efficient execution of the operation for large data frames.\n\nnote: Note\nThis method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset function instead as it is consistent with other DataFrames.jl functions (as opposed to filter).\n\nnote: Note\nDue to type stability the filter(cols => fun, df::AbstractDataFrame; view::Bool=false) call is preferred in performance critical applications.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: filter!\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> filter(row -> row.x > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter(row -> row[\"x\"] > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter(:x => x -> x > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter([:x, :y] => (x, y) -> x == 1 || y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\njulia> filter(AsTable(:) => nt -> nt.x == 1 || nt.y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\n\n\n\n\nfilter(fun, gdf::GroupedDataFrame; ungroup::Bool=false)\nfilter(cols => fun, gdf::GroupedDataFrame; ungroup::Bool=false)\n\nReturn only groups in gd for which fun returns true as a GroupedDataFrame if ungroup=false (the default), or as a data frame if ungroup=true.\n\nIf cols is not specified then the predicate fun is called with a SubDataFrame for each group.\n\nIf cols is specified then the predicate fun is called for each group with views of the corresponding columns as separate positional arguments, unless cols is an AsTable selector, in which case a NamedTuple of these arguments is passed. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers), and column duplicates are allowed if a vector of Symbols, strings, or integers is passed.\n\nnote: Note\nThis method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset function instead as it is consistent with other DataFrames.jl functions (as opposed to filter).\n\nExamples\n\njulia> df = DataFrame(g=[1, 2], x=['a', 'b']);\n\njulia> gd = groupby(df, :g)\nGroupedDataFrame with 2 groups based on key: g\nFirst Group (1 row): g = 1\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n⋮\nLast Group (1 row): g = 2\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     2  b\n\njulia> filter(x -> x.x[1] == 'a', gd)\nGroupedDataFrame with 1 group based on key: g\nFirst Group (1 row): g = 1\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> filter(:x => x -> x[1] == 'a', gd)\nGroupedDataFrame with 1 group based on key: g\nFirst Group (1 row): g = 1\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> filter(:x => x -> x[1] == 'a', gd, ungroup=true)\n1×2 DataFrame\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.filter!","page":"Functions","title":"Base.filter!","text":"filter!(fun, df::AbstractDataFrame)\nfilter!(cols => fun, df::AbstractDataFrame)\n\nRemove rows from data frame df for which fun returns false.\n\nIf cols is not specified then the predicate fun is passed DataFrameRows. Elements of a DataFrameRow may be accessed with dot syntax or column indexing inside fun.\n\nIf cols is specified then the predicate fun is passed elements of the corresponding columns as separate positional arguments, unless cols is an AsTable selector, in which case a NamedTuple of these arguments is passed. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers), and column duplicates are allowed if a vector of Symbols, strings, or integers is passed.\n\nPassing cols leads to a more efficient execution of the operation for large data frames.\n\nnote: Note\nThis method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset! function instead as it is consistent with other DataFrames.jl functions (as opposed to filter!).\n\nnote: Note\nDue to type stability the filter!(cols => fun, df::AbstractDataFrame) call is preferred in performance critical applications.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: filter\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> filter!(row -> row.x > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter!(row -> row[\"x\"] > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter!(:x => x -> x == 3, df)\n1×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"]);\n\njulia> filter!([:x, :y] => (x, y) -> x == 1 || y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"]);\n\njulia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.keepat!","page":"Functions","title":"Base.keepat!","text":"keepat!(df::DataFrame, inds)\n\nDelete rows at all indices not specified by inds from a DataFrame df in place and return it.\n\nInternally deleteat! is called for all columns so inds must be: a vector of sorted and unique integers, a boolean vector, an integer, or Not wrapping any valid selector.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> keepat!(df, [1, 3])\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.first","page":"Functions","title":"Base.first","text":"first(df::AbstractDataFrame)\n\nGet the first row of df as a DataFrameRow.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\nfirst(df::AbstractDataFrame, n::Integer; view::Bool=false)\n\nGet a data frame with the n first rows of df. Get all rows if n is greater than the number of rows in df. Error if n is negative.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.last","page":"Functions","title":"Base.last","text":"last(df::AbstractDataFrame)\n\nGet the last row of df as a DataFrameRow.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\nlast(df::AbstractDataFrame, n::Integer; view::Bool=false)\n\nGet a data frame with the n last rows of df. Get all rows if n is greater than the number of rows in df. Error if n is negative.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.nonunique","page":"Functions","title":"DataFrames.nonunique","text":"nonunique(df::AbstractDataFrame; keep::Symbol=:first)\nnonunique(df::AbstractDataFrame, cols; keep::Symbol=:first)\n\nReturn a Vector{Bool} in which true entries indicate duplicate rows.\n\nDuplicate rows are those for which at least another row contains equal values (according to isequal) for all columns in cols (by default, all columns). If keep=:first (the default), only the first occurrence of a set of duplicate rows is indicated with a false entry. If keep=:last, only the last occurrence of a set of duplicate rows is indicated with a false entry. If keep=:noduplicates, only rows without any duplicates are indicated with a false entry.\n\nArguments\n\ndf : AbstractDataFrame\ncols : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by select that returns at least one column if df has at least one column.\n\nSee also unique and unique!.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> df = vcat(df, df)\n8×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n   5 │     1      1\n   6 │     2      2\n   7 │     3      1\n   8 │     4      2\n\njulia> nonunique(df)\n8-element Vector{Bool}:\n 0\n 0\n 0\n 0\n 1\n 1\n 1\n 1\n\njulia> nonunique(df, keep=:last)\n8-element Vector{Bool}:\n 1\n 1\n 1\n 1\n 0\n 0\n 0\n 0\n\njulia> nonunique(df, 2)\n8-element Vector{Bool}:\n 0\n 0\n 1\n 1\n 1\n 1\n 1\n 1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.Iterators.only","page":"Functions","title":"Base.Iterators.only","text":"only(df::AbstractDataFrame)\n\nIf df has a single row return it as a DataFrameRow; otherwise throw ArgumentError.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.pop!","page":"Functions","title":"Base.pop!","text":"pop!(df::DataFrame)\n\nRemove the last row from df and return a NamedTuple created from this row.\n\nnote: Note\nUsing this method for very wide data frames may lead to expensive compilation.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> pop!(df)\n(a = 3, b = 6)\n\njulia> df\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.popat!","page":"Functions","title":"Base.popat!","text":"popat!(df::DataFrame, i::Integer)\n\nRemove the i-th row from df and return a NamedTuple created from this row.\n\nnote: Note\nUsing this method for very wide data frames may lead to expensive compilation.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> popat!(df, 2)\n(a = 2, b = 5)\n\njulia> df\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.popfirst!","page":"Functions","title":"Base.popfirst!","text":"popfirst!(df::DataFrame)\n\nRemove the first row from df and return a NamedTuple created from this row.\n\nnote: Note\nUsing this method for very wide data frames may lead to expensive compilation.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> popfirst!(df)\n(a = 1, b = 4)\n\njulia> df\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      5\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.resize!","page":"Functions","title":"Base.resize!","text":"resize!(df::DataFrame, n::Integer)\n\nResize df to have n rows by calling resize! on all columns of df.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> resize!(df, 2)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.subset","page":"Functions","title":"DataFrames.subset","text":"subset(df::AbstractDataFrame, args...;\n       skipmissing::Bool=false, view::Bool=false, threads::Bool=true)\nsubset(gdf::GroupedDataFrame, args...;\n       skipmissing::Bool=false, view::Bool=false,\n       ungroup::Bool=true, threads::Bool=true)\n\nReturn a copy of data frame df or parent of gdf containing only rows for which all values produced by transformation(s) args for a given row are true. All transformations must produce vectors containing true or false. When the first argument is a GroupedDataFrame, transformations are also allowed to return a single true or false value, which results in including or excluding a whole group.\n\nIf skipmissing=false (the default) args are required to produce results containing only Bool values. If skipmissing=true, additionally missing is allowed and it is treated as false (i.e. rows for which one of the conditions returns missing are skipped).\n\nEach argument passed in args can be any specifier following the rules described for select with the restriction that:\n\nspecifying target column name is not allowed as subset does not create new columns;\nevery passed transformation must return a scalar or a vector (returning AbstractDataFrame, NamedTuple, DataFrameRow or AbstractMatrix is not supported).\n\nIf view=true a SubDataFrame view  is returned instead of a DataFrame.\n\nIf ungroup=false the resulting data frame is re-grouped based on the same grouping columns as gdf and a GroupedDataFrame is returned (preserving the order of groups from gdf).\n\nIf threads=true (the default) transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nIf a GroupedDataFrame is passed then it must include all groups present in the parent data frame, like in select!.\n\nnote: Note\nNote that as the subset function works in exactly the same way as other transformation functions defined in DataFrames.jl this is the preferred way to subset rows of a data frame or grouped data frame. In particular it uses a different set of rules for specifying transformations than filter which is implemented in DataFrames.jl to ensure support for the standard Julia API for collections.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: subset!, filter, select\n\nExamples\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false],\n                      y=[true, true, false, false],\n                      z=[true, true, missing, missing], v=[1, 2, 11, 12])\n4×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     1   true   true     true      1\n   2 │     2  false   true     true      2\n   3 │     3   true  false  missing     11\n   4 │     4  false  false  missing     12\n\njulia> subset(df, :x)\n2×5 DataFrame\n Row │ id     x     y      z        v\n     │ Int64  Bool  Bool   Bool?    Int64\n─────┼────────────────────────────────────\n   1 │     1  true   true     true      1\n   2 │     3  true  false  missing     11\n\njulia> subset(df, :v => x -> x .> 3)\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     3   true  false  missing     11\n   2 │     4  false  false  missing     12\n\njulia> subset(df, :x, :y => ByRow(!))\n1×5 DataFrame\n Row │ id     x     y      z        v\n     │ Int64  Bool  Bool   Bool?    Int64\n─────┼────────────────────────────────────\n   1 │     3  true  false  missing     11\n\njulia> subset(df, :x, :z, skipmissing=true)\n1×5 DataFrame\n Row │ id     x     y     z      v\n     │ Int64  Bool  Bool  Bool?  Int64\n─────┼─────────────────────────────────\n   1 │     1  true  true   true      1\n\njulia> subset(df, :x, :z)\nERROR: ArgumentError: missing was returned in condition number 2 but only true or false are allowed; pass skipmissing=true to skip missing values\n\njulia> subset(groupby(df, :y), :v => x -> x .> minimum(x))\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     2  false   true     true      2\n   2 │     4  false  false  missing     12\n\njulia> subset(groupby(df, :y), :v => x -> minimum(x) > 5)\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     3   true  false  missing     11\n   2 │     4  false  false  missing     12\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.subset!","page":"Functions","title":"DataFrames.subset!","text":"subset!(df::AbstractDataFrame, args...;\n        skipmissing::Bool=false, threads::Bool=true)\nsubset!(gdf::GroupedDataFrame{DataFrame}, args...;\n        skipmissing::Bool=false, ungroup::Bool=true, threads::Bool=true)\n\nUpdate data frame df or the parent of gdf in place to contain only rows for which all values produced by transformation(s) args for a given row is true. All transformations must produce vectors containing true or false. When the first argument is a GroupedDataFrame, transformations are also allowed to return a single true or false value, which results in including or excluding a whole group.\n\nIf skipmissing=false (the default) args are required to produce results containing only Bool values. If skipmissing=true, additionally missing is allowed and it is treated as false (i.e. rows for which one of the conditions returns missing are skipped).\n\nEach argument passed in args can be any specifier following the rules described for select with the restriction that:\n\nspecifying target column name is not allowed as subset! does not create new columns;\nevery passed transformation must return a scalar or a vector (returning AbstractDataFrame, NamedTuple, DataFrameRow or AbstractMatrix is not supported).\n\nIf ungroup=false the passed GroupedDataFrame gdf is updated (preserving the order of its groups) and returned.\n\nIf threads=true (the default) transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nIf GroupedDataFrame is subsetted then it must include all groups present in the parent data frame, like in select!. In this case the passed GroupedDataFrame is updated to have correct groups after its parent is updated.\n\nnote: Note\nNote that as the subset! function works in exactly the same way as other transformation functions defined in DataFrames.jl this is the preferred way to subset rows of a data frame or grouped data frame. In particular it uses a different set of rules for specifying transformations than filter! which is implemented in DataFrames.jl to ensure support for the standard Julia API for collections.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: subset, filter!, select!\n\nExamples\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false])\n4×3 DataFrame\n Row │ id     x      y\n     │ Int64  Bool   Bool\n─────┼─────────────────────\n   1 │     1   true   true\n   2 │     2  false   true\n   3 │     3   true  false\n   4 │     4  false  false\n\njulia> subset!(df, :x, :y => ByRow(!));\n\njulia> df\n1×3 DataFrame\n Row │ id     x     y\n     │ Int64  Bool  Bool\n─────┼────────────────────\n   1 │     3  true  false\n\njulia> df = DataFrame(id=1:4, y=[true, true, false, false], v=[1, 2, 11, 12]);\n\njulia> subset!(groupby(df, :y), :v => x -> x .> minimum(x));\n\njulia> df\n2×3 DataFrame\n Row │ id     y      v\n     │ Int64  Bool   Int64\n─────┼─────────────────────\n   1 │     2   true      2\n   2 │     4  false     12\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false],\n                      z=[true, true, missing, missing], v=1:4)\n4×4 DataFrame\n Row │ id     x      z        v\n     │ Int64  Bool   Bool?    Int64\n─────┼──────────────────────────────\n   1 │     1   true     true      1\n   2 │     2  false     true      2\n   3 │     3   true  missing      3\n   4 │     4  false  missing      4\n\njulia> subset!(df, :x, :z)\nERROR: ArgumentError: missing was returned in condition number 2 but only true or false are allowed; pass skipmissing=true to skip missing values\n\njulia> subset!(df, :x, :z, skipmissing=true);\n\njulia> df\n1×4 DataFrame\n Row │ id     x     z      v\n     │ Int64  Bool  Bool?  Int64\n─────┼───────────────────────────\n   1 │     1  true   true      1\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false],\n                      z=[true, true, missing, missing], v=[1, 2, 11, 12]);\n\njulia> subset!(groupby(df, :y), :v => x -> x .> minimum(x));\n\njulia> df\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     2  false   true     true      2\n   2 │     4  false  false  missing     12\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false],\n                      z=[true, true, missing, missing], v=[1, 2, 11, 12]);\n\njulia> subset!(groupby(df, :y), :v => x -> minimum(x) > 5);\n\njulia> df\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     3   true  false  missing     11\n   2 │     4  false  false  missing     12\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.unique","page":"Functions","title":"Base.unique","text":"unique(df::AbstractDataFrame; view::Bool=false, keep::Symbol=:first)\nunique(df::AbstractDataFrame, cols; view::Bool=false, keep::Symbol=:first)\n\nReturn a data frame containing only unique rows in df.\n\nNon-unique (duplicate) rows are those for which at least another row contains equal values (according to isequal) for all columns in cols (by default, all columns). If keep=:first (the default), only the first occurrence of a set of duplicate rows is kept. If keep=:last, only the last occurrence of a set of duplicate rows is kept. If keep=:noduplicates, only rows without any duplicates are kept.\n\nIf view=false a freshly allocated DataFrame is returned, and if view=true then a SubDataFrame view into df is returned.\n\nArguments\n\ndf : the AbstractDataFrame\ncols : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by select that returns at least one column if df has at least one column.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: unique!, nonunique.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> df = vcat(df, df)\n8×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n   5 │     1      1\n   6 │     2      2\n   7 │     3      1\n   8 │     4      2\n\njulia> unique(df)   # doesn't modify df\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> unique(df, 2)\n2×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n\njulia> unique(df, keep=:noduplicates)\n0×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┴──────────────\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.unique!","page":"Functions","title":"Base.unique!","text":"unique!(df::AbstractDataFrame; keep::Symbol=:first)\nunique!(df::AbstractDataFrame, cols; keep::Symbol=:first)\n\nUpdate df in-place to contain only unique rows.\n\nNon-unique (duplicate) rows are those for which at least another row contains equal values (according to isequal) for all columns in cols (by default, all columns). If keep=:first (the default), only the first occurrence of a set of duplicate rows is kept. If keep=:last, only the last occurrence of a set of duplicate rows is kept. If keep=:noduplicates, only rows without any duplicates are kept.\n\nArguments\n\ndf : the AbstractDataFrame\ncols :  column indicator (Symbol, Int, Vector{Symbol}, Regex, etc.) specifying the column(s) to compare. Can be any column selector or transformation accepted by select that returns at least one column if df has at least one column.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: unique!, nonunique.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> df = vcat(df, df)\n8×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n   5 │     1      1\n   6 │     2      2\n   7 │     3      1\n   8 │     4      2\n\njulia> unique!(copy(df))  # modifies df\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> unique(df, keep=:noduplicates)\n0×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┴──────────────\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Working-with-missing-values","page":"Functions","title":"Working with missing values","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"allowmissing\nallowmissing!\ncompletecases\ndisallowmissing\ndisallowmissing!\ndropmissing\ndropmissing!","category":"page"},{"location":"lib/functions/#Missings.allowmissing","page":"Functions","title":"Missings.allowmissing","text":"allowmissing(df::AbstractDataFrame, cols=:)\n\nReturn a copy of data frame df with columns cols converted to element type Union{T, Missing} from T to allow support for missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=[1, 2])\n2×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> allowmissing(df)\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼────────\n   1 │      1\n   2 │      2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.allowmissing!","page":"Functions","title":"DataFrames.allowmissing!","text":"allowmissing!(df::DataFrame, cols=:)\n\nConvert columns cols of data frame df from element type T to Union{T, Missing} to support missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.completecases","page":"Functions","title":"DataFrames.completecases","text":"completecases(df::AbstractDataFrame, cols=:)\n\nReturn a Boolean vector with true entries indicating rows without missing values (complete cases) in data frame df.\n\nIf cols is provided, only missing values in the corresponding columns are considered. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers) that returns at least one column if df has at least one column.\n\nSee also: dropmissing and dropmissing!. Use findall(completecases(df)) to get the indices of the rows.\n\nExamples\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> completecases(df)\n5-element BitVector:\n 0\n 0\n 0\n 1\n 1\n\njulia> completecases(df, :x)\n5-element BitVector:\n 0\n 1\n 0\n 1\n 1\n\njulia> completecases(df, [:x, :y])\n5-element BitVector:\n 0\n 0\n 0\n 1\n 1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Missings.disallowmissing","page":"Functions","title":"Missings.disallowmissing","text":"disallowmissing(df::AbstractDataFrame, cols=:; error::Bool=true)\n\nReturn a copy of data frame df with columns cols converted from element type Union{T, Missing} to T to drop support for missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nIf error=false then columns containing a missing value will be skipped instead of throwing an error.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=Union{Int, Missing}[1, 2])\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼────────\n   1 │      1\n   2 │      2\n\njulia> disallowmissing(df)\n2×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df = DataFrame(a=[1, missing])\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼─────────\n   1 │       1\n   2 │ missing\n\njulia> disallowmissing(df, error=false)\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼─────────\n   1 │       1\n   2 │ missing\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.disallowmissing!","page":"Functions","title":"DataFrames.disallowmissing!","text":"disallowmissing!(df::DataFrame, cols=:; error::Bool=true)\n\nConvert columns cols of data frame df from element type Union{T, Missing} to T to drop support for missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nIf error=false then columns containing a missing value will be skipped instead of throwing an error.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.dropmissing","page":"Functions","title":"DataFrames.dropmissing","text":"dropmissing(df::AbstractDataFrame, cols=:; view::Bool=false, disallowmissing::Bool=!view)\n\nReturn a data frame excluding rows with missing values in df.\n\nIf cols is provided, only missing values in the corresponding columns are considered. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned. In this case disallowmissing must be false.\n\nIf disallowmissing is true (the default when view is false) then columns specified in cols will be converted so as not to allow for missing values using disallowmissing!.\n\nSee also: completecases and dropmissing!.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> dropmissing(df)\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\njulia> dropmissing(df, disallowmissing=false)\n2×3 DataFrame\n Row │ i      x       y\n     │ Int64  Int64?  String?\n─────┼────────────────────────\n   1 │     4       2  d\n   2 │     5       1  e\n\njulia> dropmissing(df, :x)\n3×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     2      4  missing\n   2 │     4      2  d\n   3 │     5      1  e\n\njulia> dropmissing(df, [:x, :y])\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.dropmissing!","page":"Functions","title":"DataFrames.dropmissing!","text":"dropmissing!(df::AbstractDataFrame, cols=:; disallowmissing::Bool=true)\n\nRemove rows with missing values from data frame df and return it.\n\nIf cols is provided, only missing values in the corresponding columns are considered. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf disallowmissing is true (the default) then the cols columns will get converted using disallowmissing!.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: dropmissing and completecases.\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> dropmissing!(copy(df))\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\njulia> dropmissing!(copy(df), disallowmissing=false)\n2×3 DataFrame\n Row │ i      x       y\n     │ Int64  Int64?  String?\n─────┼────────────────────────\n   1 │     4       2  d\n   2 │     5       1  e\n\njulia> dropmissing!(copy(df), :x)\n3×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     2      4  missing\n   2 │     4      2  d\n   3 │     5      1  e\n\njulia> dropmissing!(df, [:x, :y])\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Iteration","page":"Functions","title":"Iteration","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"eachcol\neachrow\nvalues\npairs\nIterators.partition","category":"page"},{"location":"lib/functions/#Base.eachcol","page":"Functions","title":"Base.eachcol","text":"eachcol(df::AbstractDataFrame)\n\nReturn a DataFrameColumns object that is a vector-like that allows iterating an AbstractDataFrame column by column.\n\nIndexing into DataFrameColumns objects using integer, Symbol or string returns the corresponding column (without copying). Indexing into DataFrameColumns objects using a multiple column selector returns a subsetted DataFrameColumns object with a new parent containing only the selected columns (without copying).\n\nDataFrameColumns supports most of the AbstractVector API. The key differences are that it is read-only and that the keys function returns a vector of Symbols (and not integers as for normal vectors).\n\nIn particular findnext, findprev, findfirst, findlast, and findall functions are supported, and in findnext and findprev functions it is allowed to pass an integer, string, or Symbol as a reference index.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> eachcol(df)\n4×2 DataFrameColumns\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> collect(eachcol(df))\n2-element Vector{AbstractVector}:\n [1, 2, 3, 4]\n [11, 12, 13, 14]\n\njulia> map(eachcol(df)) do col\n           maximum(col) - minimum(col)\n       end\n2-element Vector{Int64}:\n 3\n 3\n\njulia> sum.(eachcol(df))\n2-element Vector{Int64}:\n 10\n 50\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.eachrow","page":"Functions","title":"Base.eachrow","text":"eachrow(df::AbstractDataFrame)\n\nReturn a DataFrameRows that iterates a data frame row by row, with each row represented as a DataFrameRow.\n\nBecause DataFrameRows have an eltype of Any, use copy(dfr::DataFrameRow) to obtain a named tuple, which supports iteration and property access like a DataFrameRow, but also passes information on the eltypes of the columns of df.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> eachrow(df)\n4×2 DataFrameRows\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> copy.(eachrow(df))\n4-element Vector{@NamedTuple{x::Int64, y::Int64}}:\n (x = 1, y = 11)\n (x = 2, y = 12)\n (x = 3, y = 13)\n (x = 4, y = 14)\n\njulia> eachrow(view(df, [4, 3], [2, 1]))\n2×2 DataFrameRows\n Row │ y      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │    14      4\n   2 │    13      3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.values","page":"Functions","title":"Base.values","text":"values(dfc::DataFrameColumns)\n\nGet a vector of columns from dfc.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.pairs","page":"Functions","title":"Base.pairs","text":"pairs(dfc::DataFrameColumns)\n\nReturn an iterator of pairs associating the name of each column of dfc with the corresponding column vector, i.e. name => col where name is the column name of the column col.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.Iterators.partition","page":"Functions","title":"Base.Iterators.partition","text":"Iterators.partition(df::AbstractDataFrame, n::Integer)\n\nIterate over df data frame n rows at a time, returning each block as a SubDataFrame.\n\nExamples\n\njulia> collect(Iterators.partition(DataFrame(x=1:5), 2))\n3-element Vector{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}:\n 2×1 SubDataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n 2×1 SubDataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     4\n 1×1 SubDataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     5\n\n\n\n\n\nIterators.partition(dfr::DataFrameRows, n::Integer)\n\nIterate over DataFrameRows dfr n rows at a time, returning each block as a DataFrameRows over a view of rows of parent of dfr.\n\nExamples\n\njulia> collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))\n3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}:\n 2×1 DataFrameRows\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n 2×1 DataFrameRows\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     4\n 1×1 DataFrameRows\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Equality","page":"Functions","title":"Equality","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"isapprox","category":"page"},{"location":"lib/functions/#Base.isapprox","page":"Functions","title":"Base.isapprox","text":"isapprox(df1::AbstractDataFrame, df2::AbstractDataFrame;\n         rtol::Real=atol>0 ? 0 : √eps, atol::Real=0,\n         nans::Bool=false, norm::Function=norm)\n\nInexact equality comparison. df1 and df2 must have the same size and column names. Return  true if isapprox with given keyword arguments applied to all pairs of columns stored in df1 and df2 returns true.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Metadata","page":"Functions","title":"Metadata","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"metadata\nmetadatakeys\nmetadata!\ndeletemetadata!\nemptymetadata!\ncolmetadata\ncolmetadatakeys\ncolmetadata!\ndeletecolmetadata!\nemptycolmetadata!","category":"page"},{"location":"lib/functions/#DataAPI.metadata","page":"Functions","title":"DataAPI.metadata","text":"metadata(df::AbstractDataFrame, key::AbstractString, [default]; style::Bool=false)\nmetadata(dfr::DataFrameRow, key::AbstractString, [default]; style::Bool=false)\nmetadata(dfc::DataFrameColumns, key::AbstractString, [default]; style::Bool=false)\nmetadata(dfr::DataFrameRows, key::AbstractString, [default]; style::Bool=false)\n\nReturn table-level metadata value associated with df for key key. If style=true return a tuple of metadata value and metadata style.\n\nSubDataFrame and DataFrameRow expose only :note-style metadata of their parent.\n\nIf default is passed then return it if key does not exist; if style=true return (default, :default).\n\nSee also: metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.metadatakeys","page":"Functions","title":"DataAPI.metadatakeys","text":"metadatakeys(df::AbstractDataFrame)\nmetadatakeys(dfr::DataFrameRow)\nmetadatakeys(dfc::DataFrameColumns)\nmetadatakeys(dfr::DataFrameRows)\n\nReturn an iterator of table-level metadata keys which are set in the object.\n\nValues can be accessed using metadata(df, key).\n\nSubDataFrame and DataFrameRow expose only :note-style metadata keys of their parent.\n\nSee also: metadata, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.metadata!","page":"Functions","title":"DataAPI.metadata!","text":"metadata!(df::AbstractDataFrame, key::AbstractString, value; style::Symbol=:default)\nmetadata!(dfr::DataFrameRow, key::AbstractString, value; style::Symbol=:default)\nmetadata!(dfc::DataFrameColumns, key::AbstractString, value; style::Symbol=:default)\nmetadata!(dfr::DataFrameRows, key::AbstractString, value; style::Symbol=:default)\n\nSet table-level metadata for object df for key key to have value value and style style (:default by default) and return df.\n\nFor SubDataFrame and DataFrameRow only :note-style is allowed. Trying to set a key-value pair for which the key already exists in the parent data frame with another style throws an error.\n\nSee also: metadata, metadatakeys, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.deletemetadata!","page":"Functions","title":"DataAPI.deletemetadata!","text":"deletemetadata!(df::AbstractDataFrame, key::AbstractString)\ndeletemetadata!(dfr::DataFrameRow, key::AbstractString)\ndeletemetadata!(dfc::DataFrameColumns, key::AbstractString)\ndeletemetadata!(dfr::DataFrameRows, key::AbstractString)\n\nDelete table-level metadata from object df for key key and return df. If key does not exist, return df without modification.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.emptymetadata!","page":"Functions","title":"DataAPI.emptymetadata!","text":"emptymetadata!(df::AbstractDataFrame)\nemptymetadata!(dfr::DataFrameRow)\nemptymetadata!(dfc::DataFrameColumns)\nemptymetadata!(dfr::DataFrameRows)\n\nDelete all table-level metadata from object df.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> emptymetadata!(df);\n\njulia> metadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.colmetadata","page":"Functions","title":"DataAPI.colmetadata","text":"colmetadata(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\ncolmetadata(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\ncolmetadata(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\ncolmetadata(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\n\nReturn column-level metadata value associated with df for column col and key key.\n\nSubDataFrame and DataFrameRow expose only :note-style metadata of their parent.\n\nIf default is passed then return it if key does not exist for column col; if style=true return (default, :default). If col does not exist in df always throw an error.\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.colmetadatakeys","page":"Functions","title":"DataAPI.colmetadatakeys","text":"colmetadatakeys(df::AbstractDataFrame, [col::ColumnIndex])\ncolmetadatakeys(dfr::DataFrameRow, [col::ColumnIndex])\ncolmetadatakeys(dfc::DataFrameColumns, [col::ColumnIndex])\ncolmetadatakeys(dfr::DataFrameRows, [col::ColumnIndex])\n\nIf col is passed return an iterator of column-level metadata keys which are set for column col. If col is not passed return an iterator of col => colmetadatakeys(x, col) pairs for all columns that have metadata, where col are Symbol.\n\nValues can be accessed using colmetadata(df, col, key).\n\nSubDataFrame and DataFrameRow expose only :note-style metadata of their parent.\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.colmetadata!","page":"Functions","title":"DataAPI.colmetadata!","text":"colmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\ncolmetadata!(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\ncolmetadata!(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\ncolmetadata!(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\n\nSet column-level metadata in df for column col and key key to have value value and style style (:default by default) and return df.\n\nFor SubDataFrame and DataFrameRow only :note style is allowed. Trying to set a key-value pair for which the key already exists in the parent data frame with another style throws an error.\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.deletecolmetadata!","page":"Functions","title":"DataAPI.deletecolmetadata!","text":"deletecolmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString)\ndeletecolmetadata!(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString)\ndeletecolmetadata!(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString)\ndeletecolmetadata!(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString)\n\nDelete column-level metadata set in df for column col and key key and return df.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.emptycolmetadata!","page":"Functions","title":"DataAPI.emptycolmetadata!","text":"emptycolmetadata!(df::AbstractDataFrame, [col::ColumnIndex])\nemptycolmetadata!(dfr::DataFrameRow, [col::ColumnIndex])\nemptycolmetadata!(dfc::DataFrameColumns, [col::ColumnIndex])\nemptycolmetadata!(dfr::DataFrameRows, [col::ColumnIndex])\n\nDelete column-level metadata set in df for column col and key key and return df.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> emptycolmetadata!(df, :a);\n\njulia> colmetadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"man/split_apply_combine/#The-Split-Apply-Combine-Strategy","page":"Split-apply-combine","title":"The Split-Apply-Combine Strategy","text":"","category":"section"},{"location":"man/split_apply_combine/#Design-of-the-split-apply-combine-support","page":"Split-apply-combine","title":"Design of the split-apply-combine support","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Many data analysis tasks involve three steps:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"splitting a data set into groups,\napplying some functions to each of the groups,\ncombining the results.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that any of the steps 1 and 3 of this general procedure can be dropped, in which case we just transform a data frame without grouping it and later combining the result.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"A standardized framework for handling this sort of computation is described in the paper \"The Split-Apply-Combine Strategy for Data Analysis\", written by Hadley Wickham.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The DataFrames package supports the split-apply-combine strategy through the groupby function that creates a GroupedDataFrame, followed by combine, select/select! or transform/transform!.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All operations described in this section of the manual are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"In order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Operations can then be applied on each group using one of the following functions:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"combine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"As a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"standard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"As a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"What is allowed for function to return is determined by the target_cols value:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"In all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"select/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"For combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"It is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"By default (threads=true) a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The following keyword arguments are supported by the transformation functions (not all keyword arguments are supported in all cases; in general they are allowed in situations when they are meaningful, see the documentation of the specific functions for details):","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"keepkeys : whether grouping columns should be kept in the returned data frame.\nungroup : whether the return value of the operation should be a data frame or a GroupedDataFrame.\ncopycols : whether columns of the source data frame should be copied if no transformation is applied to them.\nrenamecols : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nthreads : whether transformations may be run in separate tasks which can execute in parallel","category":"page"},{"location":"man/split_apply_combine/#Examples-of-the-split-apply-combine-operations","page":"Split-apply-combine","title":"Examples of the split-apply-combine operations","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"We show several examples of these functions applied to the iris dataset below:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> using DataFrames, CSV, Statistics\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> iris = CSV.read(path, DataFrame)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted\n\njulia> iris_gdf = groupby(iris, :Species)\nGroupedDataFrame with 3 groups based on key: Species\nFirst Group (50 rows): Species = \"Iris-setosa\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  49 │         5.3         3.7          1.5         0.2  Iris-setosa\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      46 rows omitted\n⋮\nLast Group (50 rows): Species = \"Iris-virginica\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         47 rows omitted\n\njulia> combine(iris_gdf, :PetalLength => mean)\n3×2 DataFrame\n Row │ Species          PetalLength_mean\n     │ String15         Float64\n─────┼───────────────────────────────────\n   1 │ Iris-setosa                 1.464\n   2 │ Iris-versicolor             4.26\n   3 │ Iris-virginica              5.552\n\njulia> combine(iris_gdf, nrow, proprow, groupindices)\n3×4 DataFrame\n Row │ Species          nrow   proprow   groupindices\n     │ String15         Int64  Float64   Int64\n─────┼────────────────────────────────────────────────\n   1 │ Iris-setosa         50  0.333333             1\n   2 │ Iris-versicolor     50  0.333333             2\n   3 │ Iris-virginica      50  0.333333             3\n\njulia> combine(iris_gdf, nrow, :PetalLength => mean => :mean)\n3×3 DataFrame\n Row │ Species          nrow   mean\n     │ String15         Int64  Float64\n─────┼─────────────────────────────────\n   1 │ Iris-setosa         50    1.464\n   2 │ Iris-versicolor     50    4.26\n   3 │ Iris-virginica      50    5.552\n\njulia> combine(iris_gdf,\n               [:PetalLength, :SepalLength] =>\n               ((p, s) -> (a=mean(p)/mean(s), b=sum(p))) =>\n               AsTable) # multiple columns are passed as arguments\n3×3 DataFrame\n Row │ Species          a         b\n     │ String15         Float64   Float64\n─────┼────────────────────────────────────\n   1 │ Iris-setosa      0.292449     73.2\n   2 │ Iris-versicolor  0.717655    213.0\n   3 │ Iris-virginica   0.842744    277.6\n\njulia> combine(iris_gdf,\n               AsTable([:PetalLength, :SepalLength]) =>\n               x -> std(x.PetalLength) / std(x.SepalLength)) # passing a NamedTuple\n3×2 DataFrame\n Row │ Species          PetalLength_SepalLength_function\n     │ String15         Float64\n─────┼───────────────────────────────────────────────────\n   1 │ Iris-setosa                              0.492245\n   2 │ Iris-versicolor                          0.910378\n   3 │ Iris-virginica                           0.867923\n\njulia> combine(x -> std(x.PetalLength) / std(x.SepalLength), iris_gdf) # passing a SubDataFrame\n3×2 DataFrame\n Row │ Species          x1\n     │ String15         Float64\n─────┼───────────────────────────\n   1 │ Iris-setosa      0.492245\n   2 │ Iris-versicolor  0.910378\n   3 │ Iris-virginica   0.867923\n\njulia> combine(iris_gdf, 1:2 => cor, nrow)\n3×3 DataFrame\n Row │ Species          SepalLength_SepalWidth_cor  nrow\n     │ String15         Float64                     Int64\n─────┼────────────────────────────────────────────────────\n   1 │ Iris-setosa                        0.74678      50\n   2 │ Iris-versicolor                    0.525911     50\n   3 │ Iris-virginica                     0.457228     50\n\njulia> combine(iris_gdf, :PetalLength => (x -> [extrema(x)]) => [:min, :max])\n3×3 DataFrame\n Row │ Species          min      max\n     │ String15         Float64  Float64\n─────┼───────────────────────────────────\n   1 │ Iris-setosa          1.0      1.9\n   2 │ Iris-versicolor      3.0      5.1\n   3 │ Iris-virginica       4.5      6.9","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To get row number for each observation within each group use the eachindex function:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf, eachindex)\n150×2 DataFrame\n Row │ Species         eachindex\n     │ String15        Int64\n─────┼───────────────────────────\n   1 │ Iris-setosa             1\n   2 │ Iris-setosa             2\n   3 │ Iris-setosa             3\n  ⋮  │       ⋮             ⋮\n 148 │ Iris-virginica         48\n 149 │ Iris-virginica         49\n 150 │ Iris-virginica         50\n                 144 rows omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Contrary to combine, the select and transform functions always return a data frame with the same number and order of rows as the source. In the example below the return values in columns :SepalLength_SepalWidth_cor and :nrow are broadcasted to match the number of elements in each group:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> select(iris_gdf, 1:2 => cor)\n150×2 DataFrame\n Row │ Species         SepalLength_SepalWidth_cor\n     │ String          Float64\n─────┼────────────────────────────────────────────\n   1 │ Iris-setosa                       0.74678\n   2 │ Iris-setosa                       0.74678\n   3 │ Iris-setosa                       0.74678\n   4 │ Iris-setosa                       0.74678\n  ⋮  │       ⋮                     ⋮\n 148 │ Iris-virginica                    0.457228\n 149 │ Iris-virginica                    0.457228\n 150 │ Iris-virginica                    0.457228\n                                  143 rows omitted\n\njulia> transform(iris_gdf, :Species => x -> chop.(x, head=5, tail=0))\n150×6 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species         Species_function\n     │ Float64      Float64     Float64      Float64     String          SubString…\n─────┼────────────────────────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa     setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa     setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa     setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa     setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮                ⋮\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica  virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica  virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica  virginica\n                                                                          143 rows omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All functions also support the do block form. However, as noted above, this form is slow and should therefore be avoided when performance matters.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf) do df\n           (m = mean(df.PetalLength), s² = var(df.PetalLength))\n       end\n3×3 DataFrame\n Row │ Species          m        s²\n     │ String15         Float64  Float64\n─────┼─────────────────────────────────────\n   1 │ Iris-setosa        1.464  0.0301061\n   2 │ Iris-versicolor    4.26   0.220816\n   3 │ Iris-virginica     5.552  0.304588","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To apply a function to each non-grouping column of a GroupedDataFrame you can write:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf, valuecols(iris_gdf) .=> mean)\n3×5 DataFrame\n Row │ Species          SepalLength_mean  SepalWidth_mean  PetalLength_mean  P ⋯\n     │ String15         Float64           Float64          Float64           F ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ Iris-setosa                 5.006            3.418             1.464    ⋯\n   2 │ Iris-versicolor             5.936            2.77              4.26\n   3 │ Iris-virginica              6.588            2.974             5.552\n                                                                1 column omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that GroupedDataFrame is a view: therefore grouping columns of its parent data frame must not be mutated, and rows must not be added nor removed from it. If the number or rows of the parent changes then an error is thrown when a child GroupedDataFrame is used:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(id=1:2)\n2×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> gd = groupby(df, :id)\nGroupedDataFrame with 2 groups based on key: id\nFirst Group (1 row): id = 1\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n⋮\nLast Group (1 row): id = 2\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     2\n\njulia> push!(df, [3])\n3×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> gd[1]\nERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, deleteat!, push!, or append! functions).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Sometimes it is useful to append rows to the source data frame of a GroupedDataFrame, without affecting the rows used for grouping. In such a scenario you can create the grouped data frame using a view of the parent data frame to avoid the error:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(id=1:2)\n2×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> gd = groupby(view(df, :, :), :id)\nGroupedDataFrame with 2 groups based on key: id\nFirst Group (1 row): id = 1\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n⋮\nLast Group (1 row): id = 2\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     2\n\njulia> push!(df, [3])\n3×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> gd[1]\n1×1 SubDataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1","category":"page"},{"location":"man/split_apply_combine/#Using-GroupedDataFrame-as-an-iterable-and-indexable-object","page":"Split-apply-combine","title":"Using GroupedDataFrame as an iterable and indexable object","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If you only want to split the data set into subsets, use the groupby function. You can then iterate SubDataFrames that constitute the identified groups:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> for subdf in iris_gdf\n           println(size(subdf, 1))\n       end\n50\n50\n50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To also get the values of the grouping columns along with each group, use the pairs function:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> for (key, subdf) in pairs(iris_gdf)\n           println(\"Number of data points for $(key.Species): $(nrow(subdf))\")\n       end\nNumber of data points for Iris-setosa: 50\nNumber of data points for Iris-versicolor: 50\nNumber of data points for Iris-virginica: 50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The value of key in the example above where we iterated pairs(iris_gdf) is a DataFrames.GroupKey object, which can be used in a similar fashion to a NamedTuple.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Grouping a data frame using the groupby function can be seen as adding a lookup key to it. Such lookups can be performed efficiently by indexing the resulting GroupedDataFrame with DataFrames.GroupKey (as it was presented above) a Tuple, a NamedTuple, or a dictionary. Here are some more examples of such indexing.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> iris_gdf[(Species=\"Iris-virginica\",)]  # a NamedTuple\n50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n   3 │         7.1         3.0          5.9         2.1  Iris-virginica\n   4 │         6.3         2.9          5.6         1.8  Iris-virginica\n   5 │         6.5         3.0          5.8         2.2  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         4.9         2.5          4.5         1.7  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  44 │         6.8         3.2          5.9         2.3  Iris-virginica\n  45 │         6.7         3.3          5.7         2.5  Iris-virginica\n  46 │         6.7         3.0          5.2         2.3  Iris-virginica\n  47 │         6.3         2.5          5.0         1.9  Iris-virginica\n  48 │         6.5         3.0          5.2         2.0  Iris-virginica\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         35 rows omitted\n\njulia> iris_gdf[[(\"Iris-virginica\",), (\"Iris-setosa\",)]] # a vector of Tuples\nGroupedDataFrame with 2 groups based on key: Species\nFirst Group (50 rows): Species = \"Iris-virginica\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         46 rows omitted\n⋮\nLast Group (50 rows): Species = \"Iris-setosa\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      47 rows omitted\n\njulia> key = keys(iris_gdf) |> last # last key in iris_gdf\nGroupKey: (Species = String15(\"Iris-virginica\"),)\n\njulia> iris_gdf[key]\n50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n   3 │         7.1         3.0          5.9         2.1  Iris-virginica\n   4 │         6.3         2.9          5.6         1.8  Iris-virginica\n   5 │         6.5         3.0          5.8         2.2  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         4.9         2.5          4.5         1.7  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  44 │         6.8         3.2          5.9         2.3  Iris-virginica\n  45 │         6.7         3.3          5.7         2.5  Iris-virginica\n  46 │         6.7         3.0          5.2         2.3  Iris-virginica\n  47 │         6.3         2.5          5.0         1.9  Iris-virginica\n  48 │         6.5         3.0          5.2         2.0  Iris-virginica\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         35 rows omitted\n\njulia> iris_gdf[Dict(\"Species\" => \"Iris-setosa\")] # a dictionary\n50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  44 │         5.0         3.5          1.6         0.6  Iris-setosa\n  45 │         5.1         3.8          1.9         0.4  Iris-setosa\n  46 │         4.8         3.0          1.4         0.3  Iris-setosa\n  47 │         5.1         3.8          1.6         0.2  Iris-setosa\n  48 │         4.6         3.2          1.4         0.2  Iris-setosa\n  49 │         5.3         3.7          1.5         0.2  Iris-setosa\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      35 rows omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that although GroupedDataFrame is iterable and indexable it is not an AbstractVector. For this reason currently it was decided that it does not support map nor broadcasting (to allow for making a decision in the future what result type they should produce). To apply a function to all groups of a data frame and get a vector of results either use a comprehension or collect GroupedDataFrame into a vector first. Here are examples of both approaches:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> sdf_vec = collect(iris_gdf)\n3-element Vector{Any}:\n 50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  44 │         5.0         3.5          1.6         0.6  Iris-setosa\n  45 │         5.1         3.8          1.9         0.4  Iris-setosa\n  46 │         4.8         3.0          1.4         0.3  Iris-setosa\n  47 │         5.1         3.8          1.6         0.2  Iris-setosa\n  48 │         4.6         3.2          1.4         0.2  Iris-setosa\n  49 │         5.3         3.7          1.5         0.2  Iris-setosa\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      35 rows omitted\n 50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────────\n   1 │         7.0         3.2          4.7         1.4  Iris-versicolor\n   2 │         6.4         3.2          4.5         1.5  Iris-versicolor\n   3 │         6.9         3.1          4.9         1.5  Iris-versicolor\n   4 │         5.5         2.3          4.0         1.3  Iris-versicolor\n   5 │         6.5         2.8          4.6         1.5  Iris-versicolor\n   6 │         5.7         2.8          4.5         1.3  Iris-versicolor\n   7 │         6.3         3.3          4.7         1.6  Iris-versicolor\n   8 │         4.9         2.4          3.3         1.0  Iris-versicolor\n  ⋮  │      ⋮           ⋮            ⋮           ⋮              ⋮\n  44 │         5.0         2.3          3.3         1.0  Iris-versicolor\n  45 │         5.6         2.7          4.2         1.3  Iris-versicolor\n  46 │         5.7         3.0          4.2         1.2  Iris-versicolor\n  47 │         5.7         2.9          4.2         1.3  Iris-versicolor\n  48 │         6.2         2.9          4.3         1.3  Iris-versicolor\n  49 │         5.1         2.5          3.0         1.1  Iris-versicolor\n  50 │         5.7         2.8          4.1         1.3  Iris-versicolor\n                                                          35 rows omitted\n 50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n   3 │         7.1         3.0          5.9         2.1  Iris-virginica\n   4 │         6.3         2.9          5.6         1.8  Iris-virginica\n   5 │         6.5         3.0          5.8         2.2  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         4.9         2.5          4.5         1.7  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  44 │         6.8         3.2          5.9         2.3  Iris-virginica\n  45 │         6.7         3.3          5.7         2.5  Iris-virginica\n  46 │         6.7         3.0          5.2         2.3  Iris-virginica\n  47 │         6.3         2.5          5.0         1.9  Iris-virginica\n  48 │         6.5         3.0          5.2         2.0  Iris-virginica\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         35 rows omitted\n\njulia> map(nrow, sdf_vec)\n3-element Vector{Int64}:\n 50\n 50\n 50\n\njulia> nrow.(sdf_vec)\n3-element Vector{Int64}:\n 50\n 50\n 50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Since GroupedDataFrame is iterable, you can achieve the same result with a comprehension:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> [nrow(sdf) for sdf in iris_gdf]\n3-element Vector{Int64}:\n 50\n 50\n 50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that using the split-apply-combine strategy with the operation specification syntax in combine, select or transform will usually be faster for large GroupedDataFrame objects than iterating them, with the difference that they produce a data frame. An operation corresponding to the example above is:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf, nrow)\n3×2 DataFrame\n Row │ Species          nrow\n     │ String15         Int64\n─────┼────────────────────────\n   1 │ Iris-setosa         50\n   2 │ Iris-versicolor     50\n   3 │ Iris-virginica      50","category":"page"},{"location":"man/split_apply_combine/#Simulating-the-SQL-where-clause","page":"Split-apply-combine","title":"Simulating the SQL where clause","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"You can conveniently work on subsets of a data frame by using SubDataFrames. Operations performed on such objects can either create a new data frame or be performed in-place. Here are some examples:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(a=1:5)\n5×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n   4 │     4\n   5 │     5\n\njulia> sdf = @view df[2:3, :]\n2×1 SubDataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     2\n   2 │     3\n\njulia> transform(sdf, :a => ByRow(string)) # create a new data frame\n2×2 DataFrame\n Row │ a      a_string\n     │ Int64  String\n─────┼─────────────────\n   1 │     2  2\n   2 │     3  3\n\njulia> transform!(sdf, :a => ByRow(string)) # update the source df in-place\n2×2 SubDataFrame\n Row │ a      a_string\n     │ Int64  String?\n─────┼─────────────────\n   1 │     2  2\n   2 │     3  3\n\njulia> df # new column was created filled with missing in filtered-out rows\n5×2 DataFrame\n Row │ a      a_string\n     │ Int64  String?\n─────┼─────────────────\n   1 │     1  missing\n   2 │     2  2\n   3 │     3  3\n   4 │     4  missing\n   5 │     5  missing\n\njulia> select!(sdf, :a => -, renamecols=false) # update the source df in-place\n2×1 SubDataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │    -2\n   2 │    -3\n\njulia> df # the column replaced an existing column; previously stored values are re-used in filtered-out rows\n5×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │    -2\n   3 │    -3\n   4 │     4\n   5 │     5","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Similar operations can be performed on GroupedDataFrame as well:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(a=[1, 1, 1, 2, 2, 3], b=1:6)\n6×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     1      2\n   3 │     1      3\n   4 │     2      4\n   5 │     2      5\n   6 │     3      6\n\njulia> sdf = @view df[2:4, :]\n3×2 SubDataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     1      3\n   3 │     2      4\n\njulia> gsdf = groupby(sdf, :a)\nGroupedDataFrame with 2 groups based on key: a\nFirst Group (2 rows): a = 1\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     1      3\n⋮\nLast Group (1 row): a = 2\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      4\n\njulia> transform(gsdf, nrow) # create a new data frame\n3×3 DataFrame\n Row │ a      b      nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      2\n   2 │     1      3      2\n   3 │     2      4      1\n\njulia> transform!(gsdf, nrow, :b => :b_copy)\n3×4 SubDataFrame\n Row │ a      b      nrow    b_copy\n     │ Int64  Int64  Int64?  Int64?\n─────┼──────────────────────────────\n   1 │     1      2       2       2\n   2 │     1      3       2       3\n   3 │     2      4       1       4\n\njulia> df\n6×4 DataFrame\n Row │ a      b      nrow     b_copy\n     │ Int64  Int64  Int64?   Int64?\n─────┼────────────────────────────────\n   1 │     1      1  missing  missing\n   2 │     1      2        2        2\n   3 │     1      3        2        3\n   4 │     2      4        1        4\n   5 │     2      5  missing  missing\n   6 │     3      6  missing  missing\n\njulia> select!(gsdf, :b_copy, :b => sum, renamecols=false)\n3×3 SubDataFrame\n Row │ a      b_copy  b\n     │ Int64  Int64?  Int64\n─────┼──────────────────────\n   1 │     1       2      5\n   2 │     1       3      5\n   3 │     2       4      4\n\njulia> df\n6×3 DataFrame\n Row │ a      b_copy   b\n     │ Int64  Int64?   Int64\n─────┼───────────────────────\n   1 │     1  missing      1\n   2 │     1        2      5\n   3 │     1        3      5\n   4 │     2        4      4\n   5 │     2  missing      5\n   6 │     3  missing      6","category":"page"},{"location":"man/split_apply_combine/#Column-independent-operations","page":"Split-apply-combine","title":"Column-independent operations","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The operation specification language used with combine, select and transform supports the following column-independent operations:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"getting the number of rows in a group (nrow);\ngetting the proportion of rows in a group (proprow);\ngetting the group number (groupindices);\ngetting a vector of indices within groups (eachindex).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"These operations are column-independent, because they do not require specifying the input column name in the operation specification syntax.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"These four exceptions to the standard operation specification syntax were introduced for user convenience as these operations are often needed in practice.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Below each of them is explained by example.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"First create a data frame we will work with:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(customer_id=[\"a\", \"b\", \"b\", \"b\", \"c\", \"c\"],\n                      transaction_id=[12, 15, 19, 17, 13, 11],\n                      volume=[2, 3, 1, 4, 5, 9])\n6×3 DataFrame\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ a                        12       2\n   2 │ b                        15       3\n   3 │ b                        19       1\n   4 │ b                        17       4\n   5 │ c                        13       5\n   6 │ c                        11       9\n\njulia> gdf = groupby(df, :customer_id, sort=true);\n\njulia> show(gdf, allgroups=true)\nGroupedDataFrame with 3 groups based on key: customer_id\nGroup 1 (1 row): customer_id = \"a\"\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ a                        12       2\nGroup 2 (3 rows): customer_id = \"b\"\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ b                        15       3\n   2 │ b                        19       1\n   3 │ b                        17       4\nGroup 3 (2 rows): customer_id = \"c\"\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ c                        13       5\n   2 │ c                        11       9","category":"page"},{"location":"man/split_apply_combine/#Getting-the-number-of-rows","page":"Split-apply-combine","title":"Getting the number of rows","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"You can get the number of rows per group in a GroupedDataFrame by just writing nrow, in which case the generated column name with the number of rows is :nrow:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, nrow)\n3×2 DataFrame\n Row │ customer_id  nrow\n     │ String       Int64\n─────┼────────────────────\n   1 │ a                1\n   2 │ b                3\n   3 │ c                2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Additionally you are allowed to pass target column name:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, nrow => \"transaction_count\")\n3×2 DataFrame\n Row │ customer_id  transaction_count\n     │ String       Int64\n─────┼────────────────────────────────\n   1 │ a                            1\n   2 │ b                            3\n   3 │ c                            2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that in both cases we did not pass source column name as it is not needed to determine the number of rows per group. This is the reason why column-independent operations are exceptions to standard operation specification syntax.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The nrow expression also works in the operation specification syntax applied to a data frame. Here is an example:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(df, nrow => \"transaction_count\")\n1×1 DataFrame\n Row │ transaction_count\n     │ Int64\n─────┼───────────────────\n   1 │                 6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Finally, recall that nrow is also a regular function that returns a number of rows in a data frame:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> nrow(df)\n6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"This dual use of nrow does not lead to ambiguities, and is meant to make it easier to remember this exception.","category":"page"},{"location":"man/split_apply_combine/#Getting-the-proportion-of-rows","page":"Split-apply-combine","title":"Getting the proportion of rows","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If you want to get a proportion of rows per group in a GroupedDataFrame you can use the proprow and proprow => [target column name] column-independent operations. Here are some examples:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, proprow)\n3×2 DataFrame\n Row │ customer_id  proprow\n     │ String       Float64\n─────┼───────────────────────\n   1 │ a            0.166667\n   2 │ b            0.5\n   3 │ c            0.333333\n\njulia> combine(gdf, proprow => \"transaction_fraction\")\n3×2 DataFrame\n Row │ customer_id  transaction_fraction\n     │ String       Float64\n─────┼───────────────────────────────────\n   1 │ a                        0.166667\n   2 │ b                        0.5\n   3 │ c                        0.333333","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"As opposed to nrow, proprow cannot be used outside of the operation specification syntax and is only allowed when processing a GroupedDataFrame.","category":"page"},{"location":"man/split_apply_combine/#Getting-the-group-number","page":"Split-apply-combine","title":"Getting the group number","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Another common operation is getting group number. Use the groupindices and groupindices => [target column name] column-independent operations to get it:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, groupindices)\n3×2 DataFrame\n Row │ customer_id  groupindices\n     │ String       Int64\n─────┼───────────────────────────\n   1 │ a                       1\n   2 │ b                       2\n   3 │ c                       3\n\njulia> transform(gdf, groupindices)\n6×4 DataFrame\n Row │ customer_id  transaction_id  volume  groupindices\n     │ String       Int64           Int64   Int64\n─────┼───────────────────────────────────────────────────\n   1 │ a                        12       2             1\n   2 │ b                        15       3             2\n   3 │ b                        19       1             2\n   4 │ b                        17       4             2\n   5 │ c                        13       5             3\n   6 │ c                        11       9             3\n\njulia> combine(gdf, groupindices => \"group_number\")\n3×2 DataFrame\n Row │ customer_id  group_number\n     │ String       Int64\n─────┼───────────────────────────\n   1 │ a                       1\n   2 │ b                       2\n   3 │ c                       3","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Outside of the operation specification syntax, groupindices is also a regular function which returns group indices for each row in the parent data frame of the passed GroupedDataFrame:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> groupindices(gdf)\n6-element Vector{Union{Missing, Int64}}:\n 1\n 2\n 2\n 2\n 3\n 3","category":"page"},{"location":"man/split_apply_combine/#Getting-a-vector-of-indices-within-groups","page":"Split-apply-combine","title":"Getting a vector of indices within groups","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The last column-independent operation supported by the operation specification syntax is getting the index of each row within each group:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, eachindex)\n6×2 DataFrame\n Row │ customer_id  eachindex\n     │ String       Int64\n─────┼────────────────────────\n   1 │ a                    1\n   2 │ b                    1\n   3 │ b                    2\n   4 │ b                    3\n   5 │ c                    1\n   6 │ c                    2\n\njulia> select(gdf, eachindex, groupindices)\n6×3 DataFrame\n Row │ customer_id  eachindex  groupindices\n     │ String       Int64      Int64\n─────┼──────────────────────────────────────\n   1 │ a                    1             1\n   2 │ b                    1             2\n   3 │ b                    2             2\n   4 │ b                    3             2\n   5 │ c                    1             3\n   6 │ c                    2             3\n\njulia> combine(gdf, eachindex => \"transaction_number\")\n6×2 DataFrame\n Row │ customer_id  transaction_number\n     │ String       Int64\n─────┼─────────────────────────────────\n   1 │ a                             1\n   2 │ b                             1\n   3 │ b                             2\n   4 │ b                             3\n   5 │ c                             1\n   6 │ c                             2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that this operation also makes sense in a data frame context, where all rows are considered to be in the same group:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> transform(df, eachindex)\n6×4 DataFrame\n Row │ customer_id  transaction_id  volume  eachindex\n     │ String       Int64           Int64   Int64\n─────┼────────────────────────────────────────────────\n   1 │ a                        12       2          1\n   2 │ b                        15       3          2\n   3 │ b                        19       1          3\n   4 │ b                        17       4          4\n   5 │ c                        13       5          5\n   6 │ c                        11       9          6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Finally recall that eachindex is a standard function for getting all indices in an array. This similarity of functionality was the reason why this name was picked:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> collect(eachindex(df.customer_id))\n6-element Vector{Int64}:\n 1\n 2\n 3\n 4\n 5\n 6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"This, for example, means that in the following example the two created columns have the same contents:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, eachindex, :customer_id => eachindex)\n6×3 DataFrame\n Row │ customer_id  eachindex  customer_id_eachindex\n     │ String       Int64      Int64\n─────┼───────────────────────────────────────────────\n   1 │ a                    1                      1\n   2 │ b                    1                      1\n   3 │ b                    2                      2\n   4 │ b                    3                      3\n   5 │ c                    1                      1\n   6 │ c                    2                      2","category":"page"},{"location":"man/split_apply_combine/#Column-independent-operations-versus-functions","page":"Split-apply-combine","title":"Column-independent operations versus functions","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"When discussing column-independent operations it is important to remember that operation specification syntax allows you to pass a function (without source and target column names), in which case such a function gets passed a SubDataFrame that represents a group in a GroupedDataFrame. Here is an example comparing a column-independent operation and a function:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, eachindex, sdf -> axes(sdf, 1))\n6×3 DataFrame\n Row │ customer_id  eachindex  x1\n     │ String       Int64      Int64\n─────┼───────────────────────────────\n   1 │ a                    1      1\n   2 │ b                    1      1\n   3 │ b                    2      2\n   4 │ b                    3      3\n   5 │ c                    1      1\n   6 │ c                    2      2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Notice that the column-independent operation eachindex produces the same result as using the anonymous function sdf -> axes(sdf, 1) that takes a SubDataFrame as its first argument and returns indices along its first axes. Importantly if it wasn't defined as a column-independent operation the eachindex function would fail when being passed as you can see here:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, sdf -> eachindex(sdf))\nERROR: MethodError: no method matching keys(::SubDataFrame{DataFrame, DataFrames.Index, Vector{Int64}})","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The reason for this error is that the eachindex function does not allow passing a SubDataFrame as its argument.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The same applies to proprow and groupindices: they would not work with a SubDataFrame as stand-alone functions.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The nrow column-independent operation is a different case, as the nrow function accepts SubDataFrame as an argument:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, nrow, sdf -> nrow(sdf))\n3×3 DataFrame\n Row │ customer_id  nrow   x1\n     │ String       Int64  Int64\n─────┼───────────────────────────\n   1 │ a                1      1\n   2 │ b                3      3\n   3 │ c                2      2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Notice that columns :nrow and :x1 have identical contents, but the difference is that they do not have the same names. nrow is a column-independent operation generating the :nrow column name by default with number of rows per group. On the other hand, the sdf -> nrow(sdf) anonymous function does gets a SubDataFrame as its argument and returns its number of rows. The :x1 column name is the default auto-generated column name when processing anonymous functions.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Passing a function taking a SubDataFrame is a flexible functionality allowing you to perform complex operations on your data. However, you should bear in mind two aspects:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Using the full operation specification syntax (where source and target column names are passed) or column-independent operations will lead to faster execution of your code (as the Julia compiler is able to better optimize execution of such operations) in comparison to passing a function taking a SubDataFrame.\nAlthough writing nrow, proprow, groupindices, and eachindex looks like just passing a function they internally do not take a SubDataFrame as their argument. As we explained in this section, proprow, groupindices, and eachindex would not work with SubDataFrame as their argument, and nrow would work, but would produce a different column name. Instead, these four operations are special column-independent operations that are exceptions to the standard operation specification syntax rules. They were added for user convenience.","category":"page"},{"location":"man/split_apply_combine/#Specifying-group-order-in-groupby","page":"Split-apply-combine","title":"Specifying group order in groupby","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"By default order of groups produced by groupby is undefined. If you want the order of groups to follow the order of first appearance in the source data frame of a grouping key then pass the sort=false keyword argument to groupby:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> push!(df, [\"a\", 100, 100]) # push row with large integer values to disable default sorting\n7×3 DataFrame\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ a                        12       2\n   2 │ b                        15       3\n   3 │ b                        19       1\n   4 │ b                        17       4\n   5 │ c                        13       5\n   6 │ c                        11       9\n   7 │ a                       100     100\n\njulia> keys(groupby(df, :volume))\n7-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (volume = 2,)\n GroupKey: (volume = 3,)\n GroupKey: (volume = 1,)\n GroupKey: (volume = 4,)\n GroupKey: (volume = 5,)\n GroupKey: (volume = 9,)\n GroupKey: (volume = 100,)","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If you want to have them sorted in ascending order pass sort=true:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> keys(groupby(df, :volume, sort=true))\n7-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (volume = 1,)\n GroupKey: (volume = 2,)\n GroupKey: (volume = 3,)\n GroupKey: (volume = 4,)\n GroupKey: (volume = 5,)\n GroupKey: (volume = 9,)\n GroupKey: (volume = 100,)","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"You can also use the order wrapper when passing a column name to group by or pass a named tuple as sort keyword argument containing one or more of alg, lt, by, rev, and order fields that will be treated just like in sortperm:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> keys(groupby(df, [:customer_id, order(:volume, rev=true)]))\n6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (customer_id = \"a\", volume = 2)\n GroupKey: (customer_id = \"b\", volume = 4)\n GroupKey: (customer_id = \"b\", volume = 3)\n GroupKey: (customer_id = \"b\", volume = 1)\n GroupKey: (customer_id = \"c\", volume = 9)\n GroupKey: (customer_id = \"c\", volume = 5)\n\njulia> keys(groupby(df, :customer_id, sort=(rev=true,)))\n3-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (customer_id = \"c\",)\n GroupKey: (customer_id = \"b\",)\n GroupKey: (customer_id = \"a\",)","category":"page"},{"location":"man/getting_started/#Getting-Started","page":"Getting Started","title":"Getting Started","text":"","category":"section"},{"location":"man/getting_started/#Installation","page":"Getting Started","title":"Installation","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"The DataFrames package is available through the Julia package system and can be installed using the following commands:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"using Pkg\nPkg.add(\"DataFrames\")","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Throughout the rest of this tutorial, we will assume that you have installed the DataFrames package and have already typed using DataFrames to bring all of the relevant variables into your current namespace.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"note: Note\nBy default DataFrames.jl limits the number of rows and columns when displaying a data frame in a Jupyter Notebook to 25 and 100, respectively. You can override this behavior by changing the values of the ENV[\"DATAFRAMES_COLUMNS\"] and ENV[\"DATAFRAMES_ROWS\"] variables to hold the maximum number of columns and rows of the output. All columns or rows will be printed if those numbers are equal or lower than 0.Alternatively, you may want to set the maximum number of data frame rows to print to 100 and the maximum number of columns to print to 1000 for every Julia session using some Jupyter kernel file (numbers 100 and 1000 are only examples and can be adjusted). In such case add a \"DATAFRAME_COLUMNS\": \"1000\", \"DATAFRAMES_ROWS\": \"100\" entry to the \"env\" variable in this Jupyter kernel file. See here for information about location and specification of Jupyter kernels.The package PrettyTables.jl renders the DataFrame in the Jupyter notebook. Users can customize the output by passing keywords arguments kwargs... to the function show: show(stdout, MIME(\"text/html\"), df; kwargs...), where df is the DataFrame. Any argument supported by PrettyTables.jl in the HTML backend can be used here. Hence, for example, if the user wants to change the color of all numbers smaller than 0 to red in Jupyter, they can execute: show(stdout, MIME(\"text/html\"), df; highlighters = hl_lt(0, HtmlDecoration(color = \"red\"))) after using PrettyTables. For more information about the available options, check PrettyTables.jl documentation.","category":"page"},{"location":"man/getting_started/#The-DataFrame-Type","page":"Getting Started","title":"The DataFrame Type","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Objects of the DataFrame type represent a data table as a series of vectors, each corresponding to a column or variable. The simplest way of constructing a DataFrame is to pass column vectors using keyword arguments or pairs:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> using DataFrames\n\njulia> DataFrame(a=1:4, b=[\"M\", \"F\", \"F\", \"M\"]) # keyword argument constructor\n4×2 DataFrame\n Row │ a      b\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  F\n   3 │     3  F\n   4 │     4  M","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Here are examples of other commonly used ways to construct a data frame:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> DataFrame((a=[1, 2], b=[3, 4])) # Tables.jl table constructor from a named tuple of vectors\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> DataFrame([(a=1, b=0), (a=2, b=0)]) # Tables.jl table constructor from a vector of named tuples\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(\"a\" => 1:2, \"b\" => 0) # Pair constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([:a => 1:2, :b => 0]) # vector of Pairs constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(Dict(:a => 1:2, :b => 0)) # dictionary constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([[1, 2], [0, 0]], [:a, :b]) # vector of vectors constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([1 0; 2 0], :auto) # matrix constructor\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Columns can be directly (i.e. without copying) extracted using df.col, df.\"col\", df[!, :col] or df[!, \"col\"] (this rule applies to getting data from a data frame, not writing data to a data frame). The two latter syntaxes are more flexible as they allow passing a variable holding the name of the column, and not only a literal name. Note that column names can be either symbols (written as :col, :var\"col\" or Symbol(\"col\")) or strings (written as \"col\"). In the forms df.\"col\" and :var\"col\" variable interpolation into a string using $ does not work. Columns can also be extracted using an integer index specifying their position.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Since df[!, :col] does not make a copy, changing the elements of the column vector returned by this syntax will affect the values stored in the original df. To get a copy of the column use df[:, :col]: changing the vector returned by this syntax does not change df.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df = DataFrame(A=1:4, B=[\"M\", \"F\", \"F\", \"M\"])\n4×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  F\n   3 │     3  F\n   4 │     4  M\n\njulia> df.A\n4-element Vector{Int64}:\n 1\n 2\n 3\n 4\n\njulia> df.\"A\"\n4-element Vector{Int64}:\n 1\n 2\n 3\n 4\n\njulia> df.A === df[!, :A]\ntrue\n\njulia> df.A === df[:, :A]\nfalse\n\njulia> df.A == df[:, :A]\ntrue\n\njulia> df.A === df[!, \"A\"]\ntrue\n\njulia> df.A === df[:, \"A\"]\nfalse\n\njulia> df.A == df[:, \"A\"]\ntrue\n\njulia> df.A === df[!, 1]\ntrue\n\njulia> df.A === df[:, 1]\nfalse\n\njulia> df.A == df[:, 1]\ntrue\n\njulia> firstcolumn = :A\n:A\n\njulia> df[!, firstcolumn] === df.A\ntrue\n\njulia> df[:, firstcolumn] === df.A\nfalse\n\njulia> df[:, firstcolumn] == df.A\ntrue","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Column names can be obtained as strings using the names function:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> names(df)\n2-element Vector{String}:\n \"A\"\n \"B\"","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"You can also filter column names by passing a column selector condition as a second argument. See the names docstring for a detailed list of available conditions. Here we give some selected examples:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> names(df, r\"A\") # a regular expression selector\n1-element Vector{String}:\n \"A\"\n\njulia> names(df, Int) # a selector using column element type\n1-element Vector{String}:\n \"A\"\n\njulia> names(df, Not(:B)) # selector keeping all columns except :B\n1-element Vector{String}:\n \"A\"","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"To get column names as Symbols use the propertynames function:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> propertynames(df)\n2-element Vector{Symbol}:\n :A\n :B","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"note: Note\nDataFrames.jl allows to use Symbols (like :A) and strings (like \"A\") for all column indexing operations for convenience. However, using Symbols is slightly faster and should generally be preferred, if not generating them via string manipulation.","category":"page"},{"location":"man/getting_started/#Constructing-Column-by-Column","page":"Getting Started","title":"Constructing Column by Column","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"It is also possible to start with an empty DataFrame and add columns to it one by one:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df = DataFrame()\n0×0 DataFrame\n\njulia> df.A = 1:8\n1:8\n\njulia> df[:, :B] = [\"M\", \"F\", \"F\", \"M\", \"F\", \"M\", \"M\", \"F\"]\n8-element Vector{String}:\n \"M\"\n \"F\"\n \"F\"\n \"M\"\n \"F\"\n \"M\"\n \"M\"\n \"F\"\n\njulia> df[!, :C] .= 0\n8-element Vector{Int64}:\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n\njulia> df\n8×3 DataFrame\n Row │ A      B       C\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  M           0\n   2 │     2  F           0\n   3 │     3  F           0\n   4 │     4  M           0\n   5 │     5  F           0\n   6 │     6  M           0\n   7 │     7  M           0\n   8 │     8  F           0","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"The DataFrame we build in this way has 8 rows and 3 columns. This can be checked using the size function:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> size(df, 1)\n8\n\njulia> size(df, 2)\n3\n\njulia> size(df)\n(8, 3)","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"In the above example notice that the df[!, :C] .= 0 expression created a new column in the data frame by broadcasting a scalar.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"When setting a column of a data frame the df[!, :C] and df.C syntaxes are equivalent and they would replace (or create) the :C column in df. This is different from using df[:, :C] to set a column in a data frame, which updates the contents of column in-place if it already exists.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Here is an example showing this difference. Let us try changing the :B column to a binary variable.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df[:, :B] = df.B .== \"F\"\nERROR: MethodError: Cannot `convert` an object of type Bool to an object of type String\n\njulia> df[:, :B] .= df.B .== \"F\"\nERROR: MethodError: Cannot `convert` an object of type Bool to an object of type String","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"The above operations did not work because when you use : as row selector the :B column is updated in-place, and it only supports storing strings.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"On the other hand the following works:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df.B = df.B .== \"F\"\n8-element BitVector:\n 0\n 1\n 1\n 0\n 1\n 0\n 0\n 1\n\njulia> df\n8×3 DataFrame\n Row │ A      B      C\n     │ Int64  Bool   Int64\n─────┼─────────────────────\n   1 │     1  false      0\n   2 │     2   true      0\n   3 │     3   true      0\n   4 │     4  false      0\n   5 │     5   true      0\n   6 │     6  false      0\n   7 │     7  false      0\n   8 │     8   true      0","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"As you can see because we used df.B on the right-hand side of the assignment the :B column was replaced. The same effect would be achieved if we used df[!, :B] instead or if we used broadcasted assignment .=.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"In the Indexing section of the manual you can find all details about all the available indexing options.","category":"page"},{"location":"man/getting_started/#Constructing-Row-by-Row","page":"Getting Started","title":"Constructing Row by Row","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"It is also possible to fill a DataFrame row by row. Let us construct an empty data frame with two columns (note that the first column can only contain integers and the second one can only contain strings):","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df = DataFrame(A=Int[], B=String[])\n0×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┴───────────────","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Rows can then be added as tuples or vectors, where the order of elements matches that of columns. To add new rows at the end of a data frame use push!:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> push!(df, (1, \"M\"))\n1×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n\njulia> push!(df, [2, \"N\"])\n2×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  N","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Rows can also be added as Dicts, where the dictionary keys match the column names:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> push!(df, Dict(:B => \"F\", :A => 3))\n3×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  N\n   3 │     3  F","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Note that constructing a DataFrame row by row is significantly less performant than constructing it all at once, or column by column. For many use-cases this will not matter, but for very large DataFrames  this may be a consideration.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"If you want to add rows at the beginning of a data frame use pushfirst! and to insert a row in an arbitrary location use insert!.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"You can also add whole tables to a data frame using the append! and prepend! functions.","category":"page"},{"location":"man/getting_started/#Constructing-from-another-table-type","page":"Getting Started","title":"Constructing from another table type","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"DataFrames supports the Tables.jl interface for interacting with tabular data. This means that a DataFrame can be used as a \"source\" to any package that expects a Tables.jl interface input, (file format packages, data manipulation packages, etc.). A DataFrame can also be a sink for any Tables.jl interface input. Some example uses are:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"df = DataFrame(a=[1, 2, 3], b=[:a, :b, :c])\n\n# write DataFrame out to CSV file\nCSV.write(\"dataframe.csv\", df)\n\n# store DataFrame in an SQLite database table\nSQLite.load!(df, db, \"dataframe_table\")\n\n# transform a DataFrame through Query.jl package\ndf = df |> @map({a=_.a + 1, _.b}) |> DataFrame","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"A particular common case of a collection that supports the Tables.jl interface is a vector of NamedTuples:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> v = [(a=1, b=2), (a=3, b=4)]\n2-element Vector{@NamedTuple{a::Int64, b::Int64}}:\n (a = 1, b = 2)\n (a = 3, b = 4)\n\njulia> df = DataFrame(v)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"You can also easily convert a data frame back to a vector of NamedTuples:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> using Tables\n\njulia> Tables.rowtable(df)\n2-element Vector{@NamedTuple{a::Int64, b::Int64}}:\n (a = 1, b = 2)\n (a = 3, b = 4)","category":"page"},{"location":"man/missing/#Missing-Data","page":"Missing Data","title":"Missing Data","text":"","category":"section"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"In Julia, missing values in data are represented using the special object missing, which is the single instance of the type Missing.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> missing\nmissing\n\njulia> typeof(missing)\nMissing","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The Missing type lets users create vectors and DataFrame columns with missing values. Here we create a vector with a missing value and the element-type of the returned vector is Union{Missing, Int64}.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> x = [1, 2, missing]\n3-element Vector{Union{Missing, Int64}}:\n 1\n 2\n  missing\n\njulia> eltype(x)\nUnion{Missing, Int64}\n\njulia> Union{Missing, Int}\nUnion{Missing, Int64}\n\njulia> eltype(x) == Union{Missing, Int}\ntrue","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"missing values can be excluded when performing operations by using skipmissing, which returns a memory-efficient iterator.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> skipmissing(x)\nskipmissing(Union{Missing, Int64}[1, 2, missing])","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The output of skipmissing can be passed directly into functions as an argument. For example, we can find the sum of all non-missing values or collect the non-missing values into a new missing-free vector.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> sum(skipmissing(x))\n3\n\njulia> collect(skipmissing(x))\n2-element Vector{Int64}:\n 1\n 2","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The function coalesce can be used to replace missing values with another value (note the dot, indicating that the replacement should be applied to all entries in x):","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> coalesce.(x, 0)\n3-element Vector{Int64}:\n 1\n 2\n 0","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The functions dropmissing and dropmissing! can be used to remove the rows containing missing values from a data frame and either create a new DataFrame or mutate the original in-place respectively.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> using DataFrames\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> dropmissing(df)\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"One can specify the column(s) in which to search for rows containing missing values to be removed.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> dropmissing(df, :x)\n3×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     2      4  missing\n   2 │     4      2  d\n   3 │     5      1  e","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"By default the dropmissing and dropmissing! functions keep the Union{T, Missing} element type in columns selected for row removal. To remove the Missing part, if present, set the disallowmissing keyword argument to true (it will become the default behavior in the future).","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> dropmissing(df, disallowmissing=true)\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Sometimes it is useful to allow or disallow support of missing values in some columns of a data frame. These operations are supported by the allowmissing, allowmissing!, disallowmissing, and disallowmissing! functions. Here is an example:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> df = DataFrame(x=1:3, y=4:6)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> allowmissing!(df)\n3×2 DataFrame\n Row │ x       y\n     │ Int64?  Int64?\n─────┼────────────────\n   1 │      1       4\n   2 │      2       5\n   3 │      3       6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Now df allows missing values in all its columns. We can take advantage of this fact and set some of the values in df to missing, e.g.:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> df[1, 1] = missing\nmissing\n\njulia> df\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64?\n─────┼─────────────────\n   1 │ missing       4\n   2 │       2       5\n   3 │       3       6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Note that a column selector can be passed as the second positional argument to allowmissing and allowmissing! to restrict the change to only some columns in our data frame.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Now let us perform the reverse operation by disallowing missing values in df. We know that column :y does not contain missing values so we can use the disallowmissing function passing a column selector as the second positional argument:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> disallowmissing(df, :y)\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64\n─────┼────────────────\n   1 │ missing      4\n   2 │       2      5\n   3 │       3      6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"This operation created a new DataFrame. If we wanted to update the df in-place the disallowmissing! function should be used.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"If we tried to disallow missings in the whole data frame using disallowmissing(df) we would get an error. However, it is often useful to disallow missings in all columns that actually do not contain them but keep the columns that have some missing values unchanged without having to list them explicitly. This can be accomplished by passing the error=false keyword argument:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> disallowmissing(df, error=false)\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64\n─────┼────────────────\n   1 │ missing      4\n   2 │       2      5\n   3 │       3      6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The Missings.jl package provides a few convenience functions to work with missing values.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"One of the most commonly used is passmissing. It is a higher order function that takes some function f as its argument and returns a new function which returns missing if any of its positional arguments are missing and otherwise applies the function f to these arguments. This functionality is useful in combination with functions that do not support passing missing values as their arguments. For example, trying uppercase(missing) would produce an error, while the following works:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> passmissing(uppercase)(\"a\")\n\"A\"\n\njulia> passmissing(uppercase)(missing)\nmissing","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The function Missings.replace returns an iterator which replaces missing elements with another value:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> using Missings\n\njulia> Missings.replace(x, 1)\nMissings.EachReplaceMissing{Vector{Union{Missing, Int64}}, Int64}(Union{Missing, Int64}[1, 2, missing], 1)\n\njulia> collect(Missings.replace(x, 1))\n3-element Vector{Int64}:\n 1\n 2\n 1\n\njulia> collect(Missings.replace(x, 1)) == coalesce.(x, 1)\ntrue","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The function nonmissingtype returns the element-type T in Union{T, Missing}.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> eltype(x)\nUnion{Missing, Int64}\n\njulia> nonmissingtype(eltype(x))\nInt64","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The missings function constructs Vectors and Arrays supporting missing values, using the optional first argument to specify the element-type.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> missings(1)\n1-element Vector{Missing}:\n missing\n\njulia> missings(3)\n3-element Vector{Missing}:\n missing\n missing\n missing\n\njulia> missings(1, 3)\n1×3 Matrix{Missing}:\n missing  missing  missing\n\njulia> missings(Int, 1, 3)\n1×3 Matrix{Union{Missing, Int64}}:\n missing  missing  missing","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"See the Julia manual for more information about missing values.","category":"page"},{"location":"#DataFrames.jl","page":"Introduction","title":"DataFrames.jl","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Welcome to the DataFrames.jl documentation!","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"This resource aims to teach you everything you need to know to get up and running with tabular data manipulation using the DataFrames.jl package.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"For more illustrations of DataFrames.jl usage, in particular in conjunction with other packages you can check-out the following resources (they are kept up to date with the released version of DataFrames.jl):","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl: Flexible and Fast Tabular Data in Julia article published in the Journal of Statistical Software\nData Wrangling with DataFrames.jl Cheat Sheet\nDataFrames Tutorial using Jupyter Notebooks\nJulia Academy DataFrames.jl tutorial\nJuliaCon 2023, JuliaCon 2022, JuliaCon 2021, JuliaCon 2020, JuliaCon 2019, ODSC Europe 2021 tutorials, and PyData Global 2020\nDataFrames.jl showcase","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"If you prefer to learn DataFrames.jl from a book you can consider reading:","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Julia for Data Analysis;\nJulia Data Science.","category":"page"},{"location":"#What-is-DataFrames.jl?","page":"Introduction","title":"What is DataFrames.jl?","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl provides a set of tools for working with tabular data in Julia. Its design and functionality are similar to those of pandas (in Python) and data.frame, data.table and dplyr (in R), making it  a great general purpose data science tool.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl plays a central role in the Julia Data ecosystem, and has tight integrations with a range of different libraries. DataFrames.jl isn't the only tool for working with tabular data in Julia – as noted below, there are some other great libraries for certain use-cases – but it provides great data wrangling functionality through a familiar interface.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"To understand the toolchain in more detail, have a look at the tutorials in this manual. New users can start with the First Steps with DataFrames.jl section.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"You may find the DataFramesMeta.jl package or one of the other convenience packages discussed in the Data manipulation frameworks section of this manual helpful when writing more advanced data transformations, especially if you do not have a significant programming experience. These packages provide convenience syntax similar to dplyr in R.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"If you use metadata when working with DataFrames.jl you might find the TableMetadataTools.jl package useful. This package defines several convenience functions for performing typical metadata operations.","category":"page"},{"location":"#DataFrames.jl-and-the-Julia-Data-Ecosystem","page":"Introduction","title":"DataFrames.jl and the Julia Data Ecosystem","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"The Julia data ecosystem can be a difficult space for new users to navigate, in part because the Julia ecosystem tends to distribute functionality across different libraries more than some other languages. Because many people coming to DataFrames.jl are just starting to explore the Julia data ecosystem, below is a list of well-supported libraries that provide different data science tools, along with a few notes about what makes each library special, and how well integrated they are with DataFrames.jl.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Statistics\nStatsKit.jl: A convenience meta-package which loads a set of essential packages for statistics, including those mentioned below in this section and DataFrames.jl itself.\nStatistics: The Julia standard library comes with a wide range of statistics functionality, but to gain access to these functions you must call using Statistics.\nLinearAlgebra: Like Statistics, many linear algebra features (factorizations, inversions, etc.) live in a library you have to load to use.\nSparseArrays are also in the standard library but must be loaded to be used.\nFreqTables.jl: Create frequency tables / cross-tabulations. Tightly integrated with DataFrames.jl.\nHypothesisTests.jl: A range of hypothesis testing tools.\nGLM.jl: Tools for estimating linear and generalized linear models. Tightly integrated with DataFrames.jl.\nStatsModels.jl: For converting heterogeneous DataFrame into homogeneous matrices for use with linear algebra libraries or machine learning applications that don't directly support DataFrames. Will do things like convert categorical variables into indicators/one-hot-encodings, create interaction terms, etc.\nMultivariateStats.jl: linear regression, ridge regression, PCA, component analyses tools. Not well integrated with DataFrames.jl, but easily used in combination with StatsModels.\nMachine Learning\nMLJ.jl: if you're more of an applied user, there is a single package the pulls from all these different libraries and provides a single, scikit-learn inspired API: MLJ.jl. MLJ.jl provides a common interface for a wide range of machine learning algorithms.\nScikitLearn.jl: A Julia wrapper around the full Python scikit-learn machine learning library. Not well integrated with DataFrames.jl, but can be combined using StatsModels.jl.\nAutoMLPipeline: A package that makes it trivial to create complex ML pipeline structures using simple expressions. It leverages on the built-in macro programming features of Julia to symbolically process, manipulate pipeline expressions, and makes it easy to discover optimal structures for machine learning regression and classification.\nDeep learning: KNet.jl and Flux.jl.\nPlotting\nPlots.jl: Powerful, modern plotting library with a syntax akin to that of matplotlib (in Python) or plot (in R). StatsPlots.jl provides Plots.jl with recipes for many standard statistical plots.\nGadfly.jl: High-level plotting library with a \"grammar of graphics\" syntax akin to that of ggplot (in R).\nAlgebraOfGraphics.jl: A \"grammar of graphics\" library build upon Makie.jl.\nVegaLite.jl: High-level plotting library that uses a different \"grammar of graphics\" syntax and has an emphasis on interactive graphics.\nData Wrangling:\nImpute.jl: various methods for handling missing data in vectors, matrices and tables.\nDataFramesMeta.jl: A range of convenience functions for DataFrames.jl that augment select and transform to provide a user experience similar to that provided by dplyr in R.\nDataFrameMacros.jl: Provides macro versions of the common DataFrames.jl functions similar to DataFramesMeta.jl, with convenient syntax for the manipulation of multiple columns at once.\nQuery.jl: Query.jl provides a single framework for data wrangling that works with a range of libraries, including DataFrames.jl, other tabular data libraries (more on those below), and even non-tabular data. Provides many convenience functions analogous to those in dplyr in R or LINQ.\nYou can find more information on these packages in the Data manipulation frameworks section of this manual.\nAnd More!\nGraphs.jl: A pure-Julia, high performance network analysis library. Edgelists in DataFrames can be easily converted into graphs using the GraphDataFrameBridge.jl package.\nIO:\nDataFrames.jl work well with a range of formats, including:\nCSV files (using CSV.jl),\nApache Arrow (using Arrow.jl)\nreading Stata, SAS and SPSS files (using ReadStatTables.jl; alternatively Queryverse users can choose StatFiles.jl),\nParquet files (using Parquet2.jl),\nreading R data files (.rda, .RData) (using RData.jl).","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"While not all of these libraries are tightly integrated with DataFrames.jl, because DataFrames are essentially collections of aligned Julia vectors, so it is easy to (a) pull out a vector for use with a non-DataFrames-integrated library, or (b) convert your table into a homogeneously-typed matrix using the Matrix constructor or StatsModels.jl.","category":"page"},{"location":"#Other-Julia-Tabular-Libraries","page":"Introduction","title":"Other Julia Tabular Libraries","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl is a great general purpose tool for data manipulation and wrangling, but it's not ideal for all applications. For users with more specialized needs, consider using:","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"TypedTables.jl: Type-stable heterogeneous tables. Useful for improved performance when the structure of your table is relatively stable and does not feature thousands of columns.\nJuliaDB.jl: For users working with data that is too large to fit in memory, we suggest JuliaDB.jl, which offers better performance for large datasets, and can handle out-of-core data manipulations (Python users can think of JuliaDB.jl as the Julia version of dask).","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Note that most tabular data libraries in the Julia ecosystem (including DataFrames.jl) support a common interface (defined in the Tables.jl package). As a result, some libraries are capable or working with a range of tabular data structures, making it easy to move between tabular libraries as your needs change. A user of Query.jl, for example, can use the same code to manipulate data in a DataFrame, a Table (defined by TypedTables.jl), or a JuliaDB table.","category":"page"},{"location":"#Questions?","page":"Introduction","title":"Questions?","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"If there is something you expect DataFrames to be capable of, but cannot figure out how to do, please reach out with questions in Domains/Data on Discourse. Additionally you might want to listen to an introduction to DataFrames.jl on JuliaAcademy.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Please report bugs by opening an issue.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"You can follow the source links throughout the documentation to jump right to the source files on GitHub to make pull requests for improving the documentation and function capabilities.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Please review DataFrames contributing guidelines before submitting your first PR!","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Information on specific versions can be found on the Release page.","category":"page"},{"location":"#Package-Manual","page":"Introduction","title":"Package Manual","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Pages = [\"man/basics.md\",\n         \"man/getting_started.md\",\n         \"man/joins.md\",\n         \"man/split_apply_combine.md\",\n         \"man/reshaping_and_pivoting.md\",\n         \"man/sorting.md\",\n         \"man/categorical.md\",\n         \"man/missing.md\",\n         \"man/comparisons.md\",\n         \"man/querying_frameworks.md\"]\nDepth = 2","category":"page"},{"location":"#API","page":"Introduction","title":"API","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Only exported (i.e. available for use without DataFrames. qualifier after loading the DataFrames.jl package with using DataFrames) types and functions are considered a part of the public API of the DataFrames.jl package. In general all such objects are documented in this manual (in case some documentation is missing please kindly report an issue here).","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"note: Note\nBreaking changes to public and documented API are avoided in DataFrames.jl where possible.The following changes are not considered breaking:specific floating point values computed by operations may change at any time; users should rely only on approximate accuracy;\nin functions that use the default random number generator provided by Base Julia the specific random numbers computed may change across Julia versions;\nif the changed functionality is classified as a bug;\nif the changed behavior was not documented; two major cases are:\nin its implementation some function accepted a wider range of arguments that it was documented to handle - changes in handling of undocumented arguments are not considered as breaking;\nthe type of the value returned by a function changes, but it still follows the contract specified in the documentation; for example if a function is documented to return a vector then changing its type from Vector to PooledVector is not considered as breaking;\nerror behavior: code that threw an exception can change exception type thrown or stop throwing an exception;\nchanges in display (how objects are printed);\nchanges to the state of global objects from Base Julia whose state normally is considered volatile (e.g. state of global random number generator).All types and functions that are part of public API are guaranteed to go through a deprecation period before a breaking change is made to them or they would be removed.The standard practice is that breaking changes are implemented when a major release of DataFrames.jl is made (e.g. functionalities deprecated in a 1.x release would be changed in the 2.0 release).In rare cases a breaking change might be introduced in a minor release. In such a case the changed behavior still goes through one minor release during which it is deprecated. The situations where such a breaking change might be allowed are (still such breaking changes will be avoided if possible):the affected functionality was previously clearly identified in the documentation as being subject to changes (for example in DataFrames.jl 1.4 release propagation rules of :note-style metadata are documented as such);\nthe change is on the border of being classified as a bug (in rare cases even if a behavior of some function was documented its consequences for certain argument combinations could be decided to be unintended and not wanted);\nthe change is needed to adjust DataFrames.jl functionality to changes in Base Julia.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Please be warned that while Julia allows you to access internal functions or types of DataFrames.jl these can change without warning between versions of DataFrames.jl. In particular it is not safe to directly access fields of types that are a part of public API of the DataFrames.jl package using e.g. the getfield function. Whenever some operation on fields of defined types is considered allowed an appropriate exported function should be used instead.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Pages = [\"lib/types.md\", \"lib/functions.md\", \"lib/indexing.md\"]\nDepth = 2","category":"page"},{"location":"#Index","page":"Introduction","title":"Index","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Pages = [\"lib/types.md\", \"lib/functions.md\"]","category":"page"},{"location":"assets/README/#Introduction","page":"Introduction","title":"Introduction","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"In this folder we store the following data sets:","category":"page"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"german_credit.csv\niris.csv","category":"page"},{"location":"assets/README/#German-Credit-data-set","page":"Introduction","title":"German Credit data set","text":"","category":"section"},{"location":"assets/README/#License:","page":"Introduction","title":"License:","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://opendatacommons.org/licenses/dbcl/1-0/","category":"page"},{"location":"assets/README/#Source:","page":"Introduction","title":"Source:","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data) Professor Dr. Hans Hofmann Institut für Statistik und Ökonometrie Universität Hamburg FB Wirtschaftswissenschaften Von-Melle-Park 5 2000 Hamburg 13","category":"page"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"The original data is from UCI, and the file stored here is from Kaggle","category":"page"},{"location":"assets/README/#Iris-data-set","page":"Introduction","title":"Iris data set","text":"","category":"section"},{"location":"assets/README/#License","page":"Introduction","title":"License","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://creativecommons.org/publicdomain/zero/1.0/","category":"page"},{"location":"assets/README/#Source:-2","page":"Introduction","title":"Source:","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://archive.ics.uci.edu/ml/datasets/Iris Creator: R.A. Fisher","category":"page"}]
+[{"location":"lib/internals/","page":"Internals","title":"Internals","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/internals/#Internals","page":"Internals","title":"Internals","text":"","category":"section"},{"location":"lib/internals/","page":"Internals","title":"Internals","text":"warning: Internal API\nThe functions, methods and types listed on this page are internal to DataFrames and are not considered to be part of the public API.","category":"page"},{"location":"lib/internals/","page":"Internals","title":"Internals","text":"compacttype\ngennames\ngetmaxwidths\nourshow\nourstrwidth\n@spawn_for_chunks\n@spawn_or_run_task\n@spawn_or_run\ndefault_table_transformation\nisreadonly","category":"page"},{"location":"lib/internals/#DataFrames.compacttype","page":"Internals","title":"DataFrames.compacttype","text":"compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)\n\nReturn compact string representation of type T.\n\nFor displaying data frame we do not want string representation of type to be longer than maxwidth. This function implements rules how type names are cropped if they are longer than maxwidth.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.gennames","page":"Internals","title":"DataFrames.gennames","text":"gennames(n::Integer)\n\nGenerate standardized names for columns of a DataFrame. The first name will be :x1, the second :x2, etc.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.getmaxwidths","page":"Internals","title":"DataFrames.getmaxwidths","text":"DataFrames.getmaxwidths(df::AbstractDataFrame,\n                        io::IO,\n                        rowindices1::AbstractVector{Int},\n                        rowindices2::AbstractVector{Int},\n                        rowlabel::Symbol,\n                        rowid::Union{Integer, Nothing},\n                        show_eltype::Bool,\n                        buffer::IOBuffer)\n\nCalculate, for each column of an AbstractDataFrame, the maximum string width used to render the name of that column, its type, and the longest entry in that column – among the rows of the data frame will be rendered to IO. The widths for all columns are returned as a vector.\n\nReturn a Vector{Int} giving the maximum string widths required to render each column, including that column's name and type.\n\nNOTE: The last entry of the result vector is the string width of the implicit row ID column contained in every AbstractDataFrame.\n\nArguments\n\ndf::AbstractDataFrame: The data frame whose columns will be printed.\nio::IO: The IO to which df is to be printed\n`rowindices1::AbstractVector{Int}: A set of indices of the first chunk of the AbstractDataFrame that would be rendered to IO.\n`rowindices2::AbstractVector{Int}: A set of indices of the second chunk of the AbstractDataFrame that would be rendered to IO. Can be empty if the AbstractDataFrame would be printed without any ellipses.\nrowlabel::AbstractString: The label that will be used when rendered the numeric ID's of each row. Typically, this will be set to \"Row\".\nrowid: Used to handle showing DataFrameRow.\nshow_eltype: Whether to print the column type  under the column name in the heading.\nbuffer: buffer passed around to avoid reallocations in ourstrwidth\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.ourshow","page":"Internals","title":"DataFrames.ourshow","text":"DataFrames.ourshow(io::IO, x::Any, truncstring::Int)\n\nRender a value to an IO object compactly using print. truncstring indicates the approximate number of text characters width to truncate the output (if it is a non-positive value then no truncation is applied).\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.ourstrwidth","page":"Internals","title":"DataFrames.ourstrwidth","text":"DataFrames.ourstrwidth(io::IO, x::Any, buffer::IOBuffer, truncstring::Int)\n\nDetermine the number of characters that would be used to print a value.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.@spawn_for_chunks","page":"Internals","title":"DataFrames.@spawn_for_chunks","text":"@spawn_for_chunks basesize for i in range ... end\n\nParallelize a for loop by spawning separate tasks iterating each over a chunk of at least basesize elements in range.\n\nA number of tasks higher than Threads.nthreads() may be spawned, since that can allow for a more efficient load balancing in case some threads are busy (nested parallelism).\n\n\n\n\n\n","category":"macro"},{"location":"lib/internals/#DataFrames.@spawn_or_run_task","page":"Internals","title":"DataFrames.@spawn_or_run_task","text":"@spawn_or_run_task threads expr\n\nEquivalent to Threads.@spawn if threads === true, otherwise run expr and return a Task that returns its value.\n\n\n\n\n\n","category":"macro"},{"location":"lib/internals/#DataFrames.@spawn_or_run","page":"Internals","title":"DataFrames.@spawn_or_run","text":"@spawn_or_run threads expr\n\nEquivalent to Threads.@spawn if threads === true, otherwise run expr.\n\n\n\n\n\n","category":"macro"},{"location":"lib/internals/#DataFrames.default_table_transformation","page":"Internals","title":"DataFrames.default_table_transformation","text":"default_table_transformation(df_sel::AbstractDataFrame, fun)\n\nThis is a default implementation called when AsTable(...) => fun is requested. The df_sel argument is a data frame storing columns selected by AsTable(...) selector.\n\n\n\n\n\n","category":"function"},{"location":"lib/internals/#DataFrames.isreadonly","page":"Internals","title":"DataFrames.isreadonly","text":"isreadonly(fun)\n\nTrait returning a Bool indicator if function fun is only reading the passed argument. Such a function guarantees not to modify nor return in any form the passed argument. By default false is returned.\n\nThis function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental. Adding a method to isreadonly for a specific function fun will improve performance of AsTable(...) => ByRow(fun∘collect) operation.\n\n\n\n\n\n","category":"function"},{"location":"man/basics/#First-Steps-with-DataFrames.jl","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"","category":"section"},{"location":"man/basics/#Setting-up-the-Environment","page":"First Steps with DataFrames.jl","title":"Setting up the Environment","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If want to use the DataFrames.jl package you need to install it first. You can do it using the following commands:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Pkg\n\njulia> Pkg.add(\"DataFrames\")","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"or","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> ] # ']' should be pressed\n\n(@v1.9) pkg> add DataFrames","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to make sure everything works as expected you can run the tests bundled with DataFrames.jl, but be warned that it will take more than 30 minutes:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Pkg\n\njulia> Pkg.test(\"DataFrames\") # Warning! This will take more than 30 minutes.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Additionally, it is recommended to check the version of DataFrames.jl that you have installed with the status command.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> ]\n\n(@v1.9) pkg> status DataFrames\n      Status `~\\v1.6\\Project.toml`\n  [a93c6f00] DataFrames v1.5.0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Throughout the rest of the tutorial we will assume that you have installed the DataFrames.jl package and have already typed using DataFrames which loads the package:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using DataFrames","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The most fundamental type provided by DataFrames.jl is DataFrame, where typically each row is interpreted as an observation and each column as a feature.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Advanced installation configuration\nDataFrames.jl puts in extra time and effort when the package is being built (precompiled) to make sure it is more responsive when you are using it. However, in some scenarios users might want to avoid this extra precompilaion effort to reduce the time needed to build the package and later to load it. To disable precompilation of DataFrames.jl in your current project follow the instructions given in the PrecompileTools.jl documentation","category":"page"},{"location":"man/basics/#Constructors-and-Basic-Utility-Functions","page":"First Steps with DataFrames.jl","title":"Constructors and Basic Utility Functions","text":"","category":"section"},{"location":"man/basics/#Constructors","page":"First Steps with DataFrames.jl","title":"Constructors","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this section you will see several ways to create a DataFrame using the constructor. You can find a detailed list of supported constructors along with more examples in the documentation of the DataFrame object.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We start by creating an empty DataFrame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame()\n0×0 DataFrame","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Now let us initialize a DataFrame with several columns. This is a basic way to do it is the following:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(A=1:3, B=5:7, fixed=1)\n3×3 DataFrame\n Row │ A      B      fixed\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      1\n   2 │     2      6      1\n   3 │     3      7      1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Observe that using this constructor scalars, like 1 for the column :fixed get automatically broadcasted to fill all rows of the created DataFrame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Sometimes one needs to create a data frame whose column names are not valid Julia identifiers. In such a case the following form, where = is replaced by => is handy:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(\"customer age\" => [15, 20, 25],\n                 \"first name\" => [\"Rohit\", \"Rahul\", \"Akshat\"])\n3×2 DataFrame\n Row │ customer age  first name\n     │ Int64         String\n─────┼──────────────────────────\n   1 │           15  Rohit\n   2 │           20  Rahul\n   3 │           25  Akshat","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Notice that this time we have passed column names as strings.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Often you have your source data stored in a dictionary. Provided that the keys of the dictionary are strings or Symbols you can also easily create a DataFrame from it:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> dict = Dict(\"customer age\" => [15, 20, 25],\n                   \"first name\" => [\"Rohit\", \"Rahul\", \"Akshat\"])\nDict{String, Vector} with 2 entries:\n  \"first name\"   => [\"Rohit\", \"Rahul\", \"Akshat\"]\n  \"customer age\" => [15, 20, 25]\n\njulia> DataFrame(dict)\n3×2 DataFrame\n Row │ customer age  first name\n     │ Int64         String\n─────┼──────────────────────────\n   1 │           15  Rohit\n   2 │           20  Rahul\n   3 │           25  Akshat\n\njulia> dict = Dict(:customer_age => [15, 20, 25],\n                   :first_name => [\"Rohit\", \"Rahul\", \"Akshat\"])\nDict{Symbol, Vector} with 2 entries:\n  :customer_age => [15, 20, 25]\n  :first_name   => [\"Rohit\", \"Rahul\", \"Akshat\"]\n\njulia> DataFrame(dict)\n3×2 DataFrame\n Row │ customer_age  first_name\n     │ Int64         String\n─────┼──────────────────────────\n   1 │           15  Rohit\n   2 │           20  Rahul\n   3 │           25  Akshat","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Using Symbols, e.g. :customer_age rather than strings, e.g. \"customer age\" to denote column names is preferred as it is faster. However, as you can see in the example above if our column name contains a space it is not very convenient to pass it as a Symbol (you would have to write Symbol(\"customer age\"), which is verbose) so using a string is more convenient.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"It is also quite common to create a DataFrame from a NamedTuple of vectors or a vector of NamedTuples. Here are some examples of these operations:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame((a=[1, 2], b=[3, 4]))\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> DataFrame([(a=1, b=0), (a=2, b=0)])\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Sometimes your source data might have a heterogeneous set of columns for each observation. Here is an example:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> source = [(type=\"circle\", radius=10), (type=\"square\", side=20)]\n2-element Vector{NamedTuple{names, Tuple{String, Int64}} where names}:\n (type = \"circle\", radius = 10)\n (type = \"square\", side = 20)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to create a data frame from such data containing all columns present in at least one of the source observations, with a missing entry if some column is not present then you can use Tables.dictcolumntable function to help you create the desired data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(Tables.dictcolumntable(source))\n2×3 DataFrame\n Row │ type    radius   side\n     │ String  Int64?   Int64?\n─────┼──────────────────────────\n   1 │ circle       10  missing\n   2 │ square  missing       20","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The role of Tables.dictcolumntable is to make sure that the DataFrame constructor gets information about all columns present in the source data and properly instantiates them. If we did not use this function the DataFrame constructor would assume that the first row of data contains the set of columns present in the source, which would lead to an error in our example:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame(source)\nERROR: type NamedTuple has no field radius","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us finish our review of constructors by showing how to create a DataFrame from a matrix. In this case you pass a matrix as a first argument. If the second argument is just :auto then column names x1, x2, ... will be auto generated.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> DataFrame([1 0; 2 0], :auto)\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Alternatively you can pass a vector of column names as a second argument to the DataFrame constructor:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> mat = [1 2 4 5; 15 58 69 41; 23 21 26 69]\n3×4 Matrix{Int64}:\n  1   2   4   5\n 15  58  69  41\n 23  21  26  69\n\njulia> nms = [\"a\", \"b\", \"c\", \"d\"]\n4-element Vector{String}:\n \"a\"\n \"b\"\n \"c\"\n \"d\"\n\njulia> DataFrame(mat, nms)\n3×4 DataFrame\n Row │ a      b      c      d\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      4      5\n   2 │    15     58     69     41\n   3 │    23     21     26     69","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You now know how to create a DataFrame from data that you already have in your Julia session. In the next section we show how to load data to a DataFrame from disk.","category":"page"},{"location":"man/basics/#Reading-Data-From-CSV-Files","page":"First Steps with DataFrames.jl","title":"Reading Data From CSV Files","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Here we focus on one of the most common scenarios, where one has data stored on disk in the CSV format.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"First make sure you have CSV.jl installed. You can do it using the following instructions:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Pkg\n\njulia> Pkg.add(\"CSV\")","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In order to read the file in we will use the CSV.read function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using CSV\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"german.csv\");\n\njulia> german_ref = CSV.read(path, DataFrame)\n1000×10 DataFrame\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  Int64  String7  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0     67  male         2  own      NA               little         ⋯\n    2 │     1     22  female       2  own      little           moderate\n    3 │     2     49  male         1  own      little           NA\n    4 │     3     45  male         2  free     little           little\n    5 │     4     53  male         2  free     little           little         ⋯\n    6 │     5     35  male         1  free     NA               NA\n    7 │     6     53  male         2  own      quite rich       NA\n    8 │     7     35  male         3  rent     little           moderate\n  ⋮   │   ⋮      ⋮       ⋮       ⋮       ⋮            ⋮                ⋮       ⋱\n  994 │   993     30  male         3  own      little           little         ⋯\n  995 │   994     50  male         2  own      NA               NA\n  996 │   995     31  female       1  own      little           NA\n  997 │   996     40  male         3  own      little           little\n  998 │   997     38  male         2  own      little           NA             ⋯\n  999 │   998     23  male         2  free     little           little\n 1000 │   999     27  male         2  own      moderate         moderate\n                                                  4 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the data frame is wider and taller than the display width, so it got cropped and its 4 rightmost columns and middle 985 rows were not printed. Later in the tutorial we will discuss how to force Julia to show the whole data frame if we wanted so.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Also observe that DataFrames.jl displays the data type of the column below its name. In our case, it is an Int64, or String7 and String15.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us mention here the difference between the standard String type in Julia and e.g. the String7 or String15 types. The types with number suffix denote strings that have a fixed width (similar CHAR(N) type provided by many data bases). Such strings are much faster to work with (especially if you have many of them) than the standard String type because their instances are not heap allocated. For this reason CSV.read by default reads in narrow string columns using these fixed-width types.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us now explain in detail the following code block:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"german.csv\");\n\ngerman_ref = CSV.read(path, DataFrame)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"we are storing the german.csv file in the DataFrames.jl repository to make user's life easier and avoid having to download it each time;\npkgdir(DataFrames) gives us the full path to the root of the DataFrames.jl package.\nthen from this directory we need to move to the directory where the german.csv file is stored; we use joinpath as this is a recommended way to compose paths to resources stored on disk in an operating system independent way (remember that Windows and Unix differ as they use either / or \\ as path separator; the joinpath function ensures we are not running into issues with this);\nthen we read the CSV file; the second argument to CSV.read is DataFrame to indicate that we want to read in the file into a DataFrame (as CSV.read allows for many different target formats of data it can read-into).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Before proceeding copy the reference data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german = copy(german_ref); # we copy the data frame","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this way we can always easily restore our data even if we mess up the german data frame by modifying it.","category":"page"},{"location":"man/basics/#Basic-Operations-on-Data-Frames","page":"First Steps with DataFrames.jl","title":"Basic Operations on Data Frames","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To extract the columns of a data frame directly (i.e. without copying) you can use one of the following syntaxes: german.Sex, german.\"Sex\", german[!, :Sex] or german[!, \"Sex\"].","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The two latter syntaxes using indexing are more flexible as they allow us passing a variable holding the name of the column, and not only a literal name as in the case of the syntax using a ..","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german.Sex\n1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}:\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n ⋮\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n\njulia> colname = \"Sex\"\n\"Sex\"\n\njulia> german[!, colname]\n1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}:\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n ⋮\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Since german.Sex does not make a copy when extracting a column from the data frame, changing the elements of the vector returned by this operation will affect the values stored in the original german data frame. To get a copy of the column you can use german[:, :Sex] or german[:, \"Sex\"]. In this case changing the vector returned by this operation does not affect the data stored in the german data frame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The === function allows us to check if both expressions produce the same object and confirm the behavior described above:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german.Sex === german[!, :Sex]\ntrue\n\njulia> german.Sex === german[:, :Sex]\nfalse","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can obtain a vector of column names of the data frame as Strings using the names function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> names(german)\n10-element Vector{String}:\n \"id\"\n \"Age\"\n \"Sex\"\n \"Job\"\n \"Housing\"\n \"Saving accounts\"\n \"Checking account\"\n \"Credit amount\"\n \"Duration\"\n \"Purpose\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Sometimes you are interested in names of columns that meet a particular condition.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"For example you can get column names with a given element type by passing this type as a second argument to the names function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> names(german, AbstractString)\n5-element Vector{String}:\n \"Sex\"\n \"Housing\"\n \"Saving accounts\"\n \"Checking account\"\n \"Purpose\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can explore more options of filtering column names in the documentation of the names function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If instead you wanted to get column names of a data frame as Symbols use the propertynames function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> propertynames(german)\n10-element Vector{Symbol}:\n :id\n :Age\n :Sex\n :Job\n :Housing\n Symbol(\"Saving accounts\")\n Symbol(\"Checking account\")\n Symbol(\"Credit amount\")\n :Duration\n :Purpose","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the column names containing spaces are not very convenient to work with as Symbols because they require more typing and introduce some visual noise.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you were interested in element types of the columns instead. You can use the eachcol(german) function to get an iterator over the columns of the data frame. Then you can broadcast the eltype function over it to get the desired result:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> eltype.(eachcol(german))\n10-element Vector{DataType}:\n Int64\n Int64\n String7\n Int64\n String7\n String15\n String15\n Int64\n Int64\n String31","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nRemember that DataFrames.jl allows to use Symbols (like :id) and strings (like \"id\") for all column indexing operations for convenience. However, using Symbols is slightly faster, but strings are simpler to work with when non standard characters are present in column names or one wants to manipulate them.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Before we wrap up let us discuss the empty and empty! functions that remove all rows from a DataFrame. Understanding the difference between the behavior of these two functions will help you to understand the function naming scheme in DataFrames.jl in general.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us start with the example of using the empty and empty! functions:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> empty(german)\n0×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┴──────────────────────────────────────────────────────────────────────────\n                                                               4 columns omitted\n\njulia> german\n1000×10 DataFrame\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  Int64  String7  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0     67  male         2  own      NA               little         ⋯\n    2 │     1     22  female       2  own      little           moderate\n    3 │     2     49  male         1  own      little           NA\n    4 │     3     45  male         2  free     little           little\n    5 │     4     53  male         2  free     little           little         ⋯\n    6 │     5     35  male         1  free     NA               NA\n    7 │     6     53  male         2  own      quite rich       NA\n    8 │     7     35  male         3  rent     little           moderate\n  ⋮   │   ⋮      ⋮       ⋮       ⋮       ⋮            ⋮                ⋮       ⋱\n  994 │   993     30  male         3  own      little           little         ⋯\n  995 │   994     50  male         2  own      NA               NA\n  996 │   995     31  female       1  own      little           NA\n  997 │   996     40  male         3  own      little           little\n  998 │   997     38  male         2  own      little           NA             ⋯\n  999 │   998     23  male         2  free     little           little\n 1000 │   999     27  male         2  own      moderate         moderate\n                                                  4 columns and 985 rows omitted\n\njulia> empty!(german)\n0×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┴──────────────────────────────────────────────────────────────────────────\n                                                               4 columns omitted\n\njulia> german\n0×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┴──────────────────────────────────────────────────────────────────────────\n                                                               4 columns omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the above example empty function created a new DataFrame with the same column names and column element types as german but with zero rows. On the other hand empty! function removed all rows from german in-place and made each of its columns empty.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The difference between the behavior of the empty and empty! functions is an application of the stylistic convention employed in the Julia language. This convention is followed in all functions provided by the DataFrames.jl package.","category":"page"},{"location":"man/basics/#Getting-Basic-Information-about-a-Data-Frame","page":"First Steps with DataFrames.jl","title":"Getting Basic Information about a Data Frame","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this section we will learn about how to get basic information on our german DataFrame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The size function returns the dimensions of the data frame. First we restore the german data frame, as we have just emptied it above.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german = copy(german_ref);\n\njulia> size(german)\n(1000, 10)\n\njulia> size(german, 1)\n1000\n\njulia> size(german, 2)\n10","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Additionally the nrow and ncol functions can be used to get the number of rows and columns in a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> nrow(german)\n1000\n\njulia> ncol(german)\n10","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To get basic statistics of data in your data frame use the describe function (check out the help of describe for information on how to customize the shown statistics).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> describe(german)\n10×7 DataFrame\n Row │ variable          mean     min       median  max              nmissing  ⋯\n     │ Symbol            Union…   Any       Union…  Any              Int64     ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ id                499.5    0         499.5   999                     0  ⋯\n   2 │ Age               35.546   19        33.0    75                      0\n   3 │ Sex                        female            male                    0\n   4 │ Job               1.904    0         2.0     3                       0\n   5 │ Housing                    free              rent                    0  ⋯\n   6 │ Saving accounts            NA                rich                    0\n   7 │ Checking account           NA                rich                    0\n   8 │ Credit amount     3271.26  250       2319.5  18424                   0\n   9 │ Duration          20.903   4         18.0    72                      0  ⋯\n  10 │ Purpose                    business          vacation/others         0\n                                                                1 column omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To limit the columns processed by describe use cols keyword argument, e.g.:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> describe(german, cols=1:3)\n3×7 DataFrame\n Row │ variable  mean    min     median  max   nmissing  eltype\n     │ Symbol    Union…  Any     Union…  Any   Int64     DataType\n─────┼────────────────────────────────────────────────────────────\n   1 │ id        499.5   0       499.5   999          0  Int64\n   2 │ Age       35.546  19      33.0    75           0  Int64\n   3 │ Sex               female          male         0  String7","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The default statistics reported are mean, min, median, max, number of missing values, and element type of the column. missing values are skipped when computing the summary statistics.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can adjust how data frame is displayed by calling the show function manually: show(german, allrows=true) prints all rows even if they do not fit on screen and show(german, allcols=true) does the same for columns, e.g.:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> show(german, allcols=true)\n1000×10 DataFrame\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking account  Credit amount  Duration  Purpose\n      │ Int64  Int64  String7  Int64  String7  String15         String15          Int64          Int64     String31\n──────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n    1 │     0     67  male         2  own      NA               little                     1169         6  radio/TV\n    2 │     1     22  female       2  own      little           moderate                   5951        48  radio/TV\n    3 │     2     49  male         1  own      little           NA                         2096        12  education\n    4 │     3     45  male         2  free     little           little                     7882        42  furniture/equipment\n    5 │     4     53  male         2  free     little           little                     4870        24  car\n    6 │     5     35  male         1  free     NA               NA                         9055        36  education\n    7 │     6     53  male         2  own      quite rich       NA                         2835        24  furniture/equipment\n    8 │     7     35  male         3  rent     little           moderate                   6948        36  car\n  ⋮   │   ⋮      ⋮       ⋮       ⋮       ⋮            ⋮                ⋮                ⋮           ⋮               ⋮\n  994 │   993     30  male         3  own      little           little                     3959        36  furniture/equipment\n  995 │   994     50  male         2  own      NA               NA                         2390        12  car\n  996 │   995     31  female       1  own      little           NA                         1736        12  furniture/equipment\n  997 │   996     40  male         3  own      little           little                     3857        30  car\n  998 │   997     38  male         2  own      little           NA                          804        12  radio/TV\n  999 │   998     23  male         2  free     little           little                     1845        45  radio/TV\n 1000 │   999     27  male         2  own      moderate         moderate                   4576        45  car\n                                                                                                               985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"It is easy to compute descriptive statistics directly on individual columns using the functions defined in the Statistics module:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using Statistics\n\njulia> mean(german.Age)\n35.546","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If instead we want to apply some function to all columns of a data frame we can use the mapcols function. It returns a DataFrame where each column of the source data frame is transformed using a function passed as a first argument. Note that mapcols guarantees not to reuse the columns from german in the returned DataFrame. If the transformation returns its argument then it gets copied before being stored.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> mapcols(id -> id .^ 2, german)\n1000×10 DataFrame\n  Row │ id      Age    Sex           Job    Housing   Saving accounts       Ch ⋯\n      │ Int64   Int64  String        Int64  String    String                St ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │      0   4489  malemale          4  ownown    NANA                  li ⋯\n    2 │      1    484  femalefemale      4  ownown    littlelittle          mo\n    3 │      4   2401  malemale          1  ownown    littlelittle          NA\n    4 │      9   2025  malemale          4  freefree  littlelittle          li\n    5 │     16   2809  malemale          4  freefree  littlelittle          li ⋯\n    6 │     25   1225  malemale          1  freefree  NANA                  NA\n    7 │     36   2809  malemale          4  ownown    quite richquite rich  NA\n    8 │     49   1225  malemale          9  rentrent  littlelittle          mo\n  ⋮   │   ⋮       ⋮         ⋮          ⋮       ⋮               ⋮               ⋱\n  994 │ 986049    900  malemale          9  ownown    littlelittle          li ⋯\n  995 │ 988036   2500  malemale          4  ownown    NANA                  NA\n  996 │ 990025    961  femalefemale      1  ownown    littlelittle          NA\n  997 │ 992016   1600  malemale          9  ownown    littlelittle          li\n  998 │ 994009   1444  malemale          4  ownown    littlelittle          NA ⋯\n  999 │ 996004    529  malemale          4  freefree  littlelittle          li\n 1000 │ 998001    729  malemale          4  ownown    moderatemoderate      mo\n                                                  4 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to look at first and last rows of a data frame then you can do this using the first and last functions respectively:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> first(german, 6)\n6×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n   2 │     1     22  female       2  own      little           moderate\n   3 │     2     49  male         1  own      little           NA\n   4 │     3     45  male         2  free     little           little\n   5 │     4     53  male         2  free     little           little          ⋯\n   6 │     5     35  male         1  free     NA               NA\n                                                               4 columns omitted\n\njulia> last(german, 5)\n5×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │   995     31  female       1  own      little           NA              ⋯\n   2 │   996     40  male         3  own      little           little\n   3 │   997     38  male         2  own      little           NA\n   4 │   998     23  male         2  free     little           little\n   5 │   999     27  male         2  own      moderate         moderate        ⋯\n                                                               4 columns omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Using first and last without passing the number of rows will return a first/last DataFrameRow in the data frame. DataFrameRow is a view into a single row of an AbstractDataFrame. It stores a reference to a parent DataFrame and information about which row and columns from the parent are selected. You can think of DataFrameRow as a NamedTuple that is mutable, i.e. allows to update the source data frame, which is often useful.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> first(german)\nDataFrameRow\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n                                                               4 columns omitted\n\njulia> last(german)\nDataFrameRow\n  Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accou ⋯\n      │ Int64  Int64  String7  Int64  String7  String15         String15       ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n 1000 │   999     27  male         2  own      moderate         moderate       ⋯\n                                                               4 columns omitted","category":"page"},{"location":"man/basics/#Getting-and-Setting-Data-in-a-Data-Frame","page":"First Steps with DataFrames.jl","title":"Getting and Setting Data in a Data Frame","text":"","category":"section"},{"location":"man/basics/#Indexing-Syntax","page":"First Steps with DataFrames.jl","title":"Indexing Syntax","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Data frame can be indexed in a similar way to matrices. In the Indexing section of the manual you can find all details about all the available options. Here we highlight the basic ones.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The general syntax for indexing is data_frame[selected_rows, selected_columns]. Observe that, as opposed to matrices in Julia Base, it is required to always pass both row and column selector. The colon : indicates that all items (rows or columns depending on its position) should be retained. Here are a few examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[1:5, [:Sex, :Age]]\n5×2 DataFrame\n Row │ Sex      Age\n     │ String7  Int64\n─────┼────────────────\n   1 │ male        67\n   2 │ female      22\n   3 │ male        49\n   4 │ male        45\n   5 │ male        53\n\njulia> german[1:5, :]\n5×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n   2 │     1     22  female       2  own      little           moderate\n   3 │     2     49  male         1  own      little           NA\n   4 │     3     45  male         2  free     little           little\n   5 │     4     53  male         2  free     little           little          ⋯\n                                                               4 columns omitted\n\njulia> german[[1, 6, 15], :]\n3×10 DataFrame\n Row │ id     Age    Sex      Job    Housing  Saving accounts  Checking accoun ⋯\n     │ Int64  Int64  String7  Int64  String7  String15         String15        ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │     0     67  male         2  own      NA               little          ⋯\n   2 │     5     35  male         1  free     NA               NA\n   3 │    14     28  female       2  rent     little           little\n                                                               4 columns omitted\n\njulia> german[:, [:Age, :Sex]]\n1000×2 DataFrame\n  Row │ Age    Sex\n      │ Int64  String7\n──────┼────────────────\n    1 │    67  male\n    2 │    22  female\n    3 │    49  male\n    4 │    45  male\n    5 │    53  male\n    6 │    35  male\n    7 │    53  male\n    8 │    35  male\n  ⋮   │   ⋮       ⋮\n  994 │    30  male\n  995 │    50  male\n  996 │    31  female\n  997 │    40  male\n  998 │    38  male\n  999 │    23  male\n 1000 │    27  male\n       985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Pay attention that german[!, [:Sex]] and german[:, [:Sex]] returns a data frame object, while german[!, :Sex] and german[:, :Sex] returns a vector. In the first case, [:Sex] is a vector, indicating that the resulting object should be a data frame. On the other hand, :Sex is a single Symbol, indicating that a single column vector should be extracted. Note that in the first case a vector is required to be passed (not just any iterable), so e.g. german[:, (:Age, :Sex)] is not allowed, but german[:, [:Age, :Sex]] is valid. Below we show both operations to highlight this difference:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[!, [:Sex]]\n1000×1 DataFrame\n  Row │ Sex\n      │ String7\n──────┼─────────\n    1 │ male\n    2 │ female\n    3 │ male\n    4 │ male\n    5 │ male\n    6 │ male\n    7 │ male\n    8 │ male\n  ⋮   │    ⋮\n  994 │ male\n  995 │ male\n  996 │ female\n  997 │ male\n  998 │ male\n  999 │ male\n 1000 │ male\n985 rows omitted\n\njulia> german[!, :Sex]\n1000-element PooledArrays.PooledVector{String7, UInt32, Vector{UInt32}}:\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n ⋮\n \"male\"\n \"male\"\n \"male\"\n \"male\"\n \"female\"\n \"male\"\n \"male\"\n \"male\"\n \"male\"","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As it was explained earlier in this tutorial the difference between using ! and : when passing a row index is that ! does not perform a copy of columns, while : does when reading data from a data frame. Therefore german[!, [:Sex]] data frame stores the same vector as the source german data frame, while german[:, [:Sex]] stores its copy.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The ! selector normally should be avoided as using it can lead to hard to catch bugs. However, when working with very large data frames it can be useful to save memory and improve performance of operations.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Recapping what we have already learned, To get the column :Age from the german data frame you can do the following:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"to copy the vector: german[:, :Age], german[:, \"Age\"] or german[:, 2];\nto get a vector without copying: german.Age, german.\"Age\", german[!, :Age], german[!, \"Age\"] or german[!, 2].","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To get the first two columns as a DataFrame, we can index as follows:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"to get the copied columns: german[:, 1:2], german[:, [:id, :Age]], or german[:, [\"id\", \"Age\"]];\nto reuse the columns without copying: german[!, 1:2], german[!, [:id, :Age]], or german[!, [\"id\", \"Age\"]].","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If you want to can get a single cell of a data frame use the same syntax as the one that gets a cell of a matrix:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[4, 4]\n2","category":"page"},{"location":"man/basics/#Views","page":"First Steps with DataFrames.jl","title":"Views","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We can also create a view of a data frame. It is often useful as it is more memory efficient than creating a materialized selection. You can create it using a view function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> view(german, :, 2:5)\n1000×4 SubDataFrame\n  Row │ Age    Sex      Job    Housing\n      │ Int64  String7  Int64  String7\n──────┼────────────────────────────────\n    1 │    67  male         2  own\n    2 │    22  female       2  own\n    3 │    49  male         1  own\n    4 │    45  male         2  free\n    5 │    53  male         2  free\n    6 │    35  male         1  free\n    7 │    53  male         2  own\n    8 │    35  male         3  rent\n  ⋮   │   ⋮       ⋮       ⋮       ⋮\n  994 │    30  male         3  own\n  995 │    50  male         2  own\n  996 │    31  female       1  own\n  997 │    40  male         3  own\n  998 │    38  male         2  own\n  999 │    23  male         2  free\n 1000 │    27  male         2  own\n                       985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"or using a @view macro:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[end:-1:1, [1, 4]]\n1000×2 SubDataFrame\n  Row │ id     Job\n      │ Int64  Int64\n──────┼──────────────\n    1 │   999      2\n    2 │   998      2\n    3 │   997      2\n    4 │   996      3\n    5 │   995      1\n    6 │   994      2\n    7 │   993      3\n    8 │   992      1\n  ⋮   │   ⋮      ⋮\n  994 │     6      2\n  995 │     5      1\n  996 │     4      2\n  997 │     3      2\n  998 │     2      1\n  999 │     1      2\n 1000 │     0      2\n     985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Similarly we can get a view of one column of a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[1:5, 1]\n5-element view(::Vector{Int64}, 1:5) with eltype Int64:\n 0\n 1\n 2\n 3\n 4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"its single cell:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[2, 2]\n0-dimensional view(::Vector{Int64}, 2) with eltype Int64:\n22","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"or a single row:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> @view german[3, 2:5]\nDataFrameRow\n Row │ Age    Sex      Job    Housing\n     │ Int64  String7  Int64  String7\n─────┼────────────────────────────────\n   3 │    49  male         1  own","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see the row and column indexing syntax is exactly the same as for indexing. The only difference is that we do not create a new object, but a view into an existing one.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In order to compare the performance of indexing vs creation of a view let us run the following benchmark using the BenchmarkTools.jl package (please install it if you want to re-run this comparison):","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> using BenchmarkTools\n\njulia> @btime $german[1:end-1, 1:end-1];\n  9.900 μs (44 allocations: 57.56 KiB)\n\njulia> @btime @view $german[1:end-1, 1:end-1];\n  67.332 ns (2 allocations: 32 bytes)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As you can see creation of a view is:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"an order of magnitude faster;\nallocates much less memory.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The downside of the view is that:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"it points to the same memory as its parent (so changing a view changes the parent, which is sometimes undesirable);\nsome operations might be a bit slower (as DataFrames.jl needs to perform a mapping of indices of a view to indices of the parent).","category":"page"},{"location":"man/basics/#Changing-the-Data-Stored-in-a-Data-Frame","page":"First Steps with DataFrames.jl","title":"Changing the Data Stored in a Data Frame","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In order to show how to perform mutating operations on a data frame we make a subset of a german data frame first:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1 = german[1:6, 2:4]\n6×3 DataFrame\n Row │ Age    Sex      Job\n     │ Int64  String7  Int64\n─────┼───────────────────────\n   1 │    67  male         2\n   2 │    22  female       2\n   3 │    49  male         1\n   4 │    45  male         2\n   5 │    53  male         2\n   6 │    35  male         1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the following example we replace the column :Age in our df1 data frame with a new vector:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> val = [80, 85, 98, 95, 78, 89]\n6-element Vector{Int64}:\n 80\n 85\n 98\n 95\n 78\n 89\n\njulia> df1.Age = val\n6-element Vector{Int64}:\n 80\n 85\n 98\n 95\n 78\n 89\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex      Job\n     │ Int64  String7  Int64\n─────┼───────────────────────\n   1 │    80  male         2\n   2 │    85  female       2\n   3 │    98  male         1\n   4 │    95  male         2\n   5 │    78  male         2\n   6 │    89  male         1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This is a non-copying operation. One can perform it only if val vector has the same length as number of rows of df1 or as a special case if df1 would not have any columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1.Age === val # no copy is performed\ntrue","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If in indexing you select a subset of rows from a data frame the mutation is performed in place, i.e. writing to an existing vector. Below setting values of column :Job in rows 1:3 to values [2, 3, 2]:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[1:3, :Job] = [2, 3, 2]\n3-element Vector{Int64}:\n 2\n 3\n 2\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex      Job\n     │ Int64  String7  Int64\n─────┼───────────────────────\n   1 │    80  male         2\n   2 │    85  female       3\n   3 │    98  male         2\n   4 │    95  male         2\n   5 │    78  male         2\n   6 │    89  male         1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As a special rule using ! as row selector replaces column without copying (just like in the df1.Age = val example above). For example below we replace the :Sex column:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Sex] = [\"male\", \"female\", \"female\", \"transgender\", \"female\", \"male\"]\n6-element Vector{String}:\n \"male\"\n \"female\"\n \"female\"\n \"transgender\"\n \"female\"\n \"male\"\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex          Job\n     │ Int64  String       Int64\n─────┼───────────────────────────\n   1 │    80  male             2\n   2 │    85  female           3\n   3 │    98  female           2\n   4 │    95  transgender      2\n   5 │    78  female           2\n   6 │    89  male             1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Similarly to setting selected rows of a single column we can also set selected columns of a given row of a data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[3, 1:3] = [78, \"male\", 4]\n3-element Vector{Any}:\n 78\n   \"male\"\n  4\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex          Job\n     │ Int64  String       Int64\n─────┼───────────────────────────\n   1 │    80  male             2\n   2 │    85  female           3\n   3 │    78  male             4\n   4 │    95  transgender      2\n   5 │    78  female           2\n   6 │    89  male             1","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We have already mentioned that DataFrameRow can be used to mutate its parent data frame. Here are a few examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> dfr = df1[2, :] # DataFrameRow with the second row and all columns of df1\nDataFrameRow\n Row │ Age    Sex     Job\n     │ Int64  String  Int64\n─────┼──────────────────────\n   2 │    85  female      3\n\njulia> dfr.Age = 98 # set value of col `:Age` in row `2` to `98` in-place\n98\n\njulia> dfr\nDataFrameRow\n Row │ Age    Sex     Job\n     │ Int64  String  Int64\n─────┼──────────────────────\n   2 │    98  female      3\n\njulia> dfr[2:3] = [\"male\", 2] # set values of entries in columns `:Sex` and `:Job`\n2-element Vector{Any}:\n  \"male\"\n 2\n\njulia> dfr\nDataFrameRow\n Row │ Age    Sex     Job\n     │ Int64  String  Int64\n─────┼──────────────────────\n   2 │    98  male        2","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"These operations updated the data stored in the df1 data frame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In a similar fashion views can be used to update data stored in their parent data frame. Here are some examples:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> sdf = view(df1, :, 2:3)\n6×2 SubDataFrame\n Row │ Sex          Job\n     │ String       Int64\n─────┼────────────────────\n   1 │ male             2\n   2 │ male             2\n   3 │ male             4\n   4 │ transgender      2\n   5 │ female           2\n   6 │ male             1\n\njulia> sdf[2, :Sex] = \"female\" # set value of col `:Sex` in second row to `female` in-place\n\"female\"\n\njulia> sdf\n6×2 SubDataFrame\n Row │ Sex          Job\n     │ String       Int64\n─────┼────────────────────\n   1 │ male             2\n   2 │ female           2\n   3 │ male             4\n   4 │ transgender      2\n   5 │ female           2\n   6 │ male             1\n\njulia> sdf[6, 1:2] = [\"female\", 3]\n2-element Vector{Any}:\n  \"female\"\n 3\n\njulia> sdf\n6×2 SubDataFrame\n Row │ Sex          Job\n     │ String       Int64\n─────┼────────────────────\n   1 │ male             2\n   2 │ female           2\n   3 │ male             4\n   4 │ transgender      2\n   5 │ female           2\n   6 │ female           3","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In all these cases the parent of sdf view was also updated.","category":"page"},{"location":"man/basics/#Broadcasting-Assignment","page":"First Steps with DataFrames.jl","title":"Broadcasting Assignment","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Apart from normal assignment one can perform broadcasting assignment using the .= operation.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Before we move forward let us explain how broadcasting works in Julia. The standard syntax to perform broadcasting is to use .. For example, as opposed to R this operation fails:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> s = [25, 26, 35, 56]\n4-element Vector{Int64}:\n 25\n 26\n 35\n 56\n\njulia> s[2:3] = 0\nERROR: ArgumentError: indexed assignment with a single value to possibly many locations is not supported; perhaps use broadcasting `.=` instead?","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Instead we have to write:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> s[2:3] .= 0\n2-element view(::Vector{Int64}, 2:3) with eltype Int64:\n 0\n 0\n\njulia> s\n4-element Vector{Int64}:\n 25\n  0\n  0\n 56","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Similar syntax is fully supported in DataFrames.jl. Here, Column :Age is replaced freshly allocated vector because of broadcasting assignment:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Age] .= [85, 89, 78, 58, 96, 68] # col `:Age` is replaced freshly allocated vector\n6-element Vector{Int64}:\n 85\n 89\n 78\n 58\n 96\n 68\n\njulia> df1\n6×3 DataFrame\n Row │ Age    Sex          Job\n     │ Int64  String       Int64\n─────┼───────────────────────────\n   1 │    85  male             2\n   2 │    89  female           2\n   3 │    78  male             4\n   4 │    58  transgender      2\n   5 │    96  female           2\n   6 │    68  female           3","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Using the : instead of ! above would perform a broadcasting assignment in-place into an existing column. The major difference between in-place and replace operations is that replacing columns is needed if new values have a different type than the old ones.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the examples below we operate on columns :Customers and :City that are not present in df1. In this case using ! and : are equivalent and a new column is allocated:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Customers] .= [\"Rohit\", \"Akshat\", \"Rahul\", \"Aayush\", \"Prateek\", \"Anam\"]\n6-element Vector{String}:\n \"Rohit\"\n \"Akshat\"\n \"Rahul\"\n \"Aayush\"\n \"Prateek\"\n \"Anam\"\n\njulia> df1[:, :City] .= [\"Kanpur\", \"Lucknow\", \"Bhuvneshwar\", \"Jaipur\", \"Ranchi\", \"Dehradoon\"]\n6-element Vector{String}:\n \"Kanpur\"\n \"Lucknow\"\n \"Bhuvneshwar\"\n \"Jaipur\"\n \"Ranchi\"\n \"Dehradoon\"\n\njulia> df1\n6×5 DataFrame\n Row │ Age    Sex          Job    Customers  City\n     │ Int64  String       Int64  String     String\n─────┼───────────────────────────────────────────────────\n   1 │    85  male             2  Rohit      Kanpur\n   2 │    89  female           2  Akshat     Lucknow\n   3 │    78  male             4  Rahul      Bhuvneshwar\n   4 │    58  transgender      2  Aayush     Jaipur\n   5 │    96  female           2  Prateek    Ranchi\n   6 │    68  female           3  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"A most common broadcasting assignment operation is when a scalar is used on the right hand side, e.g:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[:, 3] .= 4 # an in-place replacement of values stored in column number 3 by 4\n6-element view(::Vector{Int64}, :) with eltype Int64:\n 4\n 4\n 4\n 4\n 4\n 4\n\njulia> df1\n6×5 DataFrame\n Row │ Age    Sex          Job    Customers  City\n     │ Int64  String       Int64  String     String\n─────┼───────────────────────────────────────────────────\n   1 │    85  male             4  Rohit      Kanpur\n   2 │    89  female           4  Akshat     Lucknow\n   3 │    78  male             4  Rahul      Bhuvneshwar\n   4 │    58  transgender      4  Aayush     Jaipur\n   5 │    96  female           4  Prateek    Ranchi\n   6 │    68  female           4  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"For : row selector the broadcasting assignment operation works in-place, so the following operation throws an error:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[:, :Age] .= \"Economics\"\nERROR: MethodError: Cannot `convert` an object of type String to an object of type Int64","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We need to use ! instead as it replaces the old vector with a freshly allocated one:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df1[!, :Age] .= \"Economics\"\n6-element Vector{String}:\n \"Economics\"\n \"Economics\"\n \"Economics\"\n \"Economics\"\n \"Economics\"\n \"Economics\"\n\njulia> df1\n6×5 DataFrame\n Row │ Age        Sex          Job    Customers  City\n     │ String     String       Int64  String     String\n─────┼───────────────────────────────────────────────────────\n   1 │ Economics  male             4  Rohit      Kanpur\n   2 │ Economics  female           4  Akshat     Lucknow\n   3 │ Economics  male             4  Rahul      Bhuvneshwar\n   4 │ Economics  transgender      4  Aayush     Jaipur\n   5 │ Economics  female           4  Prateek    Ranchi\n   6 │ Economics  female           4  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"There are some scenarios in DataFrames.jl, when we naturally want a broadcasting-like behaviour, but do not allow for the use of . operation. In such cases a so-called pseudo-broadcasting is performed for user convenience. We have already seen it in examples of DataFrame constructor. Below we show pseudo-broadcasting at work in the insertcols! function, that inserts a column into a data frame in an arbitrary position.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the example below we are creating a column :Country with the insertcols! function. Since we pass a scalar \"India\" value of the column it is broadcasted to all rows in the output data frame:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> insertcols!(df1, 1, :Country => \"India\")\n6×6 DataFrame\n Row │ Country  Age        Sex          Job    Customers  City\n     │ String   String     String       Int64  String     String\n─────┼────────────────────────────────────────────────────────────────\n   1 │ India    Economics  male             4  Rohit      Kanpur\n   2 │ India    Economics  female           4  Akshat     Lucknow\n   3 │ India    Economics  male             4  Rahul      Bhuvneshwar\n   4 │ India    Economics  transgender      4  Aayush     Jaipur\n   5 │ India    Economics  female           4  Prateek    Ranchi\n   6 │ India    Economics  female           4  Anam       Dehradoon","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can pass a column location where you want to put the inserted column as a second argument to the insertcols! function:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> insertcols!(df1, 4, :b => exp(4))\n6×7 DataFrame\n Row │ Country  Age        Sex          b        Job    Customers  City        ⋯\n     │ String   String     String       Float64  Int64  String     String      ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ India    Economics  male         54.5982      4  Rohit      Kanpur      ⋯\n   2 │ India    Economics  female       54.5982      4  Akshat     Lucknow\n   3 │ India    Economics  male         54.5982      4  Rahul      Bhuvneshwar\n   4 │ India    Economics  transgender  54.5982      4  Aayush     Jaipur\n   5 │ India    Economics  female       54.5982      4  Prateek    Ranchi      ⋯\n   6 │ India    Economics  female       54.5982      4  Anam       Dehradoon","category":"page"},{"location":"man/basics/#Not,-Between,-Cols,-and-All-Column-Selectors","page":"First Steps with DataFrames.jl","title":"Not, Between, Cols, and All Column Selectors","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can use Not, Between, Cols, and All selectors in more complex column selection scenarios:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Not selector (from the InvertedIndices.jl package) allows us to specify the columns we want to exclude from the resulting data frame. We can put any valid other column selector inside Not;\nBetween selector allows us to specify a range of columns (we can pass the start and stop column using any of the single column selector syntaxes);\nCols(...) selector picks a union of other selectors passed as its arguments;\nAll() allows us to select all columns of DataFrame; this is the same as passing :;\nregular expression to select columns whose names match it.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Let us give some examples of these selectors.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Drop :Age column:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[:, Not(:Age)]\n1000×9 DataFrame\n  Row │ id     Sex      Job    Housing  Saving accounts  Checking account  Cre ⋯\n      │ Int64  String7  Int64  String7  String15         String15          Int ⋯\n──────┼─────────────────────────────────────────────────────────────────────────\n    1 │     0  male         2  own      NA               little                ⋯\n    2 │     1  female       2  own      little           moderate\n    3 │     2  male         1  own      little           NA\n    4 │     3  male         2  free     little           little\n    5 │     4  male         2  free     little           little                ⋯\n    6 │     5  male         1  free     NA               NA\n    7 │     6  male         2  own      quite rich       NA\n    8 │     7  male         3  rent     little           moderate\n  ⋮   │   ⋮       ⋮       ⋮       ⋮            ⋮                ⋮              ⋱\n  994 │   993  male         3  own      little           little                ⋯\n  995 │   994  male         2  own      NA               NA\n  996 │   995  female       1  own      little           NA\n  997 │   996  male         3  own      little           little\n  998 │   997  male         2  own      little           NA                    ⋯\n  999 │   998  male         2  free     little           little\n 1000 │   999  male         2  own      moderate         moderate\n                                                  3 columns and 985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Select columns starting from :Sex and ending at :Housing:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[:, Between(:Sex, :Housing)]\n1000×3 DataFrame\n  Row │ Sex     Job    Housing\n      │ String  Int64  String\n──────┼────────────────────────\n    1 │ male        2  own\n    2 │ female      2  own\n    3 │ male        1  own\n    4 │ male        2  free\n    5 │ male        2  free\n    6 │ male        1  free\n    7 │ male        2  own\n    8 │ male        3  rent\n  ⋮   │   ⋮       ⋮       ⋮\n  994 │ male        3  own\n  995 │ male        2  own\n  996 │ female      1  own\n  997 │ male        3  own\n  998 │ male        2  own\n  999 │ male        2  free\n 1000 │ male        2  own\n               985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the example below Cols selector is picking a union of \"Age\" and Between(\"Sex\", \"Job\") selectors passed as its arguments:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[:, Cols(\"Age\", Between(\"Sex\", \"Job\"))]\n1000×3 DataFrame\n  Row │ Age    Sex      Job\n      │ Int64  String7  Int64\n──────┼───────────────────────\n    1 │    67  male         2\n    2 │    22  female       2\n    3 │    49  male         1\n    4 │    45  male         2\n    5 │    53  male         2\n    6 │    35  male         1\n    7 │    53  male         2\n    8 │    35  male         3\n  ⋮   │   ⋮       ⋮       ⋮\n  994 │    30  male         3\n  995 │    50  male         2\n  996 │    31  female       1\n  997 │    40  male         3\n  998 │    38  male         2\n  999 │    23  male         2\n 1000 │    27  male         2\n              985 rows omitted","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can also use Regex (regular expressions) to select columns. In the example below we select columns that have \"S\" in their name and also we use Not to drop row number 5:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> german[Not(5), r\"S\"]\n999×2 DataFrame\n Row │ Sex      Saving accounts\n     │ String7  String15\n─────┼──────────────────────────\n   1 │ male     NA\n   2 │ female   little\n   3 │ male     little\n   4 │ male     little\n   5 │ male     NA\n   6 │ male     quite rich\n   7 │ male     little\n   8 │ male     rich\n  ⋮  │    ⋮            ⋮\n 993 │ male     little\n 994 │ male     NA\n 995 │ female   little\n 996 │ male     little\n 997 │ male     little\n 998 │ male     little\n 999 │ male     moderate\n                984 rows omitted","category":"page"},{"location":"man/basics/#Manipulation-Functions","page":"First Steps with DataFrames.jl","title":"Manipulation Functions","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The seven functions below can be used to manipulate data frames by applying operations to them.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The functions without a ! in their name will create a new data frame based on the source data frame, so you will probably want to store the new data frame to a new variable name, e.g. new_df = transform(source_df, operation). The functions with a ! at the end of their name will modify an existing data frame in-place, so there is typically no need to assign the result to a variable, e.g. transform!(source_df, operation) instead of source_df = transform(source_df, operation).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The number of columns and rows in the resultant data frame varies depending on the manipulation function employed.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Function Memory Usage Column Retention Row Retention\ntransform Creates a new data frame. Retains original and resultant columns. Retains same number of rows as original data frame.\ntransform! Modifies an existing data frame. Retains original and resultant columns. Retains same number of rows as original data frame.\nselect Creates a new data frame. Retains only resultant columns. Retains same number of rows as original data frame.\nselect! Modifies an existing data frame. Retains only resultant columns. Retains same number of rows as original data frame.\nsubset Creates a new data frame. Retains original columns. Retains only rows where condition is true.\nsubset! Modifies an existing data frame. Retains original columns. Retains only rows where condition is true.\ncombine Creates a new data frame. Retains only resultant columns. Retains only resultant rows.","category":"page"},{"location":"man/basics/#Constructing-Operations","page":"First Steps with DataFrames.jl","title":"Constructing Operations","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"All of the functions above use the same syntax which is commonly manipulation_function(dataframe, operation). The operation argument defines the operation to be applied to the source dataframe, and it can take any of the following common forms explained below:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector : selects source column(s) without manipulating or renaming them","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Examples: :a, [:a, :b], All(), Not(:a)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector => operation_function : passes source column(s) as arguments to a function and automatically names the resulting column(s)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Examples: :a => sum, [:a, :b] => +, :a => ByRow(==(3))","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector => operation_function => new_column_names : passes source column(s) as arguments to a function and names the resulting column(s) new_column_names","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Examples: :a => sum => :sum_of_a, [:a, :b] => (+) => :a_plus_b","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"(Not available for subset or subset!)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector => new_column_names : renames a source column, or splits a column containing collection elements into multiple new columns","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Examples: :a => :new_a, :a_b => [:a, :b], :nt => AsTable","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"(Not available for subset or subset!)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The => operator constructs a Pair, which is a type to link one object to another. (Pairs are commonly used to create elements of a Dictionary.) In DataFrames.jl manipulation functions, Pair arguments are used to define column operations to be performed. The examples shown above will be explained in more detail later.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The manipulation functions also have methods for applying multiple operations. See the later sections Applying Multiple Operations per Manipulation and Broadcasting Operation Pairs for more information.","category":"page"},{"location":"man/basics/#source_column_selector","page":"First Steps with DataFrames.jl","title":"source_column_selector","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Inside an operation, source_column_selector is usually a column name or column index which identifies a data frame column.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector may be used as the entire operation with select or select! to isolate or reorder columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a = [1, 2, 3], b = [4, 5, 6], c = [7, 8, 9])\n3×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      7\n   2 │     2      5      8\n   3 │     3      6      9\n\njulia> select(df, :b)\n3×1 DataFrame\n Row │ b\n     │ Int64\n─────┼───────\n   1 │     4\n   2 │     5\n   3 │     6\n\njulia> select(df, \"b\")\n3×1 DataFrame\n Row │ b\n     │ Int64\n─────┼───────\n   1 │     4\n   2 │     5\n   3 │     6\n\njulia> select(df, 2)\n3×1 DataFrame\n Row │ b\n     │ Int64\n─────┼───────\n   1 │     4\n   2 │     5\n   3 │     6","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector may also be used as the entire operation with subset or subset! if the source column contains Bool values.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(\n           name = [\"Scott\", \"Jill\", \"Erica\", \"Jimmy\"],\n           minor = [false, true, false, true],\n       )\n4×2 DataFrame\n Row │ name    minor\n     │ String  Bool\n─────┼───────────────\n   1 │ Scott   false\n   2 │ Jill     true\n   3 │ Erica   false\n   4 │ Jimmy    true\n\njulia> subset(df, :minor)\n2×2 DataFrame\n Row │ name    minor\n     │ String  Bool\n─────┼───────────────\n   1 │ Jill     true\n   2 │ Jimmy    true","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"source_column_selector may instead be a collection of columns such as a vector, a regular expression, a Not, Between, All, or Cols expression, or a :. See the Indexing API for the full list of possible values with references.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nThe Julia parser sometimes prevents : from being used by itself. If you get ERROR: syntax: whitespace not allowed after \":\" used for quoting, try using All(), Cols(:), or (:) instead to select all columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(\n           id = [1, 2, 3],\n           first_name = [\"José\", \"Emma\", \"Nathan\"],\n           last_name = [\"Garcia\", \"Marino\", \"Boyer\"],\n           age = [61, 24, 33]\n       )\n3×4 DataFrame\n Row │ id     first_name  last_name  age\n     │ Int64  String      String     Int64\n─────┼─────────────────────────────────────\n   1 │     1  José        Garcia        61\n   2 │     2  Emma        Marino        24\n   3 │     3  Nathan      Boyer         33\n\njulia> select(df, [:last_name, :first_name])\n3×2 DataFrame\n Row │ last_name  first_name\n     │ String     String\n─────┼───────────────────────\n   1 │ Garcia     José\n   2 │ Marino     Emma\n   3 │ Boyer      Nathan\n\njulia> select(df, r\"name\")\n3×2 DataFrame\n Row │ first_name  last_name\n     │ String      String\n─────┼───────────────────────\n   1 │ José        Garcia\n   2 │ Emma        Marino\n   3 │ Nathan      Boyer\n\njulia> select(df, Not(:id))\n3×3 DataFrame\n Row │ first_name  last_name  age\n     │ String      String     Int64\n─────┼──────────────────────────────\n   1 │ José        Garcia        61\n   2 │ Emma        Marino        24\n   3 │ Nathan      Boyer         33\n\njulia> select(df, Between(2,4))\n3×3 DataFrame\n Row │ first_name  last_name  age\n     │ String      String     Int64\n─────┼──────────────────────────────\n   1 │ José        Garcia        61\n   2 │ Emma        Marino        24\n   3 │ Nathan      Boyer         33\n\njulia> df2 = DataFrame(\n           name = [\"Scott\", \"Jill\", \"Erica\", \"Jimmy\"],\n           minor = [false, true, false, true],\n           male = [true, false, false, true],\n       )\n4×3 DataFrame\n Row │ name    minor  male\n     │ String  Bool   Bool\n─────┼──────────────────────\n   1 │ Scott   false   true\n   2 │ Jill     true  false\n   3 │ Erica   false  false\n   4 │ Jimmy    true   true\n\njulia> subset(df2, [:minor, :male])\n1×3 DataFrame\n Row │ name    minor  male\n     │ String  Bool   Bool\n─────┼─────────────────────\n   1 │ Jimmy    true  true","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nUsing Symbol in source_column_selector will perform slightly faster than using string. However, a string is convenient when column names contain spaces.All elements of source_column_selector must be the same type (unless wrapped in Cols), e.g. subset(df2, [:minor, \"male\"]) will error since Symbol and string are used simultaneously.","category":"page"},{"location":"man/basics/#operation_function","page":"First Steps with DataFrames.jl","title":"operation_function","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Inside an operation pair, operation_function is a function which operates on data frame columns passed as vectors. When multiple columns are selected by source_column_selector, the operation_function will receive the columns as separate positional arguments in the order they were selected, e.g. f(column1, column2, column3).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a = [1, 2, 3], b = [4, 5, 4])\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      4\n\njulia> combine(df, :a => sum)\n1×1 DataFrame\n Row │ a_sum\n     │ Int64\n─────┼───────\n   1 │     6\n\njulia> transform(df, :b => maximum) # `transform` and `select` copy scalar result to all rows\n3×3 DataFrame\n Row │ a      b      b_maximum\n     │ Int64  Int64  Int64\n─────┼─────────────────────────\n   1 │     1      4          5\n   2 │     2      5          5\n   3 │     3      4          5\n\njulia> transform(df, [:b, :a] => -) # vector subtraction is okay\n3×3 DataFrame\n Row │ a      b      b_a_-\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      3\n   2 │     2      5      3\n   3 │     3      4      1\n\njulia> transform(df, [:a, :b] => *) # vector multiplication is not defined\nERROR: MethodError: no method matching *(::Vector{Int64}, ::Vector{Int64})","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Don't worry! There is a quick fix for the previous error. If you want to apply a function to each element in a column instead of to the entire column vector, then you can wrap your element-wise function in ByRow like ByRow(my_elementwise_function). This will apply my_elementwise_function to every element in the column and then collect the results back into a vector.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(df, [:a, :b] => ByRow(*))\n3×3 DataFrame\n Row │ a      b      a_b_*\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      4\n   2 │     2      5     10\n   3 │     3      4     12\n\njulia> transform(df, Cols(:) => ByRow(max))\n3×3 DataFrame\n Row │ a      b      a_b_max\n     │ Int64  Int64  Int64\n─────┼───────────────────────\n   1 │     1      4        4\n   2 │     2      5        5\n   3 │     3      4        4\n\njulia> f(x) = x + 1\nf (generic function with 1 method)\n\njulia> transform(df, :a => ByRow(f))\n3×3 DataFrame\n Row │ a      b      a_f\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      2\n   2 │     2      5      3\n   3 │     3      4      4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Alternatively, you may just want to define the function itself so it broadcasts over vectors.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> g(x) = x .+ 1\ng (generic function with 1 method)\n\njulia> transform(df, :a => g)\n3×3 DataFrame\n Row │ a      b      a_g\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      2\n   2 │     2      5      3\n   3 │     3      4      4\n\njulia> h(x, y) = x .+ y .+ 1\nh (generic function with 1 method)\n\njulia> transform(df, [:a, :b] => h)\n3×3 DataFrame\n Row │ a      b      a_b_h\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      6\n   2 │     2      5      8\n   3 │     3      4      8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Anonymous functions are a convenient way to define and use an operation_function all within the manipulation function call.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> select(df, :a => ByRow(x -> x + 1))\n3×1 DataFrame\n Row │ a_function\n     │ Int64\n─────┼────────────\n   1 │          2\n   2 │          3\n   3 │          4\n\njulia> transform(df, [:a, :b] => ByRow((x, y) -> 2x + y))\n3×3 DataFrame\n Row │ a      b      a_b_function\n     │ Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4             6\n   2 │     2      5             9\n   3 │     3      4            10\n\njulia> subset(df, :b => ByRow(x -> x < 5))\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      4\n\njulia> subset(df, :b => ByRow(<(5))) # shorter version of the previous\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\noperation_functions within subset or subset! function calls must return a Boolean vector. true elements in the Boolean vector will determine which rows are retained in the resulting data frame.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As demonstrated above, DataFrame columns are usually passed from source_column_selector to operation_function as one or more vector arguments. However, when AsTable(source_column_selector) is used, the selected columns are collected and passed as a single NamedTuple to operation_function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This is often useful when your operation_function is defined to operate on a single collection argument rather than on multiple positional arguments. The distinction is somewhat similar to the difference between the built-in min and minimum functions. min is defined to find the minimum value among multiple positional arguments, while minimum is defined to find the minimum value among the elements of a single collection argument.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a = 1:2, b = 3:4, c = 5:6, d = 2:-1:1)\n2×4 DataFrame\n Row │ a      b      c      d\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      3      5      2\n   2 │     2      4      6      1\n\njulia> select(df, Cols(:) => ByRow(min)) # min operates on multiple arguments\n2×1 DataFrame\n Row │ a_b_etc_min\n     │ Int64\n─────┼─────────────\n   1 │           1\n   2 │           1\n\njulia> select(df, AsTable(:) => ByRow(minimum)) # minimum operates on a collection\n2×1 DataFrame\n Row │ a_b_etc_minimum\n     │ Int64\n─────┼─────────────────\n   1 │               1\n   2 │               1\n\njulia> select(df, [:a,:b] => ByRow(+)) # `+` operates on a multiple arguments\n2×1 DataFrame\n Row │ a_b_+\n     │ Int64\n─────┼───────\n   1 │     4\n   2 │     6\n\njulia> select(df, AsTable([:a,:b]) => ByRow(sum)) # `sum` operates on a collection\n2×1 DataFrame\n Row │ a_b_sum\n     │ Int64\n─────┼─────────\n   1 │       4\n   2 │       6\n\njulia> using Statistics # contains the `mean` function\n\njulia> select(df, AsTable(Between(:b, :d)) => ByRow(mean)) # `mean` operates on a collection\n2×1 DataFrame\n Row │ b_c_d_mean\n     │ Float64\n─────┼────────────\n   1 │    3.33333\n   2 │    3.66667","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"AsTable can also be used to pass columns to a function which operates on fields of a NamedTuple.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a = 1:2, b = 3:4, c = 5:6, d = 7:8)\n2×4 DataFrame\n Row │ a      b      c      d\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      3      5      7\n   2 │     2      4      6      8\n\njulia> f(nt) = nt.a + nt.d\nf (generic function with 1 method)\n\njulia> transform(df, AsTable(:) => ByRow(f))\n2×5 DataFrame\n Row │ a      b      c      d      a_b_etc_f\n     │ Int64  Int64  Int64  Int64  Int64\n─────┼───────────────────────────────────────\n   1 │     1      3      5      7          8\n   2 │     2      4      6      8         10","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"As demonstrated above, in the source_column_selector => operation_function operation pair form, the results of an operation will be placed into a new column with an automatically-generated name based on the operation; the new column name will be the operation_function name appended to the source column name(s) with an underscore.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This automatic column naming behavior can be avoided in two ways. First, the operation result can be placed back into the original column with the original column name by switching the keyword argument renamecols from its default value (true) to renamecols=false. This option prevents the function name from being appended to the column name as it usually would be.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a=1:4, b=5:8)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> transform(df, :a => ByRow(x->x+10), renamecols=false) # add 10 in-place\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │    11      5\n   2 │    12      6\n   3 │    13      7\n   4 │    14      8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The second method to avoid the default manipulation column naming is to specify your own new_column_names.","category":"page"},{"location":"man/basics/#new_column_names","page":"First Steps with DataFrames.jl","title":"new_column_names","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"new_column_names can be included at the end of an operation pair to specify the name of the new column(s). new_column_names may be a symbol, string, function, vector of symbols, vector of strings, or AsTable.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a=1:4, b=5:8)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> transform(df, Cols(:) => ByRow(+) => :c)\n4×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      6\n   2 │     2      6      8\n   3 │     3      7     10\n   4 │     4      8     12\n\njulia> transform(df, Cols(:) => ByRow(+) => \"a+b\")\n4×3 DataFrame\n Row │ a      b      a+b\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      6\n   2 │     2      6      8\n   3 │     3      7     10\n   4 │     4      8     12\n\njulia> transform(df, :a => ByRow(x->x+10) => \"a+10\")\n4×3 DataFrame\n Row │ a      b      a+10\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5     11\n   2 │     2      6     12\n   3 │     3      7     13\n   4 │     4      8     14","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"The source_column_selector => new_column_names operation form can be used to rename columns without an intermediate function. However, there are rename and rename! functions, which accept similar syntax, that tend to be more useful for this operation.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a=1:4, b=5:8)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> transform(df, :a => :apple) # adds column `apple`\n4×3 DataFrame\n Row │ a      b      apple\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      1\n   2 │     2      6      2\n   3 │     3      7      3\n   4 │     4      8      4\n\njulia> select(df, :a => :apple) # retains only column `apple`\n4×1 DataFrame\n Row │ apple\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n   4 │     4\n\njulia> rename(df, :a => :apple) # renames column `a` to `apple` in-place\n4×2 DataFrame\n Row │ apple  b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If new_column_names already exist in the source data frame, those columns will be replaced in the existing column location rather than being added to the end. This can be done by manually specifying an existing column name or by using the renamecols=false keyword argument.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a=1:4, b=5:8)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> transform(df, :b => (x -> x .+ 10))  # automatic new column and column name\n4×3 DataFrame\n Row │ a      b      b_function\n     │ Int64  Int64  Int64\n─────┼──────────────────────────\n   1 │     1      5          15\n   2 │     2      6          16\n   3 │     3      7          17\n   4 │     4      8          18\n\njulia> transform(df, :b => (x -> x .+ 10), renamecols=false)  # transform column in-place\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     15\n   2 │     2     16\n   3 │     3     17\n   4 │     4     18\n\njulia> transform(df, :b => (x -> x .+ 10) => :a)  # replace column :a\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │    15      5\n   2 │    16      6\n   3 │    17      7\n   4 │    18      8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Actually, renamecols=false just prevents the function name from being appended to the final column name such that the operation is usually returned to the same column.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(df, [:a, :b] => +)  # new column name is all source columns and function name\n4×3 DataFrame\n Row │ a      b      a_b_+\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      6\n   2 │     2      6      8\n   3 │     3      7     10\n   4 │     4      8     12\n\njulia> transform(df, [:a, :b] => +, renamecols=false)  # same as above but with no function name\n4×3 DataFrame\n Row │ a      b      a_b\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      6\n   2 │     2      6      8\n   3 │     3      7     10\n   4 │     4      8     12\n\njulia> transform(df, [:a, :b] => (+) => :a)  # manually overwrite column :a (see Note below about parentheses)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     6      5\n   2 │     8      6\n   3 │    10      7\n   4 │    12      8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the source_column_selector => operation_function => new_column_names operation form, new_column_names may also be a renaming function which operates on a string to create the destination column names programmatically.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a=1:4, b=5:8)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> add_prefix(s) = \"new_\" * s\nadd_prefix (generic function with 1 method)\n\njulia> transform(df, :a => (x -> 10 .* x) => add_prefix) # with named renaming function\n4×3 DataFrame\n Row │ a      b      new_a\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5     10\n   2 │     2      6     20\n   3 │     3      7     30\n   4 │     4      8     40\n\njulia> transform(df, :a => (x -> 10 .* x) => (s -> \"new_\" * s)) # with anonymous renaming function\n4×3 DataFrame\n Row │ a      b      new_a\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5     10\n   2 │     2      6     20\n   3 │     3      7     30\n   4 │     4      8     40","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nIt is a good idea to wrap anonymous functions in parentheses to avoid the => operator accidently becoming part of the anonymous function. The examples above do not work correctly without the parentheses!julia> transform(df, :a => x -> 10 .* x => add_prefix)  # Not what we wanted!\n4×3 DataFrame\n Row │ a      b      a_function\n     │ Int64  Int64  Pair…\n─────┼────────────────────────────────────────────\n   1 │     1      5  [10, 20, 30, 40]=>add_prefix\n   2 │     2      6  [10, 20, 30, 40]=>add_prefix\n   3 │     3      7  [10, 20, 30, 40]=>add_prefix\n   4 │     4      8  [10, 20, 30, 40]=>add_prefix\njulia> transform(df, :a => x -> 10 .* x => s -> \"new_\" * s)  # Not what we wanted!\n4×3 DataFrame\n Row │ a      b      a_function\n     │ Int64  Int64  Pair…\n─────┼─────────────────────────────────────\n   1 │     1      5  [10, 20, 30, 40]=>#18\n   2 │     2      6  [10, 20, 30, 40]=>#18\n   3 │     3      7  [10, 20, 30, 40]=>#18\n   4 │     4      8  [10, 20, 30, 40]=>#18","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"A renaming function will not work in the source_column_selector => new_column_names operation form because a function in the second element of the operation pair is assumed to take the source_column_selector => operation_function operation form. To work around this limitation, use the source_column_selector => operation_function => new_column_names operation form with identity as the operation_function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(df, :a => add_prefix)\nERROR: MethodError: no method matching *(::String, ::Vector{Int64})\n\njulia> transform(df, :a => identity => add_prefix)\n4×3 DataFrame\n Row │ a      b      new_a\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      5      1\n   2 │     2      6      2\n   3 │     3      7      3\n   4 │     4      8      4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this case though, it is probably again more useful to use the rename or rename! function rather than one of the manipulation functions in order to rename in-place and avoid the intermediate operation_function.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> rename(add_prefix, df)  # rename all columns with a function\n4×2 DataFrame\n Row │ new_a  new_b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> rename(add_prefix, df; cols=:a)  # rename some columns with a function\n4×2 DataFrame\n Row │ new_a  b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the source_column_selector => new_column_names operation form, only a single source column may be selected per operation, so why is new_column_names plural? It is possible to split the data contained inside a single column into multiple new columns by supplying a vector of strings or symbols as new_column_names.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(data = [(1,2), (3,4)]) # vector of tuples\n2×1 DataFrame\n Row │ data\n     │ Tuple…\n─────┼────────\n   1 │ (1, 2)\n   2 │ (3, 4)\n\njulia> transform(df, :data => [:first, :second]) # manual naming\n2×3 DataFrame\n Row │ data    first  second\n     │ Tuple…  Int64  Int64\n─────┼───────────────────────\n   1 │ (1, 2)      1       2\n   2 │ (3, 4)      3       4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"This kind of data splitting can even be done automatically with AsTable.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(df, :data => AsTable) # default automatic naming with tuples\n2×3 DataFrame\n Row │ data    x1     x2\n     │ Tuple…  Int64  Int64\n─────┼──────────────────────\n   1 │ (1, 2)      1      2\n   2 │ (3, 4)      3      4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"If a data frame column contains NamedTuples, then AsTable will preserve the field names.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(data = [(a=1,b=2), (a=3,b=4)]) # vector of named tuples\n2×1 DataFrame\n Row │ data\n     │ NamedTup…\n─────┼────────────────\n   1 │ (a = 1, b = 2)\n   2 │ (a = 3, b = 4)\n\njulia> transform(df, :data => AsTable) # keeps names from named tuples\n2×3 DataFrame\n Row │ data            a      b\n     │ NamedTup…       Int64  Int64\n─────┼──────────────────────────────\n   1 │ (a = 1, b = 2)      1      2\n   2 │ (a = 3, b = 4)      3      4","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nTo pack multiple columns into a single column of NamedTuples (reverse of the above operation) apply the identity function ByRow, e.g. transform(df, AsTable([:a, :b]) => ByRow(identity) => :data).","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Renaming functions also work for multi-column transformations, but they must operate on a vector of strings.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(data = [(1,2), (3,4)])\n2×1 DataFrame\n Row │ data\n     │ Tuple…\n─────┼────────\n   1 │ (1, 2)\n   2 │ (3, 4)\n\njulia> new_names(v) = [\"primary \", \"secondary \"] .* v\nnew_names (generic function with 1 method)\n\njulia> transform(df, :data => identity => new_names)\n2×3 DataFrame\n Row │ data    primary data  secondary data\n     │ Tuple…  Int64         Int64\n─────┼──────────────────────────────────────\n   1 │ (1, 2)             1               2\n   2 │ (3, 4)             3               4","category":"page"},{"location":"man/basics/#Applying-Multiple-Operations-per-Manipulation","page":"First Steps with DataFrames.jl","title":"Applying Multiple Operations per Manipulation","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"All data frame manipulation functions can accept multiple operation pairs at once using any of the following methods:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"manipulation_function(dataframe, operation1, operation2)   : multiple arguments\nmanipulation_function(dataframe, [operation1, operation2]) : vector argument\nmanipulation_function(dataframe, [operation1 operation2])  : matrix argument","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Passing multiple operations is especially useful for the select, select!, and combine manipulation functions, since they only retain columns which are a result of the passed operations.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a = 1:4, b = [50,50,60,60], c = [\"hat\",\"bat\",\"cat\",\"dog\"])\n4×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     1     50  hat\n   2 │     2     50  bat\n   3 │     3     60  cat\n   4 │     4     60  dog\n\njulia> combine(df, :a => maximum, :b => sum, :c => join) # 3 combine operations\n1×3 DataFrame\n Row │ a_maximum  b_sum  c_join\n     │ Int64      Int64  String\n─────┼────────────────────────────────\n   1 │         4    220  hatbatcatdog\n\njulia> select(df, :c, :b, :a) # re-order columns\n4×3 DataFrame\n Row │ c       b      a\n     │ String  Int64  Int64\n─────┼──────────────────────\n   1 │ hat        50      1\n   2 │ bat        50      2\n   3 │ cat        60      3\n   4 │ dog        60      4\n\nulia> select(df, :b, :) # `:` here means all other columns\n4×3 DataFrame\n Row │ b      a      c\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │    50      1  hat\n   2 │    50      2  bat\n   3 │    60      3  cat\n   4 │    60      4  dog\n\njulia> select(\n           df,\n           :c => (x -> \"a \" .* x) => :one_c,\n           :a => (x -> 100x),\n           :b,\n           renamecols=false\n       ) # can mix operation forms\n4×3 DataFrame\n Row │ one_c   a      b\n     │ String  Int64  Int64\n─────┼──────────────────────\n   1 │ a hat     100     50\n   2 │ a bat     200     50\n   3 │ a cat     300     60\n   4 │ a dog     400     60\n\njulia> select(\n           df,\n           :c => ByRow(reverse),\n           :c => ByRow(uppercase)\n       ) # multiple operations on same column\n4×2 DataFrame\n Row │ c_reverse  c_uppercase\n     │ String     String\n─────┼────────────────────────\n   1 │ tah        HAT\n   2 │ tab        BAT\n   3 │ tac        CAT\n   4 │ god        DOG","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the last two examples, the manipulation function arguments were split across multiple lines. This is a good way to make manipulations with many operations more readable.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Passing multiple operations to subset or subset! is an easy way to narrow in on a particular row of data.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> subset(\n           df,\n           :b => ByRow(==(60)),\n           :c => ByRow(contains(\"at\"))\n       ) # rows with 60 and \"at\"\n1×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     3     60  cat","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Note that all operations within a single manipulation must use the data as it existed before the function call i.e. you cannot use newly created columns for subsequent operations within the same manipulation.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(\n           df,\n           [:a, :b] => ByRow(+) => :d,\n           :d => (x -> x ./ 2),\n       ) # requires two separate transformations\nERROR: ArgumentError: column name :d not found in the data frame; existing most similar names are: :a, :b and :c\n\njulia> new_df = transform(df, [:a, :b] => ByRow(+) => :d)\n4×4 DataFrame\n Row │ a      b      c       d\n     │ Int64  Int64  String  Int64\n─────┼─────────────────────────────\n   1 │     1     50  hat        51\n   2 │     2     50  bat        52\n   3 │     3     60  cat        63\n   4 │     4     60  dog        64\n\njulia> transform!(new_df, :d => (x -> x ./ 2) => :d_2)\n4×5 DataFrame\n Row │ a      b      c       d      d_2\n     │ Int64  Int64  String  Int64  Float64\n─────┼──────────────────────────────────────\n   1 │     1     50  hat        51     25.5\n   2 │     2     50  bat        52     26.0\n   3 │     3     60  cat        63     31.5\n   4 │     4     60  dog        64     32.0","category":"page"},{"location":"man/basics/#Broadcasting-Operation-Pairs","page":"First Steps with DataFrames.jl","title":"Broadcasting Operation Pairs","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Broadcasting pairs with .=> is often a convenient way to generate multiple similar operations to be applied within a single manipulation. Broadcasting within the Pair of an operation is no different than broadcasting in base Julia. The broadcasting .=> will be expanded into a vector of pairs ([operation1, operation2, ...]), and this expansion will occur before the manipulation function is invoked. Then the manipulation function will use the manipulation_function(dataframe, [operation1, operation2, ...]) method. This process will be explained in more detail below.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To illustrate these concepts, let us first examine the Type of a basic Pair. In DataFrames.jl, a symbol, string, or integer may be used to select a single column. Some Pairs with these types are below.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> typeof(:x => :a)\nPair{Symbol, Symbol}\n\njulia> typeof(\"x\" => \"a\")\nPair{String, String}\n\njulia> typeof(1 => \"a\")\nPair{Int64, String}","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Any of the Pairs above could be used to rename the first column of the data frame below to a.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(x = 1:3, y = 4:6)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> select(df, :x => :a)\n3×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> select(df, 1 => \"a\")\n3×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"What should we do if we want to keep and rename both the x and y column? One option is to supply a Vector of operation Pairs to select. select will process all of these operations in order.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> [\"x\" => \"a\", \"y\" => \"b\"]\n2-element Vector{Pair{String, String}}:\n \"x\" => \"a\"\n \"y\" => \"b\"\n\njulia> select(df, [\"x\" => \"a\", \"y\" => \"b\"])\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We can use broadcasting to simplify the syntax above.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> [\"x\", \"y\"] .=> [\"a\", \"b\"]\n2-element Vector{Pair{String, String}}:\n \"x\" => \"a\"\n \"y\" => \"b\"\n\njulia> select(df, [\"x\", \"y\"] .=> [\"a\", \"b\"])\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Notice that select sees the same Vector{Pair{String, String}} operation argument whether the individual pairs are written out explicitly or constructed with broadcasting. The broadcasting is applied before the call to select.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> [\"x\" => \"a\", \"y\" => \"b\"] == ([\"x\", \"y\"] .=> [\"a\", \"b\"])\ntrue","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Note\nThese operation pairs (or vector of pairs) can be given variable names. This is uncommon in practice but could be helpful for intermediate inspection and testing.df = DataFrame(x = 1:3, y = 4:6)       # create data frame\noperation = [\"x\", \"y\"] .=> [\"a\", \"b\"]  # save operation to variable\ntypeof(operation)                      # check type of operation\nfirst(operation)                       # check first pair in operation\nlast(operation)                        # check last pair in operation\nselect(df, operation)                  # manipulate `df` with `operation`","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In Julia, a non-vector broadcasted with a vector will be repeated in each resultant pair element.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> [\"x\", \"y\"] .=> :a    # :a is repeated\n2-element Vector{Pair{String, Symbol}}:\n \"x\" => :a\n \"y\" => :a\n\njulia> 1 .=> [:a, :b]       # 1 is repeated\n2-element Vector{Pair{Int64, Symbol}}:\n 1 => :a\n 1 => :b","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"We can use this fact to easily broadcast an operation_function to multiple columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> f(x) = 2 * x\nf (generic function with 1 method)\n\njulia> [\"x\", \"y\"] .=> f  # f is repeated\n2-element Vector{Pair{String, typeof(f)}}:\n \"x\" => f\n \"y\" => f\n\njulia> select(df, [\"x\", \"y\"] .=> f)  # apply f with automatic column renaming\n3×2 DataFrame\n Row │ x_f    y_f\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      8\n   2 │     4     10\n   3 │     6     12\n\njulia> [\"x\", \"y\"] .=> f .=> [\"a\", \"b\"]  # f is repeated\n2-element Vector{Pair{String, Pair{typeof(f), String}}}:\n \"x\" => (f => \"a\")\n \"y\" => (f => \"b\")\n\njulia> select(df, [\"x\", \"y\"] .=> f .=> [\"a\", \"b\"])  # apply f with manual column renaming\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      8\n   2 │     4     10\n   3 │     6     12","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"A renaming function can be applied to multiple columns in the same way. It will also be repeated in each operation Pair.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> newname(s::String) = s * \"_new\"\nnewname (generic function with 1 method)\n\njulia> [\"x\", \"y\"] .=> f .=> newname  # both f and newname are repeated\n2-element Vector{Pair{String, Pair{typeof(f), typeof(newname)}}}:\n \"x\" => (f => newname)\n \"y\" => (f => newname)\n\njulia> select(df, [\"x\", \"y\"] .=> f .=> newname)  # apply f then rename column with newname\n3×2 DataFrame\n Row │ x_new  y_new\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      8\n   2 │     4     10\n   3 │     6     12","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You can see from the type output above that a three element pair does not actually exist. A Pair (as the name implies) can only contain two elements. Thus, :x => :y => :z becomes a nested Pair, where :x is the first element and points to the Pair :y => :z, which is the second element.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> p = :x => :y => :z\n:x => (:y => :z)\n\njulia> p[1]\n:x\n\njulia> p[2]\n:y => :z\n\njulia> p[2][1]\n:y\n\njulia> p[2][2]\n:z\n\njulia> p[3] # there is no index 3 for a pair\nERROR: BoundsError: attempt to access Pair{Symbol, Pair{Symbol, Symbol}} at index [3]","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In the previous examples, the source columns have been individually selected. When broadcasting multiple columns to the same function, often similarities in the column names or position can be exploited to avoid tedious selection. Consider a data frame with temperature data at three different locations taken over time.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(Time = 1:4,\n                      Temperature1 = [20, 23, 25, 28],\n                      Temperature2 = [33, 37, 41, 44],\n                      Temperature3 = [15, 10, 4, 0])\n4×4 DataFrame\n Row │ Time   Temperature1  Temperature2  Temperature3\n     │ Int64  Int64         Int64         Int64\n─────┼─────────────────────────────────────────────────\n   1 │     1            20            33            15\n   2 │     2            23            37            10\n   3 │     3            25            41             4\n   4 │     4            28            44             0","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"To convert all of the temperature data in one transformation, we just need to define a conversion function and broadcast it to all of the \"Temperature\" columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> celsius_to_kelvin(x) = x + 273\ncelsius_to_kelvin (generic function with 1 method)\n\njulia> transform(\n           df,\n           Cols(r\"Temp\") .=> ByRow(celsius_to_kelvin),\n           renamecols = false\n       )\n4×4 DataFrame\n Row │ Time   Temperature1  Temperature2  Temperature3\n     │ Int64  Int64         Int64         Int64\n─────┼─────────────────────────────────────────────────\n   1 │     1           293           306           288\n   2 │     2           296           310           283\n   3 │     3           298           314           277\n   4 │     4           301           317           273","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Or, simultaneously changing the column names:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> rename_function(s) = \"Temperature $(last(s)) (K)\"\nrename_function (generic function with 1 method)\n\njulia> select(\n           df,\n           \"Time\",\n           Cols(r\"Temp\") .=> ByRow(celsius_to_kelvin) .=> rename_function\n       )\n4×4 DataFrame\n Row │ Time   Temperature 1 (K)  Temperature 2 (K)  Temperature 3 (K)\n     │ Int64  Int64              Int64              Int64\n─────┼────────────────────────────────────────────────────────────────\n   1 │     1                293                306                288\n   2 │     2                296                310                283\n   3 │     3                298                314                277\n   4 │     4                301                317                273","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"note: Notes\nNot(\"Time\") or 2:4 would have been equally good choices for source_column_selector in the above operations.\nDon't forget ByRow if your function is to be applied to elements rather than entire column vectors.Without ByRow, the manipulations above would have thrown ERROR: MethodError: no method matching +(::Vector{Int64}, ::Int64).Regular expression (r\"\") and : source_column_selectorsmust be wrapped in Cols to be properly broadcasted because otherwise the broadcasting occurs before the expression is expanded into a vector of matches.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"You could also broadcast different columns to different functions by supplying a vector of functions.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(a=1:4, b=5:8)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6\n   3 │     3      7\n   4 │     4      8\n\njulia> f1(x) = x .+ 1\nf1 (generic function with 1 method)\n\njulia> f2(x) = x ./ 10\nf2 (generic function with 1 method)\n\njulia> transform(df, [:a, :b] .=> [f1, f2])\n4×4 DataFrame\n Row │ a      b      a_f1   b_f2\n     │ Int64  Int64  Int64  Float64\n─────┼──────────────────────────────\n   1 │     1      5      2      0.5\n   2 │     2      6      3      0.6\n   3 │     3      7      4      0.7\n   4 │     4      8      5      0.8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"However, this form is not much more convenient than supplying multiple individual operations.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform(df, [:a => f1, :b => f2]) # same manipulation as previous\n4×4 DataFrame\n Row │ a      b      a_f1   b_f2\n     │ Int64  Int64  Int64  Float64\n─────┼──────────────────────────────\n   1 │     1      5      2      0.5\n   2 │     2      6      3      0.6\n   3 │     3      7      4      0.7\n   4 │     4      8      5      0.8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Perhaps more useful for broadcasting syntax is to apply multiple functions to multiple columns by changing the vector of functions to a 1-by-x matrix of functions. (Recall that a list, a vector, or a matrix of operation pairs are all valid for passing to the manipulation functions.)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> [:a, :b] .=> [f1 f2] # No comma `,` between f1 and f2\n2×2 Matrix{Pair{Symbol}}:\n :a=>f1  :a=>f2\n :b=>f1  :b=>f2\n\njulia> transform(df, [:a, :b] .=> [f1 f2]) # No comma `,` between f1 and f2\n4×6 DataFrame\n Row │ a      b      a_f1   b_f1   a_f2     b_f2\n     │ Int64  Int64  Int64  Int64  Float64  Float64\n─────┼──────────────────────────────────────────────\n   1 │     1      5      2      6      0.1      0.5\n   2 │     2      6      3      7      0.2      0.6\n   3 │     3      7      4      8      0.3      0.7\n   4 │     4      8      5      9      0.4      0.8","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"In this way, every combination of selected columns and functions will be applied.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Pair broadcasting is a simple but powerful tool that can be used in any of the manipulation functions listed under Manipulation Functions. Experiment for yourself to discover other useful operations.","category":"page"},{"location":"man/basics/#Additional-Resources","page":"First Steps with DataFrames.jl","title":"Additional Resources","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"More details and examples of operation pair syntax can be found in this blog post. (The official wording describing the syntax has changed since the blog post was written, but the examples are still illustrative. The operation pair syntax is sometimes referred to as the DataFrames.jl mini-language or Domain-Specific Language.)","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"For additional syntax niceties, many users find the Chain.jl and DataFramesMeta.jl packages useful to help simplify manipulations that may be tedious with operation pairs alone.","category":"page"},{"location":"man/basics/#Approach-Comparison","page":"First Steps with DataFrames.jl","title":"Approach Comparison","text":"","category":"section"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"After that deep dive into Manipulation Functions, it is a good idea to review the alternative approaches covered in Getting and Setting Data in a Data Frame. Let us compare the approaches with a few examples.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"For simple operations, often getting/setting data with dot syntax is simpler than the equivalent data frame manipulation. Here we will add the two columns of our data frame together and place the result in a new third column.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Setup:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(x = 1:3, y = 4:6)  # define a data frame\n3×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Manipulation:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform!(df, [:x, :y] => (+) => :z)\n3×3 DataFrame\n Row │ x      y      z\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      5\n   2 │     2      5      7\n   3 │     3      6      9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Dot Syntax:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df.z = df.x + df.y\n3-element Vector{Int64}:\n 5\n 7\n 9\n\njulia> df  # see that the previous expression updated the data frame `df`\n3×3 DataFrame\n Row │ x      y      z\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      5\n   2 │     2      5      7\n   3 │     3      6      9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Recall that the return type from a data frame manipulation function call is always a data frame. The return type of a data frame column accessed with dot syntax is a Vector. Thus the expression df.x + df.y gets the column data as vectors and returns the result of the vector addition. However, in that same line, we assigned the resultant Vector to a new column z in the data frame df. We could have instead assigned the resultant Vector to some other variable, and then df would not have been altered. The approach with dot syntax is very versatile since the data getting, mathematics, and data setting can be separate steps.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df.x  # dot syntax returns a vector\n3-element Vector{Int64}:\n 1\n 2\n 3\n\njulia> v = df.x + df.y  # assign mathematical result to a vector `v`\n3-element Vector{Int64}:\n 5\n 7\n 9\n\njulia> df.z = v  # place `v` into the data frame `df` with the column name `z`\n3-element Vector{Int64}:\n 5\n 7\n 9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"However, one way in which dot syntax is less versatile is that the column name must be explicitly written in the code. Indexing syntax is a good alternative in these cases which is only slightly longer to write than dot syntax. Both indexing syntax and manipulation functions can operate on dynamic column names stored in variables.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Setup:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Imagine this setup data was read from a file and/or entered by a user at runtime.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(\"My First Column\" => 1:3, \"My Second Column\" => 4:6)  # define a data frame\n3×2 DataFrame\n Row │ My First Column  My Second Column\n     │ Int64            Int64\n─────┼───────────────────────────────────\n   1 │               1                 4\n   2 │               2                 5\n   3 │               3                 6\n\njulia> c1 = \"My First Column\"; c2 = \"My Second Column\"; c3 = \"My Third Column\";  # define column names","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Dot Syntax:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df.c1  # dot syntax expects an explicit column name and cannot be used to access variable column name\nERROR: ArgumentError: column name :c1 not found in the data frame","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Indexing:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df[:, c3] = df[:, c1] + df[:, c2]  # access columns with names stored in variables\n3-element Vector{Int64}:\n 5\n 7\n 9\n\njulia> df  # see that the previous expression updated the data frame `df`\n3×3 DataFrame\n Row │ My First Column  My Second Column  My Third Column\n     │ Int64            Int64             Int64\n─────┼────────────────────────────────────────────────────\n   1 │               1                 4                5\n   2 │               2                 5                7\n   3 │               3                 6                9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Manipulation:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform!(df, [c1, c2] => (+) => c3)  # access columns with names stored in variables\n3×3 DataFrame\n Row │ My First Column  My Second Column  My Third Column\n     │ Int64            Int64             Int64\n─────┼────────────────────────────────────────────────────\n   1 │               1                 4                5\n   2 │               2                 5                7\n   3 │               3                 6                9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Additionally, manipulation functions only require the name of the data frame to be written once. This can be helpful when dealing with long variable and column names.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Setup:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> my_very_long_data_frame_name = DataFrame(\n           \"My First Column\" => 1:3,\n           \"My Second Column\" => 4:6\n       )  # define a data frame\n3×2 DataFrame\n Row │ My First Column  My Second Column\n     │ Int64            Int64\n─────┼───────────────────────────────────\n   1 │               1                 4\n   2 │               2                 5\n   3 │               3                 6\n\njulia> c1 = \"My First Column\"; c2 = \"My Second Column\"; c3 = \"My Third Column\";  # define column names","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Manipulation:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"\njulia> transform!(my_very_long_data_frame_name, [c1, c2] => (+) => c3)\n3×3 DataFrame\n Row │ My First Column  My Second Column  My Third Column\n     │ Int64            Int64             Int64\n─────┼────────────────────────────────────────────────────\n   1 │               1                 4                5\n   2 │               2                 5                7\n   3 │               3                 6                9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Indexing:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> my_very_long_data_frame_name[:, c3] = my_very_long_data_frame_name[:, c1] + my_very_long_data_frame_name[:, c2]\n3-element Vector{Int64}:\n 5\n 7\n 9\n\njulia> df  # see that the previous expression updated the data frame `df`\n3×3 DataFrame\n Row │ My First Column  My Second Column  My Third Column\n     │ Int64            Int64             Int64\n─────┼────────────────────────────────────────────────────\n   1 │               1                 4                5\n   2 │               2                 5                7\n   3 │               3                 6                9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Another benefit of manipulation functions and indexing over dot syntax is that it is easier to operate on a subset of columns.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Setup:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df = DataFrame(x = 1:3, y = 4:6, z = 7:9)  # define data frame\n3×3 DataFrame\n Row │ x      y      z\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      7\n   2 │     2      5      8\n   3 │     3      6      9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Dot Syntax:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df.Not(:x)  # will not work; requires a literal column name\nERROR: ArgumentError: column name :Not not found in the data frame","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Indexing:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> df[:, :y_z_max] = maximum.(eachrow(df[:, Not(:x)]))  # find maximum value across all rows except for column `x`\n3-element Vector{Int64}:\n 7\n 8\n 9\n\njulia> df  # see that the previous expression updated the data frame `df`\n3×4 DataFrame\n Row │ x      y      z      y_z_max\n     │ Int64  Int64  Int64  Int64\n─────┼──────────────────────────────\n   1 │     1      4      7        7\n   2 │     2      5      8        8\n   3 │     3      6      9        9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Manipulation:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> transform!(df, Not(:x) => ByRow(max))  # find maximum value across all rows except for column `x`\n3×4 DataFrame\n Row │ x      y      z      y_z_max\n     │ Int64  Int64  Int64  Int64\n─────┼──────────────────────────────\n   1 │     1      4      7        7\n   2 │     2      5      8        8\n   3 │     3      6      9        9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Moreover, indexing can operate on a subset of columns and rows.","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Indexing:","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"julia> y_z_max_row3 = maximum(df[3, Not(:x)])  # find maximum value across row 3 except for column `x`\n9","category":"page"},{"location":"man/basics/","page":"First Steps with DataFrames.jl","title":"First Steps with DataFrames.jl","text":"Hopefully this small comparison has illustrated some of the benefits and drawbacks of the various syntaxes available in DataFrames.jl. The best syntax to use depends on the situation.","category":"page"},{"location":"man/importing_and_exporting/#Importing-and-Exporting-Data-(I/O)","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"","category":"section"},{"location":"man/importing_and_exporting/#CSV-Files","page":"Importing and Exporting Data (I/O)","title":"CSV Files","text":"","category":"section"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"For reading and writing tabular data from CSV and other delimited text files, use the CSV.jl package.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"If you have not used the CSV.jl package before then you may need to install it first:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"using Pkg\nPkg.add(\"CSV\")","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"The CSV.jl functions are not loaded automatically and must be imported into the session.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"using CSV","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"A dataset can now be read from a CSV file at path input using","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"DataFrame(CSV.File(input))","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"A DataFrame can be written to a CSV file at path output using","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"df = DataFrame(x=1, y=2)\nCSV.write(output, df)","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"The behavior of CSV functions can be adapted via keyword arguments. For more information, see ?CSV.File, ?CSV.read and ?CSV.write, or checkout the online CSV.jl documentation.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"In simple cases, when compilation latency of CSV.jl might be an issue, using the DelimitedFiles module from the Julia standard library can be considered. Here is an example showing how to read in the data and perform its post-processing:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"julia> using DelimitedFiles, DataFrames\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> data, header = readdlm(path, ',', header=true);\n\njulia> iris_raw = DataFrame(data, vec(header))\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Any          Any         Any          Any         Any\n─────┼──────────────────────────────────────────────────────────────────\n   1 │ 5.1          3.5         1.4          0.2         Iris-setosa\n   2 │ 4.9          3.0         1.4          0.2         Iris-setosa\n   3 │ 4.7          3.2         1.3          0.2         Iris-setosa\n   4 │ 4.6          3.1         1.5          0.2         Iris-setosa\n   5 │ 5.0          3.6         1.4          0.2         Iris-setosa\n   6 │ 5.4          3.9         1.7          0.4         Iris-setosa\n   7 │ 4.6          3.4         1.4          0.3         Iris-setosa\n   8 │ 5.0          3.4         1.5          0.2         Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │ 6.8          3.2         5.9          2.3         Iris-virginica\n 145 │ 6.7          3.3         5.7          2.5         Iris-virginica\n 146 │ 6.7          3.0         5.2          2.3         Iris-virginica\n 147 │ 6.3          2.5         5.0          1.9         Iris-virginica\n 148 │ 6.5          3.0         5.2          2.0         Iris-virginica\n 149 │ 6.2          3.4         5.4          2.3         Iris-virginica\n 150 │ 5.9          3.0         5.1          1.8         Iris-virginica\n                                                        135 rows omitted\n\njulia> iris = identity.(iris_raw)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     SubStrin…\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Observe that in our example:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"header is a Matrix therefore we had to pass vec(header) to the DataFrame constructor;\nwe broadcasted the identity function over the iris_raw data frame to perform narrowing of eltype of columns of iris_raw; the reason is that read in by the readdlm function is stored into a data Matrix so all columns in iris_raw initially have the same eltype – in this case it had to be Any as some of the columns are numeric and some are string.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"All such operations (and many more) are automatically handled by CSV.jl.","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Similarly, you can use the writedlm function from the DelimitedFiles module to save a data frame like this:","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"writedlm(\"test.csv\", Iterators.flatten(([names(iris)], eachrow(iris))), ',')","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"As you can see the code required to transform iris into a proper input to the writedlm function so that you can create the CSV file having the expected format is not easy. Therefore CSV.jl is the preferred package to write CSV files for data stored in data frames.","category":"page"},{"location":"man/importing_and_exporting/#Other-formats","page":"Importing and Exporting Data (I/O)","title":"Other formats","text":"","category":"section"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Other data formats are supported for reading and writing in the following packages (non exhaustive list):","category":"page"},{"location":"man/importing_and_exporting/","page":"Importing and Exporting Data (I/O)","title":"Importing and Exporting Data (I/O)","text":"Apache Arrow (including Feather v2): Arrow.jl\nApache Feather (v1): Feather.jl\nApache Avro: Avro.jl\nJSON: JSONTables.jl\nParquet: Parquet2.jl\nStata, SAS and SPSS: ReadStatTables.jl (alternatively Queryverse  users can choose StatFiles.jl)\nreading R data files (.rda, .RData): RData.jl\nMicrosoft Excel (XLSX): XLSX.jl\nCopying/pasting to clipboard, for sending data to and from spreadsheets: ClipData.jl","category":"page"},{"location":"man/querying_frameworks/#Data-manipulation-frameworks","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Three frameworks provide convenience methods to manipulate DataFrames: DataFramesMeta.jl, DataFrameMacros.jl and Query.jl. They implement a functionality similar to dplyr or LINQ.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"These frameworks are designed both to make it easier for new users to start working with data frames in Julia and to allow advanced users to write more compact code.","category":"page"},{"location":"man/querying_frameworks/#TidierData.jl","page":"Data manipulation frameworks","title":"TidierData.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl, part of  the Tidier ecosystem, is a macro-based  data analysis interface that wraps DataFrames.jl.  The instructions below are for version  0.16.0 of TidierData.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First, install the TidierData.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"TidierData\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl enables clean, readable, and fast code for all major data transformation  functions including  aggregating,  pivoting,  nesting,  and joining  data frames. TidierData re-exports DataFrame from DataFrames.jl, @chain from Chain.jl, and  Statistics.jl to streamline data operations. ","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl is heavily inspired by the dplyr and tidyr R packages (part of the R  tidyverse), which it aims to implement using pure Julia by wrapping DataFrames.jl. While TidierData.jl borrows conventions from the tidyverse, it is important to note that the  tidyverse itself is often not considered idiomatic R code. TidierData.jl brings  data analysis conventions from tidyverse into Julia to have the best of both worlds:  tidy syntax and the speed and flexibility of the Julia language.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"TidierData.jl has two major differences from other macro-based packages. First, TidierData.jl  uses tidy expressions. An example of a tidy expression is a = mean(b), where b refers  to an existing column in the data frame, and a refers to either a new or existing column.  Referring to variables outside of the data frame requires prefixing variables with !!.  For example, a = mean(!!b) refers to a variable b outside the data frame. Second,  TidierData.jl aims to make broadcasting mostly invisible through  auto-vectorization. TidierData.jl currently uses a lookup table to decide which functions not to  vectorize; all other functions are automatically vectorized. This allows for  writing of concise expressions: @mutate(df, a = a - mean(a)) transforms the a column  by subtracting each value by the mean of the column. Behind the scenes, the right-hand  expression is converted to a .- mean(a) because mean() is in the lookup table as a  function that should not be vectorized. Take a look at the  auto-vectorization documentation for details.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"One major benefit of combining tidy expressions with auto-vectorization is that  TidierData.jl code (which uses DataFrames.jl as its backend) can work directly on  databases using TidierDB.jl,  which converts tidy expressions into SQL, supporting DuckDB and several other backends.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using TidierData\n\njulia> df = DataFrame(\n                name = [\"John\", \"Sally\", \"Roger\"],\n                age = [54.0, 34.0, 79.0],\n                children = [0, 2, 4]\n            )\n3×3 DataFrame\n Row │ name    age      children\n     │ String  Float64  Int64\n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> @chain df begin\n           @filter(children != 2)\n           @select(name, num_children = children)\n       end\n2×2 DataFrame\n Row │ name    num_children \n     │ String  Int64        \n─────┼──────────────────────\n   1 │ John               0\n   2 │ Roger              4","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Below are examples showcasing @group_by with @summarize or @mutate - analagous to the split, apply, combine pattern.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> df = DataFrame(\n                groups = repeat('a':'e', inner = 2), \n                b_col = 1:10, \n                c_col = 11:20, \n                d_col = 111:120\n            )\n10×4 DataFrame\n Row │ groups  b_col  c_col  d_col \n     │ Char    Int64  Int64  Int64 \n─────┼─────────────────────────────\n   1 │ a           1     11    111\n   2 │ a           2     12    112\n   3 │ b           3     13    113\n   4 │ b           4     14    114\n   5 │ c           5     15    115\n   6 │ c           6     16    116\n   7 │ d           7     17    117\n   8 │ d           8     18    118\n   9 │ e           9     19    119\n  10 │ e          10     20    120\n\njulia> @chain df begin\n           @filter(b_col > 2)\n           @group_by(groups)\n           @summarise(median_b = median(b_col), \n                      across((b_col:d_col), mean))   \n       end\n4×5 DataFrame\n Row │ groups  median_b  b_col_mean  c_col_mean  d_col_mean \n     │ Char    Float64   Float64     Float64     Float64    \n─────┼──────────────────────────────────────────────────────\n   1 │ b            3.5         3.5        13.5       113.5\n   2 │ c            5.5         5.5        15.5       115.5\n   3 │ d            7.5         7.5        17.5       117.5\n   4 │ e            9.5         9.5        19.5       119.5\n\njulia> @chain df begin\n           @filter(b_col > 4 && c_col <= 18)\n           @group_by(groups)\n           @mutate(\n               new_col = b_col + maximum(d_col),\n               new_col2 = c_col - maximum(d_col),\n               new_col3 = case_when(c_col >= 18  => \"high\",\n                                    c_col > 15   => \"medium\",\n                                    true         => \"low\"))\n           @select(starts_with(\"new\"))\n           @ungroup # required because `@mutate` does not ungroup\n       end\n4×4 DataFrame\n Row │ groups  new_col  new_col2  new_col3 \n     │ Char    Int64    Int64     String   \n─────┼─────────────────────────────────────\n   1 │ c           121      -101  low\n   2 │ c           122      -100  medium\n   3 │ d           125      -101  medium\n   4 │ d           126      -100  high","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"For more examples, please visit the TidierData.jl documentation.","category":"page"},{"location":"man/querying_frameworks/#DataFramesMeta.jl","page":"Data manipulation frameworks","title":"DataFramesMeta.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The DataFramesMeta.jl package provides a convenient yet fast macro-based interface to work with DataFrames. The instructions below are for version 0.10.0 of DataFramesMeta.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First install the DataFramesMeta.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"DataFramesMeta\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The major benefit of the package is it provides a more convenient syntax for the transformation functions transform, select, and combine  via the macros @transform, @select, @combine, and more.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"DataFramesMeta.jl also reexports the @chain macro from  Chain.jl, allowing users to pipe the output of one transformation as an input to another, as with  |> and %>% in R. ","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Below we present several selected examples of usage of the package.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First we subset rows of the source data frame using a logical condition and select two of its columns, renaming one of them:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using DataFramesMeta\n\njulia> df = DataFrame(name=[\"John\", \"Sally\", \"Roger\"],\n                      age=[54.0, 34.0, 79.0],\n                      children=[0, 2, 4])\n3×3 DataFrame\n Row │ name    age      children\n     │ String  Float64  Int64\n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> @chain df begin\n           @rsubset :age > 40 \n           @select(:number_of_children = :children, :name)\n       end\n2×2 DataFrame\n Row │ number_of_children  name\n     │ Int64               String\n─────┼────────────────────────────\n   1 │                  0  John\n   2 │                  4  Roger","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"In the following examples we show that DataFramesMeta.jl also supports the split-apply-combine pattern:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> df = DataFrame(key=repeat(1:3, 4), value=1:12)\n12×2 DataFrame\n Row │ key    value\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     1      4\n   5 │     2      5\n   6 │     3      6\n   7 │     1      7\n   8 │     2      8\n   9 │     3      9\n  10 │     1     10\n  11 │     2     11\n  12 │     3     12\n\njulia> @chain df begin\n           @rsubset :value > 3 \n           @by(:key, :min = minimum(:value), :max = maximum(:value))\n           @select(:key, :range = :max - :min)\n        end\n3×2 DataFrame\n Row │ key    range\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      6\n   2 │     2      6\n   3 │     3      6\n\njulia> @chain df begin\n           groupby(:key)\n           @transform :value0 = :value .- minimum(:value)\n       end\n12×3 DataFrame\n Row │ key    value  value0\n     │ Int64  Int64  Int64\n─────┼──────────────────────\n   1 │     1      1       0\n   2 │     2      2       0\n   3 │     3      3       0\n   4 │     1      4       3\n   5 │     2      5       3\n   6 │     3      6       3\n   7 │     1      7       6\n   8 │     2      8       6\n   9 │     3      9       6\n  10 │     1     10       9\n  11 │     2     11       9\n  12 │     3     12       9","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"You can find more details about how this package can be used on the DataFramesMeta.jl GitHub page.","category":"page"},{"location":"man/querying_frameworks/#DataFrameMacros.jl","page":"Data manipulation frameworks","title":"DataFrameMacros.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"DataFrameMacros.jl is an alternative to DataFramesMeta.jl with an additional focus on convenient solutions for the transformation of multiple columns at once. The instructions below are for version 0.3 of DataFrameMacros.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"First, install the DataFrameMacros.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"DataFrameMacros\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"In DataFrameMacros.jl, all but the @combine macro are row-wise by default. There is also a @groupby which allows creating grouping columns on the fly using the same syntax as @transform, for grouping by new columns without writing them out twice.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"In the example below, you can also see some of DataFrameMacros.jl's multi-column features, where mean is applied to both age columns at once by selecting them with the r\"age\" regex. The new column names are then derived using the \"{}\" shortcut which splices the transformed column names into a string.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using DataFrames, DataFrameMacros, Chain, Statistics\n\njulia> df = DataFrame(name=[\"John\", \"Sally\", \"Roger\"],\n                      age=[54.0, 34.0, 79.0],\n                      children=[0, 2, 4])\n3×3 DataFrame\n Row │ name    age      children \n     │ String  Float64  Int64    \n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> @chain df begin\n           @transform :age_months = :age * 12\n           @groupby :has_child = :children > 0\n           @combine \"mean_{}\" = mean({r\"age\"})\n       end\n2×3 DataFrame\n Row │ has_child  mean_age  mean_age_months \n     │ Bool       Float64   Float64         \n─────┼──────────────────────────────────────\n   1 │     false      54.0            648.0\n   2 │      true      56.5            678.0","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"There's also the capability to reference a group of multiple columns as a single unit, for example to run aggregations over them, with the {{ }} syntax. In the following example, the first quarter is compared to the maximum of the other three:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> df = DataFrame(q1 = [12.0, 0.4, 42.7],\n                      q2 = [6.4, 2.3, 40.9],\n                      q3 = [9.5, 0.2, 13.6],\n                      q4 = [6.3, 5.4, 39.3])\n3×4 DataFrame\n Row │ q1       q2       q3       q4      \n     │ Float64  Float64  Float64  Float64 \n─────┼────────────────────────────────────\n   1 │    12.0      6.4      9.5      6.3\n   2 │     0.4      2.3      0.2      5.4\n   3 │    42.7     40.9     13.6     39.3\n\njulia> @transform df :q1_best = :q1 > maximum({{Not(:q1)}})\n3×5 DataFrame\n Row │ q1       q2       q3       q4       q1_best \n     │ Float64  Float64  Float64  Float64  Bool    \n─────┼─────────────────────────────────────────────\n   1 │    12.0      6.4      9.5      6.3     true\n   2 │     0.4      2.3      0.2      5.4    false\n   3 │    42.7     40.9     13.6     39.3     true","category":"page"},{"location":"man/querying_frameworks/#Query.jl","page":"Data manipulation frameworks","title":"Query.jl","text":"","category":"section"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The Query.jl package provides advanced data manipulation capabilities for DataFrames (and many other data structures). This section provides a short introduction to the package, the Query.jl documentation has a more comprehensive documentation of the package. The instructions here are for version 1.0.0 of Query.jl.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"To get started, install the Query.jl package:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"using Pkg\nPkg.add(\"Query\")","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A query is started with the @from macro and consists of a series of query commands. Query.jl provides commands that can filter, project, join, flatten and group data from a DataFrame. A query can return an iterator, or one can materialize the results of a query into a variety of data structures, including a new DataFrame.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A simple example of a query looks like this:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> using DataFrames, Query\n\njulia> df = DataFrame(name=[\"John\", \"Sally\", \"Roger\"],\n                      age=[54.0, 34.0, 79.0],\n                      children=[0, 2, 4])\n3×3 DataFrame\n Row │ name    age      children\n     │ String  Float64  Int64\n─────┼───────────────────────────\n   1 │ John       54.0         0\n   2 │ Sally      34.0         2\n   3 │ Roger      79.0         4\n\njulia> q1 = @from i in df begin\n            @where i.age > 40\n            @select {number_of_children=i.children, i.name}\n            @collect DataFrame\n       end\n2×2 DataFrame\n Row │ number_of_children  name\n     │ Int64               String\n─────┼────────────────────────────\n   1 │                  0  John\n   2 │                  4  Roger","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The query starts with the @from macro. The first argument i is the name of the range variable that will be used to refer to an individual row in later query commands. The next argument df is the data source that one wants to query. The @where command in this query will filter the source data by applying the filter condition i.age > 40. This filters out any rows in which the age column is not larger than 40. The @select command then projects the columns of the source data onto a new column structure. The example here applies three specific modifications: 1) it only keeps a subset of the columns in the source DataFrame, i.e. the age column will not be part of the transformed data; 2) it changes the order of the two columns that are selected; and 3) it renames one of the columns that is selected from children to number_of_children. The example query uses the {} syntax to achieve this. A {} in a Query.jl expression instantiates a new NamedTuple, i.e. it is a shortcut for writing @NT(number_of_children=>i.children, name=>i.name). The @collect statement determines the data structure that the query returns. In this example the results are returned as a DataFrame.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A query without a @collect statement returns a standard julia iterator that can be used with any normal julia language construct that can deal with iterators. The following code returns a julia iterator for the query results:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> q2 = @from i in df begin\n                   @where i.age > 40\n                   @select {number_of_children=i.children, i.name}\n              end; # suppress printing the iterator type\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"One can loop over the results using a standard julia for statement:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> total_children = 0\n0\n\njulia> for i in q2\n           global total_children += i.number_of_children\n       end\n\njulia> total_children\n4\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"Or one can use a comprehension to extract the name of a subset of rows:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> y = [i.name for i in q2 if i.number_of_children > 0]\n1-element Vector{String}:\n \"Roger\"\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"The last example (extracting only the name and applying a second filter) could of course be completely expressed as a query expression:","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"julia> q3 = @from i in df begin\n            @where i.age > 40 && i.children > 0\n            @select i.name\n            @collect\n       end\n1-element Vector{String}:\n \"Roger\"\n","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"A query that ends with a @collect statement without a specific type will materialize the query results into an array. Note also the difference in the @select statement: The previous queries all used the {} syntax in the @select statement to project results into a tabular format. The last query instead just selects a single value from each row in the @select statement.","category":"page"},{"location":"man/querying_frameworks/","page":"Data manipulation frameworks","title":"Data manipulation frameworks","text":"These examples only scratch the surface of what one can do with Query.jl, and the interested reader is referred to the Query.jl documentation for more information.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/indexing/#Indexing","page":"Indexing","title":"Indexing","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Pages = [\"indexing.md\"]","category":"page"},{"location":"lib/indexing/#General-rules","page":"Indexing","title":"General rules","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following rules explain target functionality of how getindex, setindex!, view, and broadcasting are intended to work with DataFrame, SubDataFrame and DataFrameRow objects.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following values are a valid column index:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"a scalar, later denoted as col:\na Symbol;\nan AbstractString;\nan Integer that is not Bool;\na vector, later denoted as cols:\na vector of Symbol (does not have to be a subtype of AbstractVector{Symbol});\na vector of AbstractString (does not have to be a subtype of AbstractVector{<:AbstractString});\na vector of Integer that are not Bool (does not have to be a subtype of AbstractVector{<:Integer});\na vector of Bool (must be a subtype of AbstractVector{Bool});\na regular expression (will be expanded to a vector of matching column names);\na Not expression (see InvertedIndices.jl); Not(idx) selects all indices not in the passed idx; when passed as column selector Not(idx...) is equivalent to Not(Cols(idx...)).\na Cols expression (see DataAPI.jl); Cols(idxs...) selects the union of the selections in idxs; in particular Cols() selects no columns and Cols(:) selects all columns; a special rule is Cols(predicate), where predicate is a predicate function; in this case the columns whose names passed to predicate as strings return true are selected.\na Between expression (see DataAPI.jl); Between(first, last) selects the columns between first and last inclusively;\nan All expression (see DataAPI.jl); All() selects all columns, equivalent to :;\na literal colon : (selects all columns).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following values are a valid row index:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"a scalar, later denoted as row:\nan Integer that is not Bool;\na vector, later denoted as rows:\na vector of Integer that are not Bool (does not have to be a subtype of AbstractVector{<:Integer});\na vector of Bool (must be a subtype of AbstractVector{Bool});\na Not expression (see InvertedIndices.jl);\na literal colon : (selects all rows with copying);\na literal exclamation mark ! (selects all rows without copying).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Additionally it is allowed to index into an AbstractDataFrame using a two-dimensional CartesianIndex.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"In the descriptions below df represents a DataFrame, sdf is a SubDataFrame and dfr is a DataFrameRow.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":": always expands to axes(df, 1) or axes(sdf, 1).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"df.col works like df[!, col] and sdf.col works like sdf[!, col] in all cases. An exception is that under Julia 1.6 or earlier df.col .= v and sdf.col .= v performs in-place broadcasting if col is present in df/sdf and is a valid identifier (this inconsistency is not present under Julia 1.7 and later).","category":"page"},{"location":"lib/indexing/#getindex-and-view","page":"Indexing","title":"getindex and view","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following list specifies the behavior of getindex and view operations depending on argument types.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"In particular a description explicitly mentions that the data is copied or reused without copying.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"For performance reasons, accessing, via getindex or view, a single row and multiple cols of a DataFrame, a SubDataFrame or a DataFrameRow always returns a DataFrameRow (which is a view type).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"getindex on DataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"df[row, col] -> the value contained in row row of column col, the same as df[!, col][row];\ndf[CartesianIndex(row, col)] -> the same as df[row, col];\ndf[row, cols] -> a DataFrameRow with parent df;\ndf[rows, col] -> a copy of the vector df[!, col] with only the entries                    corresponding to rows selected, the same as df[!, col][rows];\ndf[rows, cols] -> a DataFrame containing copies of columns cols with                     only the entries corresponding to rows selected;\ndf[!, col] -> the vector contained in column col returned without copying;                 the same as df.col if col is a valid identifier.\ndf[!, cols] -> create a new DataFrame with columns cols without copying                  of columns; the same as select(df, cols, copycols=false).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"view on DataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"@view df[row, col] -> a 0-dimensional view into df[!, col] in row row,                         the same as view(df[!, col], row);\n@view df[CartesianIndex(row, col)] -> the same as @view df[row, col];\n@view df[row, cols] -> the same as df[row, cols];\n@view df[rows, col] -> a view into df[!, col] with rows selected, the                          same as view(df[!, col], rows);\n@view df[rows, cols] -> a SubDataFrame with rows selected with parent df;\n@view df[!, col] -> a view into df[!, col]  with all rows.\n@view df[!, cols] -> the same as @view df[:, cols].","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"getindex on SubDataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"sdf[row, col] -> a value contained in row row of column col;\nsdf[CartesianIndex(row, col)] -> the same as sdf[row, col];\nsdf[row, cols] -> a DataFrameRow with parent parent(sdf);\nsdf[rows, col] -> a copy of sdf[!, col] with only rows rows selected,                     the same as sdf[!, col][rows];\nsdf[rows, cols] -> a DataFrame containing columns cols and sdf[rows, col] as a vector for each col in cols;\nsdf[!, col] -> a view of entries corresponding to sdf in the vector                  parent(sdf)[!, col]; the same as sdf.col if col is a                  valid identifier.\nsdf[!, cols] -> create a new SubDataFrame with columns cols, the same                   parent as sdf, and the same rows selected; the same as                   select(sdf, cols, copycols=false).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"view on SubDataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"@view sdf[row, col] -> a 0-dimensional view into df[!, col] at row                          row, the same as view(sdf[!, col], row);\n@view sdf[CartesianIndex(row, col)] -> the same as @view sdf[row, col];\n@view sdf[row, cols] -> a DataFrameRow with parent parent(sdf);\n@view sdf[rows, col] -> a view into sdf[!, col] vector with rows                           selected, the same as view(sdf[!, col], rows);\n@view sdf[rows, cols] -> a SubDataFrame with parent parent(sdf);\n@view sdf[!, col] -> a view into sdf[!, col] vector with all rows.\n@view sdf[!, cols] -> the same as @view sdf[:, cols].","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"getindex on DataFrameRow:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"dfr[col] -> the value contained in column col of dfr; the same as               dfr.col if col is a valid identifier;\ndfr[cols] -> a DataFrameRow with parent parent(dfr);","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"view on DataFrameRow:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"@view dfr[col] -> a 0-dimensional view into                     parent(dfr)[DataFrames.row(dfr), col];\n@view dfr[cols] -> a DataFrameRow with parent parent(dfr);","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that views created with columns selector set to : change their columns' count if columns are added/removed/renamed in the parent; if column selector is other than : then view points to selected columns by their number at the moment of creation of the view.","category":"page"},{"location":"lib/indexing/#setindex!","page":"Indexing","title":"setindex!","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following list specifies the behavior of setindex! operations depending on argument types.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"In particular a description explicitly mentions if the assignment is in-place.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that if a setindex! operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"setindex! on DataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"df[row, col] = v -> set value of col in row row to v in-place;\ndf[CartesianIndex(row, col)] = v -> the same as df[row, col] = v;\ndf[row, cols] = v -> set row row of columns cols in-place; the same as                        dfr = df[row, cols]; dfr[:] = v;\ndf[rows, col] = v -> set rows rows of column col in-place; v must be                        an AbstractVector; if rows is : and col is a                        Symbol or AbstractString that is not present in                        df then a new column in df is created and holds a                        copy of v; equivalent to df.col = copy(v) if                        col is a valid identifier;\ndf[rows, cols] = v -> set rows rows of columns cols in-place; v must                         be an AbstractMatrix or an AbstractDataFrame (in                         this case column names must match);\ndf[!, col] = v -> replaces col with v without copying (with the                     exception that if v is an AbstractRange it gets                     converted to a Vector); also if col is a Symbol or                     AbstractString that is not present in df then a new                     column in df is created and holds v; equivalent to                     df.col = v if col is a valid identifier; this is                     allowed if ncol(df) == 0 || length(v) == nrow(df);\ndf[!, cols] = v -> replaces existing columns cols in data frame df with                      copying; v must be an AbstractMatrix or an                      AbstractDataFrame (in the latter case column names must                      match);","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"setindex! on SubDataFrame:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"sdf[row, col] = v -> set value of col in row row to v in-place;\nsdf[CartesianIndex(row, col)] = v -> the same as sdf[row, col] = v;\nsdf[row, cols] = v -> the same as dfr = df[row, cols]; dfr[:] = v in-place;\nsdf[rows, col] = v -> set rows rows of column col, in-place; v must be                         an abstract vector;\nsdf[rows, cols] = v -> set rows rows of columns cols in-place; v can                          be an AbstractMatrix or v can be                          AbstractDataFrame in which case column names must                          match;\nsdf[!, col] = v -> replaces col with v with copying; if col is present                      in sdf then filtered-out rows in newly created vector                      are filled with values already present in that column and                      promote_type is used to determine the eltype of the                      new column; if col is not present in sdf then the                      operation is only allowed if sdf was created with :                      as column selector, in which case filtered-out rows are                      filled with missing; equivalent to sdf.col = v if                      col is a valid identifier; operation is allowed if                      length(v) == nrow(sdf);\nsdf[!, cols] = v -> replaces existing columns cols in data frame sdf                       with copying; v must be an AbstractMatrix or an                       AbstractDataFrame (in the latter case column names                       must match); filtered-out rows in newly created vectors                       are filled with values already present in respective                       columns and promote_type is used to determine the                       eltype of the new columns;","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"note: Note\nThe rules above mean that sdf[:, col] = v is an in-place operation if col is present in sdf, therefore it will be fast in general. On the other hand using sdf[!, col] = v or sdf.col = v will always allocate a new vector, which is more expensive computationally.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"setindex! on DataFrameRow:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"dfr[col] = v -> set value of col in row row to v in-place;                   equivalent to dfr.col = v if col is a valid identifier;\ndfr[cols] = v -> set values of entries in columns cols in dfr by                    elements of v in place; v can be: 1) a Tuple or an                    AbstractArray, in which cases it must have a number of                    elements equal to length(dfr), 2) an AbstractDict, in                    which case column names must match, 3) a NamedTuple or                    DataFrameRow, in which case column names and order must                    match;","category":"page"},{"location":"lib/indexing/#Broadcasting","page":"Indexing","title":"Broadcasting","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The following broadcasting rules apply to AbstractDataFrame objects:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"AbstractDataFrame behaves in broadcasting like a two-dimensional collection compatible with matrices.\nIf an AbstractDataFrame takes part in broadcasting then a DataFrame is always produced as a result. In this case the requested broadcasting operation produces an object with exactly two dimensions. An exception is when an AbstractDataFrame is used only as a source of broadcast assignment into an object of dimensionality higher than two.\nIf multiple AbstractDataFrame objects take part in broadcasting then they have to have identical column names.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that if broadcasting assignment operation throws an error the target data frame may be partially changed so it is unsafe to use it afterwards (the column length correctness will be preserved).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Broadcasting DataFrameRow is currently not allowed (which is consistent with NamedTuple).","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"It is possible to assign a value to AbstractDataFrame and DataFrameRow objects using the .= operator. In such an operation AbstractDataFrame is considered as two-dimensional and DataFrameRow as single-dimensional.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"note: Note\nThe rule above means that, similar to single-dimensional objects in Base (e.g. vectors), DataFrameRow is considered to be column-oriented.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Additional rules:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"in the df[CartesianIndex(row, col)] .= v, df[row, col] .= v syntaxes v is broadcasted into the contents of df[row, col] (this is consistent with Julia Base);\nin the df[row, cols] .= v syntaxes the assignment to df is performed in-place;\nin the df[rows, col] .= v and df[rows, cols] .= v syntaxes the assignment to df is performed in-place; if rows is : and col is Symbol or AbstractString and it is missing from df then a new column is allocated and added; the length of the column is always the value of nrow(df) before the assignment takes place;\nin the df[!, col] .= v syntax column col is replaced by a freshly allocated vector; if col is Symbol or AbstractString and it is missing from df then a new column is allocated added; the length of the column is always the value of nrow(df) before the assignment takes place;\nthe df[!, cols] .= v syntax replaces existing columns cols in data frame df with freshly allocated vectors;\ndf.col .= v syntax currently performs in-place assignment to an existing vector df.col; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if :col is not present in df then a new column will be created in df.\nin the sdf[CartesianIndex(row, col)] .= v, sdf[row, col] .= v and sdf[row, cols] .= v syntaxes the assignment to sdf is performed in-place;\nin the sdf[rows, col] .= v and sdf[rows, cols] .= v syntaxes the assignment to sdf is performed in-place; if rows is : and col is a Symbol or AbstractString referring to a column missing from sdf and sdf was created with : as column selector then a new column is allocated and added; the filtered-out rows are filled with missing;\nin the sdf[!, col] .= v syntax column col is replaced by a freshly allocated vector; the filtered-out rows are filled with values already present in col; if col is a Symbol or AbstractString referring to a column missing from sdf and was sdf created with : as column selector then a new column is allocated and added; in this case the filtered-out rows are filled with missing;\nthe sdf[!, cols] .= v syntax replaces existing columns cols in data frame sdf with freshly allocated vectors; the filtered-out rows are filled with values already present in cols;\nsdf.col .= v syntax currently performs in-place assignment to an existing vector sdf.col; this behavior is deprecated and a new column will be allocated in the future. Starting from Julia 1.7 if :col is not present in sdf then a new column will be created in sdf if sdf was created with : as a column selector.\ndfr.col .= v syntax is allowed and performs in-place assignment to a value extracted by dfr.col.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Note that sdf[!, col] .= v and sdf[!, cols] .= v syntaxes are not allowed as sdf can be only modified in-place.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"If column indexing using Symbol or AbstractString names in cols is performed, the order of columns in the operation is specified by the order of names.","category":"page"},{"location":"lib/indexing/#Indexing-GroupedDataFrames","page":"Indexing","title":"Indexing GroupedDataFrames","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"A GroupedDataFrame can behave as either an AbstractVector or AbstractDict depending on the type of index used. Integers (or arrays of them) trigger vector-like indexing while Tupless and NamedTuples trigger dictionary-like indexing. An intermediate between the two is the GroupKey type returned by keys(::GroupedDataFrame), which behaves similarly to a NamedTuple but has performance on par with integer indexing.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"The elements of a GroupedDataFrame are SubDataFrames of its parent.","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"gd[i::Integer] -> Get the ith group.\ngd[key::NamedTuple] -> Get the group corresponding to the given values of the grouping columns. The fields of the NamedTuple must match the grouping columns columns passed to groupby (including order).\ngd[key::Tuple] -> Same as previous, but omitting the names on key.\nget(gd, key::Union{Tuple, NamedTuple}, default) -> Get group for key key, returning default if it does not exist.\ngd[key::GroupKey] -> Get the group corresponding to the GroupKey key (one of the elements of the vector returned by keys(::GroupedDataFrame)). This should be nearly as fast as integer indexing.\ngd[a::AbstractVector] -> Select multiple groups and return them in a new GroupedDataFrame object. Groups may be selected by integer position using an array of Integers or Bools, similar to a standard array. Alternatively the array may contain keys of any of the types supported for dictionary-like indexing (GroupKey, Tuple, or NamedTuple). Selected groups must be unique, and different types of indices cannot be mixed.\ngd[n::Not] -> Any of the above types wrapped in Not. The result will be a  new GroupedDataFrame containing all groups in gd not selected by the  wrapped index.","category":"page"},{"location":"lib/indexing/#Common-API-for-types-defined-in-DataFrames.jl","page":"Indexing","title":"Common API for types defined in DataFrames.jl","text":"","category":"section"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"This table presents return value types of calling names, propertynames, keys, length and ndims on types exposed to the user by DataFrames.jl:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Type names propertynames keys length ndims\nAbstractDataFrame Vector{String} Vector{Symbol} undefined undefined 2\nDataFrameRow Vector{String} Vector{Symbol} Vector{Symbol} Int 1\nDataFrameRows Vector{String} Vector{Symbol} vector of Int Int 1\nDataFrameColumns Vector{String} Vector{Symbol} Vector{Symbol} Int 1\nGroupedDataFrame Vector{String} tuple of fields GroupKeys Int 1\nGroupKeys undefined tuple of fields vector of Int Int 1\nGroupKey Vector{String} Vector{Symbol} Vector{Symbol} Int 1","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"Additionally the above types T (i.e. AbstractDataFrame, DataFrameRow, DataFrameRows, DataFrameColumns, GroupedDataFrame, GroupKeys, GroupKey) the following methods are defined:","category":"page"},{"location":"lib/indexing/","page":"Indexing","title":"Indexing","text":"size(::T) returning a Tuple of Int.\nsize(::T, ::Integer) returning an Int.\naxes(::T) returning a Tuple of Int vectors.\naxes(::T, ::Integer) returning an Int vector for a valid dimension (except  DataFrameRows and GroupKeys for which Base.OneTo(1) is also returned  for a dimension higher than a valid one because they are AbstractVector).\nfirstindex(::T) returning 1 (except AbstractDataFrame for which it is undefined).\nfirstindex(::T, ::Integer) returning 1 for a valid dimension (except  DataFrameRows and GroupKeys for which 1 is also returned for a  dimension higher than a valid one because they are AbstractVector).\nlastindex(::T) returning Int (except AbstractDataFrame for which it is undefined).\nlastindex(::T, ::Integer) returning Int for a valid dimension  (except  DataFrameRows and GroupKeys for which 1 is also returned for a  dimension higher than a valid one because they are AbstractVector).","category":"page"},{"location":"man/reshaping_and_pivoting/#Reshaping-and-Pivoting-Data","page":"Reshaping","title":"Reshaping and Pivoting Data","text":"","category":"section"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Reshape data from wide to long format using the stack function:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> using DataFrames, CSV\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> iris = CSV.read(path, DataFrame)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted\n\njulia> stack(iris, 1:4)\n600×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 594 │ Iris-virginica  PetalWidth       2.3\n 595 │ Iris-virginica  PetalWidth       2.5\n 596 │ Iris-virginica  PetalWidth       2.3\n 597 │ Iris-virginica  PetalWidth       1.9\n 598 │ Iris-virginica  PetalWidth       2.0\n 599 │ Iris-virginica  PetalWidth       2.3\n 600 │ Iris-virginica  PetalWidth       1.8\n                            585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"The second optional argument to stack indicates the columns to be stacked. These are normally referred to as the measured variables. Column names can also be given:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, [:SepalLength, :SepalWidth, :PetalLength, :PetalWidth])\n600×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 594 │ Iris-virginica  PetalWidth       2.3\n 595 │ Iris-virginica  PetalWidth       2.5\n 596 │ Iris-virginica  PetalWidth       2.3\n 597 │ Iris-virginica  PetalWidth       1.9\n 598 │ Iris-virginica  PetalWidth       2.0\n 599 │ Iris-virginica  PetalWidth       2.3\n 600 │ Iris-virginica  PetalWidth       1.8\n                            585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Note that all columns can be of different types. Type promotion follows the rules of vcat.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"The stacked DataFrame that results includes all of the columns not specified to be stacked. These are repeated for each stacked column. These are normally referred to as identifier (id) columns. In addition to the id columns, two additional columns labeled :variable and :values contain the column identifier and the stacked columns.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"A third optional argument to stack represents the id columns that are repeated. This makes it easier to specify which variables you want included in the long format:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, [:SepalLength, :SepalWidth], :Species)\n300×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 294 │ Iris-virginica  SepalWidth       3.2\n 295 │ Iris-virginica  SepalWidth       3.3\n 296 │ Iris-virginica  SepalWidth       3.0\n 297 │ Iris-virginica  SepalWidth       2.5\n 298 │ Iris-virginica  SepalWidth       3.0\n 299 │ Iris-virginica  SepalWidth       3.4\n 300 │ Iris-virginica  SepalWidth       3.0\n                            285 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"If you prefer to specify the id columns then use Not with stack like this:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, Not(:Species))\n600×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 594 │ Iris-virginica  PetalWidth       2.3\n 595 │ Iris-virginica  PetalWidth       2.5\n 596 │ Iris-virginica  PetalWidth       2.3\n 597 │ Iris-virginica  PetalWidth       1.9\n 598 │ Iris-virginica  PetalWidth       2.0\n 599 │ Iris-virginica  PetalWidth       2.3\n 600 │ Iris-virginica  PetalWidth       1.8\n                            585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"unstack converts from a long format to a wide format. The default is requires specifying which columns are an id variable, column variable names, and column values:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> iris.id = 1:size(iris, 1)\n1:150\n\njulia> longdf = stack(iris, Not([:Species, :id]))\n600×4 DataFrame\n Row │ Species         id     variable     value\n     │ String15        Int64  String       Float64\n─────┼─────────────────────────────────────────────\n   1 │ Iris-setosa         1  SepalLength      5.1\n   2 │ Iris-setosa         2  SepalLength      4.9\n   3 │ Iris-setosa         3  SepalLength      4.7\n   4 │ Iris-setosa         4  SepalLength      4.6\n   5 │ Iris-setosa         5  SepalLength      5.0\n   6 │ Iris-setosa         6  SepalLength      5.4\n   7 │ Iris-setosa         7  SepalLength      4.6\n   8 │ Iris-setosa         8  SepalLength      5.0\n  ⋮  │       ⋮           ⋮         ⋮          ⋮\n 594 │ Iris-virginica    144  PetalWidth       2.3\n 595 │ Iris-virginica    145  PetalWidth       2.5\n 596 │ Iris-virginica    146  PetalWidth       2.3\n 597 │ Iris-virginica    147  PetalWidth       1.9\n 598 │ Iris-virginica    148  PetalWidth       2.0\n 599 │ Iris-virginica    149  PetalWidth       2.3\n 600 │ Iris-virginica    150  PetalWidth       1.8\n                                   585 rows omitted\n\njulia> unstack(longdf, :id, :variable, :value)\n150×5 DataFrame\n Row │ id     SepalLength  SepalWidth  PetalLength  PetalWidth\n     │ Int64  Float64?     Float64?    Float64?     Float64?\n─────┼─────────────────────────────────────────────────────────\n   1 │     1          5.1         3.5          1.4         0.2\n   2 │     2          4.9         3.0          1.4         0.2\n   3 │     3          4.7         3.2          1.3         0.2\n   4 │     4          4.6         3.1          1.5         0.2\n   5 │     5          5.0         3.6          1.4         0.2\n   6 │     6          5.4         3.9          1.7         0.4\n   7 │     7          4.6         3.4          1.4         0.3\n   8 │     8          5.0         3.4          1.5         0.2\n  ⋮  │   ⋮         ⋮           ⋮            ⋮           ⋮\n 144 │   144          6.8         3.2          5.9         2.3\n 145 │   145          6.7         3.3          5.7         2.5\n 146 │   146          6.7         3.0          5.2         2.3\n 147 │   147          6.3         2.5          5.0         1.9\n 148 │   148          6.5         3.0          5.2         2.0\n 149 │   149          6.2         3.4          5.4         2.3\n 150 │   150          5.9         3.0          5.1         1.8\n                                               135 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"If the remaining columns are unique, you can skip the id variable and use:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> unstack(longdf, :variable, :value)\n150×6 DataFrame\n Row │ Species         id     SepalLength  SepalWidth  PetalLength  PetalWidth ⋯\n     │ String15        Int64  Float64?     Float64?    Float64?     Float64?   ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ Iris-setosa         1          5.1         3.5          1.4         0.2 ⋯\n   2 │ Iris-setosa         2          4.9         3.0          1.4         0.2\n   3 │ Iris-setosa         3          4.7         3.2          1.3         0.2\n   4 │ Iris-setosa         4          4.6         3.1          1.5         0.2\n   5 │ Iris-setosa         5          5.0         3.6          1.4         0.2 ⋯\n   6 │ Iris-setosa         6          5.4         3.9          1.7         0.4\n   7 │ Iris-setosa         7          4.6         3.4          1.4         0.3\n   8 │ Iris-setosa         8          5.0         3.4          1.5         0.2\n  ⋮  │       ⋮           ⋮         ⋮           ⋮            ⋮           ⋮      ⋱\n 144 │ Iris-virginica    144          6.8         3.2          5.9         2.3 ⋯\n 145 │ Iris-virginica    145          6.7         3.3          5.7         2.5\n 146 │ Iris-virginica    146          6.7         3.0          5.2         2.3\n 147 │ Iris-virginica    147          6.3         2.5          5.0         1.9\n 148 │ Iris-virginica    148          6.5         3.0          5.2         2.0 ⋯\n 149 │ Iris-virginica    149          6.2         3.4          5.4         2.3\n 150 │ Iris-virginica    150          5.9         3.0          5.1         1.8\n                                                               135 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"You can even skip passing the :variable and :value values as positional arguments, as they will be used by default, and write:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> unstack(longdf)\n150×6 DataFrame\n Row │ Species         id     SepalLength  SepalWidth  PetalLength  PetalWidth ⋯\n     │ String15        Int64  Float64?     Float64?    Float64?     Float64?   ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ Iris-setosa         1          5.1         3.5          1.4         0.2 ⋯\n   2 │ Iris-setosa         2          4.9         3.0          1.4         0.2\n   3 │ Iris-setosa         3          4.7         3.2          1.3         0.2\n   4 │ Iris-setosa         4          4.6         3.1          1.5         0.2\n   5 │ Iris-setosa         5          5.0         3.6          1.4         0.2 ⋯\n   6 │ Iris-setosa         6          5.4         3.9          1.7         0.4\n   7 │ Iris-setosa         7          4.6         3.4          1.4         0.3\n   8 │ Iris-setosa         8          5.0         3.4          1.5         0.2\n  ⋮  │       ⋮           ⋮         ⋮           ⋮            ⋮           ⋮      ⋱\n 144 │ Iris-virginica    144          6.8         3.2          5.9         2.3 ⋯\n 145 │ Iris-virginica    145          6.7         3.3          5.7         2.5\n 146 │ Iris-virginica    146          6.7         3.0          5.2         2.3\n 147 │ Iris-virginica    147          6.3         2.5          5.0         1.9\n 148 │ Iris-virginica    148          6.5         3.0          5.2         2.0 ⋯\n 149 │ Iris-virginica    149          6.2         3.4          5.4         2.3\n 150 │ Iris-virginica    150          5.9         3.0          5.1         1.8\n                                                               135 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Passing view=true to stack returns a data frame whose columns are views into the original wide data frame. Here is an example:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> stack(iris, view=true)\n600×4 DataFrame\n Row │ Species         id     variable     value\n     │ String15        Int64  String       Float64\n─────┼─────────────────────────────────────────────\n   1 │ Iris-setosa         1  SepalLength      5.1\n   2 │ Iris-setosa         2  SepalLength      4.9\n   3 │ Iris-setosa         3  SepalLength      4.7\n   4 │ Iris-setosa         4  SepalLength      4.6\n   5 │ Iris-setosa         5  SepalLength      5.0\n   6 │ Iris-setosa         6  SepalLength      5.4\n   7 │ Iris-setosa         7  SepalLength      4.6\n   8 │ Iris-setosa         8  SepalLength      5.0\n  ⋮  │       ⋮           ⋮         ⋮          ⋮\n 594 │ Iris-virginica    144  PetalWidth       2.3\n 595 │ Iris-virginica    145  PetalWidth       2.5\n 596 │ Iris-virginica    146  PetalWidth       2.3\n 597 │ Iris-virginica    147  PetalWidth       1.9\n 598 │ Iris-virginica    148  PetalWidth       2.0\n 599 │ Iris-virginica    149  PetalWidth       2.3\n 600 │ Iris-virginica    150  PetalWidth       1.8\n                                   585 rows omitted","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"This saves memory. To create the view, several AbstractVectors are defined:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":":variable column – EachRepeatedVector This repeats the variables N times where N is the number of rows of the original AbstractDataFrame.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":":value column – StackedVector This is provides a view of the original columns stacked together.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Id columns – RepeatedVector This repeats the original columns N times where N is the number of columns stacked.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"To do aggregation, use the split-apply-combine functions in combination with unstack or use the combine keyword argument in unstack. Here is an example:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> using Statistics\n\njulia> d = stack(iris, Not(:Species))\n750×3 DataFrame\n Row │ Species         variable     value\n     │ String15        String       Float64\n─────┼──────────────────────────────────────\n   1 │ Iris-setosa     SepalLength      5.1\n   2 │ Iris-setosa     SepalLength      4.9\n   3 │ Iris-setosa     SepalLength      4.7\n   4 │ Iris-setosa     SepalLength      4.6\n   5 │ Iris-setosa     SepalLength      5.0\n   6 │ Iris-setosa     SepalLength      5.4\n   7 │ Iris-setosa     SepalLength      4.6\n   8 │ Iris-setosa     SepalLength      5.0\n  ⋮  │       ⋮              ⋮          ⋮\n 744 │ Iris-virginica  id             144.0\n 745 │ Iris-virginica  id             145.0\n 746 │ Iris-virginica  id             146.0\n 747 │ Iris-virginica  id             147.0\n 748 │ Iris-virginica  id             148.0\n 749 │ Iris-virginica  id             149.0\n 750 │ Iris-virginica  id             150.0\n                            735 rows omitted\n\njulia> agg = combine(groupby(d, [:variable, :Species]), :value => mean => :vmean)\n15×3 DataFrame\n Row │ variable     Species          vmean\n     │ String       String15         Float64\n─────┼───────────────────────────────────────\n   1 │ SepalLength  Iris-setosa        5.006\n   2 │ SepalLength  Iris-versicolor    5.936\n   3 │ SepalLength  Iris-virginica     6.588\n   4 │ SepalWidth   Iris-setosa        3.418\n   5 │ SepalWidth   Iris-versicolor    2.77\n   6 │ SepalWidth   Iris-virginica     2.974\n   7 │ PetalLength  Iris-setosa        1.464\n   8 │ PetalLength  Iris-versicolor    4.26\n   9 │ PetalLength  Iris-virginica     5.552\n  10 │ PetalWidth   Iris-setosa        0.244\n  11 │ PetalWidth   Iris-versicolor    1.326\n  12 │ PetalWidth   Iris-virginica     2.026\n  13 │ id           Iris-setosa       25.5\n  14 │ id           Iris-versicolor   75.5\n  15 │ id           Iris-virginica   125.5\n\njulia> unstack(agg, :variable, :Species, :vmean)\n5×4 DataFrame\n Row │ variable     Iris-setosa  Iris-versicolor  Iris-virginica\n     │ String       Float64?     Float64?         Float64?\n─────┼───────────────────────────────────────────────────────────\n   1 │ SepalLength        5.006            5.936           6.588\n   2 │ SepalWidth         3.418            2.77            2.974\n   3 │ PetalLength        1.464            4.26            5.552\n   4 │ PetalWidth         0.244            1.326           2.026\n   5 │ id                25.5             75.5           125.5\n\njulia> unstack(d, :variable, :Species, :value, combine=mean)\n5×4 DataFrame\n Row │ variable     Iris-setosa  Iris-versicolor  Iris-virginica\n     │ String       Float64?     Float64?         Float64?\n─────┼───────────────────────────────────────────────────────────\n   1 │ SepalLength        5.006            5.936           6.588\n   2 │ SepalWidth         3.418            2.77            2.974\n   3 │ PetalLength        1.464            4.26            5.552\n   4 │ PetalWidth         0.244            1.326           2.026\n   5 │ id                25.5             75.5           125.5","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"To turn an AbstractDataFrame on its side, use permutedims.","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> df1 = DataFrame(a=[\"x\", \"y\"], b=[1.0, 2.0], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b        c      d\n     │ String  Float64  Int64  Bool\n─────┼───────────────────────────────\n   1 │ x           1.0      3   true\n   2 │ y           2.0      4  false\n\njulia> permutedims(df1, 1)\n3×3 DataFrame\n Row │ a       x        y\n     │ String  Float64  Float64\n─────┼──────────────────────────\n   1 │ b           1.0      2.0\n   2 │ c           3.0      4.0\n   3 │ d           1.0      0.0","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"Note that the column indexed by src_colnames in the original df becomes the column names in the permuted result, and the column names of the original become a new column. Typically, this would be used on columns with homogeneous element types, since the element types of the other columns are the result of promote_type on all the permuted columns. Note also that, by default, the new column created from the column names of the original df has the same name as src_namescol. An optional positional argument dest_namescol can alter this:","category":"page"},{"location":"man/reshaping_and_pivoting/","page":"Reshaping","title":"Reshaping","text":"julia> df2 = DataFrame(a=[\"x\", \"y\"], b=[1, \"two\"], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b    c      d\n     │ String  Any  Int64  Bool\n─────┼───────────────────────────\n   1 │ x       1        3   true\n   2 │ y       two      4  false\n\njulia> permutedims(df2, 1, \"different_name\")\n3×3 DataFrame\n Row │ different_name  x     y\n     │ String          Any   Any\n─────┼─────────────────────────────\n   1 │ b               1     two\n   2 │ c               3     4\n   3 │ d               true  false","category":"page"},{"location":"man/categorical/#man-categorical","page":"Categorical Data","title":"Categorical Data","text":"","category":"section"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"Often, we have to deal with columns in a data frame that take on a small number of levels:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> v = [\"Group A\", \"Group A\", \"Group A\", \"Group B\", \"Group B\", \"Group B\"]\n6-element Vector{String}:\n \"Group A\"\n \"Group A\"\n \"Group A\"\n \"Group B\"\n \"Group B\"\n \"Group B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The naive encoding used in a Vector represents every entry of this vector as a full string. In contrast, we can represent the data more efficiently by replacing the strings with indices into a small pool of levels. There are two benefits of doing this. The first is that such vectors will tend to use less memory. The second is that they can be efficiently grouped using the groupby function.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"There are two common types that allow to perform level pooling:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"PooledVector from PooledArrays.jl;\nCategoricalVector from CategoricalArrays.jl.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The difference between PooledVector and CategoricalVector is the following:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"PooledVector is intended for cases where data compression is the only objective;\nCategoricalVector is designed to additionally provide full support  for working with categorical variables, both with unordered (nominal variables) and ordered categories (ordinal variables) at the expense of allowing only AbstractString, AbstractChar, or Number element types (optionally in a union with Missing).","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"CategoricalVector is useful in particular when unique values in the array (levels) should respect a meaningful ordering, like when printing tables, drawing plots or fitting regression models. CategoricalArrays.jl provides functions to set and retrieve this order and compare values according to it. On the contrary, the PooledVector type is essentially a drop-in replacement for Vector with almost no user-visible differences except for lower memory use and higher performance. ","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"Below we show selected examples of working with CategoricalArrays.jl. See the CategoricalArrays.jl documentation package for more information regarding categorical arrays. Also note that in this section we discuss only vectors because we are considering a data frame context. However, in general both packages allow to work with arrays of any dimensionality.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"In order to follow the examples below you need to install the CategoricalArrays.jl package first.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> using CategoricalArrays\n\njulia> cv = categorical(v)\n6-element CategoricalArray{String,1,UInt32}:\n \"Group A\"\n \"Group A\"\n \"Group A\"\n \"Group B\"\n \"Group B\"\n \"Group B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"CategoricalVectorss support missing values.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv = categorical([\"Group A\", missing, \"Group A\",\n                         \"Group B\", \"Group B\", missing])\n6-element CategoricalArray{Union{Missing, String},1,UInt32}:\n \"Group A\"\n missing\n \"Group A\"\n \"Group B\"\n \"Group B\"\n missing","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"In addition to representing repeated data efficiently, the CategoricalArray type allows us to determine efficiently the allowed levels of the variable at any time using the levels function (note that levels may or may not be actually used in the data):","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> levels(cv)\n2-element Vector{String}:\n \"Group A\"\n \"Group B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The levels! function also allows changing the order of appearance of the levels, which can be useful for display purposes or when working with ordered variables.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> levels!(cv, [\"Group B\", \"Group A\"])\n6-element CategoricalArray{Union{Missing, String},1,UInt32}:\n \"Group A\"\n missing\n \"Group A\"\n \"Group B\"\n \"Group B\"\n missing\n\njulia> levels(cv)\n2-element Vector{String}:\n \"Group B\"\n \"Group A\"\n\njulia> sort(cv)\n6-element CategoricalArray{Union{Missing, String},1,UInt32}:\n \"Group B\"\n \"Group B\"\n \"Group A\"\n \"Group A\"\n missing\n missing","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"By default, a CategoricalVector is able to represent 2^32 different levels. You can use less memory by calling the compress function:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv = compress(cv)\n6-element CategoricalArray{Union{Missing, String},1,UInt8}:\n \"Group A\"\n missing\n \"Group A\"\n \"Group B\"\n \"Group B\"\n missing\n","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"The categorical function additionally accepts a keyword argument compress which when set to true is equivalent to calling compress on the new vector:","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv1 = categorical([\"A\", \"B\"], compress=true)\n2-element CategoricalArray{String,1,UInt8}:\n \"A\"\n \"B\"","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"If the ordered keyword argument is set to true, the resulting CategoricalVector will be ordered, which means that its levels can be tested for order (rather than throwing an error):","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> cv2 = categorical([\"A\", \"B\"], ordered=true)\n2-element CategoricalArray{String,1,UInt32}:\n \"A\"\n \"B\"\n\njulia> cv1[1] < cv1[2]\nERROR: ArgumentError: Unordered CategoricalValue objects cannot be tested for order using <. Use isless instead, or call the ordered! function on the parent array to change this\n\njulia> cv2[1] < cv2[2]\ntrue","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"You can check if a CategoricalVector is ordered using the isordered function and change between ordered and unordered using ordered! function.","category":"page"},{"location":"man/categorical/","page":"Categorical Data","title":"Categorical Data","text":"julia> isordered(cv1)\nfalse\n\njulia> ordered!(cv1, true)\n2-element CategoricalArray{String,1,UInt8}:\n \"A\"\n \"B\"\n\njulia> isordered(cv1)\ntrue\n\njulia> cv1[1] < cv1[2]\ntrue","category":"page"},{"location":"man/sorting/#Sorting","page":"Sorting","title":"Sorting","text":"","category":"section"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"Sorting is a fundamental component of data analysis. Basic sorting is trivial: just calling sort! will sort all columns, in place:","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"julia> using DataFrames, CSV\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> iris = CSV.read(path, DataFrame)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted\n\njulia> sort!(iris)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.3         3.0          1.1         0.1  Iris-setosa\n   2 │         4.4         2.9          1.4         0.2  Iris-setosa\n   3 │         4.4         3.0          1.3         0.2  Iris-setosa\n   4 │         4.4         3.2          1.3         0.2  Iris-setosa\n   5 │         4.5         2.3          1.3         0.3  Iris-setosa\n   6 │         4.6         3.1          1.5         0.2  Iris-setosa\n   7 │         4.6         3.2          1.4         0.2  Iris-setosa\n   8 │         4.6         3.4          1.4         0.3  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         7.4         2.8          6.1         1.9  Iris-virginica\n 145 │         7.6         3.0          6.6         2.1  Iris-virginica\n 146 │         7.7         2.6          6.9         2.3  Iris-virginica\n 147 │         7.7         2.8          6.7         2.0  Iris-virginica\n 148 │         7.7         3.0          6.1         2.3  Iris-virginica\n 149 │         7.7         3.8          6.7         2.2  Iris-virginica\n 150 │         7.9         3.8          6.4         2.0  Iris-virginica\n                                                        135 rows omitted","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"Observe that all columns are taken into account lexicographically when sorting the DataFrame.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"You can also call the sort function to create a new DataFrame with freshly allocated sorted vectors.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"In sorting DataFrames, you may want to sort different columns with different options. Here are some examples showing most of the possible options:","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"julia> sort!(iris, rev = true)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         7.9         3.8          6.4         2.0  Iris-virginica\n   2 │         7.7         3.8          6.7         2.2  Iris-virginica\n   3 │         7.7         3.0          6.1         2.3  Iris-virginica\n   4 │         7.7         2.8          6.7         2.0  Iris-virginica\n   5 │         7.7         2.6          6.9         2.3  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         7.4         2.8          6.1         1.9  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         4.6         3.2          1.4         0.2  Iris-setosa\n 145 │         4.6         3.1          1.5         0.2  Iris-setosa\n 146 │         4.5         2.3          1.3         0.3  Iris-setosa\n 147 │         4.4         3.2          1.3         0.2  Iris-setosa\n 148 │         4.4         3.0          1.3         0.2  Iris-setosa\n 149 │         4.4         2.9          1.4         0.2  Iris-setosa\n 150 │         4.3         3.0          1.1         0.1  Iris-setosa\n                                                        135 rows omitted\n\njulia> sort!(iris, [:Species, :SepalWidth])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.5         2.3          1.3         0.3  Iris-setosa\n   2 │         4.4         2.9          1.4         0.2  Iris-setosa\n   3 │         5.0         3.0          1.6         0.2  Iris-setosa\n   4 │         4.9         3.0          1.4         0.2  Iris-setosa\n   5 │         4.8         3.0          1.4         0.3  Iris-setosa\n   6 │         4.8         3.0          1.4         0.1  Iris-setosa\n   7 │         4.4         3.0          1.3         0.2  Iris-setosa\n   8 │         4.3         3.0          1.1         0.1  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.7         3.3          5.7         2.1  Iris-virginica\n 145 │         6.3         3.3          6.0         2.5  Iris-virginica\n 146 │         6.3         3.4          5.6         2.4  Iris-virginica\n 147 │         6.2         3.4          5.4         2.3  Iris-virginica\n 148 │         7.2         3.6          6.1         2.5  Iris-virginica\n 149 │         7.9         3.8          6.4         2.0  Iris-virginica\n 150 │         7.7         3.8          6.7         2.2  Iris-virginica\n                                                        135 rows omitted\n\njulia> sort!(iris, [order(:Species, by=length), order(:SepalLength, rev=true)])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────────\n   1 │         5.8         4.0          1.2         0.2  Iris-setosa\n   2 │         5.7         3.8          1.7         0.3  Iris-setosa\n   3 │         5.7         4.4          1.5         0.4  Iris-setosa\n   4 │         5.5         3.5          1.3         0.2  Iris-setosa\n   5 │         5.5         4.2          1.4         0.2  Iris-setosa\n   6 │         5.4         3.4          1.7         0.2  Iris-setosa\n   7 │         5.4         3.4          1.5         0.4  Iris-setosa\n   8 │         5.4         3.7          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮              ⋮\n 144 │         5.5         2.6          4.4         1.2  Iris-versicolor\n 145 │         5.4         3.0          4.5         1.5  Iris-versicolor\n 146 │         5.2         2.7          3.9         1.4  Iris-versicolor\n 147 │         5.1         2.5          3.0         1.1  Iris-versicolor\n 148 │         5.0         2.0          3.5         1.0  Iris-versicolor\n 149 │         5.0         2.3          3.3         1.0  Iris-versicolor\n 150 │         4.9         2.4          3.3         1.0  Iris-versicolor\n                                                         135 rows omitted","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"Keywords used above include rev (to sort in reverse), and by (to apply a function to values before comparing them). Each keyword can either be a single value, a vector with values corresponding to individual columns, or a selector: :, Cols, All, Not, Between, or Regex.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"As an alternative to using a vector values you can use order to specify an ordering for a particular column within a set of columns.","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"The following two examples show two ways to sort the iris dataset with the same result: :Species will be ordered in reverse order, and within groups, rows will be sorted by increasing :PetalLength:","category":"page"},{"location":"man/sorting/","page":"Sorting","title":"Sorting","text":"julia> sort!(iris, [:Species, :PetalLength], rev=[true, false])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.9         2.5          4.5         1.7  Iris-virginica\n   2 │         6.2         2.8          4.8         1.8  Iris-virginica\n   3 │         6.0         3.0          4.8         1.8  Iris-virginica\n   4 │         6.3         2.7          4.9         1.8  Iris-virginica\n   5 │         6.1         3.0          4.9         1.8  Iris-virginica\n   6 │         5.6         2.8          4.9         2.0  Iris-virginica\n   7 │         6.3         2.5          5.0         1.9  Iris-virginica\n   8 │         6.0         2.2          5.0         1.5  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         4.7         3.2          1.6         0.2  Iris-setosa\n 145 │         5.7         3.8          1.7         0.3  Iris-setosa\n 146 │         5.4         3.4          1.7         0.2  Iris-setosa\n 147 │         5.4         3.9          1.7         0.4  Iris-setosa\n 148 │         5.1         3.3          1.7         0.5  Iris-setosa\n 149 │         5.1         3.8          1.9         0.4  Iris-setosa\n 150 │         4.8         3.4          1.9         0.2  Iris-setosa\n                                                        135 rows omitted\n\njulia> sort!(iris, [order(:Species, rev=true), :PetalLength])\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         4.9         2.5          4.5         1.7  Iris-virginica\n   2 │         6.2         2.8          4.8         1.8  Iris-virginica\n   3 │         6.0         3.0          4.8         1.8  Iris-virginica\n   4 │         6.3         2.7          4.9         1.8  Iris-virginica\n   5 │         6.1         3.0          4.9         1.8  Iris-virginica\n   6 │         5.6         2.8          4.9         2.0  Iris-virginica\n   7 │         6.3         2.5          5.0         1.9  Iris-virginica\n   8 │         6.0         2.2          5.0         1.5  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         4.7         3.2          1.6         0.2  Iris-setosa\n 145 │         5.7         3.8          1.7         0.3  Iris-setosa\n 146 │         5.4         3.4          1.7         0.2  Iris-setosa\n 147 │         5.4         3.9          1.7         0.4  Iris-setosa\n 148 │         5.1         3.3          1.7         0.5  Iris-setosa\n 149 │         5.1         3.8          1.9         0.4  Iris-setosa\n 150 │         4.8         3.4          1.9         0.2  Iris-setosa\n                                                        135 rows omitted","category":"page"},{"location":"man/working_with_dataframes/#Working-with-Data-Frames","page":"Working with DataFrames","title":"Working with Data Frames","text":"","category":"section"},{"location":"man/working_with_dataframes/#Examining-the-Data","page":"Working with DataFrames","title":"Examining the Data","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The default printing of DataFrame objects only includes a sample of rows and columns that fits on screen:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using DataFrames\n\njulia> df = DataFrame(A=1:2:1000, B=repeat(1:10, inner=50), C=1:500)\n500×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n   4 │     7      1      4\n   5 │     9      1      5\n   6 │    11      1      6\n   7 │    13      1      7\n   8 │    15      1      8\n  ⋮  │   ⋮      ⋮      ⋮\n 494 │   987     10    494\n 495 │   989     10    495\n 496 │   991     10    496\n 497 │   993     10    497\n 498 │   995     10    498\n 499 │   997     10    499\n 500 │   999     10    500\n           485 rows omitted","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Printing options can be adjusted by calling the show function manually: show(df, allrows=true) prints all rows even if they do not fit on screen and show(df, allcols=true) does the same for columns.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The first and last functions can be used to look at the first and last rows of a data frame (respectively):","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> first(df, 6)\n6×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n   4 │     7      1      4\n   5 │     9      1      5\n   6 │    11      1      6\n\njulia> last(df, 6)\n6×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │   989     10    495\n   2 │   991     10    496\n   3 │   993     10    497\n   4 │   995     10    498\n   5 │   997     10    499\n   6 │   999     10    500","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Also notice that when DataFrame is printed to the console or rendered in HTML (e.g. in Jupyter Notebook) you get an information about type of elements held in its columns. For example in this case:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using CategoricalArrays\n\njulia> DataFrame(a=1:2, b=[1.0, missing],\n                 c=categorical('a':'b'), d=[1//2, missing])\n2×4 DataFrame\n Row │ a      b          c     d\n     │ Int64  Float64?   Cat…  Rational…?\n─────┼────────────────────────────────────\n   1 │     1        1.0  a           1//2\n   2 │     2  missing    b        missing\n","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"we can observe that:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"the first column :a can hold elements of type Int64;\nthe second column :b can hold Float64 or Missing, which is indicated by ? printed after the name of type;\nthe third column :c can hold categorical data; here we notice …, which indicates that the actual name of the type was long and got truncated;\nthe type information in fourth column :d presents a situation where the name is both truncated and the type allows Missing.","category":"page"},{"location":"man/working_with_dataframes/#Taking-a-Subset","page":"Working with DataFrames","title":"Taking a Subset","text":"","category":"section"},{"location":"man/working_with_dataframes/#Indexing-syntax","page":"Working with DataFrames","title":"Indexing syntax","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Specific subsets of a data frame can be extracted using the indexing syntax, similar to matrices. In the Indexing section of the manual you can find all the details about the available options. Here we highlight the basic options.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The colon : indicates that all items (rows or columns depending on its position) should be retained:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[1:3, :]\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n\njulia> df[[1, 5, 10], :]\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     9      1      5\n   3 │    19      1     10\n\njulia> df[:, [:A, :B]]\n500×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     3      1\n   3 │     5      1\n   4 │     7      1\n   5 │     9      1\n   6 │    11      1\n   7 │    13      1\n   8 │    15      1\n  ⋮  │   ⋮      ⋮\n 494 │   987     10\n 495 │   989     10\n 496 │   991     10\n 497 │   993     10\n 498 │   995     10\n 499 │   997     10\n 500 │   999     10\n    485 rows omitted\n\njulia> df[1:3, [:B, :A]]\n3×2 DataFrame\n Row │ B      A\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     1      3\n   3 │     1      5\n\njulia> df[[3, 1], [:C]]\n2×1 DataFrame\n Row │ C\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     1","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Do note that df[!, [:A]] and df[:, [:A]] return a DataFrame object, while df[!, :A] and df[:, :A] return a vector:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[!, [:A]]\n500×1 DataFrame\n Row │ A\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     3\n   3 │     5\n   4 │     7\n   5 │     9\n   6 │    11\n   7 │    13\n   8 │    15\n  ⋮  │   ⋮\n 494 │   987\n 495 │   989\n 496 │   991\n 497 │   993\n 498 │   995\n 499 │   997\n 500 │   999\n485 rows omitted\n\njulia> df[!, [:A]] == df[:, [:A]]\ntrue\n\njulia> df[!, :A]\n500-element Vector{Int64}:\n   1\n   3\n   5\n   7\n   9\n  11\n  13\n  15\n  17\n  19\n   ⋮\n 983\n 985\n 987\n 989\n 991\n 993\n 995\n 997\n 999\n\njulia> df[!, :A] == df[:, :A]\ntrue","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In the first case, [:A] is a vector, indicating that the resulting object should be a DataFrame. On the other hand, :A is a single symbol, indicating that a single column vector should be extracted. Note that in the first case a vector is required to be passed (not just any iterable), so e.g. df[:, (:x1, :x2)] is not allowed, but df[:, [:x1, :x2]] is valid.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is also possible to use a regular expression as a selector of columns matching it:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x1=1, x2=2, y=3)\n1×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> df[!, r\"x\"]\n1×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"A Not selector (from the InvertedIndices package) can be used to select all columns excluding a specific subset:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[!, Not(:x1)]\n1×2 DataFrame\n Row │ x2     y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      3","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Finally, you can use Not, Between, Cols and All selectors in more complex column selection scenarios (note that Cols() selects no columns while All() selects all columns therefore Cols is a preferred selector if you write generic code). Here are examples of using each of these selectors:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(r=1, x1=2, x2=3, y=4)\n1×4 DataFrame\n Row │ r      x1     x2     y\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      3      4\n\njulia> df[:, Not(:r)] # drop :r column\n1×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      3      4\n\njulia> df[:, Between(:r, :x2)] # keep columns between :r and :x2\n1×3 DataFrame\n Row │ r      x1     x2\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> df[:, All()] # keep all columns\n1×4 DataFrame\n Row │ r      x1     x2     y\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      3      4\n\njulia> df[:, Cols(x -> startswith(x, \"x\"))] # keep columns whose name starts with \"x\"\n1×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      3","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The following examples show a more complex use of the Cols selector, which moves all columns whose names match r\"x\" regular expression respectively to the front and to the end of the data frame:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[:, Cols(r\"x\", :)]\n1×4 DataFrame\n Row │ x1     x2     r      y\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     2      3      1      4\n\njulia> df[:, Cols(Not(r\"x\"), :)]\n1×4 DataFrame\n Row │ r      y      x1     x2\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      2      3","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The indexing syntax can also be used to select rows based on conditions on variables:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:2:1000, B=repeat(1:10, inner=50), C=1:500)\n500×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     3      1      2\n   3 │     5      1      3\n   4 │     7      1      4\n   5 │     9      1      5\n   6 │    11      1      6\n   7 │    13      1      7\n   8 │    15      1      8\n  ⋮  │   ⋮      ⋮      ⋮\n 494 │   987     10    494\n 495 │   989     10    495\n 496 │   991     10    496\n 497 │   993     10    497\n 498 │   995     10    498\n 499 │   997     10    499\n 500 │   999     10    500\n           485 rows omitted\n\njulia> df[df.A .> 500, :]\n250×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │   501      6    251\n   2 │   503      6    252\n   3 │   505      6    253\n   4 │   507      6    254\n   5 │   509      6    255\n   6 │   511      6    256\n   7 │   513      6    257\n   8 │   515      6    258\n  ⋮  │   ⋮      ⋮      ⋮\n 244 │   987     10    494\n 245 │   989     10    495\n 246 │   991     10    496\n 247 │   993     10    497\n 248 │   995     10    498\n 249 │   997     10    499\n 250 │   999     10    500\n           235 rows omitted\n\njulia> df[(df.A .> 500) .& (300 .< df.C .< 400), :]\n99×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │   601      7    301\n   2 │   603      7    302\n   3 │   605      7    303\n   4 │   607      7    304\n   5 │   609      7    305\n   6 │   611      7    306\n   7 │   613      7    307\n   8 │   615      7    308\n  ⋮  │   ⋮      ⋮      ⋮\n  93 │   785      8    393\n  94 │   787      8    394\n  95 │   789      8    395\n  96 │   791      8    396\n  97 │   793      8    397\n  98 │   795      8    398\n  99 │   797      8    399\n            84 rows omitted","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Where a specific subset of values needs to be matched, the in() function can be applied:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df[in.(df.A, Ref([1, 5, 601])), :]\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     5      1      3\n   3 │   601      7    301","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The Ref wrapper to [1, 5, 601] is needed to protect the vector against being broadcasted over (the vector will be treated as a scalar when wrapped in Ref). You could write this operation using a comprehension like this (note that it would be slower so it is not recommended): [a in [1, 5, 601] for a in df.A].","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Equivalently, the in function can be called with a single argument to create a function object that tests whether each value belongs to the subset (partial application of in): df[in([1, 5, 601]).(df.A), :].","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"note: Note\nAs with matrices, subsetting from a data frame will usually return a copy of columns, not a view or direct reference.The only indexing situations where data frames will not return a copy are:when a ! is placed in the first indexing position (df[!, :A], or df[!, [:A, :B]]),\nwhen using . (getpropery) notation (df.A),\nwhen a single row is selected using an integer (df[1, [:A, :B]])\nwhen view or @view is used (e.g. @view df[1:3, :A]).More details on copies, views, and references can be found in the getindex and view section.","category":"page"},{"location":"man/working_with_dataframes/#Subsetting-functions","page":"Working with DataFrames","title":"Subsetting functions","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"An alternative approach to row subsetting in a data frame is to use the subset function, or the subset! function, which is its in-place variant.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"These functions take a data frame as their first argument. The following positional arguments (one or more) are filtering condition specifications that must be jointly met. Each condition should be passed as a Pair consisting of source column(s) and a function specifying the filtering condition taking this or these column(s) as arguments:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> subset(df, :A => a -> a .< 10, :C => c -> isodd.(c))\n3×3 DataFrame\n Row │ A      B      C\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      1\n   2 │     5      1      3\n   3 │     9      1      5","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is a frequent situation that missing values might be present in the filtering columns, which could then lead the filtering condition to return missing instead of the expected true or false. In order to handle this situation one can either use the coalesce function or pass the skipmissing=true keyword argument to subset. Here is an example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x=[1, 2, missing, 4])\n4×1 DataFrame\n Row │ x\n     │ Int64?\n─────┼─────────\n   1 │       1\n   2 │       2\n   3 │ missing\n   4 │       4\n\njulia> subset(df, :x => x -> coalesce.(iseven.(x), false))\n2×1 DataFrame\n Row │ x\n     │ Int64?\n─────┼────────\n   1 │      2\n   2 │      4\n\njulia> subset(df, :x => x -> iseven.(x), skipmissing=true)\n2×1 DataFrame\n Row │ x\n     │ Int64?\n─────┼────────\n   1 │      2\n   2 │      4","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The subset function has been designed in a way that is consistent with how column transformations are specified in functions like combine, select, and transform. Examples of column transformations accepted by these functions are provided in the following section.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Additionally DataFrames.jl extends the filter and filter! functions provided in Julia Base, which also allow subsetting a data frame. These methods are defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset and subset! functions instead, as they are consistent with other DataFrames.jl functions (as opposed to filter and filter!).","category":"page"},{"location":"man/working_with_dataframes/#Selecting-and-transforming-columns","page":"Working with DataFrames","title":"Selecting and transforming columns","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"You can also use the select/select! and transform/transform! functions to select, rename and transform columns in a data frame.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The select function creates a new data frame:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x1=[1, 2], x2=[3, 4], y=[5, 6])\n2×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      3      5\n   2 │     2      4      6\n\njulia> select(df, Not(:x1)) # drop column :x1 in a new data frame\n2×2 DataFrame\n Row │ x2     y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     3      5\n   2 │     4      6\n\njulia> select(df, r\"x\") # select columns containing 'x' character\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> select(df, :x1 => :a1, :x2 => :a2) # rename columns\n2×2 DataFrame\n Row │ a1     a2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> select(df, :x1, :x2 => (x -> x .- minimum(x)) => :x2) # transform columns\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      1\n\njulia> select(df, :x2, :x2 => ByRow(sqrt)) # transform columns by row\n2×2 DataFrame\n Row │ x2     x2_sqrt\n     │ Int64  Float64\n─────┼────────────────\n   1 │     3  1.73205\n   2 │     4  2.0\n\njulia> select(df, :x1, :x2, [:x1, :x2] => ((x1, x2) -> x1 ./ x2) => :z) # transform multiple columns\n2×3 DataFrame\n Row │ x1     x2     z\n     │ Int64  Int64  Float64\n─────┼────────────────────────\n   1 │     1      3  0.333333\n   2 │     2      4  0.5\n\njulia> select(df, :x1, :x2, [:x1, :x2] => ByRow((x1, x2) -> x1 / x2) => :z)  # transform multiple columns by row\n2×3 DataFrame\n Row │ x1     x2     z\n     │ Int64  Int64  Float64\n─────┼────────────────────────\n   1 │     1      3  0.333333\n   2 │     2      4  0.5\n\njulia> select(df, AsTable(:) => ByRow(extrema) => [:lo, :hi]) # return multiple columns\n2×2 DataFrame\n Row │ lo     hi\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      5\n   2 │     2      6","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is important to note that select always returns a data frame, even if a single column is selected (as opposed to indexing syntax).","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> select(df, :x1)\n2×1 DataFrame\n Row │ x1\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df[:, :x1]\n2-element Vector{Int64}:\n 1\n 2","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"By default select copies columns of a passed source data frame. In order to avoid copying, pass copycols=false:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df2 = select(df, :x1)\n2×1 DataFrame\n Row │ x1\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df2.x1 === df.x1\nfalse\n\njulia> df2 = select(df, :x1, copycols=false)\n2×1 DataFrame\n Row │ x1\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df2.x1 === df.x1\ntrue","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"To perform the selection operation in-place use select!:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> select!(df, Not(:x1));\n\njulia> df\n2×2 DataFrame\n Row │ x2     y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     3      5\n   2 │     4      6","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"transform and transform! functions work identically to select and select!, with the only difference that they retain all columns that are present in the source data frame. Here are some more advanced examples.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"First we show how to generate a column that is a sum of all other columns in the data frame using the All() selector:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(x1=[1, 2], x2=[3, 4], y=[5, 6])\n2×3 DataFrame\n Row │ x1     x2     y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      3      5\n   2 │     2      4      6\n\njulia> transform(df, All() => +)\n2×4 DataFrame\n Row │ x1     x2     y      x1_x2_y_+\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────────\n   1 │     1      3      5          9\n   2 │     2      4      6         12","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Using the ByRow wrapper, we can easily compute for each row the name of column with the highest score:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using Random\n\njulia> Random.seed!(1);\n\njulia> df = DataFrame(rand(10, 3), [:a, :b, :c])\n10×3 DataFrame\n Row │ a           b          c\n     │ Float64     Float64    Float64\n─────┼──────────────────────────────────\n   1 │ 0.236033    0.555751   0.0769509\n   2 │ 0.346517    0.437108   0.640396\n   3 │ 0.312707    0.424718   0.873544\n   4 │ 0.00790928  0.773223   0.278582\n   5 │ 0.488613    0.28119    0.751313\n   6 │ 0.210968    0.209472   0.644883\n   7 │ 0.951916    0.251379   0.0778264\n   8 │ 0.999905    0.0203749  0.848185\n   9 │ 0.251662    0.287702   0.0856352\n  10 │ 0.986666    0.859512   0.553206\n\njulia> transform(df, AsTable(:) => ByRow(argmax) => :prediction)\n10×4 DataFrame\n Row │ a           b          c          prediction\n     │ Float64     Float64    Float64    Symbol\n─────┼──────────────────────────────────────────────\n   1 │ 0.236033    0.555751   0.0769509  b\n   2 │ 0.346517    0.437108   0.640396   c\n   3 │ 0.312707    0.424718   0.873544   c\n   4 │ 0.00790928  0.773223   0.278582   b\n   5 │ 0.488613    0.28119    0.751313   c\n   6 │ 0.210968    0.209472   0.644883   c\n   7 │ 0.951916    0.251379   0.0778264  a\n   8 │ 0.999905    0.0203749  0.848185   a\n   9 │ 0.251662    0.287702   0.0856352  b\n  10 │ 0.986666    0.859512   0.553206   a","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In the most complex example below we compute row-wise sum, number of elements, and mean, while ignoring missing values.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using Statistics\n\njulia> df = DataFrame(x=[1, 2, missing], y=[1, missing, missing])\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64?\n─────┼──────────────────\n   1 │       1        1\n   2 │       2  missing\n   3 │ missing  missing\n\njulia> transform(df, AsTable(:) .=>\n                     ByRow.([sum∘skipmissing,\n                             x -> count(!ismissing, x),\n                             mean∘skipmissing]) .=>\n                     [:sum, :n, :mean])\n3×5 DataFrame\n Row │ x        y        sum    n      mean\n     │ Int64?   Int64?   Int64  Int64  Float64\n─────┼─────────────────────────────────────────\n   1 │       1        1      2      2      1.0\n   2 │       2  missing      2      1      2.0\n   3 │ missing  missing      0      0    NaN","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"While the DataFrames.jl package provides basic data manipulation capabilities, users are encouraged to use querying frameworks for more convenient and powerful operations:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"the Query.jl package provides a LINQ-like interface to a large number of data sources\nthe DataFramesMeta.jl package provides interfaces similar to LINQ and dplyr\nthe DataFrameMacros.jl package provides macros for most standard functions from DataFrames.jl, with convenient syntax for the manipulation of multiple columns at once.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"See the Data manipulation frameworks section for more information.","category":"page"},{"location":"man/working_with_dataframes/#Summarizing-Data","page":"Working with DataFrames","title":"Summarizing Data","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The describe function returns a data frame summarizing the elementary statistics and information about each column:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:4, B=[\"M\", \"F\", \"F\", \"M\"])\n4×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  F\n   3 │     3  F\n   4 │     4  M\n\njulia> describe(df)\n2×7 DataFrame\n Row │ variable  mean    min  median  max  nmissing  eltype\n     │ Symbol    Union…  Any  Union…  Any  Int64     DataType\n─────┼────────────────────────────────────────────────────────\n   1 │ A         2.5     1    2.5     4           0  Int64\n   2 │ B                 F            M           0  String","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"If you are interested in describing only a subset of columns, then the easiest way to do it is to pass a subset of an original data frame to describe like this:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> describe(df[!, [:A]])\n1×7 DataFrame\n Row │ variable  mean     min    median   max    nmissing  eltype\n     │ Symbol    Float64  Int64  Float64  Int64  Int64     DataType\n─────┼──────────────────────────────────────────────────────────────\n   1 │ A             2.5      1      2.5      4         0  Int64","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Of course, one can also compute descriptive statistics directly on individual columns:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using Statistics\n\njulia> mean(df.A)\n2.5","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"We can also apply a function to each column of a DataFrame using combine. For example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:4, B=4.0:-1.0:1.0)\n4×2 DataFrame\n Row │ A      B\n     │ Int64  Float64\n─────┼────────────────\n   1 │     1      4.0\n   2 │     2      3.0\n   3 │     3      2.0\n   4 │     4      1.0\n\njulia> combine(df, All() .=> sum)\n1×2 DataFrame\n Row │ A_sum  B_sum\n     │ Int64  Float64\n─────┼────────────────\n   1 │    10     10.0\n\njulia> combine(df, All() .=> sum, All() .=> prod)\n1×4 DataFrame\n Row │ A_sum  B_sum    A_prod  B_prod\n     │ Int64  Float64  Int64   Float64\n─────┼─────────────────────────────────\n   1 │    10     10.0      24     24.0\n\njulia> combine(df, All() .=> [sum prod]) # the same using 2-dimensional broadcasting\n1×4 DataFrame\n Row │ A_sum  B_sum    A_prod  B_prod\n     │ Int64  Float64  Int64   Float64\n─────┼─────────────────────────────────\n   1 │    10     10.0      24     24.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"If you would prefer the result to have the same number of rows as the source data frame, use select instead of combine.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In the remainder of this section we will discuss more advanced topics related to the operation specification syntax, so you may decide to skip them if you want to focus on the most common usage patterns.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"A DataFrame can store values of any type as its columns, for example below we show how one can store a Tuple:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df2 = combine(df, All() .=> extrema)\n1×2 DataFrame\n Row │ A_extrema  B_extrema\n     │ Tuple…     Tuple…\n─────┼───────────────────────\n   1 │ (1, 4)     (1.0, 4.0)","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Later you might want to expand the tuples into separate columns storing the computed minima and maxima. This can be achieved by passing multiple columns for the output. Here is an example of how this can be done by writing the column names by-hand for a single input column:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df2, \"A_extrema\" => identity => [\"A_min\", \"A_max\"])\n1×2 DataFrame\n Row │ A_min  A_max\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"You can extend it to handling all columns in df2 using broadcasting:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df2, All() .=> identity .=> [[\"A_min\", \"A_max\"], [\"B_min\", \"B_max\"]])\n1×4 DataFrame\n Row │ A_min  A_max  B_min    B_max\n     │ Int64  Int64  Float64  Float64\n─────┼────────────────────────────────\n   1 │     1      4      1.0      4.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"This approach works, but can be improved. Instead of writing all the column names manually we can instead use a function as a way to specify target column names based on source column names:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df2, All() .=> identity .=> c -> first(c) .* [\"_min\", \"_max\"])\n1×4 DataFrame\n Row │ A_min  A_max  B_min    B_max\n     │ Int64  Int64  Float64  Float64\n─────┼────────────────────────────────\n   1 │     1      4      1.0      4.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that in this example we needed to pass identity explicitly since with All() => (c -> first(c) .* [\"_min\", \"_max\"]) the right-hand side part would be treated as a transformation and not as a rule for target column names generation.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"You might want to perform the transformation of the source data frame into the result we have just shown in one step. This can be achieved with the following expression:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df, All() .=> Ref∘extrema .=> c -> c .* [\"_min\", \"_max\"])\n1×4 DataFrame\n Row │ A_min  A_max  B_min    B_max\n     │ Int64  Int64  Float64  Float64\n─────┼────────────────────────────────\n   1 │     1      4      1.0      4.0","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that in this case we needed to add a Ref call in the Ref∘extrema operation specification. Without Ref, combine iterates the contents of the value returned by the operation specification function, which in our case is a tuple of numbers, and tries to expand it assuming that each produced value represents one row, so one gets an error:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> combine(df, All() .=> extrema .=> [c -> c .* [\"_min\", \"_max\"]])\nERROR: ArgumentError: 'Tuple{Int64, Int64}' iterates 'Int64' values,\nwhich doesn't satisfy the Tables.jl `AbstractRow` interface","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that we used Ref as it is a container that is typically used in DataFrames.jl when one wants to store one row, however, in general it could be another iterator (e.g. a tuple).","category":"page"},{"location":"man/working_with_dataframes/#Handling-of-Columns-Stored-in-a-DataFrame","page":"Working with DataFrames","title":"Handling of Columns Stored in a DataFrame","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Functions that transform a DataFrame to produce a new DataFrame always perform a copy of the columns by default, for example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df = DataFrame(A=1:4, B=4.0:-1.0:1.0)\n4×2 DataFrame\n Row │ A      B\n     │ Int64  Float64\n─────┼────────────────\n   1 │     1      4.0\n   2 │     2      3.0\n   3 │     3      2.0\n   4 │     4      1.0\n\njulia> df2 = copy(df);\n\njulia> df2.A === df.A\nfalse","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"On the other hand, in-place functions, whose names end with !, may mutate the column vectors of the DataFrame they take as an argument. For example:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> x = [3, 1, 2];\n\njulia> df = DataFrame(x=x)\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     1\n   3 │     2\n\njulia> sort!(df)\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> x\n3-element Vector{Int64}:\n 3\n 1\n 2\n\njulia> df.x[1] = 100\n100\n\njulia> df\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │   100\n   2 │     2\n   3 │     3\n\njulia> x\n3-element Vector{Int64}:\n 3\n 1\n 2","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that in the above example the original x vector is not mutated in the process, as the DataFrame(x=x) constructor makes a copy by default.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"In-place functions are safe to call, except when a view of the DataFrame (created via a view, @view or groupby) or when a DataFrame created with copycols=false are in use.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"It is possible to have a direct access to a column col of a DataFrame df using the syntaxes df.col, df[!, :col], via the eachcol function, by accessing a parent of a view of a column of a DataFrame, or simply by storing the reference to the column vector before the DataFrame was created with copycols=false.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> x = [3, 1, 2];\n\njulia> df = DataFrame(x=x)\n3×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     1\n   3 │     2\n\njulia> df.x == x\ntrue\n\njulia> df[!, 1] !== x\ntrue\n\njulia> eachcol(df)[1] === df.x\ntrue","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Note that a column obtained from a DataFrame using one of these methods should not be mutated without caution.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"The exact rules of handling columns of a DataFrame are explained in The design of handling of columns of a DataFrame section of the manual.","category":"page"},{"location":"man/working_with_dataframes/#Replacing-Data","page":"Working with DataFrames","title":"Replacing Data","text":"","category":"section"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Several approaches can be used to replace some values with others in a data frame. Some apply the replacement to all values in a data frame, and others to individual columns or subset of columns.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Do note that in-place replacement requires that the replacement value can be converted to the column's element type. In particular, this implies that replacing a value with missing requires a call to allowmissing! if the column did not allow for missing values.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Replacement operations affecting a single column can be performed using replace!:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> using DataFrames\n\njulia> df = DataFrame(a=[\"a\", \"None\", \"b\", \"None\"], b=1:4,\n                      c=[\"None\", \"j\", \"k\", \"h\"], d=[\"x\", \"y\", \"None\", \"z\"])\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  None    x\n   2 │ None        2  j       y\n   3 │ b           3  k       None\n   4 │ None        4  h       z\n\njulia> replace!(df.a, \"None\" => \"c\")\n4-element Vector{String}:\n \"a\"\n \"c\"\n \"b\"\n \"c\"\n\njulia> df\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  None    x\n   2 │ c           2  j       y\n   3 │ b           3  k       None\n   4 │ c           4  h       z","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"This is equivalent to df.a = replace(df.a, \"None\" => \"c\"), but operates in-place, without allocating a new column vector.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Replacement operations on multiple columns or on the whole data frame can be performed in-place using the broadcasting syntax:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"# replacement on a subset of columns [:c, :d]\njulia> df[:, [:c, :d]] .= ifelse.(df[!, [:c, :d]] .== \"None\", \"c\", df[!, [:c, :d]])\n4×2 SubDataFrame\n Row │ c       d\n     │ String  String\n─────┼────────────────\n   1 │ c       x\n   2 │ j       y\n   3 │ k       c\n   4 │ h       z\n\njulia> df\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  c       x\n   2 │ c           2  j       y\n   3 │ b           3  k       c\n   4 │ c           4  h       z\n\njulia> df .= ifelse.(df .== \"c\", \"None\", df) # replacement on entire data frame\n4×4 DataFrame\n Row │ a       b      c       d\n     │ String  Int64  String  String\n─────┼───────────────────────────────\n   1 │ a           1  None    x\n   2 │ None        2  j       y\n   3 │ b           3  k       None\n   4 │ None        4  h       z","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"Do note that in the above examples, changing .= to just = will allocate new column vectors instead of applying the operation in-place.","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"When replacing values with missing, if the columns do not already allow for missing values, one has to either avoid in-place operation and use = instead of .=, or call allowmissing! beforehand:","category":"page"},{"location":"man/working_with_dataframes/","page":"Working with DataFrames","title":"Working with DataFrames","text":"julia> df2 = ifelse.(df .== \"None\", missing, df) # do not operate in-place (`df = ` would also work)\n4×4 DataFrame\n Row │ a        b      c        d\n     │ String?  Int64  String?  String?\n─────┼──────────────────────────────────\n   1 │ a            1  missing  x\n   2 │ missing      2  j        y\n   3 │ b            3  k        missing\n   4 │ missing      4  h        z\n\njulia> allowmissing!(df) # operate in-place after allowing for missing\n4×4 DataFrame\n Row │ a        b       c        d\n     │ String?  Int64?  String?  String?\n─────┼───────────────────────────────────\n   1 │ a             1  None     x\n   2 │ None          2  j        y\n   3 │ b             3  k        None\n   4 │ None          4  h        z\n\njulia> df .= ifelse.(df .== \"None\", missing, df)\n4×4 DataFrame\n Row │ a        b       c        d\n     │ String?  Int64?  String?  String?\n─────┼───────────────────────────────────\n   1 │ a             1  missing  x\n   2 │ missing       2  j        y\n   3 │ b             3  k        missing\n   4 │ missing       4  h        z","category":"page"},{"location":"man/comparisons/#Comparisons","page":"Comparison with Python/R/Stata","title":"Comparisons","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"This section compares DataFrames.jl with other data manipulation frameworks in Python, R, and Stata.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"A sample data set can be created using the following code:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"using DataFrames\nusing Statistics\n\ndf = DataFrame(grp=repeat(1:2, 3), x=6:-1:1, y=4:9, z=[3:7; missing], id='a':'f')\ndf2 = DataFrame(grp=[1, 3], w=[10, 11])","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"note: Note\nSome of the operations mutate the tables so every operation assumes that it is done on the original data frame.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that in the comparisons presented below predicates like x -> x >= 1 can be more compactly written as >=(1). The latter form has an additional benefit that it is compiled only once per Julia session (as opposed to x -> x >= 1 which defines a new anonymous function every time it is introduced).","category":"page"},{"location":"man/comparisons/#Comparison-with-the-Python-package-pandas","page":"Comparison with Python/R/Stata","title":"Comparison with the Python package pandas","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with the Python package pandas (version 1.1.0):","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"import pandas as pd\nimport numpy as np\n\ndf = pd.DataFrame({'grp': [1, 2, 1, 2, 1, 2],\n                   'x': range(6, 0, -1),\n                   'y': range(4, 10),\n                   'z': [3, 4, 5, 6, 7, None]},\n                   index = list('abcdef'))\ndf2 = pd.DataFrame({'grp': [1, 3], 'w': [10, 11]})","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Because pandas supports multi-index, this example data frame is set up with a to f as row indices rather than a separate id column.","category":"page"},{"location":"man/comparisons/#Accessing-data","page":"Comparison with Python/R/Stata","title":"Accessing data","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nCell indexing by location df.iloc[1, 1] df[2, 2]\nRow slicing by location df.iloc[1:3] df[2:3, :]\nColumn slicing by location df.iloc[:, 1:] df[:, 2:end]\nRow indexing by label df.loc['c'] df[findfirst(==('c'), df.id), :]\nColumn indexing by label df.loc[:, 'x'] df[:, :x]\nColumn slicing by label df.loc[:, ['x', 'z']] df[:, [:x, :z]]\n df.loc[:, 'x':'z'] df[:, Between(:x, :z)]\nMixed indexing df.loc['c'][1] df[findfirst(==('c'), df.id), 2]","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that Julia uses 1-based indexing, inclusive on both ends. A special keyword end can be used to indicate the last index. Likewise, the begin keyword can be used to indicate the first index.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In addition, when indexing a data frame with the findfirst function, a single DataFrameRow object is returned. In the case that id is not unique, you can use the findall function or boolean indexing instead. It would then return a DataFrame object containing all matched rows. The following two lines of code are functionally equivalent:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"df[findall(==('c'), df.id), :]\ndf[df.id .== 'c', :]","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"DataFrames.jl's indexing always produces a consistent and predictable return type. By contrast, pandas' loc function returns a Series object when there is exactly one 'c' value in the index, and it returns a DataFrame object when there are multiple rows having the index value of 'c'.","category":"page"},{"location":"man/comparisons/#Common-operations","page":"Comparison with Python/R/Stata","title":"Common operations","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nReduce multiple values df['z'].mean(skipna = False) mean(df.z)\n df['z'].mean() mean(skipmissing(df.z))\n df[['z']].agg(['mean']) combine(df, :z => mean ∘ skipmissing)\nAdd new columns df.assign(z1 = df['z'] + 1) transform(df, :z => (v -> v .+ 1) => :z1)\nRename columns df.rename(columns = {'x': 'x_new'}) rename(df, :x => :x_new)\nPick & transform columns df.assign(x_mean = df['x'].mean())[['x_mean', 'y']] select(df, :x => mean, :y)\nSort rows df.sort_values(by = 'x') sort(df, :x)\n df.sort_values(by = ['grp', 'x'], ascending = [True, False]) sort(df, [:grp, order(:x, rev = true)])\nDrop missing rows df.dropna() dropmissing(df)\nSelect unique rows df.drop_duplicates() unique(df)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that pandas skips NaN values in its analytic functions by default. By contrast, Julia functions do not skip NaN's. If necessary, you can filter out the NaN's before processing, for example, mean(Iterators.filter(!isnan, x)).","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Pandas uses NaN for representing both missing data and the floating point \"not a number\" value. Julia defines a special value missing for representing missing data. DataFrames.jl respects general rules in Julia in propagating missing values by default. If necessary, the skipmissing function can be used to remove missing data. See the Missing Data section for more information.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In addition, pandas keeps the original column name after applying a function. DataFrames.jl appends a suffix to the column name by default. To keep it simple, the examples above do not synchronize the column names between pandas and DataFrames.jl (you can pass renamecols=false keyword argument to select, transform and combine functions to retain old column names).","category":"page"},{"location":"man/comparisons/#Mutating-operations","page":"Comparison with Python/R/Stata","title":"Mutating operations","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nAdd new columns df['z1'] = df['z'] + 1 df.z1 = df.z .+ 1\n  transform!(df, :z => (x -> x .+ 1) => :z1)\n df.insert(1, 'const', 10) insertcols!(df, 2, :const => 10)\nRename columns df.rename(columns = {'x': 'x_new'}, inplace = True) rename!(df, :x => :x_new)\nSort rows df.sort_values(by = 'x', inplace = True) sort!(df, :x)\nDrop missing rows df.dropna(inplace = True) dropmissing!(df)\nSelect unique rows df.drop_duplicates(inplace = True) unique!(df)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Generally speaking, DataFrames.jl follows the Julia convention of using ! in the function name to indicate mutation behavior.","category":"page"},{"location":"man/comparisons/#Grouping-data-and-aggregation","page":"Comparison with Python/R/Stata","title":"Grouping data and aggregation","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"DataFrames.jl provides a groupby function to apply operations over each group independently. The result of groupby is a GroupedDataFrame object which may be processed using the combine, transform, or select functions. The following table illustrates some common grouping and aggregation usages.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nAggregate by groups df.groupby('grp')['x'].mean() combine(groupby(df, :grp), :x => mean)\nRename column after aggregation df.groupby('grp')['x'].mean().rename(\"my_mean\") combine(groupby(df, :grp), :x => mean => :my_mean)\nAdd aggregated data as column df.join(df.groupby('grp')['x'].mean(), on='grp', rsuffix='_mean') transform(groupby(df, :grp), :x => mean)\n...and select output columns df.join(df.groupby('grp')['x'].mean(), on='grp', rsuffix='_mean')[['grp', 'x_mean']] select(groupby(df, :grp), :id, :x => mean)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that pandas returns a Series object for 1-dimensional result unless reset_index is called afterwards. The corresponding DataFrames.jl examples return an equivalent DataFrame object. Consider the first example:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":">>> df.groupby('grp')['x'].mean()\ngrp\n1    4\n2    3\nName: x, dtype: int64","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"For DataFrames.jl, it looks like this:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"julia> combine(groupby(df, :grp), :x => mean)\n2×2 DataFrame\n Row │ grp    x_mean\n     │ Int64  Float64\n─────┼────────────────\n   1 │     1      4.0\n   2 │     2      3.0","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In DataFrames.jl, the GroupedDataFrame object supports an efficient key lookup. Hence, it performs well when you need to perform lookups repeatedly.","category":"page"},{"location":"man/comparisons/#More-advanced-commands","page":"Comparison with Python/R/Stata","title":"More advanced commands","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"This section includes more complex examples.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nComplex Function df[['z']].agg(lambda v: np.mean(np.cos(v))) combine(df, :z => v -> mean(cos, skipmissing(v)))\nAggregate multiple columns df.agg({'x': max, 'y': min}) combine(df, :x => maximum, :y => minimum)\n df[['x', 'y']].mean() combine(df, [:x, :y] .=> mean)\n df.filter(regex=(\"^x\")).mean() combine(df, names(df, r\"^x\") .=> mean)\nApply function over multiple variables df.assign(x_y_cor = np.corrcoef(df.x, df.y)[0, 1]) transform(df, [:x, :y] => cor)\nRow-wise operation df.assign(x_y_min = df.apply(lambda v: min(v.x, v.y), axis=1)) transform(df, [:x, :y] => ByRow(min))\n df.assign(x_y_argmax = df.apply(lambda v: df.columns[v.argmax()], axis=1)) transform(df, AsTable([:x, :y]) => ByRow(argmax))\nDataFrame as input df.groupby('grp').head(2) combine(d -> first(d, 2), groupby(df, :grp))\nDataFrame as output df[['x']].agg(lambda x: [min(x), max(x)]) combine(df, :x => (x -> (x=[minimum(x), maximum(x)],)) => AsTable)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that pandas preserves the same row order after groupby whereas DataFrames.jl shows them grouped by the provided keys after the combine operation, but select and transform retain an original row ordering.","category":"page"},{"location":"man/comparisons/#Joining-data-frames","page":"Comparison with Python/R/Stata","title":"Joining data frames","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"DataFrames.jl supports join operations similar to a relational database.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation pandas DataFrames.jl\nInner join pd.merge(df, df2, how = 'inner', on = 'grp') innerjoin(df, df2, on = :grp)\nOuter join pd.merge(df, df2, how = 'outer', on = 'grp') outerjoin(df, df2, on = :grp)\nLeft join pd.merge(df, df2, how = 'left', on = 'grp') leftjoin(df, df2, on = :grp)\nRight join pd.merge(df, df2, how = 'right', on = 'grp') rightjoin(df, df2, on = :grp)\nSemi join (filtering) df[df.grp.isin(df2.grp)] semijoin(df, df2, on = :grp)\nAnti join (filtering) df[~df.grp.isin(df2.grp)] antijoin(df, df2, on = :grp)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"For multi-column joins, both pandas and DataFrames.jl accept an array for the on keyword argument.","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"In the cases of semi joins and anti joins, the isin function in pandas can still be used as long as the join keys are combined in a tuple. In DataFrames.jl, it just works normally with an array of join keys specified in the on keyword argument.","category":"page"},{"location":"man/comparisons/#Comparison-with-the-R-package-dplyr","page":"Comparison with Python/R/Stata","title":"Comparison with the R package dplyr","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with the R package dplyr (version 1):","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"df <- tibble(grp = rep(1:2, 3), x = 6:1, y = 4:9,\n             z = c(3:7, NA), id = letters[1:6])","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation dplyr DataFrames.jl\nReduce multiple values summarize(df, mean(x)) combine(df, :x => mean)\nAdd new columns mutate(df, x_mean = mean(x)) transform(df, :x => mean => :x_mean)\nRename columns rename(df, x_new = x) rename(df, :x => :x_new)\nPick columns select(df, x, y) select(df, :x, :y)\nPick & transform columns transmute(df, mean(x), y) select(df, :x => mean, :y)\nPick rows filter(df, x >= 1) subset(df, :x => ByRow(x -> x >= 1))\nSort rows arrange(df, x) sort(df, :x)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"As in dplyr, some of these functions can be applied to grouped data frames, in which case they operate by group:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation dplyr DataFrames.jl\nReduce multiple values summarize(group_by(df, grp), mean(x)) combine(groupby(df, :grp), :x => mean)\nAdd new columns mutate(group_by(df, grp), mean(x)) transform(groupby(df, :grp), :x => mean)\nPick & transform columns transmute(group_by(df, grp), mean(x), y) select(groupby(df, :grp), :x => mean, :y)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The table below compares more advanced commands:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation dplyr DataFrames.jl\nComplex Function summarize(df, mean(x, na.rm = T)) combine(df, :x => x -> mean(skipmissing(x)))\nTransform several columns summarize(df, max(x), min(y)) combine(df, :x => maximum,  :y => minimum)\n summarize(df, across(c(x, y), mean)) combine(df, [:x, :y] .=> mean)\n summarize(df, across(starts_with(\"x\"), mean)) combine(df, names(df, r\"^x\") .=> mean)\n summarize(df, across(c(x, y), list(max, min))) combine(df, ([:x, :y] .=> [maximum minimum])...)\nMultivariate function mutate(df, cor(x, y)) transform(df, [:x, :y] => cor)\nRow-wise mutate(rowwise(df), min(x, y)) transform(df, [:x, :y] => ByRow(min))\n mutate(rowwise(df), which.max(c_across(matches(\"^x\")))) transform(df, AsTable(r\"^x\") => ByRow(argmax))\nDataFrame as input summarize(df, head(across(), 2)) combine(d -> first(d, 2), df)\nDataFrame as output summarize(df, tibble(value = c(min(x), max(x)))) combine(df, :x => (x -> (value = [minimum(x), maximum(x)],)) => AsTable)","category":"page"},{"location":"man/comparisons/#Comparison-with-the-R-package-data.table","page":"Comparison with Python/R/Stata","title":"Comparison with the R package data.table","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with the R package data.table (version 1.14.1).","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"library(data.table)\ndf  <- data.table(grp = rep(1:2, 3), x = 6:1, y = 4:9,\n                  z = c(3:7, NA), id = letters[1:6])\ndf2 <- data.table(grp=c(1,3), w = c(10,11))","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nReduce multiple values df[, .(mean(x))] combine(df, :x => mean)\nAdd new columns df[, x_mean:=mean(x) ] transform!(df, :x => mean => :x_mean)\nRename column (in place) setnames(df, \"x\", \"x_new\") rename!(df, :x => :x_new)\nRename multiple columns (in place) setnames(df, c(\"x\", \"y\"), c(\"x_new\", \"y_new\")) rename!(df, [:x, :y] .=> [:x_new, :y_new])\nPick columns as dataframe df[, .(x, y)] select(df, :x, :y)\nPick column as a vector df[, x] df[!, :x]\nRemove columns df[, -\"x\"] select(df, Not(:x))\nRemove columns (in place) df[, x:=NULL] select!(df, Not(:x))\nRemove columns (in place) df[, c(\"x\", \"y\"):=NULL] select!(df, Not([:x, :y]))\nPick & transform columns df[, .(mean(x), y)] select(df, :x => mean, :y)\nPick rows df[ x >= 1 ] filter(:x => >=(1), df)\nSort rows (in place) setorder(df, x) sort!(df, :x)\nSort rows df[ order(x) ] sort(df, :x)","category":"page"},{"location":"man/comparisons/#Grouping-data-and-aggregation-2","page":"Comparison with Python/R/Stata","title":"Grouping data and aggregation","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nReduce multiple values df[, mean(x), by=id ] combine(groupby(df, :id), :x => mean)\nAdd new columns (in place) df[, x_mean:=mean(x), by=id] transform!(groupby(df, :id), :x => mean)\nPick & transform columns df[, .(x_mean = mean(x), y), by=id] select(groupby(df, :id), :x => mean, :y)","category":"page"},{"location":"man/comparisons/#More-advanced-commands-2","page":"Comparison with Python/R/Stata","title":"More advanced commands","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nComplex Function df[, .(mean(x, na.rm=TRUE)) ] combine(df, :x => x -> mean(skipmissing(x)))\nTransform certain rows (in place) df[x<=0, x:=0] df.x[df.x .<= 0] .= 0\nTransform several columns df[, .(max(x), min(y)) ] combine(df, :x => maximum, :y => minimum)\n df[, lapply(.SD, mean), .SDcols = c(\"x\", \"y\") ] combine(df, [:x, :y] .=> mean)\n df[, lapply(.SD, mean), .SDcols = patterns(\"*x\") ] combine(df, names(df, r\"^x\") .=> mean)\n dcast(df, . ~ ., list(max,min), value.var = c(\"x\",\"y\")) combine(df, ([:x, :y] .=> [maximum minimum])...)\nMultivariate function df[, .(cor(x,y)) ] transform(df, [:x, :y] => cor)\nRow-wise df[, min_xy := min(x, y), by = 1:nrow(df)] transform!(df, [:x, :y] => ByRow(min))\n df[, argmax_xy := which.max(.SD) , .SDcols = patterns(\"*x\"), by = 1:nrow(df) ] transform!(df, AsTable(r\"^x\") => ByRow(argmax))\nDataFrame as output df[, .SD[1], by=grp] combine(groupby(df, :grp), first)\nDataFrame as output df[, .SD[which.max(x)], by=grp] combine(groupby(df, :grp), sdf -> sdf[argmax(sdf.x), :])\nReshape longer longdf = melt(df, measure.vars=c(\"x\",\"y\"), id.vars=\"id\") longdf = stack(df, [:x, :y], :id)\nReshape wider dcast(longdf, id ~ variable, value.var=\"value\") unstack(longdf, :id, :variable, :value)","category":"page"},{"location":"man/comparisons/#Joining-data-frames-2","page":"Comparison with Python/R/Stata","title":"Joining data frames","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation data.table DataFrames.jl\nInner join merge(df, df2, on = \"grp\") innerjoin(df, df2, on = :grp)\nOuter join merge(df, df2, all = TRUE, on = \"grp\") outerjoin(df, df2, on = :grp)\nLeft join merge(df, df2, all.x = TRUE, on = \"grp\") leftjoin(df, df2, on = :grp)\nRight join merge(df, df2, all.y = TRUE, on = \"grp\") rightjoin(df, df2, on = :grp)\nAnti join (filtering) df[!df2, on = \"grp\" ] antijoin(df, df2, on = :grp)\nSemi join (filtering) merge(df1, df2[, .(grp)]) semijoin(df, df2, on = :grp)","category":"page"},{"location":"man/comparisons/#Comparison-with-Stata-(version-8-and-above)","page":"Comparison with Python/R/Stata","title":"Comparison with Stata (version 8 and above)","text":"","category":"section"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The following table compares the main functions of DataFrames.jl with Stata:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation Stata DataFrames.jl\nReduce multiple values collapse (mean) x combine(df, :x => mean)\nAdd new columns egen x_mean = mean(x) transform!(df, :x => mean => :x_mean)\nRename columns rename x x_new rename!(df, :x => :x_new)\nPick columns keep x y select!(df, :x, :y)\nPick rows keep if x >= 1 subset!(df, :x => ByRow(x -> x >= 1))\nSort rows sort x sort!(df, :x)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Note that the suffix ! (i.e. transform!, select!, etc) ensures that the operation transforms the dataframe in place, as in Stata","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Some of these functions can be applied to grouped data frames, in which case they operate by group:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation Stata DataFrames.jl\nAdd new columns egen x_mean = mean(x), by(grp) transform!(groupby(df, :grp), :x => mean)\nReduce multiple values collapse (mean) x, by(grp) combine(groupby(df, :grp), :x => mean)","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"The table below compares more advanced commands:","category":"page"},{"location":"man/comparisons/","page":"Comparison with Python/R/Stata","title":"Comparison with Python/R/Stata","text":"Operation Stata DataFrames.jl\nTransform certain rows replace x = 0 if x <= 0 transform(df, :x => (x -> ifelse.(x .<= 0, 0, x)) => :x)\nTransform several columns collapse (max) x (min) y combine(df, :x => maximum,  :y => minimum)\n collapse (mean) x y combine(df, [:x, :y] .=> mean)\n collapse (mean) x* combine(df, names(df, r\"^x\") .=> mean)\n collapse (max) x y (min) x y combine(df, ([:x, :y] .=> [maximum minimum])...)\nMultivariate function egen z = corr(x y) transform!(df, [:x, :y] => cor => :z)\nRow-wise egen z = rowmin(x y) transform!(df, [:x, :y] => ByRow(min) => :z)","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/types/#Types","page":"Types","title":"Types","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"Pages = [\"types.md\"]","category":"page"},{"location":"lib/types/#Type-hierarchy-design","page":"Types","title":"Type hierarchy design","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"AbstractDataFrame is an abstract type that provides an interface for data frame types. It is not intended as a fully generic interface for working with tabular data, which is the role of interfaces defined by Tables.jl instead.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"DataFrame is the most fundamental subtype of AbstractDataFrame, which stores a set of columns as AbstractVector objects. Indexing of all stored columns must be 1-based. Also, all functions exposed by DataFrames.jl API make sure to collect passed AbstractRange source columns before storing them in a DataFrame.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"SubDataFrame is an AbstractDataFrame subtype representing a view into a DataFrame. It stores only a reference to the parent DataFrame and information about which rows and columns from the parent are selected (both as integer indices referring to the parent). Typically it is created using the view function or is returned by indexing into a GroupedDataFrame object.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"GroupedDataFrame is a type that stores the result of a  grouping operation performed on an AbstractDataFrame. It is intended to be created as a result of a call to the groupby function.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"DataFrameRow is a view into a single row of an AbstractDataFrame. It stores only a reference to a parent DataFrame and information about which row and columns from the parent are selected (both as integer indices referring to the parent). The DataFrameRow type supports iteration over columns of the row and is similar in functionality to the NamedTuple type, but allows for modification of data stored in the parent DataFrame and reflects changes done to the parent after the creation of the view. Typically objects of the DataFrameRow type are encountered when returned by the eachrow function, or when accessing a single row of a DataFrame or SubDataFrame via getindex or view.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The eachrow function returns a value of the DataFrameRows type, which serves as an iterator over rows of an AbstractDataFrame, returning DataFrameRow objects. The DataFrameRows is a subtype of AbstractVector and supports its interface with the exception that it is read-only.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"Similarly, the eachcol function returns a value of the DataFrameColumns type, which is not an AbstractVector, but supports most of its API. The key differences are that it is read-only and that the keys function returns a vector of Symbols (and not integers as for normal vectors).","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"Note that DataFrameRows and DataFrameColumns are not exported and should not be constructed directly, but using the eachrow and eachcol functions.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The RepeatedVector and StackedVector types are subtypes of AbstractVector and support its interface with the exception that they are read only. Note that they are not exported and should not be constructed directly, but they are columns of a DataFrame returned by stack with view=true.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The ByRow type is a special type used for selection operations to signal that the wrapped function should be applied to each element (row) of the selection.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The AsTable type is a special type used for selection operations to signal that the columns selected by a wrapped selector should be passed as a NamedTuple to the function or to signal that it is requested to expand the return value of a transformation into multiple columns.","category":"page"},{"location":"lib/types/#man-columnhandling","page":"Types","title":"The design of handling of columns of a DataFrame","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"When a DataFrame is constructed columns are copied by default. You can disable this behavior by setting copycols keyword argument to false. The exception is if an AbstractRange is passed as a column, then it is always collected to a Vector.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"Also functions that transform a DataFrame to produce a new DataFrame perform a copy of the columns, unless they are passed copycols=false (available only for functions that could perform a transformation without copying the columns). Examples of such functions are vcat, hcat, filter, dropmissing, getindex, copy or the DataFrame constructor mentioned above.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"The generic single-argument constructor DataFrame(table) has copycols=nothing by default, meaning that columns are copied unless table signals that a copy of columns doesn't need to be made (this is done by wrapping the source table in Tables.CopiedColumns). CSV.jl does this when CSV.read(file, DataFrame) is called, since columns are built only for the purpose of use in a DataFrame constructor. Another example is Arrow.Table, where arrow data is inherently immutable so columns can't be accidentally mutated anyway. To be able to mutate arrow data, columns must be materialized, which can be accomplished via DataFrame(arrow_table, copycols=true).","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"On the contrary, functions that create a view of a DataFrame do not by definition make copies of the columns, and therefore require particular caution. This includes view, which returns a SubDataFrame or a DataFrameRow, and groupby, which returns a GroupedDataFrame.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"A partial exception to this rule is the stack function with view=true which creates a DataFrame that contains views of the columns from the source DataFrame.","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"In-place functions whose names end with ! (like sort! or dropmissing!, setindex!, push!, append!) may mutate the column vectors of the DataFrame they take as an argument. These functions are safe to call due to the rules described above, except when a view of the DataFrame is in use (via a SubDataFrame, a DataFrameRow or a GroupedDataFrame). In the latter case, calling such a function on the parent might corrupt the view, which make trigger errors, silently return invalid data or even cause Julia to crash. The same caution applies when DataFrame was created using columns of another DataFrame without copying (for instance when copycols=false in functions such as DataFrame or hcat).","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"It is possible to have a direct access to a column col of a DataFrame df (e.g. this can be useful in performance critical code to avoid copying), using one of the following methods:","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"via the getproperty function using the syntax df.col;\nvia the getindex function using the syntax df[!, :col] (note this is in contrast to df[:, :col] which copies);\nby creating a DataFrameColumns object using the eachcol function;\nby calling the parent function on a view of a column of the DataFrame, e.g. parent(@view df[:, :col]);\nby storing the reference to the column before creating a DataFrame with copycols=false;","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"A column obtained from a DataFrame using one of the above methods should not be mutated without caution because:","category":"page"},{"location":"lib/types/","page":"Types","title":"Types","text":"resizing a column vector will corrupt its parent DataFrame and any associated views as methods only check the length of the column when it is added to the DataFrame and later assume that all columns have the same length;\nreordering values in a column vector (e.g. using sort!) will break the consistency of rows with other columns, which will also affect views (if any);\nchanging values contained in a column vector is acceptable as long as it is not used as a grouping column in a GroupedDataFrame created based on the DataFrame.","category":"page"},{"location":"lib/types/#Types-specification","page":"Types","title":"Types specification","text":"","category":"section"},{"location":"lib/types/","page":"Types","title":"Types","text":"AbstractDataFrame\nAsTable\nDataFrame\nDataFrameRow\nGroupedDataFrame\nGroupKey\nGroupKeys\nSubDataFrame\nDataFrameRows\nDataFrameColumns\nRepeatedVector\nStackedVector","category":"page"},{"location":"lib/types/#DataFrames.AbstractDataFrame","page":"Types","title":"DataFrames.AbstractDataFrame","text":"AbstractDataFrame\n\nAn abstract type for which all concrete types expose an interface for working with tabular data.\n\nAn AbstractDataFrame is a two-dimensional table with Symbols or strings for column names.\n\nDataFrames.jl defines two types that are subtypes of AbstractDataFrame: DataFrame and SubDataFrame.\n\nIndexing and broadcasting\n\nAbstractDataFrame can be indexed by passing two indices specifying row and column selectors. The allowed indices are a superset of indices that can be used for standard arrays. You can also access a single column of an AbstractDataFrame using getproperty and setproperty! functions. Columns can be selected using integers, Symbols, or strings. In broadcasting AbstractDataFrame behavior is similar to a Matrix.\n\nA detailed description of getindex, setindex!, getproperty, setproperty!, broadcasting and broadcasting assignment for data frames is given in the \"Indexing\" section of the manual.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.AsTable","page":"Types","title":"DataFrames.AsTable","text":"AsTable(cols)\n\nA type having a special meaning in source => transformation => destination selection operations supported by combine, select, select!, transform, transform!, subset, and subset!.\n\nIf AsTable(cols) is used in source position it signals that the columns selected by the wrapped selector cols should be passed as a NamedTuple to the function.\n\nIf AsTable is used in destination position it means that the result of the transformation operation is a vector of containers (or a single container if ByRow(transformation) is used) that should be expanded  into multiple columns using keys to get column names.\n\nExamples\n\njulia> df1 = DataFrame(a=1:3, b=11:13)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n\njulia> df2 = select(df1, AsTable([:a, :b]) => ByRow(identity))\n3×1 DataFrame\n Row │ a_b_identity\n     │ NamedTuple…\n─────┼─────────────────\n   1 │ (a = 1, b = 11)\n   2 │ (a = 2, b = 12)\n   3 │ (a = 3, b = 13)\n\njulia> select(df2, :a_b_identity => AsTable)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n\njulia> select(df1, AsTable([:a, :b]) => ByRow(nt -> map(x -> x^2, nt)) => AsTable)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     4    144\n   3 │     9    169\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrame","page":"Types","title":"DataFrames.DataFrame","text":"DataFrame <: AbstractDataFrame\n\nAn AbstractDataFrame that stores a set of named columns.\n\nThe columns are normally AbstractVectors stored in memory, particularly a Vector, PooledVector or CategoricalVector.\n\nConstructors\n\nDataFrame(pairs::Pair...; makeunique::Bool=false, copycols::Bool=true)\nDataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, copycols::Bool=true)\nDataFrame(ds::AbstractDict; copycols::Bool=true)\nDataFrame(; kwargs..., copycols::Bool=true)\n\nDataFrame(table; copycols::Union{Bool, Nothing}=nothing)\nDataFrame(table, names::AbstractVector;\n          makeunique::Bool=false, copycols::Union{Bool, Nothing}=nothing)\nDataFrame(columns::AbstractVecOrMat, names::AbstractVector;\n          makeunique::Bool=false, copycols::Bool=true)\n\nDataFrame(::DataFrameRow; copycols::Bool=true)\nDataFrame(::GroupedDataFrame; copycols::Bool=true, keepkeys::Bool=true)\n\nKeyword arguments\n\ncopycols : whether vectors passed as columns should be copied; by default set to true and the vectors are copied; if set to false then the constructor will still copy the passed columns if it is not possible to construct a DataFrame without materializing new columns. Note the copycols=nothing default in the Tables.jl compatible constructor; it is provided as certain input table types may have already made a copy of columns or the columns may otherwise be immutable, in which case columns are not copied by default. To force a copy in such cases, or to get mutable columns from an immutable input table (like Arrow.Table), pass copycols=true explicitly.\nmakeunique : if false (the default), an error will be raised\n\n(note that not all constructors support these keyword arguments)\n\nDetails on behavior of different constructors\n\nIt is allowed to pass a vector of Pairs, a list of Pairs as positional arguments, or a list of keyword arguments. In this case each pair is considered to represent a column name to column value mapping and column name must be a Symbol or string. Alternatively a dictionary can be passed to the constructor in which case its entries are considered to define the column name and column value pairs. If the dictionary is a Dict then column names will be sorted in the returned DataFrame.\n\nIn all the constructors described above column value can be a vector which is consumed as is or an object of any other type (except AbstractArray). In the latter case the passed value is automatically repeated to fill a new vector of the appropriate length. As a particular rule values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated in the same way.\n\nIt is also allowed to pass a vector of vectors or a matrix as as the first argument. In this case the second argument must be a vector of Symbols or strings specifying column names, or the symbol :auto to generate column names x1, x2, ... automatically. Note that in this case if the first argument is a matrix and copycols=false the columns of the created DataFrame will be views of columns the source matrix.\n\nIf a single positional argument is passed to a DataFrame constructor then it is assumed to be of type that implements the Tables.jl interface using which the returned DataFrame is materialized.\n\nIf two positional arguments are passed, where the second argument is an AbstractVector, then the first argument is taken to be a table as described in the previous paragraph, and columns names of the resulting data frame are taken from the vector passed as the second positional argument.\n\nFinally it is allowed to construct a DataFrame from a DataFrameRow or a GroupedDataFrame. In the latter case the keepkeys keyword argument specifies whether the resulting DataFrame should contain the grouping columns of the passed GroupedDataFrame and the order of rows in the result follows the order of groups in the GroupedDataFrame passed.\n\nNotes\n\nThe DataFrame constructor by default copies all columns vectors passed to it. Pass the copycols=false keyword argument (where supported) to reuse vectors without copying them.\n\nBy default an error will be raised if duplicates in column names are found. Pass makeunique=true keyword argument (where supported) to accept duplicate names, in which case they will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf an AbstractRange is passed to a DataFrame constructor as a column it is always collected to a Vector (even if copycols=false). As a general rule AbstractRange values are always materialized to a Vector by all functions in DataFrames.jl before being stored in a DataFrame.\n\nDataFrame can store only columns that use 1-based indexing. Attempting to store a vector using non-standard indexing raises an error.\n\nThe DataFrame type is designed to allow column types to vary and to be dynamically changed also after it is constructed. Therefore DataFrames are not type stable. For performance-critical code that requires type-stability either use the functionality provided by select/transform/combine functions, use Tables.columntable and Tables.namedtupleiterator functions, use barrier functions, or provide type assertions to the variables that hold columns extracted from a DataFrame.\n\nMetadata: this function preserves all table and column-level metadata. As a special case if a GroupedDataFrame is passed then only :note-style metadata from parent of the GroupedDataFrame is preserved.\n\nExamples\n\njulia> DataFrame((a=[1, 2], b=[3, 4])) # Tables.jl table constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> DataFrame([(a=1, b=0), (a=2, b=0)]) # Tables.jl table constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(\"a\" => 1:2, \"b\" => 0) # Pair constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([:a => 1:2, :b => 0]) # vector of Pairs constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(Dict(:a => 1:2, :b => 0)) # dictionary constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(a=1:2, b=0) # keyword argument constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([[1, 2], [0, 0]], [:a, :b]) # vector of vectors constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([1 0; 2 0], :auto) # matrix constructor\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrameRow","page":"Types","title":"DataFrames.DataFrameRow","text":"DataFrameRow{<:AbstractDataFrame, <:AbstractIndex}\n\nA view of one row of an AbstractDataFrame.\n\nA DataFrameRow is returned by getindex or view functions when one row and a selection of columns are requested, or when iterating the result of the call to the eachrow function.\n\nThe DataFrameRow constructor can also be called directly:\n\nDataFrameRow(parent::AbstractDataFrame, row::Integer, cols=:)\n\nA DataFrameRow supports the iteration interface and can therefore be passed to functions that expect a collection as an argument. Its element type is always Any.\n\nIndexing is one-dimensional like specifying a column of a DataFrame. You can also access the data in a DataFrameRow using the getproperty and setproperty! functions and convert it to a Tuple, NamedTuple, or Vector using the corresponding functions.\n\nIf the selection of columns in a parent data frame is passed as : (a colon) then DataFrameRow will always have all columns from the parent, even if they are added or removed after its creation.\n\nExamples\n\njulia> df = DataFrame(a=repeat([1, 2], outer=[2]),\n                      b=repeat([\"a\", \"b\"], inner=[2]),\n                      c=1:4)\n4×3 DataFrame\n Row │ a      b       c\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  a           1\n   2 │     2  a           2\n   3 │     1  b           3\n   4 │     2  b           4\n\njulia> df[1, :]\nDataFrameRow\n Row │ a      b       c\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  a           1\n\njulia> @view df[end, [:a]]\nDataFrameRow\n Row │ a\n     │ Int64\n─────┼───────\n   4 │     2\n\njulia> eachrow(df)[1]\nDataFrameRow\n Row │ a      b       c\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  a           1\n\njulia> Tuple(df[1, :])\n(1, \"a\", 1)\n\njulia> NamedTuple(df[1, :])\n(a = 1, b = \"a\", c = 1)\n\njulia> Vector(df[1, :])\n3-element Vector{Any}:\n 1\n  \"a\"\n 1\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.GroupedDataFrame","page":"Types","title":"DataFrames.GroupedDataFrame","text":"GroupedDataFrame\n\nThe result of a groupby operation on an AbstractDataFrame; a view into the AbstractDataFrame grouped by rows.\n\nNot meant to be constructed directly, see groupby.\n\nOne can get the names of columns used to create GroupedDataFrame using the groupcols function. Similarly the groupindices function returns a vector of group indices for each row of the parent data frame.\n\nAfter its creation, a GroupedDataFrame reflects the grouping of rows that was valid at its creation time. Therefore grouping columns of its parent data frame must not be mutated, and rows must not be added nor removed from it. To safeguard the user against such cases, if the number of rows in the parent data frame changes then trying to use GroupedDataFrame will throw an error. However, one can add or remove columns to the parent data frame without invalidating the GroupedDataFrame provided that columns used for grouping are not changed.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.GroupKey","page":"Types","title":"DataFrames.GroupKey","text":"GroupKey{T<:GroupedDataFrame}\n\nKey for one of the groups of a GroupedDataFrame. Contains the values of the corresponding grouping columns and behaves similarly to a NamedTuple, but using it to index its GroupedDataFrame is more efficient than using the equivalent Tuple and NamedTuple, and much more efficient than using the equivalent AbstractDict.\n\nInstances of this type are returned by keys(::GroupedDataFrame) and are not meant to be constructed directly.\n\nIndexing fields of GroupKey is allowed using an integer, a Symbol, or a string. It is also possible to access the data in a GroupKey using the getproperty function. A GroupKey can be converted to a Tuple, NamedTuple, a Vector, or a Dict. When converted to a Dict, the keys of the Dict are Symbols.\n\nSee keys(::GroupedDataFrame) for more information.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.GroupKeys","page":"Types","title":"DataFrames.GroupKeys","text":"GroupKeys{T<:GroupedDataFrame} <: AbstractVector{GroupKey{T}}\n\nA vector containing all GroupKey objects for a given GroupedDataFrame.\n\nSee keys(::GroupedDataFrame) for more information.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.SubDataFrame","page":"Types","title":"DataFrames.SubDataFrame","text":"SubDataFrame{<:AbstractDataFrame, <:AbstractIndex, <:AbstractVector{Int}} <: AbstractDataFrame\n\nA view of an AbstractDataFrame. It is returned by a call to the view function on an AbstractDataFrame if a collections of rows and columns are specified.\n\nA SubDataFrame is an AbstractDataFrame, so expect that most DataFrame functions should work. Such methods include describe, summary, nrow, size, by, stack, and join.\n\nIf the selection of columns in a parent data frame is passed as : (a colon) then SubDataFrame will always have all columns from the parent, even if they are added or removed after its creation.\n\nExamples\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8)\n8×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     2      1      2\n   3 │     3      2      3\n   4 │     4      1      4\n   5 │     1      2      5\n   6 │     2      1      6\n   7 │     3      2      7\n   8 │     4      1      8\n\njulia> sdf1 = view(df, :, 2:3) # column subsetting\n8×2 SubDataFrame\n Row │ b      c\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      1\n   2 │     1      2\n   3 │     2      3\n   4 │     1      4\n   5 │     2      5\n   6 │     1      6\n   7 │     2      7\n   8 │     1      8\n\njulia> sdf2 = @view df[end:-1:1, [1, 3]]  # row and column subsetting\n8×2 SubDataFrame\n Row │ a      c\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      8\n   2 │     3      7\n   3 │     2      6\n   4 │     1      5\n   5 │     4      4\n   6 │     3      3\n   7 │     2      2\n   8 │     1      1\n\njulia> sdf3 = groupby(df, :a)[1]  # indexing a GroupedDataFrame returns a SubDataFrame\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrameRows","page":"Types","title":"DataFrames.DataFrameRows","text":"DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow}\n\nIterator over rows of an AbstractDataFrame, with each row represented as a DataFrameRow.\n\nA value of this type is returned by the eachrow function.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.DataFrameColumns","page":"Types","title":"DataFrames.DataFrameColumns","text":"DataFrameColumns{<:AbstractDataFrame}\n\nA vector-like object that allows iteration over columns of an AbstractDataFrame.\n\nIndexing into DataFrameColumns objects using integer, Symbol or string returns the corresponding column (without copying). Indexing into DataFrameColumns objects using a multiple column selector returns a subsetted DataFrameColumns object with a new parent containing only the selected columns (without copying).\n\nDataFrameColumns supports most of the AbstractVector API. The key differences are that it is read-only and that the keys function returns a vector of Symbols (and not integers as for normal vectors).\n\nIn particular findnext, findprev, findfirst, findlast, and findall functions are supported, and in findnext and findprev functions it is allowed to pass an integer, string, or Symbol as a reference index.\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.RepeatedVector","page":"Types","title":"DataFrames.RepeatedVector","text":"RepeatedVector{T} <: AbstractVector{T}\n\nAn AbstractVector that is a view into another AbstractVector with repeated elements\n\nNOTE: Not exported.\n\nConstructor\n\nRepeatedVector(parent::AbstractVector, inner::Int, outer::Int)\n\nArguments\n\nparent : the AbstractVector that's repeated\ninner : the number of times each element is repeated\nouter : the number of times the whole vector is repeated after expanded by inner\n\ninner and outer have the same meaning as similarly named arguments to repeat.\n\nExamples\n\nRepeatedVector([1, 2], 3, 1)   # [1, 1, 1, 2, 2, 2]\nRepeatedVector([1, 2], 1, 3)   # [1, 2, 1, 2, 1, 2]\nRepeatedVector([1, 2], 2, 2)   # [1, 1, 2, 2, 1, 1, 2, 2]\n\n\n\n\n\n","category":"type"},{"location":"lib/types/#DataFrames.StackedVector","page":"Types","title":"DataFrames.StackedVector","text":"StackedVector <: AbstractVector\n\nAn AbstractVector that is a linear, concatenated view into another set of AbstractVectors\n\nNOTE: Not exported.\n\nConstructor\n\nStackedVector(d::AbstractVector)\n\nArguments\n\nd... : one or more AbstractVectors\n\nExamples\n\nStackedVector(Any[[1, 2], [9, 10], [11, 12]])  # [1, 2, 9, 10, 11, 12]\n\n\n\n\n\n","category":"type"},{"location":"man/joins/#Database-Style-Joins","page":"Joins","title":"Database-Style Joins","text":"","category":"section"},{"location":"man/joins/#Introduction-to-joins","page":"Joins","title":"Introduction to joins","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"We often need to combine two or more data sets together to provide a complete picture of the topic we are studying. For example, suppose that we have the following two data sets:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> using DataFrames\n\njulia> people = DataFrame(ID=[20, 40], Name=[\"John Doe\", \"Jane Doe\"])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    20  John Doe\n   2 │    40  Jane Doe\n\njulia> jobs = DataFrame(ID=[20, 40], Job=[\"Lawyer\", \"Doctor\"])\n2×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │    20  Lawyer\n   2 │    40  Doctor","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"We might want to work with a larger data set that contains both the names and jobs for each ID. We can do this using the innerjoin function:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  Doctor","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In relational database theory, this operation is generally referred to as a join. The columns used to determine which rows should be combined during a join are called keys.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"The following functions are provided to perform seven kinds of joins:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"innerjoin: the output contains rows for values of the key that exist in all passed data frames.\nleftjoin: the output contains rows for values of the key that exist in the first (left) argument, whether or not that value exists in the second (right) argument.\nrightjoin: the output contains rows for values of the key that exist in the second (right) argument, whether or not that value exists in the first (left) argument.\nouterjoin: the output contains rows for values of the key that exist in any of the passed data frames.\nsemijoin: Like an inner join, but output is restricted to columns from the first (left) argument.\nantijoin: The output contains rows for values of the key that exist in the first (left) but not the second (right) argument. As with semijoin, output is restricted to columns from the first (left) argument.\ncrossjoin: The output is the cartesian product of rows from all passed data frames.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"See the Wikipedia page on SQL joins for more information.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Here are examples of different kinds of join:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> jobs = DataFrame(ID=[20, 60], Job=[\"Lawyer\", \"Astronaut\"])\n2×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼──────────────────\n   1 │    20  Lawyer\n   2 │    60  Astronaut\n\njulia> innerjoin(people, jobs, on = :ID)\n1×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │    20  John Doe  Lawyer\n\njulia> leftjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String?\n─────┼──────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  missing\n\njulia> rightjoin(people, jobs, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String?   String\n─────┼────────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    60  missing   Astronaut\n\njulia> outerjoin(people, jobs, on = :ID)\n3×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String?   String?\n─────┼────────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  missing\n   3 │    60  missing   Astronaut\n\njulia> semijoin(people, jobs, on = :ID)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    20  John Doe\n\njulia> antijoin(people, jobs, on = :ID)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    40  Jane Doe","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Cross joins are the only kind of join that does not use a on key:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> crossjoin(people, jobs, makeunique = true)\n4×4 DataFrame\n Row │ ID     Name      ID_1   Job\n     │ Int64  String    Int64  String\n─────┼───────────────────────────────────\n   1 │    20  John Doe     20  Lawyer\n   2 │    20  John Doe     60  Astronaut\n   3 │    40  Jane Doe     20  Lawyer\n   4 │    40  Jane Doe     60  Astronaut","category":"page"},{"location":"man/joins/#Key-value-comparisons-and-floating-point-values","page":"Joins","title":"Key value comparisons and floating point values","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Key values from the two or more data frames are compared using the isequal function. This is consistent with the Set and Dict types in Julia Base.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"It is not recommended to use floating point numbers as keys: floating point comparisons can be surprising and unpredictable. If you do use floating point keys, note that by default an error is raised when keys include -0.0 (negative zero) or NaN values. Here is an example:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(DataFrame(id=[-0.0]), DataFrame(id=[0.0]), on=:id)\nERROR: ArgumentError: Currently for numeric values `NaN` and `-0.0` in their real or imaginary components are not allowed. Such value was found in column :id in left data frame. Use CategoricalArrays.jl to wrap these values in a CategoricalVector to perform the requested join.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"This can be overridden by wrapping the key values in a categorical vector.","category":"page"},{"location":"man/joins/#Joining-on-key-columns-with-different-names","page":"Joins","title":"Joining on key columns with different names","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In order to join data frames on keys which have different names in the left and right tables, you may pass left => right pairs as on argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> a = DataFrame(ID=[20, 40], Name=[\"John Doe\", \"Jane Doe\"])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │    20  John Doe\n   2 │    40  Jane Doe\n\njulia> b = DataFrame(IDNew=[20, 40], Job=[\"Lawyer\", \"Doctor\"])\n2×2 DataFrame\n Row │ IDNew  Job\n     │ Int64  String\n─────┼───────────────\n   1 │    20  Lawyer\n   2 │    40  Doctor\n\njulia> innerjoin(a, b, on = :ID => :IDNew)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │    20  John Doe  Lawyer\n   2 │    40  Jane Doe  Doctor","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Here is another example with multiple columns:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> a = DataFrame(City=[\"Amsterdam\", \"London\", \"London\", \"New York\", \"New York\"],\n                     Job=[\"Lawyer\", \"Lawyer\", \"Lawyer\", \"Doctor\", \"Doctor\"],\n                     Category=[1, 2, 3, 4, 5])\n5×3 DataFrame\n Row │ City       Job     Category\n     │ String     String  Int64\n─────┼─────────────────────────────\n   1 │ Amsterdam  Lawyer         1\n   2 │ London     Lawyer         2\n   3 │ London     Lawyer         3\n   4 │ New York   Doctor         4\n   5 │ New York   Doctor         5\n\njulia> b = DataFrame(Location=[\"Amsterdam\", \"London\", \"London\", \"New York\", \"New York\"],\n                     Work=[\"Lawyer\", \"Lawyer\", \"Lawyer\", \"Doctor\", \"Doctor\"],\n                     Name=[\"a\", \"b\", \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ Location   Work    Name\n     │ String     String  String\n─────┼───────────────────────────\n   1 │ Amsterdam  Lawyer  a\n   2 │ London     Lawyer  b\n   3 │ London     Lawyer  c\n   4 │ New York   Doctor  d\n   5 │ New York   Doctor  e\n\njulia> innerjoin(a, b, on = [:City => :Location, :Job => :Work])\n9×4 DataFrame\n Row │ City       Job     Category  Name\n     │ String     String  Int64     String\n─────┼─────────────────────────────────────\n   1 │ Amsterdam  Lawyer         1  a\n   2 │ London     Lawyer         2  b\n   3 │ London     Lawyer         3  b\n   4 │ London     Lawyer         2  c\n   5 │ London     Lawyer         3  c\n   6 │ New York   Doctor         4  d\n   7 │ New York   Doctor         5  d\n   8 │ New York   Doctor         4  e\n   9 │ New York   Doctor         5  e","category":"page"},{"location":"man/joins/#Handling-of-duplicate-keys-and-tracking-source-data-frame","page":"Joins","title":"Handling of duplicate keys and tracking source data frame","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Additionally, notice that in the last join rows 2 and 3 had the same values on on variables in both joined DataFrames. In such a situation innerjoin, outerjoin, leftjoin and rightjoin will produce all combinations of matching rows. In our example rows from 2 to 5 were created as a result. The same behavior can be observed for rows 4 and 5 in both joined DataFrames.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In order to check that columns passed as the on argument define unique keys (according to isequal) in each input data frame you can set the validate keyword argument to a two-element tuple or a pair of Bool values, with each element indicating whether to run check for the corresponding data frame. Here is an example for the join operation described above:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(a, b, on = [(:City => :Location), (:Job => :Work)], validate=(true, true))\nERROR: ArgumentError: Merge key(s) are not unique in both df1 and df2. df1 contains 2 duplicate keys: (City = \"London\", Job = \"Lawyer\") and (City = \"New York\", Job = \"Doctor\"). df2 contains 2 duplicate keys: (Location = \"London\", Work = \"Lawyer\") and (Location = \"New York\", Work = \"Doctor\").","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Finally, using the source keyword argument you can add a column to the resulting data frame indicating whether the given row appeared only in the left, the right or both data frames. Here is an example:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> a = DataFrame(ID=[20, 40], Name=[\"John\", \"Jane\"])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼───────────────\n   1 │    20  John\n   2 │    40  Jane\n\njulia> b = DataFrame(ID=[20, 60], Job=[\"Lawyer\", \"Doctor\"])\n2×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │    20  Lawyer\n   2 │    60  Doctor\n\njulia> outerjoin(a, b, on=:ID, validate=(true, true), source=:source)\n3×4 DataFrame\n Row │ ID     Name     Job      source\n     │ Int64  String?  String?  String\n─────┼─────────────────────────────────────\n   1 │    20  John     Lawyer   both\n   2 │    40  Jane     missing  left_only\n   3 │    60  missing  Doctor   right_only","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Note that this time we also used the validate keyword argument and it did not produce errors as the keys defined in both source data frames were unique.","category":"page"},{"location":"man/joins/#Renaming-joined-columns","page":"Joins","title":"Renaming joined columns","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Often you want to keep track of the source data frame. This feature is supported with the renamecols keyword argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(a, b, on=:ID, renamecols = \"_left\" => \"_right\")\n1×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String     String\n─────┼─────────────────────────────\n   1 │    20  John       Lawyer","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"In the above example we added the \"_left\" suffix to the non-key columns from the left table and the \"_right\" suffix to the non-key columns from the right table.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Alternatively it is allowed to pass a function transforming column names:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(a, b, on=:ID, renamecols = lowercase => uppercase)\n1×3 DataFrame\n Row │ ID     name    JOB\n     │ Int64  String  String\n─────┼───────────────────────\n   1 │    20  John    Lawyer\n","category":"page"},{"location":"man/joins/#Matching-missing-values-in-joins","page":"Joins","title":"Matching missing values in joins","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"By default when you try to to perform a join on a key that has missing values you get an error:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> df1 = DataFrame(id=[1, missing, 3], a=1:3)\n3×2 DataFrame\n Row │ id       a\n     │ Int64?   Int64\n─────┼────────────────\n   1 │       1      1\n   2 │ missing      2\n   3 │       3      3\n\njulia> df2 = DataFrame(id=[1, 2, missing], b=1:3)\n3×2 DataFrame\n Row │ id       b\n     │ Int64?   Int64\n─────┼────────────────\n   1 │       1      1\n   2 │       2      2\n   3 │ missing      3\n\njulia> innerjoin(df1, df2, on=:id)\nERROR: ArgumentError: Missing values in key columns are not allowed when matchmissing == :error. `missing` found in column :id in left data frame.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"If you would prefer missing values to be treated as equal pass the matchmissing=:equal keyword argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(df1, df2, on=:id, matchmissing=:equal)\n2×3 DataFrame\n Row │ id       a      b\n     │ Int64?   Int64  Int64\n─────┼───────────────────────\n   1 │       1      1      1\n   2 │ missing      2      3","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Alternatively you might want to drop all rows with missing values. In this case pass matchmissing=:notequal:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> innerjoin(df1, df2, on=:id, matchmissing=:notequal)\n1×3 DataFrame\n Row │ id      a      b\n     │ Int64?  Int64  Int64\n─────┼──────────────────────\n   1 │      1      1      1","category":"page"},{"location":"man/joins/#Specifying-row-order-in-the-join-result","page":"Joins","title":"Specifying row order in the join result","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"By default the order of rows produced by the join operation is undefined:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> df_left = DataFrame(id=[1, 2, 4, 5], left=1:4)\n4×2 DataFrame\n Row │ id     left\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     4      3\n   4 │     5      4\n\njulia> df_right = DataFrame(id=[2, 1, 3, 6, 7], right=1:5)\n5×2 DataFrame\n Row │ id     right\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      1\n   2 │     1      2\n   3 │     3      3\n   4 │     6      4\n   5 │     7      5\n\njulia> outerjoin(df_left, df_right, on=:id)\n7×3 DataFrame\n Row │ id     left     right\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     2        2        1\n   2 │     1        1        2\n   3 │     4        3  missing\n   4 │     5        4  missing\n   5 │     3  missing        3\n   6 │     6  missing        4\n   7 │     7  missing        5","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"If you would like the result to keep the row order of the left table pass the order=:left keyword argument:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> outerjoin(df_left, df_right, on=:id, order=:left)\n7×3 DataFrame\n Row │ id     left     right\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     1        1        2\n   2 │     2        2        1\n   3 │     4        3  missing\n   4 │     5        4  missing\n   5 │     3  missing        3\n   6 │     6  missing        4\n   7 │     7  missing        5","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Note that in this case keys missing from the left table are put after the keys present in it.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Similarly order=:right keeps the order of the right table (and puts keys not present in it at the end):","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> outerjoin(df_left, df_right, on=:id, order=:right)\n7×3 DataFrame\n Row │ id     left     right\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     2        2        1\n   2 │     1        1        2\n   3 │     3  missing        3\n   4 │     6  missing        4\n   5 │     7  missing        5\n   6 │     4        3  missing\n   7 │     5        4  missing","category":"page"},{"location":"man/joins/#In-place-left-join","page":"Joins","title":"In-place left join","text":"","category":"section"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"A common operation is adding data from a reference table to some main table. It is possible to perform such an in-place update using the leftjoin! function. In this case the left table is updated in place with matching rows from the right table.","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> main = DataFrame(id=1:4, main=1:4)\n4×2 DataFrame\n Row │ id     main\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n\njulia> leftjoin!(main, DataFrame(id=[2, 4], info=[\"a\", \"b\"]), on=:id);\n\njulia> main\n4×3 DataFrame\n Row │ id     main   info\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     1      1  missing\n   2 │     2      2  a\n   3 │     3      3  missing\n   4 │     4      4  b","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"Note that in this case the order and number of rows in the left table is not changed. Therefore, in particular, it is not allowed to have duplicate keys in the right table:","category":"page"},{"location":"man/joins/","page":"Joins","title":"Joins","text":"julia> leftjoin!(main, DataFrame(id=[2, 2], info_bad=[\"a\", \"b\"]), on=:id)\nERROR: ArgumentError: duplicate rows found in right table","category":"page"},{"location":"lib/metadata/#Metadata","page":"Metadata","title":"Metadata","text":"","category":"section"},{"location":"lib/metadata/#Design-of-metadata-support","page":"Metadata","title":"Design of metadata support","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"DataFrames.jl allows you to store and retrieve metadata on table and column level. This is supported using the functions defined by the DataAPI.jl interface:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"for table-level metadata: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!;\nfor column-level metadata: colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Additionally you might find the TableMetadataTools.jl package useful. This package defines several convenience functions for performing typical metadata operations.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Assume that we work with a data frame-like object df that has a column col (referred to either via a Symbol, a string or an integer index).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Table-level metadata are key-value pairs that are attached to df. Column-level metadata are key-value pairs that are attached to a specific column col of df data frame.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"To check whether some key key is present in table-level metadata of data frame df you can write key in metadatakeys(df). Similarly to check whether key key is present in column-level metadata of data frame df for column col write key in colmetadatakeys(df, col).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Additionally each metadata key-value pair has a style information attached to it. In DataFrames.jl the metadata style influences how metadata is propagated when df is transformed. The following metadata styles are supported:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":":default: Metadata having this style is considered to be attached to a concrete state of df. This means that any operation on this data frame invalidates such metadata and it is dropped in the result of such operation. Note that this happens even if the operation eventually does not change the data frame: the rule is that calling a function that might alter a data frame drops such metadata; in this way it is possible to statically determine whether metadata of styles other than :note is dropped after a function call. Only two functions are exceptions that keep non-:note-style metadata, as these operations are specifically designed to create an identical copy of the source data frame:\nDataFrame constructor;\ncopy of a data frame;\n:note: Metadata having this style is considered to be an annotation of a table or a column that should be propagated under transformations (exact propagation rules of such metadata are described below).\nAll other metadata styles are allowed but they are currently treated as having :default-style (this might change in the future if other standard metadata styles are defined).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"All DataAPI.jl metadata functions work with DataFrame, SubDataFrame, DataFrameRow objects, and objects returned by eachrow and eachcol functions. In this section collectively these objects will be called data frame-like, and follow the rules:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"objects returned by eachrow and eachcol functions have the same metadata as their parent AbstractDataFrame;\nSubDataFrame and DataFrameRow only expose metadata from their parent DataFrame whose style is :note.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Notably, metadata is not supported for GroupedDataFrame and you can't add, modify, nor view metadata through the GroupedDataFrame itself. It is possible only through its parent.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"note: Note\nDataFrames.jl allows users to extract out columns of a data frame and perform operations on them. Such operations will not affect metadata. Therefore, even if some metadata has :default style it might no longer correctly describe the column's contents if the user mutates columns directly.","category":"page"},{"location":"lib/metadata/#DataFrames.jl-specific-design-principles-for-use-of-metadata","page":"Metadata","title":"DataFrames.jl-specific design principles for use of metadata","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"DataFrames.jl supports storing any object as metadata values. However, it is recommended to use strings as values of the metadata, as some storage formats, like for example Apache Arrow, only support strings.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"For all functions that operate on column-level metadata, an ArgumentError is thrown if passed column is not present in a data frame.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"If metadata! or colmetadata! is used to add metadata to a SubDataFrame or a DataFrameRow then:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"using metadata that has style other than :note throws an error;\ntrying to add key-value pair for which a mapping for key already exists with style other than :note in the parent data frame throws an error.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"DataFrames.jl is designed so that there is no performance overhead due to metadata support when there is no metadata in a data frame. Therefore if you need maximum performance of operations that do not rely on metadata call emptymetadata! and emptycolmetadata! before running these operations.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Processing metadata for SubDataFrame and DataFrameRow has more overhead than for other types defined in DataFrames.jl that support metadata, because they have a more complex logic of handling it (they support only :note-style metadata, which means that other metadata needs to be filtered-out).","category":"page"},{"location":"lib/metadata/#Examples","page":"Metadata","title":"Examples","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Here is a simple example how you can work with metadata in DataFrames.jl:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"julia> using DataFrames\n\njulia> df = DataFrame(name=[\"Jan Krzysztof Duda\", \"Jan Krzysztof Duda\",\n                            \"Radosław Wojtaszek\", \"Radosław Wojtaszek\"],\n                      date=[\"2022-Jun\", \"2021-Jun\", \"2022-Jun\", \"2021-Jun\"],\n                      rating=[2750, 2729, 2708, 2687])\n4×3 DataFrame\n Row │ name                date      rating\n     │ String              String    Int64\n─────┼──────────────────────────────────────\n   1 │ Jan Krzysztof Duda  2022-Jun    2750\n   2 │ Jan Krzysztof Duda  2021-Jun    2729\n   3 │ Radosław Wojtaszek  2022-Jun    2708\n   4 │ Radosław Wojtaszek  2021-Jun    2687\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"caption\", \"ELO ratings of chess players\", style=:note);\n\njulia> collect(metadatakeys(df))\n1-element Vector{String}:\n \"caption\"\n\njulia> \"caption\" in metadatakeys(df)\ntrue\n\njulia> metadata(df, \"caption\")\n\"ELO ratings of chess players\"\n\njulia> metadata(df, \"caption\", style=true)\n(\"ELO ratings of chess players\", :note)\n\njulia> emptymetadata!(df);\n\njulia> metadatakeys(df)\n()\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :name, \"label\", \"First and last name of a player\", style=:note);\n\njulia> colmetadata!(df, :date, \"label\", \"Rating date in yyyy-u format\", style=:note);\n\njulia> colmetadata!(df, :rating, \"label\", \"ELO rating in classical time control\", style=:note);\n\njulia> \"label\" in colmetadatakeys(df, :rating)\ntrue\n\njulia> colmetadata(df, :rating, \"label\")\n\"ELO rating in classical time control\"\n\njulia> colmetadata(df, :rating, \"label\", style=true)\n(\"ELO rating in classical time control\", :note)\n\njulia> collect(colmetadatakeys(df))\n3-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n   :date => [\"label\"]\n :rating => [\"label\"]\n   :name => [\"label\"]\n\njulia> [only(names(df, col)) =>\n        [key => colmetadata(df, col, key) for key in metakeys] for\n        (col, metakeys) in colmetadatakeys(df)]\n3-element Vector{Pair{String, Vector{Pair{String, String}}}}:\n   \"date\" => [\"label\" => \"Rating date in yyyy-u format\"]\n \"rating\" => [\"label\" => \"ELO rating in classical time control\"]\n   \"name\" => [\"label\" => \"First and last name of a player\"]\n\njulia> emptycolmetadata!(df);\n\njulia> colmetadatakeys(df)\n()","category":"page"},{"location":"lib/metadata/#Propagation-of-:note-style-metadata","page":"Metadata","title":"Propagation of :note-style metadata","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"An important design feature of :note-style metadata is how it is handled when data frames are transformed.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"note: Note\nThe provided rules might slightly change in the future. Any change to :note-style metadata propagation rules will not be considered as breaking and can be done in any minor release of DataFrames.jl. Such changes might be made based on users' feedback about what metadata propagation rules are most convenient in practice.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"The general design rules for propagation of :note-style metadata are as follows.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"For operations that take a single data frame as an input:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Table level metadata is propagated to the returned data frame object.\nFor column-level metadata:\nin all cases when a single column is transformed to a single column and the name of the column does not change (or is automatically changed e.g. to de-duplicate column names or via column renaming in joins) column-level metadata is preserved (example operations of this kind are getindex, subset, joins, mapcols).\nin all cases when a single column is transformed with identity or copy to a single column, column-level metadata is preserved even if column name is changed (example operations of this kind are rename, or the :x => :y or :x => copy => :y operation specification in select).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"For operations that take multiple data frames as their input two cases are distinguished:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"When there is a natural main table in the operation (append!, prepend!, leftjoin, leftjoin!, rightjoin, semijoin, antijoin, setindex!):\ntable-level metadata is taken from the main table;\ncolumn-level metadata for columns from the main table is taken from main table;\ncolumn-level metadata for columns from the non-main table is taken only for columns not present in the main table.\nWhen all tables are equivalent (hcat, vcat, innerjoin, outerjoin):\ntable-level metadata is preserved only for keys which are defined in all passed tables and have the same value;\ncolumn-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value.","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"In all these operations when metadata is preserved the values in the key-value pairs are not copied (this is relevant in case of mutable values).","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"note: Note\nThe rules for column-level :note-style metadata propagation are designed to make the right decision in common cases. In particular, they assume that if source and target column name is the same then the metadata for the column is not changed. While this is valid for many operations, it is not always true in general. For example the :x => ByRow(log) => :x transformation might invalidate metadata if it contained unit of measure of the variable. In such cases user must either use a different name for the output column, set metadata style to :default before the operation, or manually drop or update such metadata from the :x column after the transformation.","category":"page"},{"location":"lib/metadata/#Operations-that-preserve-:note-style-metadata","page":"Metadata","title":"Operations that preserve :note-style metadata","text":"","category":"section"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"Most of the functions in DataFrames.jl only preserve table and column metadata whose style is :note. Some functions use a more complex logic, even if they follow the general rules described above (in particular under any transformation all non-:note-style metadata is always dropped). These are:","category":"page"},{"location":"lib/metadata/","page":"Metadata","title":"Metadata","text":"describe drops all metadata.\nhcat: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved.\nvcat: propagates table-level metadata only for keys which are defined in all passed tables and have the same value; column-level metadata is preserved only for keys which are defined in all passed tables that contain this column and have the same value;\nstack: propagates table-level metadata and column-level metadata for identifier columns.\nunstack: propagates table-level metadata and column-level metadata for row keys columns.\npermutedims: propagates table-level metadata and drops column-level  metadata.\nbroadcasted assignment does not change target metadata; under Julia earlier than 1.7 operation of kind df.a .= s does not drop non-:note-style metadata; under Julia 1.7 or later this operation preserves only :note-style metadata\nbroadcasting propagates table-level metadata if some key is present in all passed data frames and value associated with it is identical in all passed data frames; column-level metadata is propagated for columns if some key for a given column is present in all passed data frames and value associated with it is identical in all passed data frames.\ngetindex preserves table-level metadata and column-level metadata for selected columns\nsetindex! does not affect table-level and column-level metadata\npush!, pushfirst!, insert! do not affect table-level nor column-level metadata (even if they add new columns and pushed row is a DataFrameRow or other value supporting metadata interface)\nappend! and prepend! do not change table and column-level metadata of the destination data frame, except that if new columns are added and these columns have metadata in the appended/prepended table then this metadata is preserved.\nleftjoin!, leftjoin: table and column-level metadata is taken from the left table except for non-key columns from right table for which metadata is taken from right table;\nrightjoin: table and column-level metadata is taken from the right table except for non-key columns from left table for which metadata is taken from left table;\ninnerjoin, outerjoin: propagates table-level metadata only for keys that are defined in all passed data frames and have the same value; column-level metadata is propagated for all columns except for key columns, for which it is propagated only for keys that are defined in all passed data frames and have the same value.\nsemijoin, antijoin: table and column-level metadata is taken from the left table.\ncrossjoin: propagates table-level metadata only for keys that are defined in both passed data frames and have the same value; propagates column-level metadata from both passed data frames.\nselect, select!, transform, transform!, combine: propagate table-level metadata; column-level metadata is propagated if: a) a single column is transformed to a single column and the name of the column does not change    (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"CurrentModule = DataFrames","category":"page"},{"location":"lib/functions/#Functions","page":"Functions","title":"Functions","text":"","category":"section"},{"location":"lib/functions/#Multithreading-support","page":"Functions","title":"Multithreading support","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"By default, selected operations in DataFrames.jl automatically use multiple threads when available. Multi-threading is task-based and implemented using the @spawn macro from Julia Base. Tasks are therefore scheduled on the :default threadpool. Functions that take user-defined functions and may run it in parallel accept a threads keyword argument which allows disabling multithreading when the provided function requires serial execution or is not thread-safe.","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"This is a list of operations that currently make use of multi-threading:","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"DataFrame constructor with copycols=true; also recursively all functions that call this constructor, e.g. copy.\ngetindex when multiple columns are selected.\ngroupby (both when hashing is required and when fast path using DataAPI.refpool is used).\n*join functions for composing output data frame (but currently not for finding matching rows in joined data frames).\ncombine, select[!], and transform[!] on GroupedDataFrame when either of the conditions below is met:\nmultiple transformations are performed (each transformation is spawned in a separate task)\na transformation produces one row per group and the passed transformation is a custom function (i.e. not for standard reductions, which use optimized single-threaded methods).\ndropmissing when the provided data frame has more than 1 column and view=false  (subsetting of individual columns is spawned in separate tasks).","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"In general at least Julia 1.4 is required to ensure that multi-threading is used and the Julia process must be started with more than one thread. Some operations turn on multi-threading only if enough rows in the processed data frame are present (the exact threshold when multi-threading is enabled is considered to be undefined and might change in the future).","category":"page"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"Except for the list above, where multi-threading is used automatically, all functions provided by DataFrames.jl that update a data frame are not thread safe. This means that while they can be called from any thread, the caller is responsible for ensuring that a given DataFrame object is never modified by one thread while others are using it (either for reading or writing). Using the same DataFrame at the same time from different threads is safe as long as it is not modified.","category":"page"},{"location":"lib/functions/#Index","page":"Functions","title":"Index","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"Pages = [\"functions.md\"]","category":"page"},{"location":"lib/functions/#Constructing-data-frames","page":"Functions","title":"Constructing data frames","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"allcombinations\ncopy\nsimilar","category":"page"},{"location":"lib/functions/#DataAPI.allcombinations","page":"Functions","title":"DataAPI.allcombinations","text":"allcombinations(DataFrame, pairs::Pair...)\nallcombinations(DataFrame; kwargs...)\n\nCreate a DataFrame from all combinations of values in passed arguments. The first passed values vary fastest.\n\nArguments associating a column name with values to expand can be specified either as Pairs passed as positional arguments, or as keyword arguments. Column names must be Symbols or strings and must be unique.\n\nColumn value can be a vector which is consumed as is or an object of any other type (except AbstractArray). In the latter case the passed value is treated as having length one for expansion. As a particular rule values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated as having length one.\n\nSee also: crossjoin can be used to get the cartesian product of rows from passed data frames.\n\nExamples\n\njulia> allcombinations(DataFrame, a=1:2, b='a':'c')\n6×2 DataFrame\n Row │ a      b\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n   2 │     2  a\n   3 │     1  b\n   4 │     2  b\n   5 │     1  c\n   6 │     2  c\n\njulia> allcombinations(DataFrame, \"a\" => 1:2, \"b\" => 'a':'c', \"c\" => \"const\")\n6×3 DataFrame\n Row │ a      b     c\n     │ Int64  Char  String\n─────┼─────────────────────\n   1 │     1  a     const\n   2 │     2  a     const\n   3 │     1  b     const\n   4 │     2  b     const\n   5 │     1  c     const\n   6 │     2  c     const\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.copy","page":"Functions","title":"Base.copy","text":"copy(df::DataFrame; copycols::Bool=true)\n\nCopy data frame df. If copycols=true (the default), return a new  DataFrame holding copies of column vectors in df. If copycols=false, return a new DataFrame sharing column vectors with df.\n\nMetadata: this function preserves all table-level and column-level metadata.\n\n\n\n\n\ncopy(dfr::DataFrameRow)\n\nConstruct a NamedTuple with the same contents as the DataFrameRow. This method returns a NamedTuple so that the returned object is not affected by changes to the parent data frame of which dfr is a view.\n\n\n\n\n\ncopy(key::GroupKey)\n\nConstruct a NamedTuple with the same contents as the GroupKey.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.similar","page":"Functions","title":"Base.similar","text":"similar(df::AbstractDataFrame, rows::Integer=nrow(df))\n\nCreate a new DataFrame with the same column names and column element types as df. An optional second argument can be provided to request a number of rows that is different than the number of rows present in df.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Summary-information","page":"Functions","title":"Summary information","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"describe\nisempty\nlength\nncol\nndims\nnrow\nrownumber\nshow\nsize","category":"page"},{"location":"lib/functions/#DataAPI.describe","page":"Functions","title":"DataAPI.describe","text":"describe(df::AbstractDataFrame; cols=:)\ndescribe(df::AbstractDataFrame, stats::Union{Symbol, Pair}...; cols=:)\n\nReturn descriptive statistics for a data frame as a new DataFrame where each row represents a variable and each column a summary statistic.\n\nArguments\n\ndf : the AbstractDataFrame\nstats::Union{Symbol, Pair}... : the summary statistics to report. Arguments can be:\nA symbol from the list :mean, :std, :min, :q25, :median, :q75, :max, :sum, :eltype, :nunique, :nuniqueall, :first, :last, :nnonmissing, and :nmissing. The default statistics used are :mean, :min, :median, :max, :nmissing, and :eltype.\n:detailed as the only Symbol argument to return all statistics except :first, :last, :sum, :nuniqueall, and :nnonmissing.\n:all as the only Symbol argument to return all statistics.\nA function => name pair where name is a Symbol or string. This will create a column of summary statistics with the provided name.\ncols : a keyword argument allowing to select only a subset or transformation of columns from df to describe. Can be any column selector or transformation accepted by select.\n\nDetails\n\nFor Real columns, compute the mean, standard deviation, minimum, first quantile, median, third quantile, and maximum. If a column does not derive from Real, describe will attempt to calculate all statistics, using nothing as a fall-back in the case of an error.\n\nWhen stats contains :nunique, describe will report the number of unique values in a column. If a column's base type derives from Real, :nunique will return nothings. Use :nuniqueall to report the number of unique values in all columns.\n\nMissing values are filtered in the calculation of all statistics, however the column :nmissing will report the number of missing values of that variable and :nnonmissing the number of non-missing values.\n\nIf custom functions are provided, they are called repeatedly with the vector corresponding to each column as the only argument. For columns allowing for missing values, the vector is wrapped in a call to skipmissing: custom functions must therefore support such objects (and not only vectors), and cannot access missing values.\n\nMetadata: this function drops all metadata.\n\nExamples\n\njulia> df = DataFrame(i=1:10, x=0.1:0.1:1.0, y='a':'j');\n\njulia> describe(df)\n3×7 DataFrame\n Row │ variable  mean    min  median  max  nmissing  eltype\n     │ Symbol    Union…  Any  Union…  Any  Int64     DataType\n─────┼────────────────────────────────────────────────────────\n   1 │ i         5.5     1    5.5     10          0  Int64\n   2 │ x         0.55    0.1  0.55    1.0         0  Float64\n   3 │ y                 a            j           0  Char\n\njulia> describe(df, :min, :max)\n3×3 DataFrame\n Row │ variable  min  max\n     │ Symbol    Any  Any\n─────┼────────────────────\n   1 │ i         1    10\n   2 │ x         0.1  1.0\n   3 │ y         a    j\n\njulia> describe(df, :min, sum => :sum)\n3×3 DataFrame\n Row │ variable  min  sum\n     │ Symbol    Any  Union…\n─────┼───────────────────────\n   1 │ i         1    55\n   2 │ x         0.1  5.5\n   3 │ y         a\n\njulia> describe(df, :min, sum => :sum, cols=:x)\n1×3 DataFrame\n Row │ variable  min      sum\n     │ Symbol    Float64  Float64\n─────┼────────────────────────────\n   1 │ x             0.1      5.5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.isempty","page":"Functions","title":"Base.isempty","text":"isempty(df::AbstractDataFrame)\n\nReturn true if data frame df has zero rows, and false otherwise.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.length","page":"Functions","title":"Base.length","text":"length(dfr::DataFrameRow)\n\nReturn the number of elements of dfr.\n\nSee also: size\n\nExamples\n\njulia> dfr = DataFrame(a=1:3, b='a':'c')[1, :]\nDataFrameRow\n Row │ a      b\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> length(dfr)\n2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.ncol","page":"Functions","title":"DataAPI.ncol","text":"ncol(df::AbstractDataFrame)\n\nReturn the number of columns in an AbstractDataFrame df.\n\nSee also nrow, size.\n\nExamples\n\njulia> df = DataFrame(i=1:10, x=rand(10), y=rand([\"a\", \"b\", \"c\"], 10));\n\njulia> ncol(df)\n3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.ndims","page":"Functions","title":"Base.ndims","text":"ndims(::AbstractDataFrame)\nndims(::Type{<:AbstractDataFrame})\n\nReturn the number of dimensions of a data frame, which is always 2.\n\n\n\n\n\nndims(::DataFrameRow)\nndims(::Type{<:DataFrameRow})\n\nReturn the number of dimensions of a data frame row, which is always 1.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.nrow","page":"Functions","title":"DataAPI.nrow","text":"nrow(df::AbstractDataFrame)\n\nReturn the number of rows in an AbstractDataFrame df.\n\nSee also: ncol, size.\n\nExamples\n\njulia> df = DataFrame(i=1:10, x=rand(10), y=rand([\"a\", \"b\", \"c\"], 10));\n\njulia> nrow(df)\n10\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.rownumber","page":"Functions","title":"DataAPI.rownumber","text":"rownumber(dfr::DataFrameRow)\n\nReturn a row number in the AbstractDataFrame that dfr was created from.\n\nNote that this differs from the first element in the tuple returned by parentindices. The latter gives the row number in the parent(dfr), which is the source DataFrame where data that dfr gives access to is stored.\n\nExamples\n\njulia> df = DataFrame(reshape(1:12, 3, 4), :auto)\n3×4 DataFrame\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      7     10\n   2 │     2      5      8     11\n   3 │     3      6      9     12\n\njulia> dfr = df[2, :]\nDataFrameRow\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   2 │     2      5      8     11\n\njulia> rownumber(dfr)\n2\n\njulia> parentindices(dfr)\n(2, Base.OneTo(4))\n\njulia> parent(dfr)\n3×4 DataFrame\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      7     10\n   2 │     2      5      8     11\n   3 │     3      6      9     12\n\njulia> dfv = @view df[2:3, 1:3]\n2×3 SubDataFrame\n Row │ x1     x2     x3\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      5      8\n   2 │     3      6      9\n\njulia> dfrv = dfv[2, :]\nDataFrameRow\n Row │ x1     x2     x3\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   3 │     3      6      9\n\njulia> rownumber(dfrv)\n2\n\njulia> parentindices(dfrv)\n(3, 1:3)\n\njulia> parent(dfrv)\n3×4 DataFrame\n Row │ x1     x2     x3     x4\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      4      7     10\n   2 │     2      5      8     11\n   3 │     3      6      9     12\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.show","page":"Functions","title":"Base.show","text":"show([io::IO, ]df::AbstractDataFrame;\n     allrows::Bool = !get(io, :limit, false),\n     allcols::Bool = !get(io, :limit, false),\n     allgroups::Bool = !get(io, :limit, false),\n     rowlabel::Symbol = :Row,\n     summary::Bool = true,\n     eltypes::Bool = true,\n     truncate::Int = 32,\n     kwargs...)\n\nRender a data frame to an I/O stream. The specific visual representation chosen depends on the width of the display.\n\nIf io is omitted, the result is printed to stdout, and allrows, allcols and allgroups default to false.\n\nArguments\n\nio::IO: The I/O stream to which df will be printed.\ndf::AbstractDataFrame: The data frame to print.\nallrows::Bool: Whether to print all rows, rather than a subset that fits the device height. By default this is the case only if io does not have the IOContext property limit set.\nallcols::Bool: Whether to print all columns, rather than a subset that fits the device width. By default this is the case only if io does not have the IOContext property limit set.\nallgroups::Bool: Whether to print all groups rather than the first and last, when df is a GroupedDataFrame. By default this is the case only if io does not have the IOContext property limit set.\nrowlabel::Symbol = :Row: The label to use for the column containing row numbers.\nsummary::Bool = true: Whether to print a brief string summary of the data frame.\neltypes::Bool = true: Whether to print the column types under column names.\ntruncate::Int = 32: the maximal display width the output can use before being truncated (in the textwidth sense, excluding …). If truncate is 0 or less, no truncation is applied.\nkwargs...: Any keyword argument supported by the function pretty_table of PrettyTables.jl can be passed here to customize the output.\n\nExamples\n\njulia> using DataFrames\n\njulia> df = DataFrame(A=1:3, B=[\"x\", \"y\", \"z\"]);\n\njulia> show(df, show_row_number=false)\n3×2 DataFrame\n A      B\n Int64  String\n───────────────\n     1  x\n     2  y\n     3  z\n\n\n\n\n\nshow(io::IO, mime::MIME, df::AbstractDataFrame)\n\nRender a data frame to an I/O stream in MIME type mime.\n\nArguments\n\nio::IO: The I/O stream to which df will be printed.\nmime::MIME: supported MIME types are: \"text/plain\", \"text/html\", \"text/latex\", \"text/csv\", \"text/tab-separated-values\" (the last two MIME types do not support  showing #undef values)\ndf::AbstractDataFrame: The data frame to print.\n\nAdditionally selected MIME types support passing the following keyword arguments:\n\nMIME type \"text/plain\" accepts all listed keyword arguments and their behavior is identical as for show(::IO, ::AbstractDataFrame)\nMIME type \"text/html\" accepts the following keyword arguments:\neltypes::Bool = true: Whether to print the column types under column names.\nsummary::Bool = true: Whether to print a brief string summary of the data frame.\nmax_column_width::AbstractString = \"\": The maximum column width. It must     be a string containing a valid CSS length. For example, passing     \"100px\" will limit the width of all columns to 100 pixels. If empty,     the columns will be rendered without limits.\nkwargs...: Any keyword argument supported by the function pretty_table of PrettyTables.jl can be passed here to customize the output.\n\nExamples\n\njulia> show(stdout, MIME(\"text/latex\"), DataFrame(A=1:3, B=[\"x\", \"y\", \"z\"]))\n\\begin{tabular}{r|cc}\n\t& A & B\\\\\n\t\\hline\n\t& Int64 & String\\\\\n\t\\hline\n\t1 & 1 & x \\\\\n\t2 & 2 & y \\\\\n\t3 & 3 & z \\\\\n\\end{tabular}\n14\n\njulia> show(stdout, MIME(\"text/csv\"), DataFrame(A=1:3, B=[\"x\", \"y\", \"z\"]))\n\"A\",\"B\"\n1,\"x\"\n2,\"y\"\n3,\"z\"\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.size","page":"Functions","title":"Base.size","text":"size(df::AbstractDataFrame[, dim])\n\nReturn a tuple containing the number of rows and columns of df. Optionally a dimension dim can be specified, where 1 corresponds to rows and 2 corresponds to columns.\n\nSee also: nrow, ncol\n\nExamples\n\njulia> df = DataFrame(a=1:3, b='a':'c');\n\njulia> size(df)\n(3, 2)\n\njulia> size(df, 1)\n3\n\n\n\n\n\nsize(dfr::DataFrameRow[, dim])\n\nReturn a 1-tuple containing the number of elements of dfr. If an optional dimension dim is specified, it must be 1, and the number of elements is returned directly as a number.\n\nSee also: length\n\nExamples\n\njulia> dfr = DataFrame(a=1:3, b='a':'c')[1, :]\nDataFrameRow\n Row │ a      b\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> size(dfr)\n(2,)\n\njulia> size(dfr, 1)\n2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Working-with-column-names","page":"Functions","title":"Working with column names","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"names\npropertynames\nrename\nrename!","category":"page"},{"location":"lib/functions/#Base.names","page":"Functions","title":"Base.names","text":"names(df::AbstractDataFrame, cols=:)\nnames(df::DataFrameRow, cols=:)\nnames(df::GroupedDataFrame, cols=:)\nnames(df::DataFrameRows, cols=:)\nnames(df::DataFrameColumns, cols=:)\nnames(df::GroupKey)\n\nReturn a freshly allocated Vector{String} of names of columns contained in df.\n\nIf cols is passed then restrict returned column names to those matching the selector (this is useful in particular with regular expressions, Cols, Not, and Between). cols can be:\n\nany column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers); these column selectors are documented in the General rules section of the Indexing part of the DataFrames.jl manual\na Type, in which case names of columns whose eltype is a subtype of T are returned\na Function predicate taking the column name as a string and returning true for columns that should be kept\n\nSee also propertynames which returns a Vector{Symbol} (except for GroupedDataFrame in which case use Symbol.(names(df))).\n\nExamples\n\njulia> df = DataFrame(x1=[1, missing, missing], x2=[3, 2, 4], x3=[3, missing, 2], x4=Union{Int, Missing}[2, 4, 4])\n3×4 DataFrame\n Row │ x1       x2     x3       x4\n     │ Int64?   Int64  Int64?   Int64?\n─────┼─────────────────────────────────\n   1 │       1      3        3       2\n   2 │ missing      2  missing       4\n   3 │ missing      4        2       4\n\njulia> names(df)\n4-element Vector{String}:\n \"x1\"\n \"x2\"\n \"x3\"\n \"x4\"\n\njulia> names(df, Int) # pick columns whose element type is Int\n1-element Vector{String}:\n \"x2\"\n\njulia> names(df, x -> x[end] == '2') # pick columns for which last character in their name is '2'\n1-element Vector{String}:\n \"x2\"\n\njulia> fun(col) = sum(skipmissing(col)) >= 10\nfun (generic function with 1 method)\n\njulia> names(df, fun.(eachcol(df))) # pick columns for which sum of their elements is at least 10\n1-element Vector{String}:\n \"x4\"\n\njulia> names(df, eltype.(eachcol(df)) .>: Missing) # pick columns that allow missing values\n3-element Vector{String}:\n \"x1\"\n \"x3\"\n \"x4\"\n\njulia> names(df, any.(ismissing, eachcol(df))) # pick columns that contain missing values\n2-element Vector{String}:\n \"x1\"\n \"x3\"\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.propertynames","page":"Functions","title":"Base.propertynames","text":"propertynames(df::AbstractDataFrame)\n\nReturn a freshly allocated Vector{Symbol} of names of columns contained in df.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.rename","page":"Functions","title":"DataFrames.rename","text":"rename(df::AbstractDataFrame, vals::AbstractVector{Symbol};\n       makeunique::Bool=false)\nrename(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};\n       makeunique::Bool=false)\nrename(df::AbstractDataFrame, (from => to)::Pair...)\nrename(df::AbstractDataFrame, d::AbstractDict)\nrename(df::AbstractDataFrame, d::AbstractVector{<:Pair})\nrename(f::Function, df::AbstractDataFrame; cols=All())\n\nCreate a new data frame that is a copy of df with changed column names. Each name is changed at most once. Permutation of names is allowed.\n\nArguments\n\ndf : the AbstractDataFrame; if it is a SubDataFrame then renaming is only allowed if it was created using : as a column selector.\nd : an AbstractDict or an AbstractVector of Pairs that maps the original names or column numbers to new names\nf : a function which for each column selected by the cols keyword argument takes the old name as a String and returns the new name that gets converted to a Symbol; the cols column selector can be any value accepted as column selector by the names function\nvals : new column names as a vector of Symbols or AbstractStrings of the same length as the number of columns in df\nmakeunique : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf pairs are passed to rename (as positional arguments or in a dictionary or a vector) then:\n\nfrom value can be a Symbol, an AbstractString or an Integer;\nto value can be a Symbol or an AbstractString.\n\nMixing symbols and strings in to and from is not allowed.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nColumn-level :note-style metadata is considered to be attached to column number: when a column is renamed, its :note-style metadata becomes associated to its new name.\n\nSee also: rename!\n\nExamples\n\njulia> df = DataFrame(i=1, x=2, y=3)\n1×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, [:a, :b, :c])\n1×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, :i => \"A\", :x => \"X\")\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, :x => :y, :y => :x)\n1×3 DataFrame\n Row │ i      y      x\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, [1 => :A, 2 => :X])\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(df, Dict(\"i\" => \"A\", \"x\" => \"X\"))\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(uppercase, df)\n1×3 DataFrame\n Row │ I      X      Y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename(uppercase, df, cols=contains('x'))\n1×3 DataFrame\n Row │ i      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.rename!","page":"Functions","title":"DataFrames.rename!","text":"rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};\n        makeunique::Bool=false)\nrename!(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};\n        makeunique::Bool=false)\nrename!(df::AbstractDataFrame, (from => to)::Pair...)\nrename!(df::AbstractDataFrame, d::AbstractDict)\nrename!(df::AbstractDataFrame, d::AbstractVector{<:Pair})\nrename!(f::Function, df::AbstractDataFrame; cols=All())\n\nRename columns of df in-place. Each name is changed at most once. Permutation of names is allowed.\n\nArguments\n\ndf : the AbstractDataFrame\nd : an AbstractDict or an AbstractVector of Pairs that maps the original names or column numbers to new names\nf : a function which for each column selected by the cols keyword argument takes the old name as a String and returns the new name that gets converted to a Symbol; the cols column selector can be any value accepted as column selector by the names function\nvals : new column names as a vector of Symbols or AbstractStrings of the same length as the number of columns in df\nmakeunique : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf pairs are passed to rename! (as positional arguments or in a dictionary or a vector) then:\n\nfrom value can be a Symbol, an AbstractString or an Integer;\nto value can be a Symbol or an AbstractString.\n\nMixing symbols and strings in to and from is not allowed.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame). Column-level :note-style metadata is considered to be attached to column number: when a column is renamed, its :note-style metadata becomes associated to its new name.\n\nSee also: rename\n\nExamples\n\njulia> df = DataFrame(i=1, x=2, y=3)\n1×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(df, Dict(:i => \"A\", :x => \"X\"))\n1×3 DataFrame\n Row │ A      X      y\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(df, [:a, :b, :c])\n1×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(df, [:a, :b, :a])\nERROR: ArgumentError: Duplicate variable names: :a. Pass makeunique=true to make them unique using a suffix automatically.\n\njulia> rename!(df, [:a, :b, :a], makeunique=true)\n1×3 DataFrame\n Row │ a      b      a_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(uppercase, df)\n1×3 DataFrame\n Row │ A      B      A_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\njulia> rename!(lowercase, df, cols=contains('A'))\n1×3 DataFrame\n Row │ a      B      a_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Mutating-and-transforming-data-frames-and-grouped-data-frames","page":"Functions","title":"Mutating and transforming data frames and grouped data frames","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"append!\ncombine\nfillcombinations\nflatten\nhcat\ninsert!\ninsertcols\ninsertcols!\ninvpermute!\nmapcols\nmapcols!\npermute!\nprepend!\npush!\npushfirst!\nreduce\nrepeat\nrepeat!\nreverse\nreverse!\nselect\nselect!\nshuffle\nshuffle!\ntable_transformation\ntransform\ntransform!\nvcat","category":"page"},{"location":"lib/functions/#Base.append!","page":"Functions","title":"Base.append!","text":"append!(df::DataFrame, tables...; cols::Symbol=:setequal,\n        promote::Bool=(cols in [:union, :subset]))\n\nAdd the rows of tables passed as tables to the end of df. If the table is not an AbstractDataFrame then it is converted using DataFrame(table, copycols=false) before being appended.\n\nThe exact behavior of append! depends on the cols argument:\n\nIf cols == :setequal (this is the default) then df2 must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then df2 must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if df2 is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then df2 may contain more columns than df, but all column names that are present in df must be present in df2 and only these are used.\nIf cols == :subset then append! behaves like for :intersect but if some column is missing in df2 then a missing value is pushed to df.\nIf cols == :union then append! adds columns missing in df that are present in df2, for columns present in df but missing in df2 a missing value is pushed.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nThe above rule has the following exceptions:\n\nIf df has no columns then copies of columns from df2 are added to it.\nIf df2 has no columns then calling append! leaves df unchanged.\n\nPlease note that append! must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for columns present in df are preserved. If new columns are added their :note-style metadata is copied from the appended table. Other metadata is dropped.\n\nSee also: use push! to add individual rows to a data frame, prepend! to add a table at the beginning, and vcat to vertically concatenate data frames.\n\nExamples\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4.0:6.0, B=4:6)\n3×2 DataFrame\n Row │ A        B\n     │ Float64  Int64\n─────┼────────────────\n   1 │     4.0      4\n   2 │     5.0      5\n   3 │     6.0      6\n\njulia> append!(df1, df2);\n\njulia> df1\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n   5 │     5      5\n   6 │     6      6\n\njulia> append!(df2, DataFrame(A=1), (; C=1:2), cols=:union)\n6×3 DataFrame\n Row │ A          B        C\n     │ Float64?   Int64?   Int64?\n─────┼─────────────────────────────\n   1 │       4.0        4  missing\n   2 │       5.0        5  missing\n   3 │       6.0        6  missing\n   4 │       1.0  missing  missing\n   5 │ missing    missing        1\n   6 │ missing    missing        2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.combine","page":"Functions","title":"DataFrames.combine","text":"combine(df::AbstractDataFrame, args...;\n        renamecols::Bool=true, threads::Bool=true)\ncombine(f::Callable, df::AbstractDataFrame;\n        renamecols::Bool=true, threads::Bool=true)\ncombine(gd::GroupedDataFrame, args...;\n        keepkeys::Bool=true, ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\ncombine(f::Base.Callable, gd::GroupedDataFrame;\n        keepkeys::Bool=true, ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\n\nCreate a new data frame that contains columns from df or gd specified by args and return it. The result can have any number of rows that is determined by the values returned by passed transformations.\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nkeepkeys::Bool=true : whether grouping columns of gd should be kept in the returned data frame.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> combine(df, :a => sum, nrow, renamecols=false)\n1×2 DataFrame\n Row │ a      nrow\n     │ Int64  Int64\n─────┼──────────────\n   1 │     6      3\n\njulia> combine(df, :a => ByRow(sin) => :c, :b)\n3×2 DataFrame\n Row │ c         b\n     │ Float64   Int64\n─────┼─────────────────\n   1 │ 0.841471      4\n   2 │ 0.909297      5\n   3 │ 0.14112       6\n\njulia> combine(df, :, [:a, :b] => (a, b) -> a .+ b .- sum(b)/length(b))\n3×3 DataFrame\n Row │ a      b      a_b_function\n     │ Int64  Int64  Float64\n─────┼────────────────────────────\n   1 │     1      4           0.0\n   2 │     2      5           2.0\n   3 │     3      6           4.0\n\njulia> combine(df, All() .=> [minimum maximum])\n1×4 DataFrame\n Row │ a_minimum  b_minimum  a_maximum  b_maximum\n     │ Int64      Int64      Int64      Int64\n─────┼────────────────────────────────────────────\n   1 │         1          4          3          6\n\njulia> using Statistics\n\njulia> combine(df, AsTable(:) => ByRow(mean), renamecols=false)\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> combine(df, AsTable(:) => ByRow(mean) => x -> join(x, \"_\"))\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> combine(first, df)\n1×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n\njulia> df = DataFrame(a=1:3, b=4:6, c=7:9)\n3×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      7\n   2 │     2      5      8\n   3 │     3      6      9\n\njulia> combine(df, AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => :stats,\n               AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => AsTable)\n3×3 DataFrame\n Row │ stats                    mean     std\n     │ NamedTup…                Float64  Float64\n─────┼───────────────────────────────────────────\n   1 │ (mean = 4.0, std = 3.0)      4.0      3.0\n   2 │ (mean = 5.0, std = 3.0)      5.0      3.0\n   3 │ (mean = 6.0, std = 3.0)      6.0      3.0\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> combine(gd, :c => sum, nrow)\n4×3 DataFrame\n Row │ a      c_sum  nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6      2\n   2 │     2      8      2\n   3 │     3     10      2\n   4 │     4     12      2\n\njulia> combine(gd, :c => sum, nrow, ungroup=false)\nGroupedDataFrame with 4 groups based on key: a\nFirst Group (1 row): a = 1\n Row │ a      c_sum  nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6      2\n⋮\nLast Group (1 row): a = 4\n Row │ a      c_sum  nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4     12      2\n\njulia> combine(gd) do d # do syntax for the slower variant\n           sum(d.c)\n       end\n4×2 DataFrame\n Row │ a      x1\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      6\n   2 │     2      8\n   3 │     3     10\n   4 │     4     12\n\njulia> combine(gd, :c => (x -> sum(log, x)) => :sum_log_c) # specifying a name for target column\n4×2 DataFrame\n Row │ a      sum_log_c\n     │ Int64  Float64\n─────┼──────────────────\n   1 │     1    1.60944\n   2 │     2    2.48491\n   3 │     3    3.04452\n   4 │     4    3.46574\n\njulia> combine(gd, [:b, :c] .=> sum) # passing a vector of pairs\n4×3 DataFrame\n Row │ a      b_sum  c_sum\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      6\n   2 │     2      2      8\n   3 │     3      4     10\n   4 │     4      2     12\n\njulia> combine(gd) do sdf # dropping group when DataFrame() is returned\n          sdf.c[1] != 1 ? sdf : DataFrame()\n       end\n6×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      2\n   2 │     2      1      6\n   3 │     3      2      3\n   4 │     3      2      7\n   5 │     4      1      4\n   6 │     4      1      8\n\nauto-splatting, renaming and keepkeys\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> combine(gd, :b => :b1, :c => :c1, [:b, :c] => +, keepkeys=false)\n8×3 DataFrame\n Row │ b1     c1     b_c_+\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      3\n   2 │     2      5      7\n   3 │     1      2      3\n   4 │     1      6      7\n   5 │     2      3      5\n   6 │     2      7      9\n   7 │     1      4      5\n   8 │     1      8      9\n\nbroadcasting and column expansion\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> combine(gd, :b, AsTable([:b, :c]) => ByRow(extrema) => [:min, :max])\n8×4 DataFrame\n Row │ a      b      min    max\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      2\n   2 │     1      2      2      5\n   3 │     2      1      1      2\n   4 │     2      1      1      6\n   5 │     3      2      2      3\n   6 │     3      2      2      7\n   7 │     4      1      1      4\n   8 │     4      1      1      8\n\njulia> combine(gd, [:b, :c] .=> Ref) # preventing vector from being spread across multiple rows\n4×3 DataFrame\n Row │ a      b_Ref      c_Ref\n     │ Int64  SubArray…  SubArray…\n─────┼─────────────────────────────\n   1 │     1  [2, 2]     [1, 5]\n   2 │     2  [1, 1]     [2, 6]\n   3 │     3  [2, 2]     [3, 7]\n   4 │     4  [1, 1]     [4, 8]\n\njulia> combine(gd, AsTable(Not(:a)) => Ref) # protecting result\n4×2 DataFrame\n Row │ a      b_c_Ref\n     │ Int64  NamedTup…\n─────┼─────────────────────────────────\n   1 │     1  (b = [2, 2], c = [1, 5])\n   2 │     2  (b = [1, 1], c = [2, 6])\n   3 │     3  (b = [2, 2], c = [3, 7])\n   4 │     4  (b = [1, 1], c = [4, 8])\n\njulia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false)\n8×4 DataFrame\n Row │ a      b      c      b_c\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      3\n   2 │     1      2      5      7\n   3 │     2      1      2      3\n   4 │     2      1      6      7\n   5 │     3      2      3      5\n   6 │     3      2      7      9\n   7 │     4      1      4      5\n   8 │     4      1      8      9\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.fillcombinations","page":"Functions","title":"DataFrames.fillcombinations","text":"fillcombinations(df::AbstractDataFrame, indexcols;\n                     allowduplicates::Bool=false,\n                     fill=missing)\n\nGenerate all combinations of levels of column(s) indexcols in data frame df. Levels and their order are determined by the levels function (i.e. unique values sorted lexicographically by default, or a custom set of levels for e.g. CategoricalArray columns), in addition to missing if present.\n\nFor combinations of indexcols not present in df these columns are filled with the fill value (missing by default).\n\nIf allowduplicates=false (the default) indexcols may only contain unique combinations of indexcols values. If allowduplicates=true duplicates are allowed.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=1:2, y='a':'b', z=[\"x\", \"y\"])\n2×3 DataFrame\n Row │ x      y     z\n     │ Int64  Char  String\n─────┼─────────────────────\n   1 │     1  a     x\n   2 │     2  b     y\n\njulia> fillcombinations(df, [:x, :y])\n4×3 DataFrame\n Row │ x      y     z\n     │ Int64  Char  String?\n─────┼──────────────────────\n   1 │     1  a     x\n   2 │     2  a     missing\n   3 │     1  b     missing\n   4 │     2  b     y\n\njulia> fillcombinations(df, [:y, :z], fill=0)\n4×3 DataFrame\n Row │ x       y     z\n     │ Int64?  Char  String\n─────┼──────────────────────\n   1 │      1  a     x\n   2 │      0  b     x\n   3 │      0  a     y\n   4 │      2  b     y\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.flatten","page":"Functions","title":"DataFrames.flatten","text":"flatten(df::AbstractDataFrame, cols; scalar::Type=Union{})\n\nWhen columns cols of data frame df have iterable elements that define length (for example a Vector of Vectors), return a DataFrame where each element of each col in cols is flattened, meaning the column corresponding to col becomes a longer vector where the original entries are concatenated. Elements of row i of df in columns other than cols will be repeated according to the length of df[i, col]. These lengths must therefore be the same for each col in cols, or else an error is raised. Note that these elements are not copied, and thus if they are mutable changing them in the returned DataFrame will affect df.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf scalar is passed then values that have this type in flattened columns are treated as scalars and broadcasted as many times as is needed to match lengths of values stored in other columns. If all values in a row are scalars, a single row is produced.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df1 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])\n2×3 DataFrame\n Row │ a      b       c\n     │ Int64  Array…  Array…\n─────┼───────────────────────\n   1 │     1  [1, 2]  [5, 6]\n   2 │     2  [3, 4]  [7, 8]\n\njulia> flatten(df1, :b)\n4×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Array…\n─────┼──────────────────────\n   1 │     1      1  [5, 6]\n   2 │     1      2  [5, 6]\n   3 │     2      3  [7, 8]\n   4 │     2      4  [7, 8]\n\njulia> flatten(df1, [:b, :c])\n4×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      1      5\n   2 │     1      2      6\n   3 │     2      3      7\n   4 │     2      4      8\n\njulia> df2 = DataFrame(a=[1, 2], b=[(\"p\", \"q\"), (\"r\", \"s\")])\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Tuple…\n─────┼───────────────────\n   1 │     1  (\"p\", \"q\")\n   2 │     2  (\"r\", \"s\")\n\njulia> flatten(df2, :b)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  String\n─────┼───────────────\n   1 │     1  p\n   2 │     1  q\n   3 │     2  r\n   4 │     2  s\n\njulia> df3 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7]])\n2×3 DataFrame\n Row │ a      b       c\n     │ Int64  Array…  Array…\n─────┼───────────────────────\n   1 │     1  [1, 2]  [5, 6]\n   2 │     2  [3, 4]  [7]\n\njulia> flatten(df3, [:b, :c])\nERROR: ArgumentError: Lengths of iterables stored in columns :b and :c are not the same in row 2\n\njulia> df4 = DataFrame(a=[1, 2, 3],\n                       b=[[1, 2], missing, missing],\n                       c=[[5, 6], missing, [7, 8]])\n3×3 DataFrame\n Row │ a      b        c\n     │ Int64  Array…?  Array…?\n─────┼─────────────────────────\n   1 │     1  [1, 2]   [5, 6]\n   2 │     2  missing  missing\n   3 │     3  missing  [7, 8]\n\njulia> flatten(df4, [:b, :c], scalar=Missing)\n5×3 DataFrame\n Row │ a      b        c\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     1        1        5\n   2 │     1        2        6\n   3 │     2  missing  missing\n   4 │     3  missing        7\n   5 │     3  missing        8\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.hcat","page":"Functions","title":"Base.hcat","text":"hcat(df::AbstractDataFrame...;\n     makeunique::Bool=false, copycols::Bool=true)\n\nHorizontally concatenate data frames.\n\nIf makeunique=false (the default) column names of passed objects must be unique. If makeunique=true then duplicate column names will be suffixed with _i (i starting at 1 for the first duplicate).\n\nIf copycols=true (the default) then the DataFrame returned by hcat will contain copied columns from the source data frames. If copycols=false then it will contain columns as they are stored in the source (without copying). This option should be used with caution as mutating either the columns in sources or in the returned DataFrame might lead to the corruption of the other object.\n\nMetadata: hcat propagates table-level :note-style metadata for keys that are present in all passed data frames and have the same value; it propagates column-level :note-style metadata.\n\nExample\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4:6, B=4:6)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n\njulia> df3 = hcat(df1, df2, makeunique=true)\n3×4 DataFrame\n Row │ A      B      A_1    B_1\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      1      4      4\n   2 │     2      2      5      5\n   3 │     3      3      6      6\n\njulia> df3.A === df1.A\nfalse\n\njulia> df3 = hcat(df1, df2, makeunique=true, copycols=false);\n\njulia> df3.A === df1.A\ntrue\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.insert!","page":"Functions","title":"Base.insert!","text":"insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray};\n        cols::Symbol=:setequal, promote::Bool=false)\ninsert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple,\n                                                  AbstractDict, Tables.AbstractRow};\n        cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))\n\nAdd one row to df at position index in-place, taking the values from row. index must be a integer between 1 and nrow(df)+1.\n\nColumn types of df are preserved, and new values are converted if necessary. An error is thrown if conversion fails.\n\nIf row is neither a DataFrameRow, NamedTuple nor AbstractDict then it must be a Tuple or an AbstractArray and columns are matched by order of appearance. In this case row must contain the same number of elements as the number of columns in df.\n\nIf row is a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow then values in row are matched to columns in df based on names. The exact behavior depends on the cols argument value in the following way:\n\nIf cols == :setequal (this is the default) then row must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then row must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if row is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then row may contain more columns than df, but all column names that are present in df must be present in row and only they are used to populate a new row in df.\nIf cols == :subset then the behavior is like for :intersect but if some column is missing in row then a missing value is pushed to df.\nIf cols == :union then columns missing in df that are present in row are added to df (using missing for existing rows) and a missing value is pushed to columns missing in row that are present in df.\n\nIf row is not a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow the cols keyword argument must be :setequal (the default), because such rows do not provide column name information.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nAs a special case, if df has no columns and row is a NamedTuple, DataFrameRow, or Tables.AbstractRow, columns are created for all values in row, using their names and order.\n\nPlease note that this function must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: push!, pushfirst!\n\nExamples\n\njulia> df = DataFrame(A='a':'c', B=1:3)\n3×2 DataFrame\n Row │ A     B\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> insert!(df, 2, (true, false), promote=true)\n4×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ true      0\n   3 │ b         2\n   4 │ c         3\n\njulia> insert!(df, 5, df[1, :])\n5×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ true      0\n   3 │ b         2\n   4 │ c         3\n   5 │ a         1\n\njulia> insert!(df, 1, (C=\"something\", A=11, B=12), cols=:intersect)\n6×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ 11       12\n   2 │ a         1\n   3 │ true      0\n   4 │ b         2\n   5 │ c         3\n   6 │ a         1\n\njulia> insert!(df, 7, Dict(:A=>1.0, :C=>1.0), cols=:union)\n7×3 DataFrame\n Row │ A     B        C\n     │ Any   Int64?   Float64?\n─────┼──────────────────────────\n   1 │ 11         12  missing\n   2 │ a           1  missing\n   3 │ true        0  missing\n   4 │ b           2  missing\n   5 │ c           3  missing\n   6 │ a           1  missing\n   7 │ 1.0   missing        1.0\n\njulia> insert!(df, 3, NamedTuple(), cols=:subset)\n8×3 DataFrame\n Row │ A        B        C\n     │ Any      Int64?   Float64?\n─────┼─────────────────────────────\n   1 │ 11            12  missing\n   2 │ a              1  missing\n   3 │ missing  missing  missing\n   4 │ true           0  missing\n   5 │ b              2  missing\n   6 │ c              3  missing\n   7 │ a              1  missing\n   8 │ 1.0      missing        1.0\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.insertcols","page":"Functions","title":"DataFrames.insertcols","text":"insertcols(df::AbstractDataFrame[, col], (name=>val)::Pair...;\n           after::Bool=false, makeunique::Bool=false, copycols::Bool=true)\n\nInsert a column into a copy of df data frame using the insertcols! function and return the newly created data frame.\n\nIf col is omitted it is set to ncol(df)+1 (the column is inserted as the last column).\n\nArguments\n\ndf : the data frame to which we want to add columns\ncol : a position at which we want to insert a column, passed as an integer or a column name (a string or a Symbol); the column selected with col and columns following it are shifted to the right in df after the operation\nname : the name of the new column\nval : an AbstractVector giving the contents of the new column or a value of any type other than AbstractArray which will be repeated to fill a new vector; As a particular rule a values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated in the same way\nafter : if true columns are inserted after col\nmakeunique : defines what to do if name already exists in df; if it is false an error will be thrown; if it is true a new unique name will be generated by adding a suffix\ncopycols : whether vectors passed as columns should be copied\n\nIf val is an AbstractRange then the result of collect(val) is inserted.\n\nIf df is a SubDataFrame then it must have been created with : as column selector (otherwise an error is thrown). In this case the copycols keyword argument is ignored (i.e. the added column is always copied) and the parent data frame's column is filled with missing in rows that are filtered out by df.\n\nIf df isa DataFrame that has no columns and only values other than AbstractVector are passed then it is used to create a one-element column. If df isa DataFrame that has no columns and at least one AbstractVector is passed then its length is used to determine the number of elements in all created columns. In all other cases the number of rows in all created columns must match nrow(df).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also insertcols!.\n\nExamples\n\njulia> df = DataFrame(a=1:3)\n3×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> insertcols(df, 1, :b => 'a':'c')\n3×2 DataFrame\n Row │ b     a\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> insertcols(df, :c => 2:4, :c => 3:5, makeunique=true)\n3×3 DataFrame\n Row │ a      c      c_1\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      3\n   2 │     2      3      4\n   3 │     3      4      5\n\njulia> insertcols(df, :a, :d => 7:9, after=true)\n3×2 DataFrame\n Row │ a      d\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      7\n   2 │     2      8\n   3 │     3      9\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.insertcols!","page":"Functions","title":"DataFrames.insertcols!","text":"insertcols!(df::AbstractDataFrame[, col], (name=>val)::Pair...;\n            after::Bool=false, makeunique::Bool=false, copycols::Bool=true)\n\nInsert a column into a data frame in place. Return the updated data frame.\n\nIf col is omitted it is set to ncol(df)+1 (the column is inserted as the last column).\n\nArguments\n\ndf : the data frame to which we want to add columns\ncol : a position at which we want to insert a column, passed as an integer or a column name (a string or a Symbol); the column selected with col and columns following it are shifted to the right in df after the operation\nname : the name of the new column\nval : an AbstractVector giving the contents of the new column or a value of any type other than AbstractArray which will be repeated to fill a new vector; As a particular rule a values stored in a Ref or a 0-dimensional AbstractArray are unwrapped and treated in the same way\nafter : if true columns are inserted after col\nmakeunique : defines what to do if name already exists in df; if it is false an error will be thrown; if it is true a new unique name will be generated by adding a suffix\ncopycols : whether vectors passed as columns should be copied\n\nIf val is an AbstractRange then the result of collect(val) is inserted.\n\nIf df is a SubDataFrame then it must have been created with : as column selector (otherwise an error is thrown). In this case the copycols keyword argument is ignored (i.e. the added column is always copied) and the parent data frame's column is filled with missing in rows that are filtered out by df.\n\nIf df isa DataFrame that has no columns and only values other than AbstractVector are passed then it is used to create a one-element column. If df isa DataFrame that has no columns and at least one AbstractVector is passed then its length is used to determine the number of elements in all created columns. In all other cases the number of rows in all created columns must match nrow(df).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nSee also insertcols.\n\nExamples\n\njulia> df = DataFrame(a=1:3)\n3×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> insertcols!(df, 1, :b => 'a':'c')\n3×2 DataFrame\n Row │ b     a\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> insertcols!(df, 2, :c => 2:4, :c => 3:5, makeunique=true)\n3×4 DataFrame\n Row │ b     c      c_1    a\n     │ Char  Int64  Int64  Int64\n─────┼───────────────────────────\n   1 │ a         2      3      1\n   2 │ b         3      4      2\n   3 │ c         4      5      3\n\njulia> insertcols!(df, :b, :d => 7:9, after=true)\n3×5 DataFrame\n Row │ b     d      c      c_1    a\n     │ Char  Int64  Int64  Int64  Int64\n─────┼──────────────────────────────────\n   1 │ a         7      2      3      1\n   2 │ b         8      3      4      2\n   3 │ c         9      4      5      3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.invpermute!","page":"Functions","title":"Base.invpermute!","text":"invpermute!(df::AbstractDataFrame, p)\n\nLike permute!, but the inverse of the given permutation is applied.\n\ninvpermute! will produce a correct result even if some columns of passed data frame or permutation p are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then invpermute! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> permute!(df, [5, 3, 1, 2, 4])\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     3      8     13\n   3 │     1      6     11\n   4 │     2      7     12\n   5 │     4      9     14\n\njulia> invpermute!(df, [5, 3, 1, 2, 4])\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.mapcols","page":"Functions","title":"DataFrames.mapcols","text":"mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())\n\nReturn a DataFrame where each column of df selected by cols (by default, all columns) is transformed using function f. Columns not selected by cols are copied.\n\nf must return AbstractVector objects all with the same length or scalars (all values other than AbstractVector are considered to be a scalar).\n\nThe cols column selector can be any value accepted as column selector by the names function.\n\nNote that mapcols guarantees not to reuse the columns from df in the returned DataFrame. If f returns its argument then it gets copied before being stored.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> mapcols(x -> x.^2, df)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     4    144\n   3 │     9    169\n   4 │    16    196\n\njulia> mapcols(x -> x.^2, df, cols=r\"y\")\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     2    144\n   3 │     3    169\n   4 │     4    196\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.mapcols!","page":"Functions","title":"DataFrames.mapcols!","text":"mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All())\n\nUpdate a DataFrame in-place where each column of df selected by cols (by default, all columns) is transformed using function f. Columns not selected by cols are left unchanged.\n\nf must return AbstractVector objects all with the same length or scalars (all values other than AbstractVector are considered to be a scalar).\n\nNote that mapcols! reuses the columns from df if they are returned by f.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> mapcols!(x -> x.^2, df);\n\njulia> df\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1    121\n   2 │     4    144\n   3 │     9    169\n   4 │    16    196\n\njulia> mapcols!(x -> 2 * x, df, cols=r\"x\");\n\njulia> df\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2    121\n   2 │     8    144\n   3 │    18    169\n   4 │    32    196\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.permute!","page":"Functions","title":"Base.permute!","text":"permute!(df::AbstractDataFrame, p)\n\nPermute data frame df in-place, according to permutation p. Throws ArgumentError if p is not a permutation.\n\nTo return a new data frame instead of permuting df in-place, use df[p, :].\n\npermute! will produce a correct result even if some columns of passed data frame or permutation p are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then permute! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> permute!(df, [5, 3, 1, 2, 4])\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     3      8     13\n   3 │     1      6     11\n   4 │     2      7     12\n   5 │     4      9     14\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.prepend!","page":"Functions","title":"Base.prepend!","text":"prepend!(df::DataFrame, tables...; cols::Symbol=:setequal,\n         promote::Bool=(cols in [:union, :subset]))\n\nAdd the rows of tables passed as tables to the beginning of df. If the table is not an AbstractDataFrame then it is converted using DataFrame(table, copycols=false) before being appended.\n\nAdd the rows of df2 to the beginning of df. If the second argument table is not an AbstractDataFrame then it is converted using DataFrame(table, copycols=false) before being prepended.\n\nThe exact behavior of prepend! depends on the cols argument:\n\nIf cols == :setequal (this is the default) then df2 must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then df2 must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if df2 is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then df2 may contain more columns than df, but all column names that are present in df must be present in df2 and only these are used.\nIf cols == :subset then append! behaves like for :intersect but if some column is missing in df2 then a missing value is pushed to df.\nIf cols == :union then append! adds columns missing in df that are present in df2, for columns present in df but missing in df2 a missing value is pushed.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nThe above rule has the following exceptions:\n\nIf df has no columns then copies of columns from df2 are added to it.\nIf df2 has no columns then calling prepend! leaves df unchanged.\n\nPlease note that prepend! must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for columns present in df are preserved. If new columns are added their :note-style metadata is copied from the appended table. Other metadata is dropped.\n\nSee also: use pushfirst! to add individual rows at the beginning of a data frame, append! to add a table at the end, and vcat to vertically concatenate data frames.\n\nExamples\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4.0:6.0, B=4:6)\n3×2 DataFrame\n Row │ A        B\n     │ Float64  Int64\n─────┼────────────────\n   1 │     4.0      4\n   2 │     5.0      5\n   3 │     6.0      6\n\njulia> prepend!(df1, df2);\n\njulia> df1\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n   4 │     1      1\n   5 │     2      2\n   6 │     3      3\n\njulia> prepend!(df2, DataFrame(A=1), (; C=1:2), cols=:union)\n6×3 DataFrame\n Row │ A          B        C\n     │ Float64?   Int64?   Int64?\n─────┼─────────────────────────────\n   1 │       1.0  missing  missing\n   2 │ missing    missing        1\n   3 │ missing    missing        2\n   4 │       4.0        4  missing\n   5 │       5.0        5  missing\n   6 │       6.0        6  missing\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.push!","page":"Functions","title":"Base.push!","text":"push!(df::DataFrame, row::Union{Tuple, AbstractArray}...;\n      cols::Symbol=:setequal, promote::Bool=false)\npush!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,\n                                Tables.AbstractRow}...;\n      cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))\n\nAdd one row at the end of df in-place, taking the values from row. Several rows can be added by passing them as separate arguments.\n\nColumn types of df are preserved, and new values are converted if necessary. An error is thrown if conversion fails.\n\nIf row is neither a DataFrameRow, NamedTuple nor AbstractDict then it must be a Tuple or an AbstractArray and columns are matched by order of appearance. In this case row must contain the same number of elements as the number of columns in df.\n\nIf row is a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow then values in row are matched to columns in df based on names. The exact behavior depends on the cols argument value in the following way:\n\nIf cols == :setequal (this is the default) then row must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then row must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if row is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then row may contain more columns than df, but all column names that are present in df must be present in row and only they are used to populate a new row in df.\nIf cols == :subset then the behavior is like for :intersect but if some column is missing in row then a missing value is pushed to df.\nIf cols == :union then columns missing in df that are present in row are added to df (using missing for existing rows) and a missing value is pushed to columns missing in row that are present in df.\n\nIf row is not a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow the cols keyword argument must be :setequal (the default), because such rows do not provide column name information.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nAs a special case, if df has no columns and row is a NamedTuple, DataFrameRow, or Tables.AbstractRow, columns are created for all values in row, using their names and order.\n\nPlease note that this function must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: pushfirst!, insert!\n\nExamples\n\njulia> df = DataFrame(A='a':'c', B=1:3)\n3×2 DataFrame\n Row │ A     B\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> push!(df, (true, false), promote=true)\n4×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n   4 │ true      0\n\njulia> push!(df, df[1, :])\n5×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n   4 │ true      0\n   5 │ a         1\n\njulia> push!(df, (C=\"something\", A=11, B=12), cols=:intersect)\n6×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n   4 │ true      0\n   5 │ a         1\n   6 │ 11       12\n\njulia> push!(df, Dict(:A=>1.0, :C=>1.0), cols=:union)\n7×3 DataFrame\n Row │ A     B        C\n     │ Any   Int64?   Float64?\n─────┼──────────────────────────\n   1 │ a           1  missing\n   2 │ b           2  missing\n   3 │ c           3  missing\n   4 │ true        0  missing\n   5 │ a           1  missing\n   6 │ 11         12  missing\n   7 │ 1.0   missing        1.0\n\njulia> push!(df, NamedTuple(), cols=:subset)\n8×3 DataFrame\n Row │ A        B        C\n     │ Any      Int64?   Float64?\n─────┼─────────────────────────────\n   1 │ a              1  missing\n   2 │ b              2  missing\n   3 │ c              3  missing\n   4 │ true           0  missing\n   5 │ a              1  missing\n   6 │ 11            12  missing\n   7 │ 1.0      missing        1.0\n   8 │ missing  missing  missing\n\njulia> push!(DataFrame(a=1, b=2), (3, 4), (5, 6))\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4\n   3 │     5      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.pushfirst!","page":"Functions","title":"Base.pushfirst!","text":"pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...;\n           cols::Symbol=:setequal, promote::Bool=false)\npushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,\n                                     Tables.AbstractRow}...;\n           cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))\n\nAdd one row at the beginning of df in-place, taking the values from row. Several rows can be added by passing them as separate arguments.\n\nColumn types of df are preserved, and new values are converted if necessary. An error is thrown if conversion fails.\n\nIf row is neither a DataFrameRow, NamedTuple nor AbstractDict then it must be a Tuple or an AbstractArray and columns are matched by order of appearance. In this case row must contain the same number of elements as the number of columns in df.\n\nIf row is a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow then values in row are matched to columns in df based on names. The exact behavior depends on the cols argument value in the following way:\n\nIf cols == :setequal (this is the default) then row must contain exactly the same columns as df (but possibly in a different order).\nIf cols == :orderequal then row must contain the same columns in the same order (for AbstractDict this option requires that keys(row) matches propertynames(df) to allow for support of ordered dicts; however, if row is a Dict an error is thrown as it is an unordered collection).\nIf cols == :intersect then row may contain more columns than df, but all column names that are present in df must be present in row and only they are used to populate a new row in df.\nIf cols == :subset then the behavior is like for :intersect but if some column is missing in row then a missing value is pushed to df.\nIf cols == :union then columns missing in df that are present in row are added to df (using missing for existing rows) and a missing value is pushed to columns missing in row that are present in df.\n\nIf row is not a DataFrameRow, NamedTuple, AbstractDict, or Tables.AbstractRow the cols keyword argument must be :setequal (the default), because such rows do not provide column name information.\n\nIf promote=true and element type of a column present in df does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in df. If promote=false an error is thrown.\n\nAs a special case, if df has no columns and row is a NamedTuple, DataFrameRow, or Tables.AbstractRow, columns are created for all values in row, using their names and order.\n\nPlease note that this function must not be used on a DataFrame that contains columns that are aliases (equal when compared with ===).\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: push!, insert!\n\nExamples\n\njulia> df = DataFrame(A='a':'c', B=1:3)\n3×2 DataFrame\n Row │ A     B\n     │ Char  Int64\n─────┼─────────────\n   1 │ a         1\n   2 │ b         2\n   3 │ c         3\n\njulia> pushfirst!(df, (true, false), promote=true)\n4×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ true      0\n   2 │ a         1\n   3 │ b         2\n   4 │ c         3\n\njulia> pushfirst!(df, df[1, :])\n5×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ true      0\n   2 │ true      0\n   3 │ a         1\n   4 │ b         2\n   5 │ c         3\n\njulia> pushfirst!(df, (C=\"something\", A=11, B=12), cols=:intersect)\n6×2 DataFrame\n Row │ A     B\n     │ Any   Int64\n─────┼─────────────\n   1 │ 11       12\n   2 │ true      0\n   3 │ true      0\n   4 │ a         1\n   5 │ b         2\n   6 │ c         3\n\njulia> pushfirst!(df, Dict(:A=>1.0, :C=>1.0), cols=:union)\n7×3 DataFrame\n Row │ A     B        C\n     │ Any   Int64?   Float64?\n─────┼──────────────────────────\n   1 │ 1.0   missing        1.0\n   2 │ 11         12  missing\n   3 │ true        0  missing\n   4 │ true        0  missing\n   5 │ a           1  missing\n   6 │ b           2  missing\n   7 │ c           3  missing\n\njulia> pushfirst!(df, NamedTuple(), cols=:subset)\n8×3 DataFrame\n Row │ A        B        C\n     │ Any      Int64?   Float64?\n─────┼─────────────────────────────\n   1 │ missing  missing  missing\n   2 │ 1.0      missing        1.0\n   3 │ 11            12  missing\n   4 │ true           0  missing\n   5 │ true           0  missing\n   6 │ a              1  missing\n   7 │ b              2  missing\n   8 │ c              3  missing\n\njulia> pushfirst!(DataFrame(a=1, b=2), (3, 4), (5, 6))\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     3      4\n   2 │     5      6\n   3 │     1      2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.reduce","page":"Functions","title":"Base.reduce","text":"reduce(::typeof(vcat),\n       dfs::Union{AbstractVector{<:AbstractDataFrame},\n                  Tuple{AbstractDataFrame, Vararg{AbstractDataFrame}}};\n       cols::Union{Symbol, AbstractVector{Symbol},\n                   AbstractVector{<:AbstractString}}=:setequal,\n       source::Union{Nothing, Symbol, AbstractString,\n                     Pair{<:Union{Symbol, AbstractString}, <:AbstractVector}}=nothing,\n       init::AbstractDataFrame=DataFrame())\n\nEfficiently reduce the given vector or tuple of AbstractDataFrames with vcat.\n\nSee the vcat docstring for a description of keyword arguments cols and source.\n\nThe keyword argument init is the initial value to use in the reductions. It must be a data frame that has zero rows. It is not taken into account when computing the value of the source column nor when determining metadata of the produced data frame.\n\nThe column order, names, and types of the resulting DataFrame, and the behavior of cols and source keyword arguments follow the rules specified for vcat of AbstractDataFrames.\n\nMetadata: vcat propagates table-level :note-style metadata for keys that are present in all passed data frames and have the same value. vcat propagates column-level :note-style metadata for keys that are present in all passed data frames that contain this column and have the same value.\n\nExample\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4:6, B=4:6)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n\njulia> df3 = DataFrame(A=7:9, C=7:9)\n3×2 DataFrame\n Row │ A      C\n     │ Int64  Int64\n─────┼──────────────\n   1 │     7      7\n   2 │     8      8\n   3 │     9      9\n\njulia> reduce(vcat, (df1, df2))\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n   5 │     5      5\n   6 │     6      6\n\njulia> reduce(vcat, [df1, df2, df3], cols=:union, source=:source)\n9×4 DataFrame\n Row │ A      B        C        source\n     │ Int64  Int64?   Int64?   Int64\n─────┼─────────────────────────────────\n   1 │     1        1  missing       1\n   2 │     2        2  missing       1\n   3 │     3        3  missing       1\n   4 │     4        4  missing       2\n   5 │     5        5  missing       2\n   6 │     6        6  missing       2\n   7 │     7  missing        7       3\n   8 │     8  missing        8       3\n   9 │     9  missing        9       3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.repeat","page":"Functions","title":"Base.repeat","text":"repeat(df::AbstractDataFrame; inner::Integer = 1, outer::Integer = 1)\n\nConstruct a data frame by repeating rows in df. inner specifies how many times each row is repeated, and outer specifies how many times the full set of rows is repeated.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat(df, inner=2, outer=3)\n12×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     1      3\n   3 │     2      4\n   4 │     2      4\n   5 │     1      3\n   6 │     1      3\n   7 │     2      4\n   8 │     2      4\n   9 │     1      3\n  10 │     1      3\n  11 │     2      4\n  12 │     2      4\n\n\n\n\n\nrepeat(df::AbstractDataFrame, count::Integer)\n\nConstruct a data frame by repeating each row in df the number of times specified by count.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat(df, 2)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n   3 │     1      3\n   4 │     2      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.repeat!","page":"Functions","title":"DataFrames.repeat!","text":"repeat!(df::DataFrame; inner::Integer=1, outer::Integer=1)\n\nUpdate a data frame df in-place by repeating its rows. inner specifies how many times each row is repeated, and outer specifies how many times the full set of rows is repeated. Columns of df are freshly allocated.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat!(df, inner=2, outer=3);\n\njulia> df\n12×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     1      3\n   3 │     2      4\n   4 │     2      4\n   5 │     1      3\n   6 │     1      3\n   7 │     2      4\n   8 │     2      4\n   9 │     1      3\n  10 │     1      3\n  11 │     2      4\n  12 │     2      4\n\n\n\n\n\nrepeat!(df::DataFrame, count::Integer)\n\nUpdate a data frame df in-place by repeating its rows the number of times specified by count. Columns of df are freshly allocated.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExample\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> repeat(df, 2)\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n   3 │     1      3\n   4 │     2      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.reverse","page":"Functions","title":"Base.reverse","text":"reverse(df::AbstractDataFrame, start=1, stop=nrow(df))\n\nReturn a data frame containing the rows in df in reversed order. If start and stop are provided, only rows in the start:stop range are affected.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> reverse(df)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     4      9     14\n   3 │     3      8     13\n   4 │     2      7     12\n   5 │     1      6     11\n\njulia> reverse(df, 2, 3)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     3      8     13\n   3 │     2      7     12\n   4 │     4      9     14\n   5 │     5     10     15\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.reverse!","page":"Functions","title":"Base.reverse!","text":"reverse!(df::AbstractDataFrame, start=1, stop=nrow(df))\n\nMutate data frame in-place to reverse its row order. If start and stop are provided, only rows in the start:stop range are affected.\n\nreverse! will produce a correct result even if some columns of passed data frame are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then reverse! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(a=1:5, b=6:10, c=11:15)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      6     11\n   2 │     2      7     12\n   3 │     3      8     13\n   4 │     4      9     14\n   5 │     5     10     15\n\njulia> reverse!(df)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     4      9     14\n   3 │     3      8     13\n   4 │     2      7     12\n   5 │     1      6     11\n\njulia> reverse!(df, 2, 3)\n5×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     5     10     15\n   2 │     3      8     13\n   3 │     4      9     14\n   4 │     2      7     12\n   5 │     1      6     11\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.select","page":"Functions","title":"DataFrames.select","text":"select(df::AbstractDataFrame, args...;\n       copycols::Bool=true, renamecols::Bool=true, threads::Bool=true)\nselect(args::Callable, df::DataFrame;\n       renamecols::Bool=true, threads::Bool=true)\nselect(gd::GroupedDataFrame, args...;\n       copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n       renamecols::Bool=true, threads::Bool=true)\nselect(f::Base.Callable, gd::GroupedDataFrame;\n       copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n       renamecols::Bool=true, threads::Bool=true)\n\nCreate a new data frame that contains columns from df or gd specified by args and return it. The result is guaranteed to have the same number of rows as df, except when no columns are selected (in which case the result has zero rows).\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\ncopycols::Bool=true : whether columns of the source data frame should be copied if no transformation is applied to them.\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nkeepkeys::Bool=true : whether grouping columns of gd should be kept in the returned data frame.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> select(df, 2)\n3×1 DataFrame\n Row │ b\n     │ Int64\n─────┼───────\n   1 │     4\n   2 │     5\n   3 │     6\n\njulia> select(df, :a => ByRow(sin) => :c, :b)\n3×2 DataFrame\n Row │ c         b\n     │ Float64   Int64\n─────┼─────────────────\n   1 │ 0.841471      4\n   2 │ 0.909297      5\n   3 │ 0.14112       6\n\njulia> select(df, :, [:a, :b] => (a, b) -> a .+ b .- sum(b)/length(b))\n3×3 DataFrame\n Row │ a      b      a_b_function\n     │ Int64  Int64  Float64\n─────┼────────────────────────────\n   1 │     1      4           0.0\n   2 │     2      5           2.0\n   3 │     3      6           4.0\n\njulia> select(df, All() .=> [minimum maximum])\n3×4 DataFrame\n Row │ a_minimum  b_minimum  a_maximum  b_maximum\n     │ Int64      Int64      Int64      Int64\n─────┼────────────────────────────────────────────\n   1 │         1          4          3          6\n   2 │         1          4          3          6\n   3 │         1          4          3          6\n\njulia> using Statistics\n\njulia> select(df, AsTable(:) => ByRow(mean), renamecols=false)\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> select(df, AsTable(:) => ByRow(mean) => x -> join(x, \"_\"))\n3×1 DataFrame\n Row │ a_b\n     │ Float64\n─────┼─────────\n   1 │     2.5\n   2 │     3.5\n   3 │     4.5\n\njulia> select(first, df)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     1      4\n   3 │     1      4\n\njulia> df = DataFrame(a=1:3, b=4:6, c=7:9)\n3×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      4      7\n   2 │     2      5      8\n   3 │     3      6      9\n\njulia> select(df, AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => :stats,\n              AsTable(:) => ByRow(x -> (mean=mean(x), std=std(x))) => AsTable)\n3×3 DataFrame\n Row │ stats                    mean     std\n     │ NamedTup…                Float64  Float64\n─────┼───────────────────────────────────────────\n   1 │ (mean = 4.0, std = 3.0)      4.0      3.0\n   2 │ (mean = 5.0, std = 3.0)      5.0      3.0\n   3 │ (mean = 6.0, std = 3.0)      6.0      3.0\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8)\n8×3 DataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      1      2\n   3 │     1      2      3\n   4 │     2      1      4\n   5 │     2      2      5\n   6 │     1      1      6\n   7 │     1      2      7\n   8 │     2      1      8\n\njulia> gd = groupby(df, :a)\nGroupedDataFrame with 2 groups based on key: a\nFirst Group (5 rows): a = 1\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      1      2\n   3 │     1      2      3\n   4 │     1      1      6\n   5 │     1      2      7\n⋮\nLast Group (3 rows): a = 2\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      4\n   2 │     2      2      5\n   3 │     2      1      8\n\nspecifying a name for target column\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, :c => (x -> sum(log, x)) => :sum_log_c)\n8×2 DataFrame\n Row │ a      sum_log_c\n     │ Int64  Float64\n─────┼──────────────────\n   1 │     1    5.52943\n   2 │     1    5.52943\n   3 │     1    5.52943\n   4 │     2    5.07517\n   5 │     2    5.07517\n   6 │     1    5.52943\n   7 │     1    5.52943\n   8 │     2    5.07517\n\njulia> select(gd, [:b, :c] .=> sum) # passing a vector of pairs\n8×3 DataFrame\n Row │ a      b_sum  c_sum\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      8     19\n   2 │     1      8     19\n   3 │     1      8     19\n   4 │     2      4     17\n   5 │     2      4     17\n   6 │     1      8     19\n   7 │     1      8     19\n   8 │     2      4     17\n\nmultiple arguments, renaming and keepkeys\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, :b => :b1, :c => :c1, [:b, :c] => +, keepkeys=false)\n8×3 DataFrame\n Row │ b1     c1     b_c_+\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      3\n   2 │     1      2      3\n   3 │     2      3      5\n   4 │     1      4      5\n   5 │     2      5      7\n   6 │     1      6      7\n   7 │     2      7      9\n   8 │     1      8      9\n\nbroadcasting and column expansion\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, :b, AsTable([:b, :c]) => ByRow(extrema) => [:min, :max])\n8×4 DataFrame\n Row │ a      b      min    max\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      2\n   2 │     1      1      1      2\n   3 │     1      2      2      3\n   4 │     2      1      1      4\n   5 │     2      2      2      5\n   6 │     1      1      1      6\n   7 │     1      2      2      7\n   8 │     2      1      1      8\n\njulia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false)\n8×4 DataFrame\n Row │ a      b      c      b_c\n     │ Int64  Int64  Int64  Int64\n─────┼────────────────────────────\n   1 │     1      2      1      3\n   2 │     1      1      2      3\n   3 │     1      2      3      5\n   4 │     2      1      4      5\n   5 │     2      2      5      7\n   6 │     1      1      6      7\n   7 │     1      2      7      9\n   8 │     2      1      8      9\n\ncolumn-independent operations\n\njulia> df = DataFrame(a=[1, 1, 1, 2, 2, 1, 1, 2],\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a);\n\njulia> select(gd, nrow, proprow, groupindices, eachindex)\n8×5 DataFrame\n Row │ a      nrow   proprow  groupindices  eachindex\n     │ Int64  Int64  Float64  Int64         Int64\n─────┼────────────────────────────────────────────────\n   1 │     1      5    0.625             1          1\n   2 │     1      5    0.625             1          2\n   3 │     1      5    0.625             1          3\n   4 │     2      3    0.375             2          1\n   5 │     2      3    0.375             2          2\n   6 │     1      5    0.625             1          4\n   7 │     1      5    0.625             1          5\n   8 │     2      3    0.375             2          3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.select!","page":"Functions","title":"DataFrames.select!","text":"select!(df::AbstractDataFrame, args...;\n        renamecols::Bool=true, threads::Bool=true)\nselect!(args::Base.Callable, df::DataFrame;\n        renamecols::Bool=true, threads::Bool=true)\nselect!(gd::GroupedDataFrame, args...; ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\nselect!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true,\n        renamecols::Bool=true, threads::Bool=true)\n\nMutate df or gd in place to retain only columns or transformations specified by args... and return it. The result is guaranteed to have the same number of rows as df or parent of gd, except when no columns are selected (in which case the result has zero rows).\n\nIf a SubDataFrame or GroupedDataFrame{SubDataFrame} is passed, the parent data frame is updated using columns generated by args..., following the same rules as indexing:\n\nfor existing columns filtered-out rows are filled with values present in the old columns\nfor new columns (which is only allowed if SubDataFrame was created with : as column selector) filtered-out rows are filled with missing\ndropped columns (which are only allowed if SubDataFrame was created with : as column selector) are removed\nif SubDataFrame was not created with : as column selector then select! is only allowed if the transformations keep exactly the same sequence of column names as is in the passed df\n\nIf a GroupedDataFrame is passed then it is updated to reflect the new rows of its updated parent. If there are independent GroupedDataFrame objects constructed using the same parent data frame they might get corrupt.\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nSee select for examples.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Random.shuffle","page":"Functions","title":"Random.shuffle","text":"shuffle([rng=GLOBAL_RNG,] df::AbstractDataFrame)\n\nReturn a copy of df with randomly permuted rows. The optional rng argument specifies a random number generator.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> using Random, StableRNGs\n\njulia> rng = StableRNG(1234);\n\njulia> shuffle(rng, DataFrame(a=1:5, b=1:5))\n5×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      2\n   2 │     1      1\n   3 │     3      3\n   4 │     5      5\n   5 │     4      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Random.shuffle!","page":"Functions","title":"Random.shuffle!","text":"shuffle!([rng=GLOBAL_RNG,] df::AbstractDataFrame)\n\nRandomly permute rows of df in-place. The optional rng argument specifies a random number generator.\n\nshuffle! will produce a correct result even if some columns of passed data frame are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then shuffle! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> using Random, StableRNGs\n\njulia> rng = StableRNG(1234);\n\njulia> shuffle!(rng, DataFrame(a=1:5, b=1:5))\n5×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      2\n   2 │     1      1\n   3 │     3      3\n   4 │     5      5\n   5 │     4      4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.table_transformation","page":"Functions","title":"DataFrames.table_transformation","text":"table_transformation(df_sel::AbstractDataFrame, fun)\n\nThis is the function called when AsTable(...) => fun is requested. The df_sel argument is a data frame storing columns selected by the AsTable(...) selector.\n\nBy default it calls default_table_transformation. However, it is allowed to add special methods for specific types of fun, as long as the result matches what would be produced by default_table_transformation, except that it is allowed to perform eltype conversion of the resulting vectors or value type promotions that are consistent with promote_type.\n\nIt is guaranteed that df_sel has at least one column.\n\nThe main use of special table_transformation methods is to provide more efficient than the default implementations of requested fun transformation.\n\nThis function might become a part of the public API of DataFrames.jl in the future, currently it should be considered experimental.\n\nFast paths are implemented within DataFrames.jl for the following functions fun:\n\nsum, ByRow(sum), ByRow(sum∘skipmissing)\nlength, ByRow(length), ByRow(length∘skipmissing)\nmean, ByRow(mean), ByRow(mean∘skipmissing)\nByRow(var), ByRow(var∘skipmissing)\nByRow(std), ByRow(std∘skipmissing)\nByRow(median), ByRow(median∘skipmissing)\nminimum, ByRow(minimum), ByRow(minimum∘skipmissing)\nmaximum, ByRow(maximum), ByRow(maximum∘skipmissing)\nfun∘collect and ByRow(fun∘collect) where fun is any function\n\nNote that in order to improve the performance ByRow(sum), ByRow(sum∘skipmissing), ByRow(mean), and ByRow(mean∘skipmissing) perform all operations in the target element type. In some very rare cases (like mixing very large Int64 values and Float64 values) it can lead to a result different from the one that would be obtained by calling the function outside of DataFrames.jl. The way to avoid this precision loss is to use an anonymous function, e.g. instead of ByRow(sum) use ByRow(x -> sum(x)). However, in general for such scenarios even standard aggregation functions should not be considered to provide reliable output, and users are recommended to switch to higher precision calculations. An example of a case when standard sum is affected by the situation discussed is:\n\njulia> sum(Any[typemax(Int), typemax(Int), 1.0])\n-1.0\n\njulia> sum(Any[1.0, typemax(Int), typemax(Int)])\n1.8446744073709552e19\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.transform","page":"Functions","title":"DataFrames.transform","text":"transform(df::AbstractDataFrame, args...;\n          copycols::Bool=true, renamecols::Bool=true, threads::Bool=true)\ntransform(f::Callable, df::DataFrame;\n          renamecols::Bool=true, threads::Bool=true)\ntransform(gd::GroupedDataFrame, args...;\n          copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n          renamecols::Bool=true, threads::Bool=true)\ntransform(f::Base.Callable, gd::GroupedDataFrame;\n          copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true,\n          renamecols::Bool=true, threads::Bool=true)\n\nCreate a new data frame that contains columns from df or gd plus columns specified by args and return it. The result is guaranteed to have the same number of rows as df. Equivalent to select(df, :, args...) or select(gd, :, args...).\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\ncopycols::Bool=true : whether columns of the source data frame should be copied if no transformation is applied to them.\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nkeepkeys::Bool=true : whether grouping columns of gd should be kept in the returned data frame.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nNote that when the first argument is a GroupedDataFrame, keepkeys=false is needed to be able to return a different value for the grouping column:\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nExamples\n\njulia> gdf = groupby(DataFrame(x=1:2), :x)\nGroupedDataFrame with 2 groups based on key: x\nFirst Group (1 row): x = 1\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n⋮\nLast Group (1 row): x = 2\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     2\n\njulia> transform(gdf, x -> (x=10,), keepkeys=false)\n2×1 DataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │    10\n   2 │    10\n\njulia> transform(gdf, x -> (x=10,), keepkeys=true)\nERROR: ArgumentError: column :x in returned data frame is not equal to grouping key :x\n\nSee select for more examples.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.transform!","page":"Functions","title":"DataFrames.transform!","text":"transform!(df::AbstractDataFrame, args...;\n           renamecols::Bool=true, threads::Bool=true)\ntransform!(args::Callable, df::AbstractDataFrame;\n           renamecols::Bool=true, threads::Bool=true)\ntransform!(gd::GroupedDataFrame, args...;\n           ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)\ntransform!(f::Base.Callable, gd::GroupedDataFrame;\n           ungroup::Bool=true, renamecols::Bool=true, threads::Bool=true)\n\nMutate df or gd in place to add columns specified by args... and return it. The result is guaranteed to have the same number of rows as df. Equivalent to select!(df, :, args...) or select!(gd, :, args...), except that column renaming performs a copy.\n\nBelow detailed common rules for all transformation functions supported by DataFrames.jl are explained and compared.\n\nAll these operations are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.\n\nIn order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.\n\nOperations can then be applied on each group using one of the following functions:\n\ncombine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;\n\nAs a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.\n\nAll these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:\n\nstandard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)\n\nNote! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.\n\nNote! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.\n\nAll functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).\n\nAs a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function. The documentation of DataFrames.table_transformation provides more information about this functionality, in particular covering performance considerations.\n\nWhat is allowed for function to return is determined by the target_cols value:\n\nIf both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.\n\nIn all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.\n\nselect/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).\n\nFor combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.\n\nIt is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.\n\nTo apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.\n\nIf a collection of column names is passed then requesting duplicate column names in target data frame are accepted (e.g. select!(df, [:a], :, r\"a\") is allowed) and only the first occurrence is used. In particular a syntax to move column :col to the first position in the data frame is select!(df, :col, :). On the contrary, output column names of renaming, transformation and single column selection operations must be unique, so e.g. select!(df, :a, :a => :a) or select!(df, :a, :a => ByRow(sin) => :a) are not allowed.\n\nIn general columns returned by transformations are stored in the target data frame without copying. An exception to this rule is when columns from the source data frame are reused in the target data frame. This can happen via expressions like: :x1, [:x1, :x2], :x1 => :x2, :x1 => identity => :x2, or :x1 => (x -> @view x[inds]) (note that in the last case the source column is reused indirectly via a view). In such cases the behavior depends on the value of the copycols keyword argument:\n\nif copycols=true then results of such transformations always perform a copy of the source column or its view;\nif copycols=false then copies are only performed to avoid storing the same column several times in the target data frame; more precisely, no copy is made the first time a column is used, but each subsequent reuse of a source column (when compared using ===, which excludes views of source columns) performs a copy;\n\nNote that performing transform! or select! assumes that copycols=false.\n\nIf df is a SubDataFrame and copycols=true then a DataFrame is returned and the same copying rules apply as for a DataFrame input: this means in particular that selected columns will be copied. If copycols=false, a SubDataFrame is returned without copying columns and in this case transforming or renaming columns is not allowed.\n\nIf a GroupedDataFrame is passed and threads=true (the default), a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading. In the future, parallelism may be extended to other cases, so this requirement also holds for DataFrame inputs.\n\nIn order to improve the performance of the operations some transformations invoke optimized implementation, see DataFrames.table_transformation for details.\n\nKeyword arguments\n\nrenamecols::Bool=true : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nungroup::Bool=true : whether the return value of the operation on gd should be a data frame or a GroupedDataFrame.\nthreads::Bool=true : whether transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nMetadata: this function propagates table-level :note-style metadata. Column-level :note-style metadata is propagated if: a) a single column is transformed to a single column and the name of the column   does not change (this includes all column selection operations), or b) a single column is transformed with identity or copy to a single column    even if column name is changed (this includes column renaming).    As a special case for GroupedDataFrame if the output has the same name    as a grouping column and keepkeys=true, metadata is taken from    original grouping column.\n\nSee select for examples.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.vcat","page":"Functions","title":"Base.vcat","text":"vcat(dfs::AbstractDataFrame...;\n     cols::Union{Symbol, AbstractVector{Symbol},\n                 AbstractVector{<:AbstractString}}=:setequal,\n     source::Union{Nothing, Symbol, AbstractString,\n                   Pair{<:Union{Symbol, AbstractString}, <:AbstractVector}}=nothing)\n\nVertically concatenate AbstractDataFrames.\n\nThe cols keyword argument determines the columns of the returned data frame:\n\n:setequal: require all data frames to have the same column names disregarding order. If they appear in different orders, the order of the first provided data frame is used.\n:orderequal: require all data frames to have the same column names and in the same order.\n:intersect: only the columns present in all provided data frames are kept. If the intersection is empty, an empty data frame is returned.\n:union: columns present in at least one of the provided data frames are kept. Columns not present in some data frames are filled with missing where necessary.\nA vector of Symbols or strings: only listed columns are kept. Columns not present in some data frames are filled with missing where necessary.\n\nThe source keyword argument, if not nothing (the default), specifies the additional column to be added in the last position in the resulting data frame that will identify the source data frame. It can be a Symbol or an AbstractString, in which case the identifier will be the number of the passed source data frame, or a Pair consisting of a Symbol or an AbstractString and of a vector specifying the data frame identifiers (which do not have to be unique). The name of the source column is not allowed to be present in any source data frame.\n\nThe order of columns is determined by the order they appear in the included data frames, searching through the header of the first data frame, then the second, etc.\n\nThe element types of columns are determined using promote_type, as with vcat for AbstractVectors.\n\nvcat ignores empty data frames when composing the result (except for metadata), making it possible to initialize an empty data frame at the beginning of a loop and vcat onto it.\n\nMetadata: vcat propagates table-level :note-style metadata for keys that are present in all passed data frames and have the same value. vcat propagates column-level :note-style metadata for keys that are present in all passed data frames that contain this column and have the same value.\n\nExample\n\njulia> df1 = DataFrame(A=1:3, B=1:3)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> df2 = DataFrame(A=4:6, B=4:6)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      4\n   2 │     5      5\n   3 │     6      6\n\njulia> df3 = DataFrame(A=7:9, C=7:9)\n3×2 DataFrame\n Row │ A      C\n     │ Int64  Int64\n─────┼──────────────\n   1 │     7      7\n   2 │     8      8\n   3 │     9      9\n\njulia> df4 = DataFrame()\n0×0 DataFrame\n\njulia> vcat(df1, df2)\n6×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n   4 │     4      4\n   5 │     5      5\n   6 │     6      6\n\njulia> vcat(df1, df3, cols=:union)\n6×3 DataFrame\n Row │ A      B        C\n     │ Int64  Int64?   Int64?\n─────┼─────────────────────────\n   1 │     1        1  missing\n   2 │     2        2  missing\n   3 │     3        3  missing\n   4 │     7  missing        7\n   5 │     8  missing        8\n   6 │     9  missing        9\n\njulia> vcat(df1, df3, cols=:intersect)\n6×1 DataFrame\n Row │ A\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n   4 │     7\n   5 │     8\n   6 │     9\n\njulia> vcat(df4, df1)\n3×2 DataFrame\n Row │ A      B\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      3\n\njulia> vcat(df1, df2, df3, df4, cols=:union, source=\"source\")\n9×4 DataFrame\n Row │ A      B        C        source\n     │ Int64  Int64?   Int64?   Int64\n─────┼─────────────────────────────────\n   1 │     1        1  missing       1\n   2 │     2        2  missing       1\n   3 │     3        3  missing       1\n   4 │     4        4  missing       2\n   5 │     5        5  missing       2\n   6 │     6        6  missing       2\n   7 │     7  missing        7       3\n   8 │     8  missing        8       3\n   9 │     9  missing        9       3\n\njulia> vcat(df1, df2, df4, df3, cols=:union, source=:source => 'a':'d')\n9×4 DataFrame\n Row │ A      B        C        source\n     │ Int64  Int64?   Int64?   Char\n─────┼─────────────────────────────────\n   1 │     1        1  missing  a\n   2 │     2        2  missing  a\n   3 │     3        3  missing  a\n   4 │     4        4  missing  b\n   5 │     5        5  missing  b\n   6 │     6        6  missing  b\n   7 │     7  missing        7  d\n   8 │     8  missing        8  d\n   9 │     9  missing        9  d\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Reshaping-data-frames-between-tall-and-wide-formats","page":"Functions","title":"Reshaping data frames between tall and wide formats","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"stack\nunstack\npermutedims","category":"page"},{"location":"lib/functions/#Base.stack","page":"Functions","title":"Base.stack","text":"stack(df::AbstractDataFrame[, measure_vars[, id_vars] ];\n      variable_name=:variable, value_name=:value,\n      view::Bool=false, variable_eltype::Type=String)\n\nStack a data frame df, i.e. convert it from wide to long format.\n\nReturn the long-format DataFrame with: columns for each of the id_vars, column value_name (:value by default) holding the values of the stacked columns (measure_vars), and column variable_name (:variable by default) a vector holding the name of the corresponding measure_vars variable.\n\nIf view=true then return a stacked view of a data frame (long format). The result is a view because the columns are special AbstractVectors that return views into the original data frame.\n\nArguments\n\ndf : the AbstractDataFrame to be stacked\nmeasure_vars : the columns to be stacked (the measurement variables), as a column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If neither measure_vars or id_vars are given, measure_vars defaults to all floating point columns.\nid_vars : the identifier columns that are repeated during stacking, as a column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). Defaults to all variables that are not measure_vars\nvariable_name : the name (Symbol or string) of the new stacked column that shall hold the names of each of measure_vars\nvalue_name : the name (Symbol or string) of the new stacked column containing the values from each of measure_vars\nview : whether the stacked data frame should be a view rather than contain freshly allocated vectors.\nvariable_eltype : determines the element type of column variable_name. By default a PooledArray{String} is created. If variable_eltype=Symbol a PooledVector{Symbol} is created, and if variable_eltype=CategoricalValue{String} a CategoricalArray{String} is produced (call using CategoricalArrays first if needed) Passing any other type T will produce a PooledVector{T} column as long as it supports conversion from String. When view=true, a RepeatedVector{T} is produced.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for identifier columns are preserved.\n\nExamples\n\njulia> df = DataFrame(a=repeat(1:3, inner=2),\n                      b=repeat(1:2, inner=3),\n                      c=repeat(1:1, inner=6),\n                      d=repeat(1:6, inner=1),\n                      e=string.('a':'f'))\n6×5 DataFrame\n Row │ a      b      c      d      e\n     │ Int64  Int64  Int64  Int64  String\n─────┼────────────────────────────────────\n   1 │     1      1      1      1  a\n   2 │     1      1      1      2  b\n   3 │     2      1      1      3  c\n   4 │     2      2      1      4  d\n   5 │     3      2      1      5  e\n   6 │     3      2      1      6  f\n\njulia> stack(df, [:c, :d])\n12×5 DataFrame\n Row │ a      b      e       variable  value\n     │ Int64  Int64  String  String    Int64\n─────┼───────────────────────────────────────\n   1 │     1      1  a       c             1\n   2 │     1      1  b       c             1\n   3 │     2      1  c       c             1\n   4 │     2      2  d       c             1\n   5 │     3      2  e       c             1\n   6 │     3      2  f       c             1\n   7 │     1      1  a       d             1\n   8 │     1      1  b       d             2\n   9 │     2      1  c       d             3\n  10 │     2      2  d       d             4\n  11 │     3      2  e       d             5\n  12 │     3      2  f       d             6\n\njulia> stack(df, [:c, :d], [:a])\n12×3 DataFrame\n Row │ a      variable  value\n     │ Int64  String    Int64\n─────┼────────────────────────\n   1 │     1  c             1\n   2 │     1  c             1\n   3 │     2  c             1\n   4 │     2  c             1\n   5 │     3  c             1\n   6 │     3  c             1\n   7 │     1  d             1\n   8 │     1  d             2\n   9 │     2  d             3\n  10 │     2  d             4\n  11 │     3  d             5\n  12 │     3  d             6\n\njulia> stack(df, Not([:a, :b, :e]))\n12×5 DataFrame\n Row │ a      b      e       variable  value\n     │ Int64  Int64  String  String    Int64\n─────┼───────────────────────────────────────\n   1 │     1      1  a       c             1\n   2 │     1      1  b       c             1\n   3 │     2      1  c       c             1\n   4 │     2      2  d       c             1\n   5 │     3      2  e       c             1\n   6 │     3      2  f       c             1\n   7 │     1      1  a       d             1\n   8 │     1      1  b       d             2\n   9 │     2      1  c       d             3\n  10 │     2      2  d       d             4\n  11 │     3      2  e       d             5\n  12 │     3      2  f       d             6\n\njulia> stack(df, Not([:a, :b, :e]), variable_name=:somemeasure)\n12×5 DataFrame\n Row │ a      b      e       somemeasure  value\n     │ Int64  Int64  String  String       Int64\n─────┼──────────────────────────────────────────\n   1 │     1      1  a       c                1\n   2 │     1      1  b       c                1\n   3 │     2      1  c       c                1\n   4 │     2      2  d       c                1\n   5 │     3      2  e       c                1\n   6 │     3      2  f       c                1\n   7 │     1      1  a       d                1\n   8 │     1      1  b       d                2\n   9 │     2      1  c       d                3\n  10 │     2      2  d       d                4\n  11 │     3      2  e       d                5\n  12 │     3      2  f       d                6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.unstack","page":"Functions","title":"DataFrames.unstack","text":"unstack(df::AbstractDataFrame, rowkeys, colkey, value;\n        renamecols::Function=identity, allowmissing::Bool=false,\n        combine=only, fill=missing, threads::Bool=true)\nunstack(df::AbstractDataFrame, colkey, value;\n        renamecols::Function=identity, allowmissing::Bool=false,\n        combine=only, fill=missing, threads::Bool=true)\nunstack(df::AbstractDataFrame;\n        renamecols::Function=identity, allowmissing::Bool=false,\n        combine=only, fill=missing, threads::Bool=true)\n\nUnstack data frame df, i.e. convert it from long to wide format.\n\nRow and column keys are ordered in the order of their first appearance.\n\nPositional arguments\n\ndf : the AbstractDataFrame to be unstacked\nrowkeys : the columns with a unique key for each row, if not given, find a key by grouping on anything not a colkey or value. Can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If rowkeys contains no columns all rows are assumed to have the same key.\ncolkey : the column (Symbol, string or integer) holding the column names in wide format, defaults to :variable\nvalues : the column storing values (Symbol, string or integer), defaults to :value\n\nKeyword arguments\n\nrenamecols: a function called on each unique value in colkey; it must return the name of the column to be created (typically as a string or a Symbol). Duplicates in resulting names when converted to Symbol are not allowed. By default no transformation is performed.\nallowmissing: if false (the default) then an error is thrown if colkey contains missing values; if true then a column referring to missing value is created.\ncombine: if only (the default) then an error is thrown if combination of rowkeys and colkey contains duplicate entries. Otherwise the passed value must be a function that is called on a vector view containing all elements for each combination of rowkeys and colkey present in the data.\nfill: missing row/column combinations are filled with this value. The default is missing. If the value column is a CategoricalVector and fill is not missing then in order to keep unstacked value columns also CategoricalVector the fill must be passed as CategoricalValue\nthreads: whether combine function may be run in separate tasks which can execute in parallel (possibly being applied to multiple groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if combine requires serial execution or is not thread-safe.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for row keys columns are preserved.\n\nDeprecations\n\nallowduplicates keyword argument is deprecated; instead use combine keyword argument; an equivalent to allowduplicates=true is combine=last and to allowduplicates=false is combine=only (the default);\n\nExamples\n\njulia> wide = DataFrame(id=1:6,\n                        a=repeat(1:3, inner=2),\n                        b=repeat(1.0:2.0, inner=3),\n                        c=repeat(1.0:1.0, inner=6),\n                        d=repeat(1.0:3.0, inner=2))\n6×5 DataFrame\n Row │ id     a      b        c        d\n     │ Int64  Int64  Float64  Float64  Float64\n─────┼─────────────────────────────────────────\n   1 │     1      1      1.0      1.0      1.0\n   2 │     2      1      1.0      1.0      1.0\n   3 │     3      2      1.0      1.0      2.0\n   4 │     4      2      2.0      1.0      2.0\n   5 │     5      3      2.0      1.0      3.0\n   6 │     6      3      2.0      1.0      3.0\n\njulia> long = stack(wide)\n18×4 DataFrame\n Row │ id     a      variable  value\n     │ Int64  Int64  String    Float64\n─────┼─────────────────────────────────\n   1 │     1      1  b             1.0\n   2 │     2      1  b             1.0\n   3 │     3      2  b             1.0\n   4 │     4      2  b             2.0\n   5 │     5      3  b             2.0\n   6 │     6      3  b             2.0\n   7 │     1      1  c             1.0\n   8 │     2      1  c             1.0\n  ⋮  │   ⋮      ⋮       ⋮         ⋮\n  12 │     6      3  c             1.0\n  13 │     1      1  d             1.0\n  14 │     2      1  d             1.0\n  15 │     3      2  d             2.0\n  16 │     4      2  d             2.0\n  17 │     5      3  d             3.0\n  18 │     6      3  d             3.0\n                         3 rows omitted\n\njulia> unstack(long)\n6×5 DataFrame\n Row │ id     a      b         c         d\n     │ Int64  Int64  Float64?  Float64?  Float64?\n─────┼────────────────────────────────────────────\n   1 │     1      1       1.0       1.0       1.0\n   2 │     2      1       1.0       1.0       1.0\n   3 │     3      2       1.0       1.0       2.0\n   4 │     4      2       2.0       1.0       2.0\n   5 │     5      3       2.0       1.0       3.0\n   6 │     6      3       2.0       1.0       3.0\n\njulia> unstack(long, :variable, :value)\n6×5 DataFrame\n Row │ id     a      b         c         d\n     │ Int64  Int64  Float64?  Float64?  Float64?\n─────┼────────────────────────────────────────────\n   1 │     1      1       1.0       1.0       1.0\n   2 │     2      1       1.0       1.0       1.0\n   3 │     3      2       1.0       1.0       2.0\n   4 │     4      2       2.0       1.0       2.0\n   5 │     5      3       2.0       1.0       3.0\n   6 │     6      3       2.0       1.0       3.0\n\njulia> unstack(long, :id, :variable, :value)\n6×4 DataFrame\n Row │ id     b         c         d\n     │ Int64  Float64?  Float64?  Float64?\n─────┼─────────────────────────────────────\n   1 │     1       1.0       1.0       1.0\n   2 │     2       1.0       1.0       1.0\n   3 │     3       1.0       1.0       2.0\n   4 │     4       2.0       1.0       2.0\n   5 │     5       2.0       1.0       3.0\n   6 │     6       2.0       1.0       3.0\n\njulia> unstack(long, [:id, :a], :variable, :value)\n6×5 DataFrame\n Row │ id     a      b         c         d\n     │ Int64  Int64  Float64?  Float64?  Float64?\n─────┼────────────────────────────────────────────\n   1 │     1      1       1.0       1.0       1.0\n   2 │     2      1       1.0       1.0       1.0\n   3 │     3      2       1.0       1.0       2.0\n   4 │     4      2       2.0       1.0       2.0\n   5 │     5      3       2.0       1.0       3.0\n   6 │     6      3       2.0       1.0       3.0\n\njulia> unstack(long, :id, :variable, :value, renamecols=x->Symbol(:_, x))\n6×4 DataFrame\n Row │ id     _b        _c        _d\n     │ Int64  Float64?  Float64?  Float64?\n─────┼─────────────────────────────────────\n   1 │     1       1.0       1.0       1.0\n   2 │     2       1.0       1.0       1.0\n   3 │     3       1.0       1.0       2.0\n   4 │     4       2.0       1.0       2.0\n   5 │     5       2.0       1.0       3.0\n   6 │     6       2.0       1.0       3.0\n\nNote that there are some differences between the widened results above.\n\njulia> df = DataFrame(id=[\"1\", \"1\", \"2\"],\n                      variable=[\"Var1\", \"Var2\", \"Var1\"],\n                      value=[1, 2, 3])\n3×3 DataFrame\n Row │ id      variable  value\n     │ String  String    Int64\n─────┼─────────────────────────\n   1 │ 1       Var1          1\n   2 │ 1       Var2          2\n   3 │ 2       Var1          3\n\njulia> unstack(df, :variable, :value, fill=0)\n2×3 DataFrame\n Row │ id      Var1   Var2\n     │ String  Int64  Int64\n─────┼──────────────────────\n   1 │ 1           1      2\n   2 │ 2           3      0\n\njulia> df = DataFrame(cols=[\"a\", \"a\", \"b\"], values=[1, 2, 4])\n3×2 DataFrame\n Row │ cols    values\n     │ String  Int64\n─────┼────────────────\n   1 │ a            1\n   2 │ a            2\n   3 │ b            4\n\njulia> unstack(df, :cols, :values, combine=copy)\n1×2 DataFrame\n Row │ a        b\n     │ Array…?  Array…?\n─────┼──────────────────\n   1 │ [1, 2]   [4]\n\njulia> unstack(df, :cols, :values, combine=sum)\n1×2 DataFrame\n Row │ a       b\n     │ Int64?  Int64?\n─────┼────────────────\n   1 │      3       4\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.permutedims","page":"Functions","title":"Base.permutedims","text":"permutedims(df::AbstractDataFrame,\n            [src_namescol::Union{Int, Symbol, AbstractString}],\n            [dest_namescol::Union{Symbol, AbstractString}];\n            makeunique::Bool=false, strict::Bool=true)\n\nTurn df on its side such that rows become columns and values in the column indexed by src_namescol become the names of new columns. In the resulting DataFrame, column names of df will become the first column with name specified by dest_namescol.\n\nArguments\n\ndf : the AbstractDataFrame\nsrc_namescol : the column that will become the new header.  If omitted then column names :x1, :x2, ... are generated automatically.\ndest_namescol : the name of the first column in the returned DataFrame. Defaults to the same name as src_namescol. Not supported when src_namescol is a vector or is omitted.\nmakeunique : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate). Not supported when src_namescol is omitted.\nstrict : if true (the default), an error will be raised if the values contained in the src_namescol are not all Symbol or all AbstractString, or can all be converted to String using convert. If false then any values are accepted and the will be changed to strings using the string function. Not supported when src_namescol is a vector or is omitted.\n\nNote: The element types of columns in resulting DataFrame (other than the first column if it is created from df column names, which always has element type String) will depend on the element types of all input columns based on the result of promote_type. That is, if the source data frame contains Int and Float64 columns, resulting columns will have element type Float64. If the source has Int and String columns, resulting columns will have element type Any.\n\nMetadata: table-level :note-style metadata is preserved and column-level metadata is dropped.\n\nExamples\n\njulia> df = DataFrame(a=1:2, b=3:4)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> permutedims(df)\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4\n\njulia> permutedims(df, [:p, :q])\n2×2 DataFrame\n Row │ p      q\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4\n\njulia> df1 = DataFrame(a=[\"x\", \"y\"], b=[1.0, 2.0], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b        c      d\n     │ String  Float64  Int64  Bool\n─────┼───────────────────────────────\n   1 │ x           1.0      3   true\n   2 │ y           2.0      4  false\n\njulia> permutedims(df1, 1) # note the column types\n3×3 DataFrame\n Row │ a       x        y\n     │ String  Float64  Float64\n─────┼──────────────────────────\n   1 │ b           1.0      2.0\n   2 │ c           3.0      4.0\n   3 │ d           1.0      0.0\n\njulia> df2 = DataFrame(a=[\"x\", \"y\"], b=[1, \"two\"], c=[3, 4], d=[true, false])\n2×4 DataFrame\n Row │ a       b    c      d\n     │ String  Any  Int64  Bool\n─────┼───────────────────────────\n   1 │ x       1        3   true\n   2 │ y       two      4  false\n\njulia> permutedims(df2, 1, \"different_name\")\n3×3 DataFrame\n Row │ different_name  x     y\n     │ String          Any   Any\n─────┼─────────────────────────────\n   1 │ b               1     two\n   2 │ c               3     4\n   3 │ d               true  false\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Sorting","page":"Functions","title":"Sorting","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"issorted\norder\nsort\nsort!\nsortperm","category":"page"},{"location":"lib/functions/#Base.issorted","page":"Functions","title":"Base.issorted","text":"issorted(df::AbstractDataFrame, cols=All();\n         lt::Union{Function, AbstractVector{<:Function}}=isless,\n         by::Union{Function, AbstractVector{<:Function}}=identity,\n         rev::Union{Bool, AbstractVector{Bool}}=false,\n         order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n         checkunique::Bool=false)\n\nTest whether data frame df sorted by column(s) cols. Checking against multiple columns is done lexicographically.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, check whether df is sorted on all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nExamples\n\njulia> df = DataFrame(a=[1, 2, 3, 4], b=[4, 3, 2, 1])\n4×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      3\n   3 │     3      2\n   4 │     4      1\n\njulia> issorted(df)\ntrue\n\njulia> issorted(df, :a)\ntrue\n\njulia> issorted(df, :b)\nfalse\n\njulia> issorted(df, :b, rev=true)\ntrue\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.order","page":"Functions","title":"DataFrames.order","text":"order(col::ColumnIndex; kwargs...)\n\nSpecify sorting order for a column col in a data frame. kwargs can be lt, by, rev, and order with values following the rules defined in sort!.\n\nSee also: sort!, sort\n\nExamples\n\njulia> df = DataFrame(x=[-3, -1, 0, 2, 4], y=1:5)\n5×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │    -3      1\n   2 │    -1      2\n   3 │     0      3\n   4 │     2      4\n   5 │     4      5\n\njulia> sort(df, order(:x, rev=true))\n5×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     4      5\n   2 │     2      4\n   3 │     0      3\n   4 │    -1      2\n   5 │    -3      1\n\njulia> sort(df, order(:x, by=abs))\n5×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     0      3\n   2 │    -1      2\n   3 │     2      4\n   4 │    -3      1\n   5 │     4      5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.sort","page":"Functions","title":"Base.sort","text":"sort(df::AbstractDataFrame, cols=All();\n     alg::Union{Algorithm, Nothing}=nothing,\n     lt::Union{Function, AbstractVector{<:Function}}=isless,\n     by::Union{Function, AbstractVector{<:Function}}=identity,\n     rev::Union{Bool, AbstractVector{Bool}}=false,\n     order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n     view::Bool=false,\n     checkunique::Bool=false)\n\nReturn a data frame containing the rows in df sorted by column(s) cols. Sorting on multiple columns is done lexicographically.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, sort df on all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nIf alg is nothing (the default), the most appropriate algorithm is chosen automatically among TimSort, MergeSort and RadixSort depending on the type of the sorting columns and on the number of rows in df.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> sort(df, :x)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\njulia> sort(df, [:x, :y])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  b\n   2 │     1  c\n   3 │     2  a\n   4 │     3  b\n\njulia> sort(df, [:x, :y], rev=true)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n   3 │     1  c\n   4 │     1  b\n\njulia> sort(df, [:x, order(:y, rev=true)])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.sort!","page":"Functions","title":"Base.sort!","text":"sort!(df::AbstractDataFrame, cols=All();\n      alg::Union{Algorithm, Nothing}=nothing,\n      lt::Union{Function, AbstractVector{<:Function}}=isless,\n      by::Union{Function, AbstractVector{<:Function}}=identity,\n      rev::Union{Bool, AbstractVector{Bool}}=false,\n      order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n      checkunique::Bool=false)\n\nSort data frame df by column(s) cols by permuting its rows in-place. Sorting on multiple columns is done lexicographicallly.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, sort df on all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nIf alg is nothing (the default), the most appropriate algorithm is chosen automatically among TimSort, MergeSort and RadixSort depending on the type of the sorting columns and on the number of rows in df.\n\nsort! will produce a correct result even if some columns of passed data frame are identical (checked with ===). Otherwise, if two columns share some part of memory but are not identical (e.g. are different views of the same parent vector) then sort! result might be incorrect.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nMetadata having other styles is dropped (from parent data frame when df is a SubDataFrame).\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> sort!(df, :x)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\njulia> sort!(df, [:x, :y])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  b\n   2 │     1  c\n   3 │     2  a\n   4 │     3  b\n\njulia> sort!(df, [:x, :y], rev=true)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n   3 │     1  c\n   4 │     1  b\n\njulia> sort!(df, [:x, order(:y, rev=true)])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  c\n   2 │     1  b\n   3 │     2  a\n   4 │     3  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.sortperm","page":"Functions","title":"Base.sortperm","text":"sortperm(df::AbstractDataFrame, cols=All();\n         alg::Union{Algorithm, Nothing}=nothing,\n         lt::Union{Function, AbstractVector{<:Function}}=isless,\n         by::Union{Function, AbstractVector{<:Function}}=identity,\n         rev::Union{Bool, AbstractVector{Bool}}=false,\n         order::Union{Ordering, AbstractVector{<:Ordering}}=Forward,\n         checkunique::Bool=false)\n\nReturn a permutation vector of row indices of data frame df that puts them in sorted order according to column(s) cols. Order on multiple columns is computed lexicographically.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). If cols selects no columns, return permutation vector based on sorting all columns (this behaviour is deprecated and will change in future versions).\n\nIf rev is true, reverse sorting is performed. To enable reverse sorting only for some columns, pass order(c, rev=true) in cols, with c the corresponding column index (see example below).\n\nSince having repeated elements makes multiple sorting orders valid, the checkunique keyword allows for the situation to be caught. If checkunique is true and duplicate elements are found an error will be thrown. The use of the checkunique keyword is only supported when neither the by nor the lt keywords are being used. Similarly, the use of order(...) clauses that specify either by or lt are not supported, but specifying rev by itself is allowed.\n\nThe by keyword allows providing a function that will be applied to each cell before comparison; the lt keyword allows providing a custom \"less than\" function. If both by and lt are specified, the lt function is applied to the result of the by function.\n\nKeyword arguments specifying sorting order (rev, lt or by) can either be a single value, or a vector of length equal to the number of columns the operation is performed on. When a single value is passed, it applies to all columns. When a vector is passed, each entry applies to the column in the corresponding position in cols.\n\nIf alg is nothing (the default), the most appropriate algorithm is chosen automatically among TimSort, MergeSort and RadixSort depending on the type of the sorting columns and on the number of rows in df.\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> sortperm(df, :x)\n4-element Vector{Int64}:\n 2\n 4\n 3\n 1\n\njulia> sortperm(df, [:x, :y])\n4-element Vector{Int64}:\n 4\n 2\n 3\n 1\n\njulia> sortperm(df, [:x, :y], rev=true)\n4-element Vector{Int64}:\n 1\n 3\n 2\n 4\n\njulia> sortperm(df, [:x, order(:y, rev=true)])\n4-element Vector{Int64}:\n 2\n 4\n 3\n 1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Joining","page":"Functions","title":"Joining","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"antijoin\ncrossjoin\ninnerjoin\nleftjoin\nleftjoin!\nouterjoin\nrightjoin\nsemijoin","category":"page"},{"location":"lib/functions/#DataAPI.antijoin","page":"Functions","title":"DataAPI.antijoin","text":"antijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)\n\nPerform an anti join of two data frame objects and return a DataFrame containing the result. An anti join returns the subset of rows of df1 that do not match with the keys in df2.\n\nThe order of rows in the result is kept from df1.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : ignored as no columns are added to df1 columns (it is provided for consistency with other functions).\nvalidate : whether to check that columns passed as the on argument  define unique keys in each input data frame (according to isequal).  Can be a tuple or a pair, with the first element indicating whether to  run check for df1 and the second element for df2.  By default no check is performed.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata are taken from df1.\n\nSee also: innerjoin, leftjoin, rightjoin,           outerjoin, semijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> antijoin(name, job, on = :ID)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     3  Joe Blogs\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> antijoin(name, job2, on = :ID => :identifier)\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     3  Joe Blogs\n\njulia> antijoin(name, job2, on = [:ID => :identifier])\n1×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     3  Joe Blogs\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.crossjoin","page":"Functions","title":"DataAPI.crossjoin","text":"crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;\n          makeunique::Bool=false, renamecols=identity => identity)\ncrossjoin(df1, df2, dfs...; makeunique = false)\n\nPerform a cross join of two or more data frame objects and return a DataFrame containing the result. A cross join returns the cartesian product of rows from all passed data frames, where the first passed data frame is assigned to the dimension that changes the slowest and the last data frame is assigned to the dimension that changes the fastest.\n\nArguments\n\ndf1, df2, dfs... : the AbstractDataFrames to be joined\n\nKeyword Arguments\n\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String.\n\nIf more than two data frames are passed, the join is performed recursively with left associativity.\n\nMetadata: table-level :note-style metadata is preserved only for keys which are defined in all passed tables and have the same value. Column-level :note-style metadata is preserved from both tables.\n\nSee also: innerjoin, leftjoin, rightjoin,           outerjoin, semijoin, antijoin.\n\nExamples\n\njulia> df1 = DataFrame(X=1:3)\n3×1 DataFrame\n Row │ X\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> df2 = DataFrame(Y=[\"a\", \"b\"])\n2×1 DataFrame\n Row │ Y\n     │ String\n─────┼────────\n   1 │ a\n   2 │ b\n\njulia> crossjoin(df1, df2)\n6×2 DataFrame\n Row │ X      Y\n     │ Int64  String\n─────┼───────────────\n   1 │     1  a\n   2 │     1  b\n   3 │     2  a\n   4 │     2  b\n   5 │     3  a\n   6 │     3  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.innerjoin","page":"Functions","title":"DataAPI.innerjoin","text":"innerjoin(df1, df2; on, makeunique=false, validate=(false, false),\n          renamecols=(identity => identity), matchmissing=:error,\n          order=:undefined)\ninnerjoin(df1, df2, dfs...; on, makeunique=false,\n          validate=(false, false), matchmissing=:error,\n          order=:undefined)\n\nPerform an inner join of two or more data frame objects and return a DataFrame containing the result. An inner join includes rows with keys that match in all passed data frames.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in df1. This behavior may change in future releases.\n\nArguments\n\ndf1, df2, dfs...: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). When joining only two data frames, a left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df1 and df2 on columns.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained. If :right then the order of rows  from the right data frame is retained.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nIf more than two data frames are passed, the join is performed recursively with left associativity. In this case the validate keyword argument is applied recursively with left associativity.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for key columns is preserved only for keys which are defined in all passed tables and have the same value. Column-level :note-style metadata is preserved for all other columns.\n\nSee also: leftjoin, rightjoin, outerjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> innerjoin(name, job, on = :ID)\n2×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> innerjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n2×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String     String\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n\njulia> innerjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n2×3 DataFrame\n Row │ ID     NAME      job\n     │ Int64  String    String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.leftjoin","page":"Functions","title":"DataAPI.leftjoin","text":"leftjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),\n         renamecols=(identity => identity), matchmissing=:error, order=:undefined)\n\nPerform a left join of two data frame objects and return a DataFrame containing the result. A left join includes all rows from df1.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in df1. This behavior may change in future releases.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name, for whether a row appeared in only df1 (\"left_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true.\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained. If :right then the order of rows  from the right data frame is retained (non-matching rows are put at the end).\n\nAll columns of the returned data frame will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata is taken from df1 (including key columns), except for columns added to it from df2, whose column-level :note-style metadata is taken from df2.\n\nSee also: innerjoin, rightjoin, outerjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> leftjoin(name, job, on = :ID)\n3×3 DataFrame\n Row │ ID     Name       Job\n     │ Int64  String     String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> leftjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n3×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String     String?\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\njulia> leftjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n3×3 DataFrame\n Row │ ID     NAME       job\n     │ Int64  String     String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.leftjoin!","page":"Functions","title":"DataFrames.leftjoin!","text":"leftjoin!(df1, df2; on, makeunique=false, source=nothing,\n          matchmissing=:error)\n\nPerform a left join of two data frame objects by updating the df1 with the joined columns from df2.\n\nA left join includes all rows from df1 and leaves all rows and columns from df1 untouched. Note that each row in df1 must have at most one match in df2. Otherwise, this function would not be able to execute the join in-place since new rows would need to be added to df1.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name, for whether a row appeared in only df1 (\"left_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\n\nThe columns added to df1 from df2 will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nMetadata: table-level and column-level :note-style metadata are taken from df1 (including key columns), except for columns added to it from df2, whose column-level :note-style metadata is taken from df2.\n\nSee also: leftjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> leftjoin!(name, job, on = :ID)\n3×3 DataFrame\n Row │ ID     Name       Job\n     │ Int64  String     String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> leftjoin!(name, job2, on = :ID => :identifier, makeunique=true, source=:source)\n3×5 DataFrame\n Row │ ID     Name       Job      Job_1    source\n     │ Int64  String     String?  String?  String\n─────┼───────────────────────────────────────────────\n   1 │     1  John Doe   Lawyer   Lawyer   both\n   2 │     2  Jane Doe   Doctor   Doctor   both\n   3 │     3  Joe Blogs  missing  missing  left_only\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.outerjoin","page":"Functions","title":"DataAPI.outerjoin","text":"outerjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),\n          renamecols=(identity => identity), matchmissing=:error, order=:undefined)\nouterjoin(df1, df2, dfs...; on, makeunique = false,\n          validate = (false, false), matchmissing=:error, order=:undefined)\n\nPerform an outer join of two or more data frame objects and return a DataFrame containing the result. An outer join includes rows with keys that appear in any of the passed data frames.\n\nThe order of rows in the result is undefined and may change in future releases.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the element type of these columns both df1 and df2. This behavior may change in future releases.\n\nArguments\n\ndf1, df2, dfs... : the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). When joining only two data frames, a left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name for whether a row appeared in only df1 (\"left_only\"), only df2 (\"right_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true. This argument is only supported when joining exactly two data frames.\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained (non-matching rows are put at the end).  If :right then the order of rows from the right data frame is retained  (non-matching rows are put at the end).\n\nAll columns of the returned data frame will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nIf more than two data frames are passed, the join is performed recursively with left associativity. In this case the indicator keyword argument is not supported and validate keyword argument is applied recursively with left associativity.\n\nMetadata: table-level :note-style metadata and column-level :note-style metadata for key columns is preserved only for keys which are defined in all passed tables and have the same value. Column-level :note-style metadata is preserved for all other columns.\n\nSee also: innerjoin, leftjoin, rightjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> outerjoin(name, job, on = :ID)\n4×3 DataFrame\n Row │ ID     Name       Job\n     │ Int64  String?    String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n   4 │     4  missing    Farmer\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> outerjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n4×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String?    String?\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n   4 │     4  missing    Farmer\n\njulia> outerjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n4×3 DataFrame\n Row │ ID     NAME       job\n     │ Int64  String?    String?\n─────┼───────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     3  Joe Blogs  missing\n   4 │     4  missing    Farmer\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.rightjoin","page":"Functions","title":"DataAPI.rightjoin","text":"rightjoin(df1, df2; on, makeunique=false, source=nothing,\n          validate=(false, false), renamecols=(identity => identity),\n          matchmissing=:error, order=:undefined)\n\nPerform a right join on two data frame objects and return a DataFrame containing the result. A right join includes all rows from df2.\n\nThe order of rows in the result is undefined and may change in future releases.\n\nIn the returned data frame the type of the columns on which the data frames are joined is determined by the type of these columns in df2. This behavior may change in future releases.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : if false (the default), an error will be raised if duplicate names are found in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).\nsource : Default: nothing. If a Symbol or string, adds indicator column with the given name for whether a row appeared in only df2 (\"right_only\") or in both (\"both\"). If the name is already in use, the column name will be modified if makeunique=true.\nvalidate : whether to check that columns passed as the on argument define unique keys in each input data frame (according to isequal). Can be a tuple or a pair, with the first element indicating whether to run check for df1 and the second element for df2. By default no check is performed.\nrenamecols : a Pair specifying how columns of left and right data frames should be renamed in the resulting data frame. Each element of the pair can be a string or a Symbol can be passed in which case it is appended to the original column name; alternatively a function can be passed in which case it is applied to each column name, which is passed to it as a String. Note that renamecols does not affect on columns, whose names are always taken from the left data frame and left unchanged.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df1 on columns.\norder : if :undefined (the default) the order of rows in the result is  undefined and may change in future releases. If :left then the order of  rows from the left data frame is retained (non-matching rows are put at the end).  If :right then the order of rows from the right data frame is retained.\n\nAll columns of the returned data frame will support missing values.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata is taken from df2 (including key columns), except for columns added to it from df1, whose column-level :note-style metadata is taken from df1.\n\nSee also: innerjoin, leftjoin, outerjoin,           semijoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> rightjoin(name, job, on = :ID)\n3×3 DataFrame\n Row │ ID     Name      Job\n     │ Int64  String?   String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n   3 │     4  missing   Farmer\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> rightjoin(name, job2, on = :ID => :identifier, renamecols = \"_left\" => \"_right\")\n3×3 DataFrame\n Row │ ID     Name_left  Job_right\n     │ Int64  String?    String\n─────┼─────────────────────────────\n   1 │     1  John Doe   Lawyer\n   2 │     2  Jane Doe   Doctor\n   3 │     4  missing    Farmer\n\njulia> rightjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase => lowercase)\n3×3 DataFrame\n Row │ ID     NAME      job\n     │ Int64  String?   String\n─────┼─────────────────────────\n   1 │     1  John Doe  Lawyer\n   2 │     2  Jane Doe  Doctor\n   3 │     4  missing   Farmer\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.semijoin","page":"Functions","title":"DataAPI.semijoin","text":"semijoin(df1, df2; on, makeunique=false, validate=(false, false), matchmissing=:error)\n\nPerform a semi join of two data frame objects and return a DataFrame containing the result. A semi join returns the subset of rows of df1 that match with the keys in df2.\n\nThe order of rows in the result is kept from df1.\n\nArguments\n\ndf1, df2: the AbstractDataFrames to be joined\n\nKeyword Arguments\n\non : The names of the key columns on which to join the data frames. This can be a single name, or a vector of names (for joining on multiple columns). A left=>right pair of names can be used instead of a name, for the case where a key has different names in df1 and df2 (it is allowed to mix names and name pairs in a vector). Key values are compared using isequal. on is a required argument.\nmakeunique : ignored as no columns are added to df1 columns (it is provided for consistency with other functions).\nindicator : Default: nothing. If a Symbol or string, adds categorical indicator  column with the given name for whether a row appeared in only df1 (\"left_only\"),  only df2 (\"right_only\") or in both (\"both\"). If the name is already in use,  the column name will be modified if makeunique=true.\nvalidate : whether to check that columns passed as the on argument  define unique keys in each input data frame (according to isequal).  Can be a tuple or a pair, with the first element indicating whether to  run check for df1 and the second element for df2.  By default no check is performed.\nmatchmissing : if equal to :error throw an error if missing is present in on columns; if equal to :equal then missing is allowed and missings are matched; if equal to :notequal then missings are dropped in df2 on columns.\n\nIt is not allowed to join on columns that contain NaN or -0.0 in real or imaginary part of the number. If you need to perform a join on such values use CategoricalArrays.jl and transform a column containing such values into a CategoricalVector.\n\nWhen merging on categorical columns that differ in the ordering of their levels, the ordering of the left data frame takes precedence over the ordering of the right data frame.\n\nMetadata: table-level and column-level :note-style metadata are taken from df1.\n\nSee also: innerjoin, leftjoin, rightjoin,           outerjoin, antijoin, crossjoin.\n\nExamples\n\njulia> name = DataFrame(ID=[1, 2, 3], Name=[\"John Doe\", \"Jane Doe\", \"Joe Blogs\"])\n3×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼──────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n   3 │     3  Joe Blogs\n\njulia> job = DataFrame(ID=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ ID     Job\n     │ Int64  String\n─────┼───────────────\n   1 │     1  Lawyer\n   2 │     2  Doctor\n   3 │     4  Farmer\n\njulia> semijoin(name, job, on = :ID)\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n\njulia> job2 = DataFrame(identifier=[1, 2, 4], Job=[\"Lawyer\", \"Doctor\", \"Farmer\"])\n3×2 DataFrame\n Row │ identifier  Job\n     │ Int64       String\n─────┼────────────────────\n   1 │          1  Lawyer\n   2 │          2  Doctor\n   3 │          4  Farmer\n\njulia> semijoin(name, job2, on = :ID => :identifier)\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n\njulia> semijoin(name, job2, on = [:ID => :identifier])\n2×2 DataFrame\n Row │ ID     Name\n     │ Int64  String\n─────┼─────────────────\n   1 │     1  John Doe\n   2 │     2  Jane Doe\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Grouping","page":"Functions","title":"Grouping","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"get\ngroupby\ngroupcols\ngroupindices\nkeys\nparent\nproprow\nvaluecols","category":"page"},{"location":"lib/functions/#Base.get","page":"Functions","title":"Base.get","text":"get(gd::GroupedDataFrame, key, default)\n\nGet a group based on the values of the grouping columns.\n\nkey may be a GroupKey, NamedTuple or Tuple of grouping column values (in the same order as the cols argument to groupby). It may also be an AbstractDict, in which case the order of the arguments does not matter.\n\nExamples\n\njulia> df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[2]),\n                      b=repeat([2, 1], outer=[3]),\n                      c=1:6);\n\njulia> gd = groupby(df, :a)\nGroupedDataFrame with 3 groups based on key: a\nFirst Group (2 rows): a = :foo\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ foo         2      1\n   2 │ foo         1      4\n⋮\nLast Group (2 rows): a = :baz\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ baz         2      3\n   2 │ baz         1      6\n\njulia> get(gd, (a=:bar,), nothing)\n2×3 SubDataFrame\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ bar         1      2\n   2 │ bar         2      5\n\njulia> get(gd, (:baz,), nothing)\n2×3 SubDataFrame\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ baz         2      3\n   2 │ baz         1      6\n\njulia> get(gd, (:qux,), nothing)\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.groupby","page":"Functions","title":"DataAPI.groupby","text":"groupby(df::AbstractDataFrame, cols;\n        sort::Union{Bool, Nothing, NamedTuple}=nothing,\n        skipmissing::Bool=false)\n\nReturn a GroupedDataFrame representing a view of an AbstractDataFrame split into row groups.\n\nArguments\n\ndf : an AbstractDataFrame to split\ncols : data frame columns to group by. Can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers). In particular if the selector picks no columns then a single-group GroupedDataFrame is created. As a special case, if cols is a single column or a vector of columns then it can contain columns wrapped in order that will be used to determine the order of groups if sort is true or a NamedTuple (if sort is false, then passing order is an error; if sort is nothing then it is set to true when order is passed).\nsort : if sort=true sort groups according to the values of the grouping columns cols; if sort=false groups are created in their order of appearance in df; if sort=nothing (the default) then the fastest available grouping algorithm is picked and in consequence the order of groups in the result is undefined and may change in future releases; below a description of the current implementation is provided. Additionally sort can be a NamedTuple having some or all of alg, lt, by, rev, and order fields. In this case the groups are sorted and their order follows the sortperm order.\nskipmissing : whether to skip groups with missing values in one of the grouping columns cols\n\nDetails\n\nAn iterator over a GroupedDataFrame returns a SubDataFrame view for each grouping into df. Within each group, the order of rows in df is preserved.\n\nA GroupedDataFrame also supports indexing by groups, select, transform, and combine (which applies a function to each group and combines the result into a data frame).\n\nGroupedDataFrame also supports the dictionary interface. The keys are GroupKey objects returned by keys(::GroupedDataFrame), which can also be used to get the values of the grouping columns for each group. Tuples and NamedTuples containing the values of the grouping columns (in the same order as the cols argument) are also accepted as indices. Finally, an AbstractDict can be used to index into a grouped data frame where the keys are column names of the data frame. The order of the keys does not matter in this case.\n\nIn the current implementation if sort=nothing groups are ordered following the order of appearance of values in the grouping columns, except when all grouping columns provide non-nothing DataAPI.refpool, in which case the order of groups follows the order of values returned by DataAPI.refpool. As a particular application of this rule if all cols are CategoricalVectors then groups are always sorted. Integer columns with a narrow range also use this this optimization, so to the order of groups when grouping on integer columns is undefined. A column is considered to be an integer column when deciding on the grouping algorithm choice if its eltype is a subtype of Union{Missing, Real}, all its elements are either missing or pass isinteger test, and none of them is equal to -0.0.\n\nSee also\n\ncombine, select, select!, transform, transform!\n\nExamples\n\njulia> df = DataFrame(a=repeat([1, 2, 3, 4], outer=[2]),\n                      b=repeat([2, 1], outer=[4]),\n                      c=1:8);\n\njulia> gd = groupby(df, :a)\nGroupedDataFrame with 4 groups based on key: a\nFirst Group (2 rows): a = 1\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n⋮\nLast Group (2 rows): a = 4\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4      1      4\n   2 │     4      1      8\n\njulia> gd[1]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n\njulia> last(gd)\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4      1      4\n   2 │     4      1      8\n\njulia> gd[(a=3,)]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n\njulia> gd[Dict(\"a\" => 3)]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n\njulia> gd[(3,)]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n\njulia> k = first(keys(gd))\nGroupKey: (a = 1,)\n\njulia> gd[k]\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n\njulia> for g in gd\n           println(g)\n       end\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      1\n   2 │     1      2      5\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     2      1      2\n   2 │     2      1      6\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     3      2      3\n   2 │     3      2      7\n2×3 SubDataFrame\n Row │ a      b      c\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     4      1      4\n   2 │     4      1      8\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.groupcols","page":"Functions","title":"DataFrames.groupcols","text":"groupcols(gd::GroupedDataFrame)\n\nReturn a vector of Symbol column names in parent(gd) used for grouping.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.groupindices","page":"Functions","title":"DataFrames.groupindices","text":"groupindices(gd::GroupedDataFrame)\n\nReturn a vector of group indices for each row of parent(gd).\n\nRows appearing in group gd[i] are attributed index i. Rows not present in any group are attributed missing (this can happen if skipmissing=true was passed when creating gd, or if gd is a subset from a larger GroupedDataFrame).\n\nThe groupindices => target_col_name syntax (or just groupindices without specifying the target column name) is also supported in the transformation mini-language when passing a GroupedDataFrame to transformation functions (combine, select, etc.).\n\nExamples\n\njulia> df = DataFrame(id=[\"a\", \"c\", \"b\", \"b\", \"a\"])\n5×1 DataFrame\n Row │ id\n     │ String\n─────┼────────\n   1 │ a\n   2 │ c\n   3 │ b\n   4 │ b\n   5 │ a\n\njulia> gdf = groupby(df, :id);\n\njulia> combine(gdf, groupindices)\n3×2 DataFrame\n Row │ id      groupindices\n     │ String  Int64\n─────┼──────────────────────\n   1 │ a                  1\n   2 │ c                  2\n   3 │ b                  3\n\njulia> select(gdf, groupindices => :gid)\n5×2 DataFrame\n Row │ id      gid\n     │ String  Int64\n─────┼───────────────\n   1 │ a           1\n   2 │ c           2\n   3 │ b           3\n   4 │ b           3\n   5 │ a           1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.keys","page":"Functions","title":"Base.keys","text":"keys(gd::GroupedDataFrame)\n\nGet the set of keys for each group of the GroupedDataFrame gd as a GroupKeys object. Each key is a GroupKey, which behaves like a NamedTuple holding the values of the grouping columns for a given group. Unlike the equivalent Tuple, NamedTuple, and AbstractDict, these keys can be used to index into gd efficiently. The ordering of the keys is identical to the ordering of the groups of gd under iteration and integer indexing.\n\nExamples\n\njulia> df = DataFrame(a=repeat([:foo, :bar, :baz], outer=[4]),\n                      b=repeat([2, 1], outer=[6]),\n                      c=1:12);\n\njulia> gd = groupby(df, [:a, :b])\nGroupedDataFrame with 6 groups based on keys: a, b\nFirst Group (2 rows): a = :foo, b = 2\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ foo         2      1\n   2 │ foo         2      7\n⋮\nLast Group (2 rows): a = :baz, b = 1\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ baz         1      6\n   2 │ baz         1     12\n\njulia> keys(gd)\n6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (a = :foo, b = 2)\n GroupKey: (a = :bar, b = 1)\n GroupKey: (a = :baz, b = 2)\n GroupKey: (a = :foo, b = 1)\n GroupKey: (a = :bar, b = 2)\n GroupKey: (a = :baz, b = 1)\n\njulia> k = keys(gd)[1]\nGroupKey: (a = :foo, b = 2)\n\njulia> keys(k)\n2-element Vector{Symbol}:\n :a\n :b\n\njulia> values(k)  # Same as Tuple(k)\n(:foo, 2)\n\njulia> NamedTuple(k)\n(a = :foo, b = 2)\n\njulia> k.a\n:foo\n\njulia> k[:a]\n:foo\n\njulia> k[1]\n:foo\n\nKeys can be used as indices to retrieve the corresponding group from their GroupedDataFrame:\n\njulia> gd[k]\n2×3 SubDataFrame\n Row │ a       b      c\n     │ Symbol  Int64  Int64\n─────┼──────────────────────\n   1 │ foo         2      1\n   2 │ foo         2      7\n\njulia> gd[keys(gd)[1]] == gd[1]\ntrue\n\n\n\n\n\nkeys(dfc::DataFrameColumns)\n\nGet a vector of column names of dfc as Symbols.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.parent","page":"Functions","title":"Base.parent","text":"parent(gd::GroupedDataFrame)\n\nReturn the parent data frame of gd.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.proprow","page":"Functions","title":"DataFrames.proprow","text":"proprow\n\nCompute the proportion of rows which belong to each group, i.e. its number of rows divided by the total number of rows in a GroupedDataFrame.\n\nThis function can only be used in the transformation mini-language via the proprow => target_col_name syntax (or just proprow without specifying the target column name), when passing a GroupedDataFrame to transformation functions (combine, select, etc.).\n\nExamples\n\njulia> df = DataFrame(id=[\"a\", \"c\", \"b\", \"b\", \"a\", \"b\"])\n6×1 DataFrame\n Row │ id\n     │ String\n─────┼────────\n   1 │ a\n   2 │ c\n   3 │ b\n   4 │ b\n   5 │ a\n   6 │ b\n\njulia> gdf = groupby(df, :id);\n\njulia> combine(gdf, proprow)\n3×2 DataFrame\n Row │ id      proprow\n     │ String  Float64\n─────┼──────────────────\n   1 │ a       0.333333\n   2 │ c       0.166667\n   3 │ b       0.5\n\njulia> select(gdf, proprow => :frac)\n6×2 DataFrame\n Row │ id      frac\n     │ String  Float64\n─────┼──────────────────\n   1 │ a       0.333333\n   2 │ c       0.166667\n   3 │ b       0.5\n   4 │ b       0.5\n   5 │ a       0.333333\n   6 │ b       0.5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.valuecols","page":"Functions","title":"DataFrames.valuecols","text":"valuecols(gd::GroupedDataFrame)\n\nReturn a vector of Symbol column names in parent(gd) not used for grouping.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Filtering-rows","page":"Functions","title":"Filtering rows","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"allunique\ndeleteat!\nempty\nempty!\nfilter\nfilter!\nkeepat!\nfirst\nlast\nnonunique\nonly\npop!\npopat!\npopfirst!\nresize!\nsubset\nsubset!\nunique\nunique!","category":"page"},{"location":"lib/functions/#Base.allunique","page":"Functions","title":"Base.allunique","text":"allunique(df::AbstractDataFrame, cols=:)\n\nReturn true if none of the rows of df are duplicated. Two rows are duplicates if all their columns contain equal values (according to isequal) for all columns in cols (by default, all columns).\n\nArguments\n\ndf : AbstractDataFrame\ncols : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by select.\n\nSee also unique and nonunique.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> allunique(df)\ntrue\n\njulia> allunique(df, :x)\nfalse\n\njulia> allunique(df, :i => ByRow(isodd))\nfalse\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.deleteat!","page":"Functions","title":"Base.deleteat!","text":"deleteat!(df::DataFrame, inds)\n\nDelete rows specified by inds from a DataFrame df in place and return it.\n\nInternally deleteat! is called for all columns so inds must be: a vector of sorted and unique integers, a boolean vector, an integer, or Not wrapping any valid selector.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> deleteat!(df, 2)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.empty","page":"Functions","title":"Base.empty","text":"empty(df::AbstractDataFrame)\n\nCreate a new DataFrame with the same column names and column element types as df but with zero rows.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.empty!","page":"Functions","title":"Base.empty!","text":"empty!(df::DataFrame)\n\nRemove all rows from df, making each of its columns empty.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> empty!(df)\n0×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┴──────────────\n\njulia> df.a, df.b\n(Int64[], Int64[])\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.filter","page":"Functions","title":"Base.filter","text":"filter(fun, df::AbstractDataFrame; view::Bool=false)\nfilter(cols => fun, df::AbstractDataFrame; view::Bool=false)\n\nReturn a data frame containing only rows from df for which fun returns true.\n\nIf cols is not specified then the predicate fun is passed DataFrameRows. Elements of a DataFrameRow may be accessed with dot syntax or column indexing inside fun.\n\nIf cols is specified then the predicate fun is passed elements of the corresponding columns as separate positional arguments, unless cols is an AsTable selector, in which case a NamedTuple of these arguments is passed. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers), and column duplicates are allowed if a vector of Symbols, strings, or integers is passed.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nPassing cols leads to a more efficient execution of the operation for large data frames.\n\nnote: Note\nThis method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset function instead as it is consistent with other DataFrames.jl functions (as opposed to filter).\n\nnote: Note\nDue to type stability the filter(cols => fun, df::AbstractDataFrame; view::Bool=false) call is preferred in performance critical applications.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: filter!\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> filter(row -> row.x > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter(row -> row[\"x\"] > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter(:x => x -> x > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter([:x, :y] => (x, y) -> x == 1 || y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\njulia> filter(AsTable(:) => nt -> nt.x == 1 || nt.y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\n\n\n\n\nfilter(fun, gdf::GroupedDataFrame; ungroup::Bool=false)\nfilter(cols => fun, gdf::GroupedDataFrame; ungroup::Bool=false)\n\nReturn only groups in gd for which fun returns true as a GroupedDataFrame if ungroup=false (the default), or as a data frame if ungroup=true.\n\nIf cols is not specified then the predicate fun is called with a SubDataFrame for each group.\n\nIf cols is specified then the predicate fun is called for each group with views of the corresponding columns as separate positional arguments, unless cols is an AsTable selector, in which case a NamedTuple of these arguments is passed. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers), and column duplicates are allowed if a vector of Symbols, strings, or integers is passed.\n\nnote: Note\nThis method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset function instead as it is consistent with other DataFrames.jl functions (as opposed to filter).\n\nExamples\n\njulia> df = DataFrame(g=[1, 2], x=['a', 'b']);\n\njulia> gd = groupby(df, :g)\nGroupedDataFrame with 2 groups based on key: g\nFirst Group (1 row): g = 1\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n⋮\nLast Group (1 row): g = 2\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     2  b\n\njulia> filter(x -> x.x[1] == 'a', gd)\nGroupedDataFrame with 1 group based on key: g\nFirst Group (1 row): g = 1\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> filter(:x => x -> x[1] == 'a', gd)\nGroupedDataFrame with 1 group based on key: g\nFirst Group (1 row): g = 1\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\njulia> filter(:x => x -> x[1] == 'a', gd, ungroup=true)\n1×2 DataFrame\n Row │ g      x\n     │ Int64  Char\n─────┼─────────────\n   1 │     1  a\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.filter!","page":"Functions","title":"Base.filter!","text":"filter!(fun, df::AbstractDataFrame)\nfilter!(cols => fun, df::AbstractDataFrame)\n\nRemove rows from data frame df for which fun returns false.\n\nIf cols is not specified then the predicate fun is passed DataFrameRows. Elements of a DataFrameRow may be accessed with dot syntax or column indexing inside fun.\n\nIf cols is specified then the predicate fun is passed elements of the corresponding columns as separate positional arguments, unless cols is an AsTable selector, in which case a NamedTuple of these arguments is passed. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers), and column duplicates are allowed if a vector of Symbols, strings, or integers is passed.\n\nPassing cols leads to a more efficient execution of the operation for large data frames.\n\nnote: Note\nThis method is defined so that DataFrames.jl implements the Julia API for collections, but it is generally recommended to use the subset! function instead as it is consistent with other DataFrames.jl functions (as opposed to filter!).\n\nnote: Note\nDue to type stability the filter!(cols => fun, df::AbstractDataFrame) call is preferred in performance critical applications.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: filter\n\nExamples\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"])\n4×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     2  a\n   4 │     1  b\n\njulia> filter!(row -> row.x > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter!(row -> row[\"x\"] > 1, df)\n2×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     2  a\n\njulia> filter!(:x => x -> x == 3, df)\n1×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"]);\n\njulia> filter!([:x, :y] => (x, y) -> x == 1 || y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\njulia> df = DataFrame(x=[3, 1, 2, 1], y=[\"b\", \"c\", \"a\", \"b\"]);\n\njulia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == \"b\", df)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  String\n─────┼───────────────\n   1 │     3  b\n   2 │     1  c\n   3 │     1  b\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.keepat!","page":"Functions","title":"Base.keepat!","text":"keepat!(df::DataFrame, inds)\n\nDelete rows at all indices not specified by inds from a DataFrame df in place and return it.\n\nInternally deleteat! is called for all columns so inds must be: a vector of sorted and unique integers, a boolean vector, an integer, or Not wrapping any valid selector.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> keepat!(df, [1, 3])\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.first","page":"Functions","title":"Base.first","text":"first(df::AbstractDataFrame)\n\nGet the first row of df as a DataFrameRow.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\nfirst(df::AbstractDataFrame, n::Integer; view::Bool=false)\n\nGet a data frame with the n first rows of df. Get all rows if n is greater than the number of rows in df. Error if n is negative.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.last","page":"Functions","title":"Base.last","text":"last(df::AbstractDataFrame)\n\nGet the last row of df as a DataFrameRow.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\nlast(df::AbstractDataFrame, n::Integer; view::Bool=false)\n\nGet a data frame with the n last rows of df. Get all rows if n is greater than the number of rows in df. Error if n is negative.\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.nonunique","page":"Functions","title":"DataFrames.nonunique","text":"nonunique(df::AbstractDataFrame; keep::Symbol=:first)\nnonunique(df::AbstractDataFrame, cols; keep::Symbol=:first)\n\nReturn a Vector{Bool} in which true entries indicate duplicate rows.\n\nDuplicate rows are those for which at least another row contains equal values (according to isequal) for all columns in cols (by default, all columns). If keep=:first (the default), only the first occurrence of a set of duplicate rows is indicated with a false entry. If keep=:last, only the last occurrence of a set of duplicate rows is indicated with a false entry. If keep=:noduplicates, only rows without any duplicates are indicated with a false entry.\n\nArguments\n\ndf : AbstractDataFrame\ncols : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by select that returns at least one column if df has at least one column.\n\nSee also unique and unique!.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> df = vcat(df, df)\n8×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n   5 │     1      1\n   6 │     2      2\n   7 │     3      1\n   8 │     4      2\n\njulia> nonunique(df)\n8-element Vector{Bool}:\n 0\n 0\n 0\n 0\n 1\n 1\n 1\n 1\n\njulia> nonunique(df, keep=:last)\n8-element Vector{Bool}:\n 1\n 1\n 1\n 1\n 0\n 0\n 0\n 0\n\njulia> nonunique(df, 2)\n8-element Vector{Bool}:\n 0\n 0\n 1\n 1\n 1\n 1\n 1\n 1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.Iterators.only","page":"Functions","title":"Base.Iterators.only","text":"only(df::AbstractDataFrame)\n\nIf df has a single row return it as a DataFrameRow; otherwise throw ArgumentError.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.pop!","page":"Functions","title":"Base.pop!","text":"pop!(df::DataFrame)\n\nRemove the last row from df and return a NamedTuple created from this row.\n\nnote: Note\nUsing this method for very wide data frames may lead to expensive compilation.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> pop!(df)\n(a = 3, b = 6)\n\njulia> df\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.popat!","page":"Functions","title":"Base.popat!","text":"popat!(df::DataFrame, i::Integer)\n\nRemove the i-th row from df and return a NamedTuple created from this row.\n\nnote: Note\nUsing this method for very wide data frames may lead to expensive compilation.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> popat!(df, 2)\n(a = 2, b = 5)\n\njulia> df\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.popfirst!","page":"Functions","title":"Base.popfirst!","text":"popfirst!(df::DataFrame)\n\nRemove the first row from df and return a NamedTuple created from this row.\n\nnote: Note\nUsing this method for very wide data frames may lead to expensive compilation.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> popfirst!(df)\n(a = 1, b = 4)\n\njulia> df\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      5\n   2 │     3      6\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.resize!","page":"Functions","title":"Base.resize!","text":"resize!(df::DataFrame, n::Integer)\n\nResize df to have n rows by calling resize! on all columns of df.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=1:3, b=4:6)\n3×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> resize!(df, 2)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.subset","page":"Functions","title":"DataFrames.subset","text":"subset(df::AbstractDataFrame, args...;\n       skipmissing::Bool=false, view::Bool=false, threads::Bool=true)\nsubset(gdf::GroupedDataFrame, args...;\n       skipmissing::Bool=false, view::Bool=false,\n       ungroup::Bool=true, threads::Bool=true)\n\nReturn a copy of data frame df or parent of gdf containing only rows for which all values produced by transformation(s) args for a given row are true. All transformations must produce vectors containing true or false. When the first argument is a GroupedDataFrame, transformations are also allowed to return a single true or false value, which results in including or excluding a whole group.\n\nIf skipmissing=false (the default) args are required to produce results containing only Bool values. If skipmissing=true, additionally missing is allowed and it is treated as false (i.e. rows for which one of the conditions returns missing are skipped).\n\nEach argument passed in args can be any specifier following the rules described for select with the restriction that:\n\nspecifying target column name is not allowed as subset does not create new columns;\nevery passed transformation must return a scalar or a vector (returning AbstractDataFrame, NamedTuple, DataFrameRow or AbstractMatrix is not supported).\n\nIf view=true a SubDataFrame view  is returned instead of a DataFrame.\n\nIf ungroup=false the resulting data frame is re-grouped based on the same grouping columns as gdf and a GroupedDataFrame is returned (preserving the order of groups from gdf).\n\nIf threads=true (the default) transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nIf a GroupedDataFrame is passed then it must include all groups present in the parent data frame, like in select!.\n\nnote: Note\nNote that as the subset function works in exactly the same way as other transformation functions defined in DataFrames.jl this is the preferred way to subset rows of a data frame or grouped data frame. In particular it uses a different set of rules for specifying transformations than filter which is implemented in DataFrames.jl to ensure support for the standard Julia API for collections.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: subset!, filter, select\n\nExamples\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false],\n                      y=[true, true, false, false],\n                      z=[true, true, missing, missing], v=[1, 2, 11, 12])\n4×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     1   true   true     true      1\n   2 │     2  false   true     true      2\n   3 │     3   true  false  missing     11\n   4 │     4  false  false  missing     12\n\njulia> subset(df, :x)\n2×5 DataFrame\n Row │ id     x     y      z        v\n     │ Int64  Bool  Bool   Bool?    Int64\n─────┼────────────────────────────────────\n   1 │     1  true   true     true      1\n   2 │     3  true  false  missing     11\n\njulia> subset(df, :v => x -> x .> 3)\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     3   true  false  missing     11\n   2 │     4  false  false  missing     12\n\njulia> subset(df, :x, :y => ByRow(!))\n1×5 DataFrame\n Row │ id     x     y      z        v\n     │ Int64  Bool  Bool   Bool?    Int64\n─────┼────────────────────────────────────\n   1 │     3  true  false  missing     11\n\njulia> subset(df, :x, :z, skipmissing=true)\n1×5 DataFrame\n Row │ id     x     y     z      v\n     │ Int64  Bool  Bool  Bool?  Int64\n─────┼─────────────────────────────────\n   1 │     1  true  true   true      1\n\njulia> subset(df, :x, :z)\nERROR: ArgumentError: missing was returned in condition number 2 but only true or false are allowed; pass skipmissing=true to skip missing values\n\njulia> subset(groupby(df, :y), :v => x -> x .> minimum(x))\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     2  false   true     true      2\n   2 │     4  false  false  missing     12\n\njulia> subset(groupby(df, :y), :v => x -> minimum(x) > 5)\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     3   true  false  missing     11\n   2 │     4  false  false  missing     12\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.subset!","page":"Functions","title":"DataFrames.subset!","text":"subset!(df::AbstractDataFrame, args...;\n        skipmissing::Bool=false, threads::Bool=true)\nsubset!(gdf::GroupedDataFrame{DataFrame}, args...;\n        skipmissing::Bool=false, ungroup::Bool=true, threads::Bool=true)\n\nUpdate data frame df or the parent of gdf in place to contain only rows for which all values produced by transformation(s) args for a given row is true. All transformations must produce vectors containing true or false. When the first argument is a GroupedDataFrame, transformations are also allowed to return a single true or false value, which results in including or excluding a whole group.\n\nIf skipmissing=false (the default) args are required to produce results containing only Bool values. If skipmissing=true, additionally missing is allowed and it is treated as false (i.e. rows for which one of the conditions returns missing are skipped).\n\nEach argument passed in args can be any specifier following the rules described for select with the restriction that:\n\nspecifying target column name is not allowed as subset! does not create new columns;\nevery passed transformation must return a scalar or a vector (returning AbstractDataFrame, NamedTuple, DataFrameRow or AbstractMatrix is not supported).\n\nIf ungroup=false the passed GroupedDataFrame gdf is updated (preserving the order of its groups) and returned.\n\nIf threads=true (the default) transformations may be run in separate tasks which can execute in parallel (possibly being applied to multiple rows or groups at the same time). Whether or not tasks are actually spawned and their number are determined automatically. Set to false if some transformations require serial execution or are not thread-safe.\n\nIf GroupedDataFrame is subsetted then it must include all groups present in the parent data frame, like in select!. In this case the passed GroupedDataFrame is updated to have correct groups after its parent is updated.\n\nnote: Note\nNote that as the subset! function works in exactly the same way as other transformation functions defined in DataFrames.jl this is the preferred way to subset rows of a data frame or grouped data frame. In particular it uses a different set of rules for specifying transformations than filter! which is implemented in DataFrames.jl to ensure support for the standard Julia API for collections.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: subset, filter!, select!\n\nExamples\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false])\n4×3 DataFrame\n Row │ id     x      y\n     │ Int64  Bool   Bool\n─────┼─────────────────────\n   1 │     1   true   true\n   2 │     2  false   true\n   3 │     3   true  false\n   4 │     4  false  false\n\njulia> subset!(df, :x, :y => ByRow(!));\n\njulia> df\n1×3 DataFrame\n Row │ id     x     y\n     │ Int64  Bool  Bool\n─────┼────────────────────\n   1 │     3  true  false\n\njulia> df = DataFrame(id=1:4, y=[true, true, false, false], v=[1, 2, 11, 12]);\n\njulia> subset!(groupby(df, :y), :v => x -> x .> minimum(x));\n\njulia> df\n2×3 DataFrame\n Row │ id     y      v\n     │ Int64  Bool   Int64\n─────┼─────────────────────\n   1 │     2   true      2\n   2 │     4  false     12\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false],\n                      z=[true, true, missing, missing], v=1:4)\n4×4 DataFrame\n Row │ id     x      z        v\n     │ Int64  Bool   Bool?    Int64\n─────┼──────────────────────────────\n   1 │     1   true     true      1\n   2 │     2  false     true      2\n   3 │     3   true  missing      3\n   4 │     4  false  missing      4\n\njulia> subset!(df, :x, :z)\nERROR: ArgumentError: missing was returned in condition number 2 but only true or false are allowed; pass skipmissing=true to skip missing values\n\njulia> subset!(df, :x, :z, skipmissing=true);\n\njulia> df\n1×4 DataFrame\n Row │ id     x     z      v\n     │ Int64  Bool  Bool?  Int64\n─────┼───────────────────────────\n   1 │     1  true   true      1\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false],\n                      z=[true, true, missing, missing], v=[1, 2, 11, 12]);\n\njulia> subset!(groupby(df, :y), :v => x -> x .> minimum(x));\n\njulia> df\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     2  false   true     true      2\n   2 │     4  false  false  missing     12\n\njulia> df = DataFrame(id=1:4, x=[true, false, true, false], y=[true, true, false, false],\n                      z=[true, true, missing, missing], v=[1, 2, 11, 12]);\n\njulia> subset!(groupby(df, :y), :v => x -> minimum(x) > 5);\n\njulia> df\n2×5 DataFrame\n Row │ id     x      y      z        v\n     │ Int64  Bool   Bool   Bool?    Int64\n─────┼─────────────────────────────────────\n   1 │     3   true  false  missing     11\n   2 │     4  false  false  missing     12\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.unique","page":"Functions","title":"Base.unique","text":"unique(df::AbstractDataFrame; view::Bool=false, keep::Symbol=:first)\nunique(df::AbstractDataFrame, cols; view::Bool=false, keep::Symbol=:first)\n\nReturn a data frame containing only unique rows in df.\n\nNon-unique (duplicate) rows are those for which at least another row contains equal values (according to isequal) for all columns in cols (by default, all columns). If keep=:first (the default), only the first occurrence of a set of duplicate rows is kept. If keep=:last, only the last occurrence of a set of duplicate rows is kept. If keep=:noduplicates, only rows without any duplicates are kept.\n\nIf view=false a freshly allocated DataFrame is returned, and if view=true then a SubDataFrame view into df is returned.\n\nArguments\n\ndf : the AbstractDataFrame\ncols : a selector specifying the column(s) or their transformations to compare. Can be any column selector or transformation accepted by select that returns at least one column if df has at least one column.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: unique!, nonunique.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> df = vcat(df, df)\n8×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n   5 │     1      1\n   6 │     2      2\n   7 │     3      1\n   8 │     4      2\n\njulia> unique(df)   # doesn't modify df\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> unique(df, 2)\n2×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n\njulia> unique(df, keep=:noduplicates)\n0×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┴──────────────\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.unique!","page":"Functions","title":"Base.unique!","text":"unique!(df::AbstractDataFrame; keep::Symbol=:first)\nunique!(df::AbstractDataFrame, cols; keep::Symbol=:first)\n\nUpdate df in-place to contain only unique rows.\n\nNon-unique (duplicate) rows are those for which at least another row contains equal values (according to isequal) for all columns in cols (by default, all columns). If keep=:first (the default), only the first occurrence of a set of duplicate rows is kept. If keep=:last, only the last occurrence of a set of duplicate rows is kept. If keep=:noduplicates, only rows without any duplicates are kept.\n\nArguments\n\ndf : the AbstractDataFrame\ncols :  column indicator (Symbol, Int, Vector{Symbol}, Regex, etc.) specifying the column(s) to compare. Can be any column selector or transformation accepted by select that returns at least one column if df has at least one column.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: unique!, nonunique.\n\nExamples\n\njulia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> df = vcat(df, df)\n8×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n   5 │     1      1\n   6 │     2      2\n   7 │     3      1\n   8 │     4      2\n\njulia> unique!(copy(df))  # modifies df\n4×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     2      2\n   3 │     3      1\n   4 │     4      2\n\njulia> unique(df, keep=:noduplicates)\n0×2 DataFrame\n Row │ i      x\n     │ Int64  Int64\n─────┴──────────────\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Working-with-missing-values","page":"Functions","title":"Working with missing values","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"allowmissing\nallowmissing!\ncompletecases\ndisallowmissing\ndisallowmissing!\ndropmissing\ndropmissing!","category":"page"},{"location":"lib/functions/#Missings.allowmissing","page":"Functions","title":"Missings.allowmissing","text":"allowmissing(df::AbstractDataFrame, cols=:)\n\nReturn a copy of data frame df with columns cols converted to element type Union{T, Missing} from T to allow support for missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=[1, 2])\n2×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> allowmissing(df)\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼────────\n   1 │      1\n   2 │      2\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.allowmissing!","page":"Functions","title":"DataFrames.allowmissing!","text":"allowmissing!(df::DataFrame, cols=:)\n\nConvert columns cols of data frame df from element type T to Union{T, Missing} to support missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.completecases","page":"Functions","title":"DataFrames.completecases","text":"completecases(df::AbstractDataFrame, cols=:)\n\nReturn a Boolean vector with true entries indicating rows without missing values (complete cases) in data frame df.\n\nIf cols is provided, only missing values in the corresponding columns are considered. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers) that returns at least one column if df has at least one column.\n\nSee also: dropmissing and dropmissing!. Use findall(completecases(df)) to get the indices of the rows.\n\nExamples\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> completecases(df)\n5-element BitVector:\n 0\n 0\n 0\n 1\n 1\n\njulia> completecases(df, :x)\n5-element BitVector:\n 0\n 1\n 0\n 1\n 1\n\njulia> completecases(df, [:x, :y])\n5-element BitVector:\n 0\n 0\n 0\n 1\n 1\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Missings.disallowmissing","page":"Functions","title":"Missings.disallowmissing","text":"disallowmissing(df::AbstractDataFrame, cols=:; error::Bool=true)\n\nReturn a copy of data frame df with columns cols converted from element type Union{T, Missing} to T to drop support for missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nIf error=false then columns containing a missing value will be skipped instead of throwing an error.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(a=Union{Int, Missing}[1, 2])\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼────────\n   1 │      1\n   2 │      2\n\njulia> disallowmissing(df)\n2×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> df = DataFrame(a=[1, missing])\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼─────────\n   1 │       1\n   2 │ missing\n\njulia> disallowmissing(df, error=false)\n2×1 DataFrame\n Row │ a\n     │ Int64?\n─────┼─────────\n   1 │       1\n   2 │ missing\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.disallowmissing!","page":"Functions","title":"DataFrames.disallowmissing!","text":"disallowmissing!(df::DataFrame, cols=:; error::Bool=true)\n\nConvert columns cols of data frame df from element type Union{T, Missing} to T to drop support for missing values.\n\ncols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf cols is omitted all columns in the data frame are converted.\n\nIf error=false then columns containing a missing value will be skipped instead of throwing an error.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.dropmissing","page":"Functions","title":"DataFrames.dropmissing","text":"dropmissing(df::AbstractDataFrame, cols=:; view::Bool=false, disallowmissing::Bool=!view)\n\nReturn a data frame excluding rows with missing values in df.\n\nIf cols is provided, only missing values in the corresponding columns are considered. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf view=false a freshly allocated DataFrame is returned. If view=true then a SubDataFrame view into df is returned. In this case disallowmissing must be false.\n\nIf disallowmissing is true (the default when view is false) then columns specified in cols will be converted so as not to allow for missing values using disallowmissing!.\n\nSee also: completecases and dropmissing!.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nExamples\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> dropmissing(df)\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\njulia> dropmissing(df, disallowmissing=false)\n2×3 DataFrame\n Row │ i      x       y\n     │ Int64  Int64?  String?\n─────┼────────────────────────\n   1 │     4       2  d\n   2 │     5       1  e\n\njulia> dropmissing(df, :x)\n3×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     2      4  missing\n   2 │     4      2  d\n   3 │     5      1  e\n\njulia> dropmissing(df, [:x, :y])\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataFrames.dropmissing!","page":"Functions","title":"DataFrames.dropmissing!","text":"dropmissing!(df::AbstractDataFrame, cols=:; disallowmissing::Bool=true)\n\nRemove rows with missing values from data frame df and return it.\n\nIf cols is provided, only missing values in the corresponding columns are considered. cols can be any column selector (Symbol, string or integer; :, Cols, All, Between, Not, a regular expression, or a vector of Symbols, strings or integers).\n\nIf disallowmissing is true (the default) then the cols columns will get converted using disallowmissing!.\n\nMetadata: this function preserves table-level and column-level :note-style metadata.\n\nSee also: dropmissing and completecases.\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> dropmissing!(copy(df))\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\njulia> dropmissing!(copy(df), disallowmissing=false)\n2×3 DataFrame\n Row │ i      x       y\n     │ Int64  Int64?  String?\n─────┼────────────────────────\n   1 │     4       2  d\n   2 │     5       1  e\n\njulia> dropmissing!(copy(df), :x)\n3×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     2      4  missing\n   2 │     4      2  d\n   3 │     5      1  e\n\njulia> dropmissing!(df, [:x, :y])\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Iteration","page":"Functions","title":"Iteration","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"eachcol\neachrow\nvalues\npairs\nIterators.partition","category":"page"},{"location":"lib/functions/#Base.eachcol","page":"Functions","title":"Base.eachcol","text":"eachcol(df::AbstractDataFrame)\n\nReturn a DataFrameColumns object that is a vector-like that allows iterating an AbstractDataFrame column by column.\n\nIndexing into DataFrameColumns objects using integer, Symbol or string returns the corresponding column (without copying). Indexing into DataFrameColumns objects using a multiple column selector returns a subsetted DataFrameColumns object with a new parent containing only the selected columns (without copying).\n\nDataFrameColumns supports most of the AbstractVector API. The key differences are that it is read-only and that the keys function returns a vector of Symbols (and not integers as for normal vectors).\n\nIn particular findnext, findprev, findfirst, findlast, and findall functions are supported, and in findnext and findprev functions it is allowed to pass an integer, string, or Symbol as a reference index.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> eachcol(df)\n4×2 DataFrameColumns\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> collect(eachcol(df))\n2-element Vector{AbstractVector}:\n [1, 2, 3, 4]\n [11, 12, 13, 14]\n\njulia> map(eachcol(df)) do col\n           maximum(col) - minimum(col)\n       end\n2-element Vector{Int64}:\n 3\n 3\n\njulia> sum.(eachcol(df))\n2-element Vector{Int64}:\n 10\n 50\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.eachrow","page":"Functions","title":"Base.eachrow","text":"eachrow(df::AbstractDataFrame)\n\nReturn a DataFrameRows that iterates a data frame row by row, with each row represented as a DataFrameRow.\n\nBecause DataFrameRows have an eltype of Any, use copy(dfr::DataFrameRow) to obtain a named tuple, which supports iteration and property access like a DataFrameRow, but also passes information on the eltypes of the columns of df.\n\nExamples\n\njulia> df = DataFrame(x=1:4, y=11:14)\n4×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> eachrow(df)\n4×2 DataFrameRows\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n\njulia> copy.(eachrow(df))\n4-element Vector{@NamedTuple{x::Int64, y::Int64}}:\n (x = 1, y = 11)\n (x = 2, y = 12)\n (x = 3, y = 13)\n (x = 4, y = 14)\n\njulia> eachrow(view(df, [4, 3], [2, 1]))\n2×2 DataFrameRows\n Row │ y      x\n     │ Int64  Int64\n─────┼──────────────\n   1 │    14      4\n   2 │    13      3\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.values","page":"Functions","title":"Base.values","text":"values(dfc::DataFrameColumns)\n\nGet a vector of columns from dfc.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.pairs","page":"Functions","title":"Base.pairs","text":"pairs(dfc::DataFrameColumns)\n\nReturn an iterator of pairs associating the name of each column of dfc with the corresponding column vector, i.e. name => col where name is the column name of the column col.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Base.Iterators.partition","page":"Functions","title":"Base.Iterators.partition","text":"Iterators.partition(df::AbstractDataFrame, n::Integer)\n\nIterate over df data frame n rows at a time, returning each block as a SubDataFrame.\n\nExamples\n\njulia> collect(Iterators.partition(DataFrame(x=1:5), 2))\n3-element Vector{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}:\n 2×1 SubDataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n 2×1 SubDataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     4\n 1×1 SubDataFrame\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     5\n\n\n\n\n\nIterators.partition(dfr::DataFrameRows, n::Integer)\n\nIterate over DataFrameRows dfr n rows at a time, returning each block as a DataFrameRows over a view of rows of parent of dfr.\n\nExamples\n\njulia> collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))\n3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}:\n 2×1 DataFrameRows\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n 2×1 DataFrameRows\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     3\n   2 │     4\n 1×1 DataFrameRows\n Row │ x\n     │ Int64\n─────┼───────\n   1 │     5\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Equality","page":"Functions","title":"Equality","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"isapprox","category":"page"},{"location":"lib/functions/#Base.isapprox","page":"Functions","title":"Base.isapprox","text":"isapprox(df1::AbstractDataFrame, df2::AbstractDataFrame;\n         rtol::Real=atol>0 ? 0 : √eps, atol::Real=0,\n         nans::Bool=false, norm::Function=norm)\n\nInexact equality comparison. df1 and df2 must have the same size and column names. Return  true if isapprox with given keyword arguments applied to all pairs of columns stored in df1 and df2 returns true.\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#Metadata","page":"Functions","title":"Metadata","text":"","category":"section"},{"location":"lib/functions/","page":"Functions","title":"Functions","text":"metadata\nmetadatakeys\nmetadata!\ndeletemetadata!\nemptymetadata!\ncolmetadata\ncolmetadatakeys\ncolmetadata!\ndeletecolmetadata!\nemptycolmetadata!","category":"page"},{"location":"lib/functions/#DataAPI.metadata","page":"Functions","title":"DataAPI.metadata","text":"metadata(df::AbstractDataFrame, key::AbstractString, [default]; style::Bool=false)\nmetadata(dfr::DataFrameRow, key::AbstractString, [default]; style::Bool=false)\nmetadata(dfc::DataFrameColumns, key::AbstractString, [default]; style::Bool=false)\nmetadata(dfr::DataFrameRows, key::AbstractString, [default]; style::Bool=false)\n\nReturn table-level metadata value associated with df for key key. If style=true return a tuple of metadata value and metadata style.\n\nSubDataFrame and DataFrameRow expose only :note-style metadata of their parent.\n\nIf default is passed then return it if key does not exist; if style=true return (default, :default).\n\nSee also: metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.metadatakeys","page":"Functions","title":"DataAPI.metadatakeys","text":"metadatakeys(df::AbstractDataFrame)\nmetadatakeys(dfr::DataFrameRow)\nmetadatakeys(dfc::DataFrameColumns)\nmetadatakeys(dfr::DataFrameRows)\n\nReturn an iterator of table-level metadata keys which are set in the object.\n\nValues can be accessed using metadata(df, key).\n\nSubDataFrame and DataFrameRow expose only :note-style metadata keys of their parent.\n\nSee also: metadata, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.metadata!","page":"Functions","title":"DataAPI.metadata!","text":"metadata!(df::AbstractDataFrame, key::AbstractString, value; style::Symbol=:default)\nmetadata!(dfr::DataFrameRow, key::AbstractString, value; style::Symbol=:default)\nmetadata!(dfc::DataFrameColumns, key::AbstractString, value; style::Symbol=:default)\nmetadata!(dfr::DataFrameRows, key::AbstractString, value; style::Symbol=:default)\n\nSet table-level metadata for object df for key key to have value value and style style (:default by default) and return df.\n\nFor SubDataFrame and DataFrameRow only :note-style is allowed. Trying to set a key-value pair for which the key already exists in the parent data frame with another style throws an error.\n\nSee also: metadata, metadatakeys, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.deletemetadata!","page":"Functions","title":"DataAPI.deletemetadata!","text":"deletemetadata!(df::AbstractDataFrame, key::AbstractString)\ndeletemetadata!(dfr::DataFrameRow, key::AbstractString)\ndeletemetadata!(dfc::DataFrameColumns, key::AbstractString)\ndeletemetadata!(dfr::DataFrameRows, key::AbstractString)\n\nDelete table-level metadata from object df for key key and return df. If key does not exist, return df without modification.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletemetadata!(df, \"name\");\n\njulia> metadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.emptymetadata!","page":"Functions","title":"DataAPI.emptymetadata!","text":"emptymetadata!(df::AbstractDataFrame)\nemptymetadata!(dfr::DataFrameRow)\nemptymetadata!(dfc::DataFrameColumns)\nemptymetadata!(dfr::DataFrameRows)\n\nDelete all table-level metadata from object df.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> metadatakeys(df)\n()\n\njulia> metadata!(df, \"name\", \"example\", style=:note);\n\njulia> metadatakeys(df)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> metadata(df, \"name\")\n\"example\"\n\njulia> metadata(df, \"name\", style=true)\n(\"example\", :note)\n\njulia> emptymetadata!(df);\n\njulia> metadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.colmetadata","page":"Functions","title":"DataAPI.colmetadata","text":"colmetadata(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\ncolmetadata(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\ncolmetadata(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\ncolmetadata(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString, [default]; style::Bool=false)\n\nReturn column-level metadata value associated with df for column col and key key.\n\nSubDataFrame and DataFrameRow expose only :note-style metadata of their parent.\n\nIf default is passed then return it if key does not exist for column col; if style=true return (default, :default). If col does not exist in df always throw an error.\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadatakeys, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.colmetadatakeys","page":"Functions","title":"DataAPI.colmetadatakeys","text":"colmetadatakeys(df::AbstractDataFrame, [col::ColumnIndex])\ncolmetadatakeys(dfr::DataFrameRow, [col::ColumnIndex])\ncolmetadatakeys(dfc::DataFrameColumns, [col::ColumnIndex])\ncolmetadatakeys(dfr::DataFrameRows, [col::ColumnIndex])\n\nIf col is passed return an iterator of column-level metadata keys which are set for column col. If col is not passed return an iterator of col => colmetadatakeys(x, col) pairs for all columns that have metadata, where col are Symbol.\n\nValues can be accessed using colmetadata(df, col, key).\n\nSubDataFrame and DataFrameRow expose only :note-style metadata of their parent.\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadata!, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.colmetadata!","page":"Functions","title":"DataAPI.colmetadata!","text":"colmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\ncolmetadata!(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\ncolmetadata!(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\ncolmetadata!(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString, value; style::Symbol=:default)\n\nSet column-level metadata in df for column col and key key to have value value and style style (:default by default) and return df.\n\nFor SubDataFrame and DataFrameRow only :note style is allowed. Trying to set a key-value pair for which the key already exists in the parent data frame with another style throws an error.\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, deletecolmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n```\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.deletecolmetadata!","page":"Functions","title":"DataAPI.deletecolmetadata!","text":"deletecolmetadata!(df::AbstractDataFrame, col::ColumnIndex, key::AbstractString)\ndeletecolmetadata!(dfr::DataFrameRow, col::ColumnIndex, key::AbstractString)\ndeletecolmetadata!(dfc::DataFrameColumns, col::ColumnIndex, key::AbstractString)\ndeletecolmetadata!(dfr::DataFrameRows, col::ColumnIndex, key::AbstractString)\n\nDelete column-level metadata set in df for column col and key key and return df.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, emptycolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadatakeys(df)\n()\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> deletecolmetadata!(df, :a, \"name\");\n\njulia> colmetadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"lib/functions/#DataAPI.emptycolmetadata!","page":"Functions","title":"DataAPI.emptycolmetadata!","text":"emptycolmetadata!(df::AbstractDataFrame, [col::ColumnIndex])\nemptycolmetadata!(dfr::DataFrameRow, [col::ColumnIndex])\nemptycolmetadata!(dfc::DataFrameColumns, [col::ColumnIndex])\nemptycolmetadata!(dfr::DataFrameRows, [col::ColumnIndex])\n\nDelete column-level metadata set in df for column col and key key and return df.\n\nFor SubDataFrame and DataFrameRow only :note-style metadata from their parent can be deleted (as other styles are not propagated to views).\n\nSee also: metadata, metadatakeys, metadata!, deletemetadata!, emptymetadata!, colmetadata, colmetadatakeys, colmetadata!, deletecolmetadata!.\n\nExamples\n\njulia> df = DataFrame(a=1, b=2);\n\njulia> colmetadata!(df, :a, \"name\", \"example\", style=:note);\n\njulia> collect(colmetadatakeys(df))\n1-element Vector{Pair{Symbol, Base.KeySet{String, Dict{String, Tuple{Any, Any}}}}}:\n :a => [\"name\"]\n\njulia> colmetadatakeys(df, :a)\nKeySet for a Dict{String, Tuple{Any, Any}} with 1 entry. Keys:\n  \"name\"\n\njulia> colmetadata(df, :a, \"name\")\n\"example\"\n\njulia> colmetadata(df, :a, \"name\", style=true)\n(\"example\", :note)\n\njulia> emptycolmetadata!(df, :a);\n\njulia> colmetadatakeys(df)\n()\n\n\n\n\n\n","category":"function"},{"location":"man/split_apply_combine/#The-Split-Apply-Combine-Strategy","page":"Split-apply-combine","title":"The Split-Apply-Combine Strategy","text":"","category":"section"},{"location":"man/split_apply_combine/#Design-of-the-split-apply-combine-support","page":"Split-apply-combine","title":"Design of the split-apply-combine support","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Many data analysis tasks involve three steps:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"splitting a data set into groups,\napplying some functions to each of the groups,\ncombining the results.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that any of the steps 1 and 3 of this general procedure can be dropped, in which case we just transform a data frame without grouping it and later combining the result.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"A standardized framework for handling this sort of computation is described in the paper \"The Split-Apply-Combine Strategy for Data Analysis\", written by Hadley Wickham.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The DataFrames package supports the split-apply-combine strategy through the groupby function that creates a GroupedDataFrame, followed by combine, select/select! or transform/transform!.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All operations described in this section of the manual are supported both for AbstractDataFrame (when split and combine steps are skipped) and GroupedDataFrame. Technically, AbstractDataFrame is just considered as being grouped on no columns (meaning it has a single group, or zero groups if it is empty). The only difference is that in this case the keepkeys and ungroup keyword arguments (described below) are not supported and a data frame is always returned, as there are no split and combine steps in this case.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"In order to perform operations by groups you first need to create a GroupedDataFrame object from your data frame using the groupby function that takes two arguments: (1) a data frame to be grouped, and (2) a set of columns to group by.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Operations can then be applied on each group using one of the following functions:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"combine: does not put restrictions on number of rows returned per group; the returned values are vertically concatenated following order of groups in GroupedDataFrame; it is typically used to compute summary statistics by group; for GroupedDataFrame if grouping columns are kept they are put as first columns in the result;\nselect: return a data frame with the number and order of rows exactly the same as the source data frame, including only new calculated columns; select! is an in-place version of select;\ntransform: return a data frame with the number and order of rows exactly the same as the source data frame, including all columns from the source and new calculated columns; transform! is an in-place version of transform; existing columns in the source data frame are put as first columns in the result;","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"As a special case, if a GroupedDataFrame that has zero groups is passed then the result of the operation is determined by performing a single call to the transformation function with a 0-row argument passed to it. The output of this operation is only used to identify the number and type of produced columns, but the result has zero rows.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All these functions take a specification of one or more functions to apply to each subset of the DataFrame. This specification can be of the following forms:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"standard column selectors (integers, Symbols, strings, vectors of integers, vectors of Symbols, vectors of strings, All, Cols, :, Between, Not and regular expressions)\na cols => function pair indicating that function should be called with positional arguments holding columns cols, which can be any valid column selector; in this case target column name is automatically generated and it is assumed that function returns a single value or a vector; the generated name is created by concatenating source column name and function name by default (see examples below).\na cols => function => target_cols form additionally explicitly specifying the target column or columns, which must be a single name (as a Symbol or a string), a vector of names or AsTable. Additionally it can be a Function which takes a string or a vector of strings as an argument containing names of columns selected by cols, and returns the target columns names (all accepted types except AsTable are allowed).\na col => target_cols pair, which renames the column col to target_cols, which must be single name (as a Symbol or a string), a vector of names or AsTable.\ncolumn-independent operations function => target_cols or just function for specific functions where the input columns are omitted; without target_cols the new column has the same name as function, otherwise it must be single name (as a Symbol or a string). Supported functions are:\nnrow to efficiently compute the number of rows in each group.\nproprow to efficiently compute the proportion of rows in each group.\neachindex to return a vector holding the number of each row within each group.\ngroupindices to return the group number.\nvectors or matrices containing transformations specified by the Pair syntax described in points 2 to 5\na function which will be called with a SubDataFrame corresponding to each group if a GroupedDataFrame is processed, or with the data frame itself if an AbstractDataFrame is processed; this form should be avoided due to its poor performance unless the number of groups is small or a very large number of columns are processed (in which case SubDataFrame avoids excessive compilation)","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note! If the expression of the form x => y is passed then except for the special convenience form nrow => target_cols it is always interpreted as cols => function. In particular the following expression function => target_cols is not a valid transformation specification.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note! If cols or target_cols are one of All, Cols, Between, or Not, broadcasting using .=> is supported and is equivalent to broadcasting the result of names(df, cols) or names(df, target_cols). This behaves as if broadcasting happened after replacing the selector with selected column names within the data frame scope.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All functions have two types of signatures. One of them takes a GroupedDataFrame as the first argument and an arbitrary number of transformations described above as following arguments. The second type of signature is when a Function or a Type is passed as the first argument and a GroupedDataFrame as the second argument (similar to map).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"As a special rule, with the cols => function and cols => function => target_cols syntaxes, if cols is wrapped in an AsTable object then a NamedTuple containing columns selected by cols is passed to function.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"What is allowed for function to return is determined by the target_cols value:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If both cols and target_cols are omitted (so only a function is passed), then returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow or a DataFrameRow will produce multiple columns in the result. Returning any other value produces a single column.\nIf target_cols is a Symbol or a string then the function is assumed to return a single column. In this case returning a data frame, a matrix, a NamedTuple, a Tables.AbstractRow, or a DataFrameRow raises an error.\nIf target_cols is a vector of Symbols or strings or AsTable it is assumed that function returns multiple columns. If function returns one of AbstractDataFrame, NamedTuple, DataFrameRow, Tables.AbstractRow, AbstractMatrix then rules described in point 1 above apply. If function returns an AbstractVector then each element of this vector must support the keys function, which must return a collection of Symbols, strings or integers; the return value of keys must be identical for all elements. Then as many columns are created as there are elements in the return value of the keys function. If target_cols is AsTable then their names are set to be equal to the key names except if keys returns integers, in which case they are prefixed by x (so the column names are e.g. x1, x2, ...). If target_cols is a vector of Symbols or strings then column names produced using the rules above are ignored and replaced by target_cols (the number of columns must be the same as the length of target_cols in this case). If fun returns a value of any other type then it is assumed that it is a table conforming to the Tables.jl API and the Tables.columntable function is called on it to get the resulting columns and their names. The names are retained when target_cols is AsTable and are replaced if target_cols is a vector of Symbols or strings.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"In all of these cases, function can return either a single row or multiple rows. As a particular rule, values wrapped in a Ref or a 0-dimensional AbstractArray are unwrapped and then treated as a single row.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"select/select! and transform/transform! always return a data frame with the same number and order of rows as the source (even if GroupedDataFrame had its groups reordered), except when selection results in zero columns in the resulting data frame (in which case the result has zero rows).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"For combine, rows in the returned object appear in the order of groups in the GroupedDataFrame. The functions can return an arbitrary number of rows for each group, but the kind of returned object and the number and names of columns must be the same for all groups, except when a DataFrame() or NamedTuple() is returned, in which case a given group is skipped.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"It is allowed to mix single values and vectors if multiple transformations are requested. In this case single value will be repeated to match the length of columns specified by returned vectors.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"By default (threads=true) a separate task is spawned for each specified transformation; each transformation then spawns as many tasks as Julia threads, and splits processing of groups across them (however, currently transformations with optimized implementations like sum and transformations that return multiple rows use a single task for all groups). This allows for parallel operation when Julia was started with more than one thread. Passed transformation functions must therefore not modify global variables (i.e. they must be pure), use locks to control parallel accesses, or threads=false must be passed to disable multithreading.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To apply function to each row instead of whole columns, it can be wrapped in a ByRow struct. cols can be any column indexing syntax, in which case function will be passed one argument for each of the columns specified by cols or a NamedTuple of them if specified columns are wrapped in AsTable. If ByRow is used it is allowed for cols to select an empty set of columns, in which case function is called for each row without any arguments and an empty NamedTuple is passed if empty set of columns is wrapped in AsTable.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The following keyword arguments are supported by the transformation functions (not all keyword arguments are supported in all cases; in general they are allowed in situations when they are meaningful, see the documentation of the specific functions for details):","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"keepkeys : whether grouping columns should be kept in the returned data frame.\nungroup : whether the return value of the operation should be a data frame or a GroupedDataFrame.\ncopycols : whether columns of the source data frame should be copied if no transformation is applied to them.\nrenamecols : whether in the cols => function form automatically generated column names should include the name of transformation functions or not.\nthreads : whether transformations may be run in separate tasks which can execute in parallel","category":"page"},{"location":"man/split_apply_combine/#Examples-of-the-split-apply-combine-operations","page":"Split-apply-combine","title":"Examples of the split-apply-combine operations","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"We show several examples of these functions applied to the iris dataset below:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> using DataFrames, CSV, Statistics\n\njulia> path = joinpath(pkgdir(DataFrames), \"docs\", \"src\", \"assets\", \"iris.csv\");\n\njulia> iris = CSV.read(path, DataFrame)\n150×5 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n 144 │         6.8         3.2          5.9         2.3  Iris-virginica\n 145 │         6.7         3.3          5.7         2.5  Iris-virginica\n 146 │         6.7         3.0          5.2         2.3  Iris-virginica\n 147 │         6.3         2.5          5.0         1.9  Iris-virginica\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                        135 rows omitted\n\njulia> iris_gdf = groupby(iris, :Species)\nGroupedDataFrame with 3 groups based on key: Species\nFirst Group (50 rows): Species = \"Iris-setosa\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  49 │         5.3         3.7          1.5         0.2  Iris-setosa\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      46 rows omitted\n⋮\nLast Group (50 rows): Species = \"Iris-virginica\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         47 rows omitted\n\njulia> combine(iris_gdf, :PetalLength => mean)\n3×2 DataFrame\n Row │ Species          PetalLength_mean\n     │ String15         Float64\n─────┼───────────────────────────────────\n   1 │ Iris-setosa                 1.464\n   2 │ Iris-versicolor             4.26\n   3 │ Iris-virginica              5.552\n\njulia> combine(iris_gdf, nrow, proprow, groupindices)\n3×4 DataFrame\n Row │ Species          nrow   proprow   groupindices\n     │ String15         Int64  Float64   Int64\n─────┼────────────────────────────────────────────────\n   1 │ Iris-setosa         50  0.333333             1\n   2 │ Iris-versicolor     50  0.333333             2\n   3 │ Iris-virginica      50  0.333333             3\n\njulia> combine(iris_gdf, nrow, :PetalLength => mean => :mean)\n3×3 DataFrame\n Row │ Species          nrow   mean\n     │ String15         Int64  Float64\n─────┼─────────────────────────────────\n   1 │ Iris-setosa         50    1.464\n   2 │ Iris-versicolor     50    4.26\n   3 │ Iris-virginica      50    5.552\n\njulia> combine(iris_gdf,\n               [:PetalLength, :SepalLength] =>\n               ((p, s) -> (a=mean(p)/mean(s), b=sum(p))) =>\n               AsTable) # multiple columns are passed as arguments\n3×3 DataFrame\n Row │ Species          a         b\n     │ String15         Float64   Float64\n─────┼────────────────────────────────────\n   1 │ Iris-setosa      0.292449     73.2\n   2 │ Iris-versicolor  0.717655    213.0\n   3 │ Iris-virginica   0.842744    277.6\n\njulia> combine(iris_gdf,\n               AsTable([:PetalLength, :SepalLength]) =>\n               x -> std(x.PetalLength) / std(x.SepalLength)) # passing a NamedTuple\n3×2 DataFrame\n Row │ Species          PetalLength_SepalLength_function\n     │ String15         Float64\n─────┼───────────────────────────────────────────────────\n   1 │ Iris-setosa                              0.492245\n   2 │ Iris-versicolor                          0.910378\n   3 │ Iris-virginica                           0.867923\n\njulia> combine(x -> std(x.PetalLength) / std(x.SepalLength), iris_gdf) # passing a SubDataFrame\n3×2 DataFrame\n Row │ Species          x1\n     │ String15         Float64\n─────┼───────────────────────────\n   1 │ Iris-setosa      0.492245\n   2 │ Iris-versicolor  0.910378\n   3 │ Iris-virginica   0.867923\n\njulia> combine(iris_gdf, 1:2 => cor, nrow)\n3×3 DataFrame\n Row │ Species          SepalLength_SepalWidth_cor  nrow\n     │ String15         Float64                     Int64\n─────┼────────────────────────────────────────────────────\n   1 │ Iris-setosa                        0.74678      50\n   2 │ Iris-versicolor                    0.525911     50\n   3 │ Iris-virginica                     0.457228     50\n\njulia> combine(iris_gdf, :PetalLength => (x -> [extrema(x)]) => [:min, :max])\n3×3 DataFrame\n Row │ Species          min      max\n     │ String15         Float64  Float64\n─────┼───────────────────────────────────\n   1 │ Iris-setosa          1.0      1.9\n   2 │ Iris-versicolor      3.0      5.1\n   3 │ Iris-virginica       4.5      6.9","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To get row number for each observation within each group use the eachindex function:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf, eachindex)\n150×2 DataFrame\n Row │ Species         eachindex\n     │ String15        Int64\n─────┼───────────────────────────\n   1 │ Iris-setosa             1\n   2 │ Iris-setosa             2\n   3 │ Iris-setosa             3\n  ⋮  │       ⋮             ⋮\n 148 │ Iris-virginica         48\n 149 │ Iris-virginica         49\n 150 │ Iris-virginica         50\n                 144 rows omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Contrary to combine, the select and transform functions always return a data frame with the same number and order of rows as the source. In the example below the return values in columns :SepalLength_SepalWidth_cor and :nrow are broadcasted to match the number of elements in each group:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> select(iris_gdf, 1:2 => cor)\n150×2 DataFrame\n Row │ Species         SepalLength_SepalWidth_cor\n     │ String          Float64\n─────┼────────────────────────────────────────────\n   1 │ Iris-setosa                       0.74678\n   2 │ Iris-setosa                       0.74678\n   3 │ Iris-setosa                       0.74678\n   4 │ Iris-setosa                       0.74678\n  ⋮  │       ⋮                     ⋮\n 148 │ Iris-virginica                    0.457228\n 149 │ Iris-virginica                    0.457228\n 150 │ Iris-virginica                    0.457228\n                                  143 rows omitted\n\njulia> transform(iris_gdf, :Species => x -> chop.(x, head=5, tail=0))\n150×6 DataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species         Species_function\n     │ Float64      Float64     Float64      Float64     String          SubString…\n─────┼────────────────────────────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa     setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa     setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa     setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa     setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮                ⋮\n 148 │         6.5         3.0          5.2         2.0  Iris-virginica  virginica\n 149 │         6.2         3.4          5.4         2.3  Iris-virginica  virginica\n 150 │         5.9         3.0          5.1         1.8  Iris-virginica  virginica\n                                                                          143 rows omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"All functions also support the do block form. However, as noted above, this form is slow and should therefore be avoided when performance matters.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf) do df\n           (m = mean(df.PetalLength), s² = var(df.PetalLength))\n       end\n3×3 DataFrame\n Row │ Species          m        s²\n     │ String15         Float64  Float64\n─────┼─────────────────────────────────────\n   1 │ Iris-setosa        1.464  0.0301061\n   2 │ Iris-versicolor    4.26   0.220816\n   3 │ Iris-virginica     5.552  0.304588","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To apply a function to each non-grouping column of a GroupedDataFrame you can write:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf, valuecols(iris_gdf) .=> mean)\n3×5 DataFrame\n Row │ Species          SepalLength_mean  SepalWidth_mean  PetalLength_mean  P ⋯\n     │ String15         Float64           Float64          Float64           F ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n   1 │ Iris-setosa                 5.006            3.418             1.464    ⋯\n   2 │ Iris-versicolor             5.936            2.77              4.26\n   3 │ Iris-virginica              6.588            2.974             5.552\n                                                                1 column omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that GroupedDataFrame is a view: therefore grouping columns of its parent data frame must not be mutated, and rows must not be added nor removed from it. If the number or rows of the parent changes then an error is thrown when a child GroupedDataFrame is used:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(id=1:2)\n2×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> gd = groupby(df, :id)\nGroupedDataFrame with 2 groups based on key: id\nFirst Group (1 row): id = 1\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n⋮\nLast Group (1 row): id = 2\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     2\n\njulia> push!(df, [3])\n3×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> gd[1]\nERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, deleteat!, push!, or append! functions).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Sometimes it is useful to append rows to the source data frame of a GroupedDataFrame, without affecting the rows used for grouping. In such a scenario you can create the grouped data frame using a view of the parent data frame to avoid the error:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(id=1:2)\n2×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n\njulia> gd = groupby(view(df, :, :), :id)\nGroupedDataFrame with 2 groups based on key: id\nFirst Group (1 row): id = 1\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n⋮\nLast Group (1 row): id = 2\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     2\n\njulia> push!(df, [3])\n3×1 DataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n\njulia> gd[1]\n1×1 SubDataFrame\n Row │ id\n     │ Int64\n─────┼───────\n   1 │     1","category":"page"},{"location":"man/split_apply_combine/#Using-GroupedDataFrame-as-an-iterable-and-indexable-object","page":"Split-apply-combine","title":"Using GroupedDataFrame as an iterable and indexable object","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If you only want to split the data set into subsets, use the groupby function. You can then iterate SubDataFrames that constitute the identified groups:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> for subdf in iris_gdf\n           println(size(subdf, 1))\n       end\n50\n50\n50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"To also get the values of the grouping columns along with each group, use the pairs function:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> for (key, subdf) in pairs(iris_gdf)\n           println(\"Number of data points for $(key.Species): $(nrow(subdf))\")\n       end\nNumber of data points for Iris-setosa: 50\nNumber of data points for Iris-versicolor: 50\nNumber of data points for Iris-virginica: 50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The value of key in the example above where we iterated pairs(iris_gdf) is a DataFrames.GroupKey object, which can be used in a similar fashion to a NamedTuple.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Grouping a data frame using the groupby function can be seen as adding a lookup key to it. Such lookups can be performed efficiently by indexing the resulting GroupedDataFrame with DataFrames.GroupKey (as it was presented above) a Tuple, a NamedTuple, or a dictionary. Here are some more examples of such indexing.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> iris_gdf[(Species=\"Iris-virginica\",)]  # a NamedTuple\n50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n   3 │         7.1         3.0          5.9         2.1  Iris-virginica\n   4 │         6.3         2.9          5.6         1.8  Iris-virginica\n   5 │         6.5         3.0          5.8         2.2  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         4.9         2.5          4.5         1.7  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  44 │         6.8         3.2          5.9         2.3  Iris-virginica\n  45 │         6.7         3.3          5.7         2.5  Iris-virginica\n  46 │         6.7         3.0          5.2         2.3  Iris-virginica\n  47 │         6.3         2.5          5.0         1.9  Iris-virginica\n  48 │         6.5         3.0          5.2         2.0  Iris-virginica\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         35 rows omitted\n\njulia> iris_gdf[[(\"Iris-virginica\",), (\"Iris-setosa\",)]] # a vector of Tuples\nGroupedDataFrame with 2 groups based on key: Species\nFirst Group (50 rows): Species = \"Iris-virginica\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         46 rows omitted\n⋮\nLast Group (50 rows): Species = \"Iris-setosa\"\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      47 rows omitted\n\njulia> key = keys(iris_gdf) |> last # last key in iris_gdf\nGroupKey: (Species = String15(\"Iris-virginica\"),)\n\njulia> iris_gdf[key]\n50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n   3 │         7.1         3.0          5.9         2.1  Iris-virginica\n   4 │         6.3         2.9          5.6         1.8  Iris-virginica\n   5 │         6.5         3.0          5.8         2.2  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         4.9         2.5          4.5         1.7  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  44 │         6.8         3.2          5.9         2.3  Iris-virginica\n  45 │         6.7         3.3          5.7         2.5  Iris-virginica\n  46 │         6.7         3.0          5.2         2.3  Iris-virginica\n  47 │         6.3         2.5          5.0         1.9  Iris-virginica\n  48 │         6.5         3.0          5.2         2.0  Iris-virginica\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         35 rows omitted\n\njulia> iris_gdf[Dict(\"Species\" => \"Iris-setosa\")] # a dictionary\n50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  44 │         5.0         3.5          1.6         0.6  Iris-setosa\n  45 │         5.1         3.8          1.9         0.4  Iris-setosa\n  46 │         4.8         3.0          1.4         0.3  Iris-setosa\n  47 │         5.1         3.8          1.6         0.2  Iris-setosa\n  48 │         4.6         3.2          1.4         0.2  Iris-setosa\n  49 │         5.3         3.7          1.5         0.2  Iris-setosa\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      35 rows omitted","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that although GroupedDataFrame is iterable and indexable it is not an AbstractVector. For this reason currently it was decided that it does not support map nor broadcasting (to allow for making a decision in the future what result type they should produce). To apply a function to all groups of a data frame and get a vector of results either use a comprehension or collect GroupedDataFrame into a vector first. Here are examples of both approaches:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> sdf_vec = collect(iris_gdf)\n3-element Vector{Any}:\n 50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────\n   1 │         5.1         3.5          1.4         0.2  Iris-setosa\n   2 │         4.9         3.0          1.4         0.2  Iris-setosa\n   3 │         4.7         3.2          1.3         0.2  Iris-setosa\n   4 │         4.6         3.1          1.5         0.2  Iris-setosa\n   5 │         5.0         3.6          1.4         0.2  Iris-setosa\n   6 │         5.4         3.9          1.7         0.4  Iris-setosa\n   7 │         4.6         3.4          1.4         0.3  Iris-setosa\n   8 │         5.0         3.4          1.5         0.2  Iris-setosa\n  ⋮  │      ⋮           ⋮            ⋮           ⋮            ⋮\n  44 │         5.0         3.5          1.6         0.6  Iris-setosa\n  45 │         5.1         3.8          1.9         0.4  Iris-setosa\n  46 │         4.8         3.0          1.4         0.3  Iris-setosa\n  47 │         5.1         3.8          1.6         0.2  Iris-setosa\n  48 │         4.6         3.2          1.4         0.2  Iris-setosa\n  49 │         5.3         3.7          1.5         0.2  Iris-setosa\n  50 │         5.0         3.3          1.4         0.2  Iris-setosa\n                                                      35 rows omitted\n 50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼───────────────────────────────────────────────────────────────────\n   1 │         7.0         3.2          4.7         1.4  Iris-versicolor\n   2 │         6.4         3.2          4.5         1.5  Iris-versicolor\n   3 │         6.9         3.1          4.9         1.5  Iris-versicolor\n   4 │         5.5         2.3          4.0         1.3  Iris-versicolor\n   5 │         6.5         2.8          4.6         1.5  Iris-versicolor\n   6 │         5.7         2.8          4.5         1.3  Iris-versicolor\n   7 │         6.3         3.3          4.7         1.6  Iris-versicolor\n   8 │         4.9         2.4          3.3         1.0  Iris-versicolor\n  ⋮  │      ⋮           ⋮            ⋮           ⋮              ⋮\n  44 │         5.0         2.3          3.3         1.0  Iris-versicolor\n  45 │         5.6         2.7          4.2         1.3  Iris-versicolor\n  46 │         5.7         3.0          4.2         1.2  Iris-versicolor\n  47 │         5.7         2.9          4.2         1.3  Iris-versicolor\n  48 │         6.2         2.9          4.3         1.3  Iris-versicolor\n  49 │         5.1         2.5          3.0         1.1  Iris-versicolor\n  50 │         5.7         2.8          4.1         1.3  Iris-versicolor\n                                                          35 rows omitted\n 50×5 SubDataFrame\n Row │ SepalLength  SepalWidth  PetalLength  PetalWidth  Species\n     │ Float64      Float64     Float64      Float64     String15\n─────┼──────────────────────────────────────────────────────────────────\n   1 │         6.3         3.3          6.0         2.5  Iris-virginica\n   2 │         5.8         2.7          5.1         1.9  Iris-virginica\n   3 │         7.1         3.0          5.9         2.1  Iris-virginica\n   4 │         6.3         2.9          5.6         1.8  Iris-virginica\n   5 │         6.5         3.0          5.8         2.2  Iris-virginica\n   6 │         7.6         3.0          6.6         2.1  Iris-virginica\n   7 │         4.9         2.5          4.5         1.7  Iris-virginica\n   8 │         7.3         2.9          6.3         1.8  Iris-virginica\n  ⋮  │      ⋮           ⋮            ⋮           ⋮             ⋮\n  44 │         6.8         3.2          5.9         2.3  Iris-virginica\n  45 │         6.7         3.3          5.7         2.5  Iris-virginica\n  46 │         6.7         3.0          5.2         2.3  Iris-virginica\n  47 │         6.3         2.5          5.0         1.9  Iris-virginica\n  48 │         6.5         3.0          5.2         2.0  Iris-virginica\n  49 │         6.2         3.4          5.4         2.3  Iris-virginica\n  50 │         5.9         3.0          5.1         1.8  Iris-virginica\n                                                         35 rows omitted\n\njulia> map(nrow, sdf_vec)\n3-element Vector{Int64}:\n 50\n 50\n 50\n\njulia> nrow.(sdf_vec)\n3-element Vector{Int64}:\n 50\n 50\n 50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Since GroupedDataFrame is iterable, you can achieve the same result with a comprehension:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> [nrow(sdf) for sdf in iris_gdf]\n3-element Vector{Int64}:\n 50\n 50\n 50","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that using the split-apply-combine strategy with the operation specification syntax in combine, select or transform will usually be faster for large GroupedDataFrame objects than iterating them, with the difference that they produce a data frame. An operation corresponding to the example above is:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(iris_gdf, nrow)\n3×2 DataFrame\n Row │ Species          nrow\n     │ String15         Int64\n─────┼────────────────────────\n   1 │ Iris-setosa         50\n   2 │ Iris-versicolor     50\n   3 │ Iris-virginica      50","category":"page"},{"location":"man/split_apply_combine/#Simulating-the-SQL-where-clause","page":"Split-apply-combine","title":"Simulating the SQL where clause","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"You can conveniently work on subsets of a data frame by using SubDataFrames. Operations performed on such objects can either create a new data frame or be performed in-place. Here are some examples:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(a=1:5)\n5×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │     2\n   3 │     3\n   4 │     4\n   5 │     5\n\njulia> sdf = @view df[2:3, :]\n2×1 SubDataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     2\n   2 │     3\n\njulia> transform(sdf, :a => ByRow(string)) # create a new data frame\n2×2 DataFrame\n Row │ a      a_string\n     │ Int64  String\n─────┼─────────────────\n   1 │     2  2\n   2 │     3  3\n\njulia> transform!(sdf, :a => ByRow(string)) # update the source df in-place\n2×2 SubDataFrame\n Row │ a      a_string\n     │ Int64  String?\n─────┼─────────────────\n   1 │     2  2\n   2 │     3  3\n\njulia> df # new column was created filled with missing in filtered-out rows\n5×2 DataFrame\n Row │ a      a_string\n     │ Int64  String?\n─────┼─────────────────\n   1 │     1  missing\n   2 │     2  2\n   3 │     3  3\n   4 │     4  missing\n   5 │     5  missing\n\njulia> select!(sdf, :a => -, renamecols=false) # update the source df in-place\n2×1 SubDataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │    -2\n   2 │    -3\n\njulia> df # the column replaced an existing column; previously stored values are re-used in filtered-out rows\n5×1 DataFrame\n Row │ a\n     │ Int64\n─────┼───────\n   1 │     1\n   2 │    -2\n   3 │    -3\n   4 │     4\n   5 │     5","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Similar operations can be performed on GroupedDataFrame as well:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(a=[1, 1, 1, 2, 2, 3], b=1:6)\n6×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      1\n   2 │     1      2\n   3 │     1      3\n   4 │     2      4\n   5 │     2      5\n   6 │     3      6\n\njulia> sdf = @view df[2:4, :]\n3×2 SubDataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     1      3\n   3 │     2      4\n\njulia> gsdf = groupby(sdf, :a)\nGroupedDataFrame with 2 groups based on key: a\nFirst Group (2 rows): a = 1\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     1      3\n⋮\nLast Group (1 row): a = 2\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     2      4\n\njulia> transform(gsdf, nrow) # create a new data frame\n3×3 DataFrame\n Row │ a      b      nrow\n     │ Int64  Int64  Int64\n─────┼─────────────────────\n   1 │     1      2      2\n   2 │     1      3      2\n   3 │     2      4      1\n\njulia> transform!(gsdf, nrow, :b => :b_copy)\n3×4 SubDataFrame\n Row │ a      b      nrow    b_copy\n     │ Int64  Int64  Int64?  Int64?\n─────┼──────────────────────────────\n   1 │     1      2       2       2\n   2 │     1      3       2       3\n   3 │     2      4       1       4\n\njulia> df\n6×4 DataFrame\n Row │ a      b      nrow     b_copy\n     │ Int64  Int64  Int64?   Int64?\n─────┼────────────────────────────────\n   1 │     1      1  missing  missing\n   2 │     1      2        2        2\n   3 │     1      3        2        3\n   4 │     2      4        1        4\n   5 │     2      5  missing  missing\n   6 │     3      6  missing  missing\n\njulia> select!(gsdf, :b_copy, :b => sum, renamecols=false)\n3×3 SubDataFrame\n Row │ a      b_copy  b\n     │ Int64  Int64?  Int64\n─────┼──────────────────────\n   1 │     1       2      5\n   2 │     1       3      5\n   3 │     2       4      4\n\njulia> df\n6×3 DataFrame\n Row │ a      b_copy   b\n     │ Int64  Int64?   Int64\n─────┼───────────────────────\n   1 │     1  missing      1\n   2 │     1        2      5\n   3 │     1        3      5\n   4 │     2        4      4\n   5 │     2  missing      5\n   6 │     3  missing      6","category":"page"},{"location":"man/split_apply_combine/#Column-independent-operations","page":"Split-apply-combine","title":"Column-independent operations","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The operation specification language used with combine, select and transform supports the following column-independent operations:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"getting the number of rows in a group (nrow);\ngetting the proportion of rows in a group (proprow);\ngetting the group number (groupindices);\ngetting a vector of indices within groups (eachindex).","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"These operations are column-independent, because they do not require specifying the input column name in the operation specification syntax.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"These four exceptions to the standard operation specification syntax were introduced for user convenience as these operations are often needed in practice.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Below each of them is explained by example.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"First create a data frame we will work with:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> df = DataFrame(customer_id=[\"a\", \"b\", \"b\", \"b\", \"c\", \"c\"],\n                      transaction_id=[12, 15, 19, 17, 13, 11],\n                      volume=[2, 3, 1, 4, 5, 9])\n6×3 DataFrame\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ a                        12       2\n   2 │ b                        15       3\n   3 │ b                        19       1\n   4 │ b                        17       4\n   5 │ c                        13       5\n   6 │ c                        11       9\n\njulia> gdf = groupby(df, :customer_id, sort=true);\n\njulia> show(gdf, allgroups=true)\nGroupedDataFrame with 3 groups based on key: customer_id\nGroup 1 (1 row): customer_id = \"a\"\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ a                        12       2\nGroup 2 (3 rows): customer_id = \"b\"\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ b                        15       3\n   2 │ b                        19       1\n   3 │ b                        17       4\nGroup 3 (2 rows): customer_id = \"c\"\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ c                        13       5\n   2 │ c                        11       9","category":"page"},{"location":"man/split_apply_combine/#Getting-the-number-of-rows","page":"Split-apply-combine","title":"Getting the number of rows","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"You can get the number of rows per group in a GroupedDataFrame by just writing nrow, in which case the generated column name with the number of rows is :nrow:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, nrow)\n3×2 DataFrame\n Row │ customer_id  nrow\n     │ String       Int64\n─────┼────────────────────\n   1 │ a                1\n   2 │ b                3\n   3 │ c                2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Additionally you are allowed to pass target column name:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, nrow => \"transaction_count\")\n3×2 DataFrame\n Row │ customer_id  transaction_count\n     │ String       Int64\n─────┼────────────────────────────────\n   1 │ a                            1\n   2 │ b                            3\n   3 │ c                            2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that in both cases we did not pass source column name as it is not needed to determine the number of rows per group. This is the reason why column-independent operations are exceptions to standard operation specification syntax.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The nrow expression also works in the operation specification syntax applied to a data frame. Here is an example:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(df, nrow => \"transaction_count\")\n1×1 DataFrame\n Row │ transaction_count\n     │ Int64\n─────┼───────────────────\n   1 │                 6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Finally, recall that nrow is also a regular function that returns a number of rows in a data frame:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> nrow(df)\n6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"This dual use of nrow does not lead to ambiguities, and is meant to make it easier to remember this exception.","category":"page"},{"location":"man/split_apply_combine/#Getting-the-proportion-of-rows","page":"Split-apply-combine","title":"Getting the proportion of rows","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If you want to get a proportion of rows per group in a GroupedDataFrame you can use the proprow and proprow => [target column name] column-independent operations. Here are some examples:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, proprow)\n3×2 DataFrame\n Row │ customer_id  proprow\n     │ String       Float64\n─────┼───────────────────────\n   1 │ a            0.166667\n   2 │ b            0.5\n   3 │ c            0.333333\n\njulia> combine(gdf, proprow => \"transaction_fraction\")\n3×2 DataFrame\n Row │ customer_id  transaction_fraction\n     │ String       Float64\n─────┼───────────────────────────────────\n   1 │ a                        0.166667\n   2 │ b                        0.5\n   3 │ c                        0.333333","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"As opposed to nrow, proprow cannot be used outside of the operation specification syntax and is only allowed when processing a GroupedDataFrame.","category":"page"},{"location":"man/split_apply_combine/#Getting-the-group-number","page":"Split-apply-combine","title":"Getting the group number","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Another common operation is getting group number. Use the groupindices and groupindices => [target column name] column-independent operations to get it:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, groupindices)\n3×2 DataFrame\n Row │ customer_id  groupindices\n     │ String       Int64\n─────┼───────────────────────────\n   1 │ a                       1\n   2 │ b                       2\n   3 │ c                       3\n\njulia> transform(gdf, groupindices)\n6×4 DataFrame\n Row │ customer_id  transaction_id  volume  groupindices\n     │ String       Int64           Int64   Int64\n─────┼───────────────────────────────────────────────────\n   1 │ a                        12       2             1\n   2 │ b                        15       3             2\n   3 │ b                        19       1             2\n   4 │ b                        17       4             2\n   5 │ c                        13       5             3\n   6 │ c                        11       9             3\n\njulia> combine(gdf, groupindices => \"group_number\")\n3×2 DataFrame\n Row │ customer_id  group_number\n     │ String       Int64\n─────┼───────────────────────────\n   1 │ a                       1\n   2 │ b                       2\n   3 │ c                       3","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Outside of the operation specification syntax, groupindices is also a regular function which returns group indices for each row in the parent data frame of the passed GroupedDataFrame:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> groupindices(gdf)\n6-element Vector{Union{Missing, Int64}}:\n 1\n 2\n 2\n 2\n 3\n 3","category":"page"},{"location":"man/split_apply_combine/#Getting-a-vector-of-indices-within-groups","page":"Split-apply-combine","title":"Getting a vector of indices within groups","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The last column-independent operation supported by the operation specification syntax is getting the index of each row within each group:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, eachindex)\n6×2 DataFrame\n Row │ customer_id  eachindex\n     │ String       Int64\n─────┼────────────────────────\n   1 │ a                    1\n   2 │ b                    1\n   3 │ b                    2\n   4 │ b                    3\n   5 │ c                    1\n   6 │ c                    2\n\njulia> select(gdf, eachindex, groupindices)\n6×3 DataFrame\n Row │ customer_id  eachindex  groupindices\n     │ String       Int64      Int64\n─────┼──────────────────────────────────────\n   1 │ a                    1             1\n   2 │ b                    1             2\n   3 │ b                    2             2\n   4 │ b                    3             2\n   5 │ c                    1             3\n   6 │ c                    2             3\n\njulia> combine(gdf, eachindex => \"transaction_number\")\n6×2 DataFrame\n Row │ customer_id  transaction_number\n     │ String       Int64\n─────┼─────────────────────────────────\n   1 │ a                             1\n   2 │ b                             1\n   3 │ b                             2\n   4 │ b                             3\n   5 │ c                             1\n   6 │ c                             2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Note that this operation also makes sense in a data frame context, where all rows are considered to be in the same group:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> transform(df, eachindex)\n6×4 DataFrame\n Row │ customer_id  transaction_id  volume  eachindex\n     │ String       Int64           Int64   Int64\n─────┼────────────────────────────────────────────────\n   1 │ a                        12       2          1\n   2 │ b                        15       3          2\n   3 │ b                        19       1          3\n   4 │ b                        17       4          4\n   5 │ c                        13       5          5\n   6 │ c                        11       9          6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Finally recall that eachindex is a standard function for getting all indices in an array. This similarity of functionality was the reason why this name was picked:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> collect(eachindex(df.customer_id))\n6-element Vector{Int64}:\n 1\n 2\n 3\n 4\n 5\n 6","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"This, for example, means that in the following example the two created columns have the same contents:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, eachindex, :customer_id => eachindex)\n6×3 DataFrame\n Row │ customer_id  eachindex  customer_id_eachindex\n     │ String       Int64      Int64\n─────┼───────────────────────────────────────────────\n   1 │ a                    1                      1\n   2 │ b                    1                      1\n   3 │ b                    2                      2\n   4 │ b                    3                      3\n   5 │ c                    1                      1\n   6 │ c                    2                      2","category":"page"},{"location":"man/split_apply_combine/#Column-independent-operations-versus-functions","page":"Split-apply-combine","title":"Column-independent operations versus functions","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"When discussing column-independent operations it is important to remember that operation specification syntax allows you to pass a function (without source and target column names), in which case such a function gets passed a SubDataFrame that represents a group in a GroupedDataFrame. Here is an example comparing a column-independent operation and a function:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, eachindex, sdf -> axes(sdf, 1))\n6×3 DataFrame\n Row │ customer_id  eachindex  x1\n     │ String       Int64      Int64\n─────┼───────────────────────────────\n   1 │ a                    1      1\n   2 │ b                    1      1\n   3 │ b                    2      2\n   4 │ b                    3      3\n   5 │ c                    1      1\n   6 │ c                    2      2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Notice that the column-independent operation eachindex produces the same result as using the anonymous function sdf -> axes(sdf, 1) that takes a SubDataFrame as its first argument and returns indices along its first axes. Importantly if it wasn't defined as a column-independent operation the eachindex function would fail when being passed as you can see here:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, sdf -> eachindex(sdf))\nERROR: MethodError: no method matching keys(::SubDataFrame{DataFrame, DataFrames.Index, Vector{Int64}})","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The reason for this error is that the eachindex function does not allow passing a SubDataFrame as its argument.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The same applies to proprow and groupindices: they would not work with a SubDataFrame as stand-alone functions.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"The nrow column-independent operation is a different case, as the nrow function accepts SubDataFrame as an argument:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> combine(gdf, nrow, sdf -> nrow(sdf))\n3×3 DataFrame\n Row │ customer_id  nrow   x1\n     │ String       Int64  Int64\n─────┼───────────────────────────\n   1 │ a                1      1\n   2 │ b                3      3\n   3 │ c                2      2","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Notice that columns :nrow and :x1 have identical contents, but the difference is that they do not have the same names. nrow is a column-independent operation generating the :nrow column name by default with number of rows per group. On the other hand, the sdf -> nrow(sdf) anonymous function does gets a SubDataFrame as its argument and returns its number of rows. The :x1 column name is the default auto-generated column name when processing anonymous functions.","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Passing a function taking a SubDataFrame is a flexible functionality allowing you to perform complex operations on your data. However, you should bear in mind two aspects:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"Using the full operation specification syntax (where source and target column names are passed) or column-independent operations will lead to faster execution of your code (as the Julia compiler is able to better optimize execution of such operations) in comparison to passing a function taking a SubDataFrame.\nAlthough writing nrow, proprow, groupindices, and eachindex looks like just passing a function they internally do not take a SubDataFrame as their argument. As we explained in this section, proprow, groupindices, and eachindex would not work with SubDataFrame as their argument, and nrow would work, but would produce a different column name. Instead, these four operations are special column-independent operations that are exceptions to the standard operation specification syntax rules. They were added for user convenience.","category":"page"},{"location":"man/split_apply_combine/#Specifying-group-order-in-groupby","page":"Split-apply-combine","title":"Specifying group order in groupby","text":"","category":"section"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"By default order of groups produced by groupby is undefined. If you want the order of groups to follow the order of first appearance in the source data frame of a grouping key then pass the sort=false keyword argument to groupby:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> push!(df, [\"a\", 100, 100]) # push row with large integer values to disable default sorting\n7×3 DataFrame\n Row │ customer_id  transaction_id  volume\n     │ String       Int64           Int64\n─────┼─────────────────────────────────────\n   1 │ a                        12       2\n   2 │ b                        15       3\n   3 │ b                        19       1\n   4 │ b                        17       4\n   5 │ c                        13       5\n   6 │ c                        11       9\n   7 │ a                       100     100\n\njulia> keys(groupby(df, :volume))\n7-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (volume = 2,)\n GroupKey: (volume = 3,)\n GroupKey: (volume = 1,)\n GroupKey: (volume = 4,)\n GroupKey: (volume = 5,)\n GroupKey: (volume = 9,)\n GroupKey: (volume = 100,)","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"If you want to have them sorted in ascending order pass sort=true:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> keys(groupby(df, :volume, sort=true))\n7-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (volume = 1,)\n GroupKey: (volume = 2,)\n GroupKey: (volume = 3,)\n GroupKey: (volume = 4,)\n GroupKey: (volume = 5,)\n GroupKey: (volume = 9,)\n GroupKey: (volume = 100,)","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"You can also use the order wrapper when passing a column name to group by or pass a named tuple as sort keyword argument containing one or more of alg, lt, by, rev, and order fields that will be treated just like in sortperm:","category":"page"},{"location":"man/split_apply_combine/","page":"Split-apply-combine","title":"Split-apply-combine","text":"julia> keys(groupby(df, [:customer_id, order(:volume, rev=true)]))\n6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (customer_id = \"a\", volume = 2)\n GroupKey: (customer_id = \"b\", volume = 4)\n GroupKey: (customer_id = \"b\", volume = 3)\n GroupKey: (customer_id = \"b\", volume = 1)\n GroupKey: (customer_id = \"c\", volume = 9)\n GroupKey: (customer_id = \"c\", volume = 5)\n\njulia> keys(groupby(df, :customer_id, sort=(rev=true,)))\n3-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:\n GroupKey: (customer_id = \"c\",)\n GroupKey: (customer_id = \"b\",)\n GroupKey: (customer_id = \"a\",)","category":"page"},{"location":"man/getting_started/#Getting-Started","page":"Getting Started","title":"Getting Started","text":"","category":"section"},{"location":"man/getting_started/#Installation","page":"Getting Started","title":"Installation","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"The DataFrames package is available through the Julia package system and can be installed using the following commands:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"using Pkg\nPkg.add(\"DataFrames\")","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Throughout the rest of this tutorial, we will assume that you have installed the DataFrames package and have already typed using DataFrames to bring all of the relevant variables into your current namespace.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"note: Note\nBy default DataFrames.jl limits the number of rows and columns when displaying a data frame in a Jupyter Notebook to 25 and 100, respectively. You can override this behavior by changing the values of the ENV[\"DATAFRAMES_COLUMNS\"] and ENV[\"DATAFRAMES_ROWS\"] variables to hold the maximum number of columns and rows of the output. All columns or rows will be printed if those numbers are equal or lower than 0.Alternatively, you may want to set the maximum number of data frame rows to print to 100 and the maximum number of columns to print to 1000 for every Julia session using some Jupyter kernel file (numbers 100 and 1000 are only examples and can be adjusted). In such case add a \"DATAFRAME_COLUMNS\": \"1000\", \"DATAFRAMES_ROWS\": \"100\" entry to the \"env\" variable in this Jupyter kernel file. See here for information about location and specification of Jupyter kernels.The package PrettyTables.jl renders the DataFrame in the Jupyter notebook. Users can customize the output by passing keywords arguments kwargs... to the function show: show(stdout, MIME(\"text/html\"), df; kwargs...), where df is the DataFrame. Any argument supported by PrettyTables.jl in the HTML backend can be used here. Hence, for example, if the user wants to change the color of all numbers smaller than 0 to red in Jupyter, they can execute: show(stdout, MIME(\"text/html\"), df; highlighters = hl_lt(0, HtmlDecoration(color = \"red\"))) after using PrettyTables. For more information about the available options, check PrettyTables.jl documentation.","category":"page"},{"location":"man/getting_started/#The-DataFrame-Type","page":"Getting Started","title":"The DataFrame Type","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Objects of the DataFrame type represent a data table as a series of vectors, each corresponding to a column or variable. The simplest way of constructing a DataFrame is to pass column vectors using keyword arguments or pairs:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> using DataFrames\n\njulia> DataFrame(a=1:4, b=[\"M\", \"F\", \"F\", \"M\"]) # keyword argument constructor\n4×2 DataFrame\n Row │ a      b\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  F\n   3 │     3  F\n   4 │     4  M","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Here are examples of other commonly used ways to construct a data frame:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> DataFrame((a=[1, 2], b=[3, 4])) # Tables.jl table constructor from a named tuple of vectors\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      3\n   2 │     2      4\n\njulia> DataFrame([(a=1, b=0), (a=2, b=0)]) # Tables.jl table constructor from a vector of named tuples\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(\"a\" => 1:2, \"b\" => 0) # Pair constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([:a => 1:2, :b => 0]) # vector of Pairs constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame(Dict(:a => 1:2, :b => 0)) # dictionary constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([[1, 2], [0, 0]], [:a, :b]) # vector of vectors constructor\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0\n\njulia> DataFrame([1 0; 2 0], :auto) # matrix constructor\n2×2 DataFrame\n Row │ x1     x2\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      0\n   2 │     2      0","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Columns can be directly (i.e. without copying) extracted using df.col, df.\"col\", df[!, :col] or df[!, \"col\"] (this rule applies to getting data from a data frame, not writing data to a data frame). The two latter syntaxes are more flexible as they allow passing a variable holding the name of the column, and not only a literal name. Note that column names can be either symbols (written as :col, :var\"col\" or Symbol(\"col\")) or strings (written as \"col\"). In the forms df.\"col\" and :var\"col\" variable interpolation into a string using $ does not work. Columns can also be extracted using an integer index specifying their position.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Since df[!, :col] does not make a copy, changing the elements of the column vector returned by this syntax will affect the values stored in the original df. To get a copy of the column use df[:, :col]: changing the vector returned by this syntax does not change df.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df = DataFrame(A=1:4, B=[\"M\", \"F\", \"F\", \"M\"])\n4×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  F\n   3 │     3  F\n   4 │     4  M\n\njulia> df.A\n4-element Vector{Int64}:\n 1\n 2\n 3\n 4\n\njulia> df.\"A\"\n4-element Vector{Int64}:\n 1\n 2\n 3\n 4\n\njulia> df.A === df[!, :A]\ntrue\n\njulia> df.A === df[:, :A]\nfalse\n\njulia> df.A == df[:, :A]\ntrue\n\njulia> df.A === df[!, \"A\"]\ntrue\n\njulia> df.A === df[:, \"A\"]\nfalse\n\njulia> df.A == df[:, \"A\"]\ntrue\n\njulia> df.A === df[!, 1]\ntrue\n\njulia> df.A === df[:, 1]\nfalse\n\njulia> df.A == df[:, 1]\ntrue\n\njulia> firstcolumn = :A\n:A\n\njulia> df[!, firstcolumn] === df.A\ntrue\n\njulia> df[:, firstcolumn] === df.A\nfalse\n\njulia> df[:, firstcolumn] == df.A\ntrue","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Column names can be obtained as strings using the names function:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> names(df)\n2-element Vector{String}:\n \"A\"\n \"B\"","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"You can also filter column names by passing a column selector condition as a second argument. See the names docstring for a detailed list of available conditions. Here we give some selected examples:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> names(df, r\"A\") # a regular expression selector\n1-element Vector{String}:\n \"A\"\n\njulia> names(df, Int) # a selector using column element type\n1-element Vector{String}:\n \"A\"\n\njulia> names(df, Not(:B)) # selector keeping all columns except :B\n1-element Vector{String}:\n \"A\"","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"To get column names as Symbols use the propertynames function:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> propertynames(df)\n2-element Vector{Symbol}:\n :A\n :B","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"note: Note\nDataFrames.jl allows to use Symbols (like :A) and strings (like \"A\") for all column indexing operations for convenience. However, using Symbols is slightly faster and should generally be preferred, if not generating them via string manipulation.","category":"page"},{"location":"man/getting_started/#Constructing-Column-by-Column","page":"Getting Started","title":"Constructing Column by Column","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"It is also possible to start with an empty DataFrame and add columns to it one by one:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df = DataFrame()\n0×0 DataFrame\n\njulia> df.A = 1:8\n1:8\n\njulia> df[:, :B] = [\"M\", \"F\", \"F\", \"M\", \"F\", \"M\", \"M\", \"F\"]\n8-element Vector{String}:\n \"M\"\n \"F\"\n \"F\"\n \"M\"\n \"F\"\n \"M\"\n \"M\"\n \"F\"\n\njulia> df[!, :C] .= 0\n8-element Vector{Int64}:\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n\njulia> df\n8×3 DataFrame\n Row │ A      B       C\n     │ Int64  String  Int64\n─────┼──────────────────────\n   1 │     1  M           0\n   2 │     2  F           0\n   3 │     3  F           0\n   4 │     4  M           0\n   5 │     5  F           0\n   6 │     6  M           0\n   7 │     7  M           0\n   8 │     8  F           0","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"The DataFrame we build in this way has 8 rows and 3 columns. This can be checked using the size function:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> size(df, 1)\n8\n\njulia> size(df, 2)\n3\n\njulia> size(df)\n(8, 3)","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"In the above example notice that the df[!, :C] .= 0 expression created a new column in the data frame by broadcasting a scalar.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"When setting a column of a data frame the df[!, :C] and df.C syntaxes are equivalent and they would replace (or create) the :C column in df. This is different from using df[:, :C] to set a column in a data frame, which updates the contents of column in-place if it already exists.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Here is an example showing this difference. Let us try changing the :B column to a binary variable.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df[:, :B] = df.B .== \"F\"\nERROR: MethodError: Cannot `convert` an object of type Bool to an object of type String\n\njulia> df[:, :B] .= df.B .== \"F\"\nERROR: MethodError: Cannot `convert` an object of type Bool to an object of type String","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"The above operations did not work because when you use : as row selector the :B column is updated in-place, and it only supports storing strings.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"On the other hand the following works:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df.B = df.B .== \"F\"\n8-element BitVector:\n 0\n 1\n 1\n 0\n 1\n 0\n 0\n 1\n\njulia> df\n8×3 DataFrame\n Row │ A      B      C\n     │ Int64  Bool   Int64\n─────┼─────────────────────\n   1 │     1  false      0\n   2 │     2   true      0\n   3 │     3   true      0\n   4 │     4  false      0\n   5 │     5   true      0\n   6 │     6  false      0\n   7 │     7  false      0\n   8 │     8   true      0","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"As you can see because we used df.B on the right-hand side of the assignment the :B column was replaced. The same effect would be achieved if we used df[!, :B] instead or if we used broadcasted assignment .=.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"In the Indexing section of the manual you can find all details about all the available indexing options.","category":"page"},{"location":"man/getting_started/#Constructing-Row-by-Row","page":"Getting Started","title":"Constructing Row by Row","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"It is also possible to fill a DataFrame row by row. Let us construct an empty data frame with two columns (note that the first column can only contain integers and the second one can only contain strings):","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> df = DataFrame(A=Int[], B=String[])\n0×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┴───────────────","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Rows can then be added as tuples or vectors, where the order of elements matches that of columns. To add new rows at the end of a data frame use push!:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> push!(df, (1, \"M\"))\n1×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n\njulia> push!(df, [2, \"N\"])\n2×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  N","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Rows can also be added as Dicts, where the dictionary keys match the column names:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> push!(df, Dict(:B => \"F\", :A => 3))\n3×2 DataFrame\n Row │ A      B\n     │ Int64  String\n─────┼───────────────\n   1 │     1  M\n   2 │     2  N\n   3 │     3  F","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"Note that constructing a DataFrame row by row is significantly less performant than constructing it all at once, or column by column. For many use-cases this will not matter, but for very large DataFrames  this may be a consideration.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"If you want to add rows at the beginning of a data frame use pushfirst! and to insert a row in an arbitrary location use insert!.","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"You can also add whole tables to a data frame using the append! and prepend! functions.","category":"page"},{"location":"man/getting_started/#Constructing-from-another-table-type","page":"Getting Started","title":"Constructing from another table type","text":"","category":"section"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"DataFrames supports the Tables.jl interface for interacting with tabular data. This means that a DataFrame can be used as a \"source\" to any package that expects a Tables.jl interface input, (file format packages, data manipulation packages, etc.). A DataFrame can also be a sink for any Tables.jl interface input. Some example uses are:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"df = DataFrame(a=[1, 2, 3], b=[:a, :b, :c])\n\n# write DataFrame out to CSV file\nCSV.write(\"dataframe.csv\", df)\n\n# store DataFrame in an SQLite database table\nSQLite.load!(df, db, \"dataframe_table\")\n\n# transform a DataFrame through Query.jl package\ndf = df |> @map({a=_.a + 1, _.b}) |> DataFrame","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"A particular common case of a collection that supports the Tables.jl interface is a vector of NamedTuples:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> v = [(a=1, b=2), (a=3, b=4)]\n2-element Vector{@NamedTuple{a::Int64, b::Int64}}:\n (a = 1, b = 2)\n (a = 3, b = 4)\n\njulia> df = DataFrame(v)\n2×2 DataFrame\n Row │ a      b\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      2\n   2 │     3      4","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"You can also easily convert a data frame back to a vector of NamedTuples:","category":"page"},{"location":"man/getting_started/","page":"Getting Started","title":"Getting Started","text":"julia> using Tables\n\njulia> Tables.rowtable(df)\n2-element Vector{@NamedTuple{a::Int64, b::Int64}}:\n (a = 1, b = 2)\n (a = 3, b = 4)","category":"page"},{"location":"man/missing/#Missing-Data","page":"Missing Data","title":"Missing Data","text":"","category":"section"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"In Julia, missing values in data are represented using the special object missing, which is the single instance of the type Missing.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> missing\nmissing\n\njulia> typeof(missing)\nMissing","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The Missing type lets users create vectors and DataFrame columns with missing values. Here we create a vector with a missing value and the element-type of the returned vector is Union{Missing, Int64}.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> x = [1, 2, missing]\n3-element Vector{Union{Missing, Int64}}:\n 1\n 2\n  missing\n\njulia> eltype(x)\nUnion{Missing, Int64}\n\njulia> Union{Missing, Int}\nUnion{Missing, Int64}\n\njulia> eltype(x) == Union{Missing, Int}\ntrue","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"missing values can be excluded when performing operations by using skipmissing, which returns a memory-efficient iterator.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> skipmissing(x)\nskipmissing(Union{Missing, Int64}[1, 2, missing])","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The output of skipmissing can be passed directly into functions as an argument. For example, we can find the sum of all non-missing values or collect the non-missing values into a new missing-free vector.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> sum(skipmissing(x))\n3\n\njulia> collect(skipmissing(x))\n2-element Vector{Int64}:\n 1\n 2","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The function coalesce can be used to replace missing values with another value (note the dot, indicating that the replacement should be applied to all entries in x):","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> coalesce.(x, 0)\n3-element Vector{Int64}:\n 1\n 2\n 0","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The functions dropmissing and dropmissing! can be used to remove the rows containing missing values from a data frame and either create a new DataFrame or mutate the original in-place respectively.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> using DataFrames\n\njulia> df = DataFrame(i=1:5,\n                      x=[missing, 4, missing, 2, 1],\n                      y=[missing, missing, \"c\", \"d\", \"e\"])\n5×3 DataFrame\n Row │ i      x        y\n     │ Int64  Int64?   String?\n─────┼─────────────────────────\n   1 │     1  missing  missing\n   2 │     2        4  missing\n   3 │     3  missing  c\n   4 │     4        2  d\n   5 │     5        1  e\n\njulia> dropmissing(df)\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"One can specify the column(s) in which to search for rows containing missing values to be removed.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> dropmissing(df, :x)\n3×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String?\n─────┼───────────────────────\n   1 │     2      4  missing\n   2 │     4      2  d\n   3 │     5      1  e","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"By default the dropmissing and dropmissing! functions keep the Union{T, Missing} element type in columns selected for row removal. To remove the Missing part, if present, set the disallowmissing keyword argument to true (it will become the default behavior in the future).","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> dropmissing(df, disallowmissing=true)\n2×3 DataFrame\n Row │ i      x      y\n     │ Int64  Int64  String\n─────┼──────────────────────\n   1 │     4      2  d\n   2 │     5      1  e","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Sometimes it is useful to allow or disallow support of missing values in some columns of a data frame. These operations are supported by the allowmissing, allowmissing!, disallowmissing, and disallowmissing! functions. Here is an example:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> df = DataFrame(x=1:3, y=4:6)\n3×2 DataFrame\n Row │ x      y\n     │ Int64  Int64\n─────┼──────────────\n   1 │     1      4\n   2 │     2      5\n   3 │     3      6\n\njulia> allowmissing!(df)\n3×2 DataFrame\n Row │ x       y\n     │ Int64?  Int64?\n─────┼────────────────\n   1 │      1       4\n   2 │      2       5\n   3 │      3       6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Now df allows missing values in all its columns. We can take advantage of this fact and set some of the values in df to missing, e.g.:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> df[1, 1] = missing\nmissing\n\njulia> df\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64?\n─────┼─────────────────\n   1 │ missing       4\n   2 │       2       5\n   3 │       3       6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Note that a column selector can be passed as the second positional argument to allowmissing and allowmissing! to restrict the change to only some columns in our data frame.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"Now let us perform the reverse operation by disallowing missing values in df. We know that column :y does not contain missing values so we can use the disallowmissing function passing a column selector as the second positional argument:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> disallowmissing(df, :y)\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64\n─────┼────────────────\n   1 │ missing      4\n   2 │       2      5\n   3 │       3      6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"This operation created a new DataFrame. If we wanted to update the df in-place the disallowmissing! function should be used.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"If we tried to disallow missings in the whole data frame using disallowmissing(df) we would get an error. However, it is often useful to disallow missings in all columns that actually do not contain them but keep the columns that have some missing values unchanged without having to list them explicitly. This can be accomplished by passing the error=false keyword argument:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> disallowmissing(df, error=false)\n3×2 DataFrame\n Row │ x        y\n     │ Int64?   Int64\n─────┼────────────────\n   1 │ missing      4\n   2 │       2      5\n   3 │       3      6","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The Missings.jl package provides a few convenience functions to work with missing values.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"One of the most commonly used is passmissing. It is a higher order function that takes some function f as its argument and returns a new function which returns missing if any of its positional arguments are missing and otherwise applies the function f to these arguments. This functionality is useful in combination with functions that do not support passing missing values as their arguments. For example, trying uppercase(missing) would produce an error, while the following works:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> passmissing(uppercase)(\"a\")\n\"A\"\n\njulia> passmissing(uppercase)(missing)\nmissing","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The function Missings.replace returns an iterator which replaces missing elements with another value:","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> using Missings\n\njulia> Missings.replace(x, 1)\nMissings.EachReplaceMissing{Vector{Union{Missing, Int64}}, Int64}(Union{Missing, Int64}[1, 2, missing], 1)\n\njulia> collect(Missings.replace(x, 1))\n3-element Vector{Int64}:\n 1\n 2\n 1\n\njulia> collect(Missings.replace(x, 1)) == coalesce.(x, 1)\ntrue","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The function nonmissingtype returns the element-type T in Union{T, Missing}.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> eltype(x)\nUnion{Missing, Int64}\n\njulia> nonmissingtype(eltype(x))\nInt64","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"The missings function constructs Vectors and Arrays supporting missing values, using the optional first argument to specify the element-type.","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"julia> missings(1)\n1-element Vector{Missing}:\n missing\n\njulia> missings(3)\n3-element Vector{Missing}:\n missing\n missing\n missing\n\njulia> missings(1, 3)\n1×3 Matrix{Missing}:\n missing  missing  missing\n\njulia> missings(Int, 1, 3)\n1×3 Matrix{Union{Missing, Int64}}:\n missing  missing  missing","category":"page"},{"location":"man/missing/","page":"Missing Data","title":"Missing Data","text":"See the Julia manual for more information about missing values.","category":"page"},{"location":"#DataFrames.jl","page":"Introduction","title":"DataFrames.jl","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Welcome to the DataFrames.jl documentation!","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"This resource aims to teach you everything you need to know to get up and running with tabular data manipulation using the DataFrames.jl package.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"For more illustrations of DataFrames.jl usage, in particular in conjunction with other packages you can check-out the following resources (they are kept up to date with the released version of DataFrames.jl):","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl: Flexible and Fast Tabular Data in Julia article published in the Journal of Statistical Software\nData Wrangling with DataFrames.jl Cheat Sheet\nDataFrames Tutorial using Jupyter Notebooks\nJulia Academy DataFrames.jl tutorial\nJuliaCon 2023, JuliaCon 2022, JuliaCon 2021, JuliaCon 2020, JuliaCon 2019, ODSC Europe 2021 tutorials, and PyData Global 2020\nDataFrames.jl showcase","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"If you prefer to learn DataFrames.jl from a book you can consider reading:","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Julia for Data Analysis;\nJulia Data Science.","category":"page"},{"location":"#What-is-DataFrames.jl?","page":"Introduction","title":"What is DataFrames.jl?","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl provides a set of tools for working with tabular data in Julia. Its design and functionality are similar to those of pandas (in Python) and data.frame, data.table and dplyr (in R), making it  a great general purpose data science tool.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl plays a central role in the Julia Data ecosystem, and has tight integrations with a range of different libraries. DataFrames.jl isn't the only tool for working with tabular data in Julia – as noted below, there are some other great libraries for certain use-cases – but it provides great data wrangling functionality through a familiar interface.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"To understand the toolchain in more detail, have a look at the tutorials in this manual. New users can start with the First Steps with DataFrames.jl section.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"You may find the DataFramesMeta.jl package or one of the other convenience packages discussed in the Data manipulation frameworks section of this manual helpful when writing more advanced data transformations, especially if you do not have a significant programming experience. These packages provide convenience syntax similar to dplyr in R.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"If you use metadata when working with DataFrames.jl you might find the TableMetadataTools.jl package useful. This package defines several convenience functions for performing typical metadata operations.","category":"page"},{"location":"#DataFrames.jl-and-the-Julia-Data-Ecosystem","page":"Introduction","title":"DataFrames.jl and the Julia Data Ecosystem","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"The Julia data ecosystem can be a difficult space for new users to navigate, in part because the Julia ecosystem tends to distribute functionality across different libraries more than some other languages. Because many people coming to DataFrames.jl are just starting to explore the Julia data ecosystem, below is a list of well-supported libraries that provide different data science tools, along with a few notes about what makes each library special, and how well integrated they are with DataFrames.jl.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Statistics\nStatsKit.jl: A convenience meta-package which loads a set of essential packages for statistics, including those mentioned below in this section and DataFrames.jl itself.\nStatistics: The Julia standard library comes with a wide range of statistics functionality, but to gain access to these functions you must call using Statistics.\nLinearAlgebra: Like Statistics, many linear algebra features (factorizations, inversions, etc.) live in a library you have to load to use.\nSparseArrays are also in the standard library but must be loaded to be used.\nFreqTables.jl: Create frequency tables / cross-tabulations. Tightly integrated with DataFrames.jl.\nHypothesisTests.jl: A range of hypothesis testing tools.\nGLM.jl: Tools for estimating linear and generalized linear models. Tightly integrated with DataFrames.jl.\nStatsModels.jl: For converting heterogeneous DataFrame into homogeneous matrices for use with linear algebra libraries or machine learning applications that don't directly support DataFrames. Will do things like convert categorical variables into indicators/one-hot-encodings, create interaction terms, etc.\nMultivariateStats.jl: linear regression, ridge regression, PCA, component analyses tools. Not well integrated with DataFrames.jl, but easily used in combination with StatsModels.\nMachine Learning\nMLJ.jl: if you're more of an applied user, there is a single package the pulls from all these different libraries and provides a single, scikit-learn inspired API: MLJ.jl. MLJ.jl provides a common interface for a wide range of machine learning algorithms.\nScikitLearn.jl: A Julia wrapper around the full Python scikit-learn machine learning library. Not well integrated with DataFrames.jl, but can be combined using StatsModels.jl.\nAutoMLPipeline: A package that makes it trivial to create complex ML pipeline structures using simple expressions. It leverages on the built-in macro programming features of Julia to symbolically process, manipulate pipeline expressions, and makes it easy to discover optimal structures for machine learning regression and classification.\nDeep learning: KNet.jl and Flux.jl.\nPlotting\nPlots.jl: Powerful, modern plotting library with a syntax akin to that of matplotlib (in Python) or plot (in R). StatsPlots.jl provides Plots.jl with recipes for many standard statistical plots.\nGadfly.jl: High-level plotting library with a \"grammar of graphics\" syntax akin to that of ggplot (in R).\nAlgebraOfGraphics.jl: A \"grammar of graphics\" library build upon Makie.jl.\nVegaLite.jl: High-level plotting library that uses a different \"grammar of graphics\" syntax and has an emphasis on interactive graphics.\nData Wrangling:\nImpute.jl: various methods for handling missing data in vectors, matrices and tables.\nDataFramesMeta.jl: A range of convenience functions for DataFrames.jl that augment select and transform to provide a user experience similar to that provided by dplyr in R.\nDataFrameMacros.jl: Provides macro versions of the common DataFrames.jl functions similar to DataFramesMeta.jl, with convenient syntax for the manipulation of multiple columns at once.\nQuery.jl: Query.jl provides a single framework for data wrangling that works with a range of libraries, including DataFrames.jl, other tabular data libraries (more on those below), and even non-tabular data. Provides many convenience functions analogous to those in dplyr in R or LINQ.\nYou can find more information on these packages in the Data manipulation frameworks section of this manual.\nAnd More!\nGraphs.jl: A pure-Julia, high performance network analysis library. Edgelists in DataFrames can be easily converted into graphs using the GraphDataFrameBridge.jl package.\nIO:\nDataFrames.jl work well with a range of formats, including:\nCSV files (using CSV.jl),\nApache Arrow (using Arrow.jl)\nreading Stata, SAS and SPSS files (using ReadStatTables.jl; alternatively Queryverse users can choose StatFiles.jl),\nParquet files (using Parquet2.jl),\nreading R data files (.rda, .RData) (using RData.jl).","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"While not all of these libraries are tightly integrated with DataFrames.jl, because DataFrames are essentially collections of aligned Julia vectors, so it is easy to (a) pull out a vector for use with a non-DataFrames-integrated library, or (b) convert your table into a homogeneously-typed matrix using the Matrix constructor or StatsModels.jl.","category":"page"},{"location":"#Other-Julia-Tabular-Libraries","page":"Introduction","title":"Other Julia Tabular Libraries","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"DataFrames.jl is a great general purpose tool for data manipulation and wrangling, but it's not ideal for all applications. For users with more specialized needs, consider using:","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"TypedTables.jl: Type-stable heterogeneous tables. Useful for improved performance when the structure of your table is relatively stable and does not feature thousands of columns.\nJuliaDB.jl: For users working with data that is too large to fit in memory, we suggest JuliaDB.jl, which offers better performance for large datasets, and can handle out-of-core data manipulations (Python users can think of JuliaDB.jl as the Julia version of dask).","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Note that most tabular data libraries in the Julia ecosystem (including DataFrames.jl) support a common interface (defined in the Tables.jl package). As a result, some libraries are capable or working with a range of tabular data structures, making it easy to move between tabular libraries as your needs change. A user of Query.jl, for example, can use the same code to manipulate data in a DataFrame, a Table (defined by TypedTables.jl), or a JuliaDB table.","category":"page"},{"location":"#Questions?","page":"Introduction","title":"Questions?","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"If there is something you expect DataFrames to be capable of, but cannot figure out how to do, please reach out with questions in Domains/Data on Discourse. Additionally you might want to listen to an introduction to DataFrames.jl on JuliaAcademy.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Please report bugs by opening an issue.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"You can follow the source links throughout the documentation to jump right to the source files on GitHub to make pull requests for improving the documentation and function capabilities.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Please review DataFrames contributing guidelines before submitting your first PR!","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Information on specific versions can be found on the Release page.","category":"page"},{"location":"#Package-Manual","page":"Introduction","title":"Package Manual","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Pages = [\"man/basics.md\",\n         \"man/getting_started.md\",\n         \"man/joins.md\",\n         \"man/split_apply_combine.md\",\n         \"man/reshaping_and_pivoting.md\",\n         \"man/sorting.md\",\n         \"man/categorical.md\",\n         \"man/missing.md\",\n         \"man/comparisons.md\",\n         \"man/querying_frameworks.md\"]\nDepth = 2","category":"page"},{"location":"#API","page":"Introduction","title":"API","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Only exported (i.e. available for use without DataFrames. qualifier after loading the DataFrames.jl package with using DataFrames) types and functions are considered a part of the public API of the DataFrames.jl package. In general all such objects are documented in this manual (in case some documentation is missing please kindly report an issue here).","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"note: Note\nBreaking changes to public and documented API are avoided in DataFrames.jl where possible.The following changes are not considered breaking:specific floating point values computed by operations may change at any time; users should rely only on approximate accuracy;\nin functions that use the default random number generator provided by Base Julia the specific random numbers computed may change across Julia versions;\nif the changed functionality is classified as a bug;\nif the changed behavior was not documented; two major cases are:\nin its implementation some function accepted a wider range of arguments that it was documented to handle - changes in handling of undocumented arguments are not considered as breaking;\nthe type of the value returned by a function changes, but it still follows the contract specified in the documentation; for example if a function is documented to return a vector then changing its type from Vector to PooledVector is not considered as breaking;\nerror behavior: code that threw an exception can change exception type thrown or stop throwing an exception;\nchanges in display (how objects are printed);\nchanges to the state of global objects from Base Julia whose state normally is considered volatile (e.g. state of global random number generator).All types and functions that are part of public API are guaranteed to go through a deprecation period before a breaking change is made to them or they would be removed.The standard practice is that breaking changes are implemented when a major release of DataFrames.jl is made (e.g. functionalities deprecated in a 1.x release would be changed in the 2.0 release).In rare cases a breaking change might be introduced in a minor release. In such a case the changed behavior still goes through one minor release during which it is deprecated. The situations where such a breaking change might be allowed are (still such breaking changes will be avoided if possible):the affected functionality was previously clearly identified in the documentation as being subject to changes (for example in DataFrames.jl 1.4 release propagation rules of :note-style metadata are documented as such);\nthe change is on the border of being classified as a bug (in rare cases even if a behavior of some function was documented its consequences for certain argument combinations could be decided to be unintended and not wanted);\nthe change is needed to adjust DataFrames.jl functionality to changes in Base Julia.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Please be warned that while Julia allows you to access internal functions or types of DataFrames.jl these can change without warning between versions of DataFrames.jl. In particular it is not safe to directly access fields of types that are a part of public API of the DataFrames.jl package using e.g. the getfield function. Whenever some operation on fields of defined types is considered allowed an appropriate exported function should be used instead.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Pages = [\"lib/types.md\", \"lib/functions.md\", \"lib/indexing.md\"]\nDepth = 2","category":"page"},{"location":"#Index","page":"Introduction","title":"Index","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Pages = [\"lib/types.md\", \"lib/functions.md\"]","category":"page"},{"location":"assets/README/#Introduction","page":"Introduction","title":"Introduction","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"In this folder we store the following data sets:","category":"page"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"german_credit.csv\niris.csv","category":"page"},{"location":"assets/README/#German-Credit-data-set","page":"Introduction","title":"German Credit data set","text":"","category":"section"},{"location":"assets/README/#License:","page":"Introduction","title":"License:","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://opendatacommons.org/licenses/dbcl/1-0/","category":"page"},{"location":"assets/README/#Source:","page":"Introduction","title":"Source:","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data) Professor Dr. Hans Hofmann Institut für Statistik und Ökonometrie Universität Hamburg FB Wirtschaftswissenschaften Von-Melle-Park 5 2000 Hamburg 13","category":"page"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"The original data is from UCI, and the file stored here is from Kaggle","category":"page"},{"location":"assets/README/#Iris-data-set","page":"Introduction","title":"Iris data set","text":"","category":"section"},{"location":"assets/README/#License","page":"Introduction","title":"License","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://creativecommons.org/publicdomain/zero/1.0/","category":"page"},{"location":"assets/README/#Source:-2","page":"Introduction","title":"Source:","text":"","category":"section"},{"location":"assets/README/","page":"Introduction","title":"Introduction","text":"https://archive.ics.uci.edu/ml/datasets/Iris Creator: R.A. Fisher","category":"page"}]
 }

Type	`names`	`propertynames`	`keys`	`length`	`ndims`
`AbstractDataFrame`	`Vector{String}`	`Vector{Symbol}`	undefined	undefined	`2`
`DataFrameRow`	`Vector{String}`	`Vector{Symbol}`	`Vector{Symbol}`	`Int`	`1`
`DataFrameRows`	`Vector{String}`	`Vector{Symbol}`	vector of `Int`	`Int`	`1`
`DataFrameColumns`	`Vector{String}`	`Vector{Symbol}`	`Vector{Symbol}`	`Int`	`1`
`GroupedDataFrame`	`Vector{String}`	tuple of fields	`GroupKeys`	`Int`	`1`
`GroupKeys`	undefined	tuple of fields	vector of `Int`	`Int`	`1`
`GroupKey`	`Vector{String}`	`Vector{Symbol}`	`Vector{Symbol}`	`Int`	`1`
Function	Memory Usage	Column Retention	Row Retention
`transform`	Creates a new data frame.	Retains original and resultant columns.	Retains same number of rows as original data frame.
`transform!`	Modifies an existing data frame.	Retains original and resultant columns.	Retains same number of rows as original data frame.
`select`	Creates a new data frame.	Retains only resultant columns.	Retains same number of rows as original data frame.
`select!`	Modifies an existing data frame.	Retains only resultant columns.	Retains same number of rows as original data frame.
`subset`	Creates a new data frame.	Retains original columns.	Retains only rows where condition is true.
`subset!`	Modifies an existing data frame.	Retains original columns.	Retains only rows where condition is true.
`combine`	Creates a new data frame.	Retains only resultant columns.	Retains only resultant rows.
Operation	dplyr	DataFrames.jl
Reduce multiple values	`summarize(df, mean(x))`	`combine(df, :x => mean)`
Add new columns	`mutate(df, x_mean = mean(x))`	`transform(df, :x => mean => :x_mean)`
Rename columns	`rename(df, x_new = x)`	`rename(df, :x => :x_new)`
Pick columns	`select(df, x, y)`	`select(df, :x, :y)`
Pick & transform columns	`transmute(df, mean(x), y)`	`select(df, :x => mean, :y)`
Pick rows	`filter(df, x >= 1)`	`subset(df, :x => ByRow(x -> x >= 1))`
Sort rows	`arrange(df, x)`	`sort(df, :x)`
Operation	dplyr	DataFrames.jl
Reduce multiple values	`summarize(group_by(df, grp), mean(x))`	`combine(groupby(df, :grp), :x => mean)`
Add new columns	`mutate(group_by(df, grp), mean(x))`	`transform(groupby(df, :grp), :x => mean)`
Pick & transform columns	`transmute(group_by(df, grp), mean(x), y)`	`select(groupby(df, :grp), :x => mean, :y)`
Operation	dplyr	DataFrames.jl
Complex Function	`summarize(df, mean(x, na.rm = T))`	`combine(df, :x => x -> mean(skipmissing(x)))`
Transform several columns	`summarize(df, max(x), min(y))`	`combine(df, :x => maximum, :y => minimum)`
	`summarize(df, across(c(x, y), mean))`	`combine(df, [:x, :y] .=> mean)`
	`summarize(df, across(starts_with("x"), mean))`	`combine(df, names(df, r"^x") .=> mean)`
	`summarize(df, across(c(x, y), list(max, min)))`	`combine(df, ([:x, :y] .=> [maximum minimum])...)`
Multivariate function	`mutate(df, cor(x, y))`	`transform(df, [:x, :y] => cor)`
Row-wise	`mutate(rowwise(df), min(x, y))`	`transform(df, [:x, :y] => ByRow(min))`
	`mutate(rowwise(df), which.max(c_across(matches("^x"))))`	`transform(df, AsTable(r"^x") => ByRow(argmax))`
DataFrame as input	`summarize(df, head(across(), 2))`	`combine(d -> first(d, 2), df)`
DataFrame as output	`summarize(df, tibble(value = c(min(x), max(x))))`	`combine(df, :x => (x -> (value = [minimum(x), maximum(x)],)) => AsTable)`
Operation	data.table	DataFrames.jl
Reduce multiple values	`df[, .(mean(x))]`	`combine(df, :x => mean)`
Add new columns	`df[, x_mean:=mean(x) ]`	`transform!(df, :x => mean => :x_mean)`
Rename column (in place)	`setnames(df, "x", "x_new")`	`rename!(df, :x => :x_new)`
Rename multiple columns (in place)	`setnames(df, c("x", "y"), c("x_new", "y_new"))`	`rename!(df, [:x, :y] .=> [:x_new, :y_new])`
Pick columns as dataframe	`df[, .(x, y)]`	`select(df, :x, :y)`
Pick column as a vector	`df[, x]`	`df[!, :x]`
Remove columns	`df[, -"x"]`	`select(df, Not(:x))`
Remove columns (in place)	`df[, x:=NULL]`	`select!(df, Not(:x))`
Remove columns (in place)	`df[, c("x", "y"):=NULL]`	`select!(df, Not([:x, :y]))`
Pick & transform columns	`df[, .(mean(x), y)]`	`select(df, :x => mean, :y)`
Pick rows	`df[ x >= 1 ]`	`filter(:x => >=(1), df)`
Sort rows (in place)	`setorder(df, x)`	`sort!(df, :x)`
Sort rows	`df[ order(x) ]`	`sort(df, :x)`
Operation	data.table	DataFrames.jl
Reduce multiple values	`df[, mean(x), by=id ]`	`combine(groupby(df, :id), :x => mean)`
Add new columns (in place)	`df[, x_mean:=mean(x), by=id]`	`transform!(groupby(df, :id), :x => mean)`
Pick & transform columns	`df[, .(x_mean = mean(x), y), by=id]`	`select(groupby(df, :id), :x => mean, :y)`
Operation	data.table	DataFrames.jl
Complex Function	`df[, .(mean(x, na.rm=TRUE)) ]`	`combine(df, :x => x -> mean(skipmissing(x)))`
Transform certain rows (in place)	`df[x<=0, x:=0]`	`df.x[df.x .<= 0] .= 0`
Transform several columns	`df[, .(max(x), min(y)) ]`	`combine(df, :x => maximum, :y => minimum)`
	`df[, lapply(.SD, mean), .SDcols = c("x", "y") ]`	`combine(df, [:x, :y] .=> mean)`
	`df[, lapply(.SD, mean), .SDcols = patterns("*x") ]`	`combine(df, names(df, r"^x") .=> mean)`
	`dcast(df, . ~ ., list(max,min), value.var = c("x","y"))`	`combine(df, ([:x, :y] .=> [maximum minimum])...)`
Multivariate function	`df[, .(cor(x,y)) ]`	`transform(df, [:x, :y] => cor)`
Row-wise	`df[, min_xy := min(x, y), by = 1:nrow(df)]`	`transform!(df, [:x, :y] => ByRow(min))`
	`df[, argmax_xy := which.max(.SD) , .SDcols = patterns("*x"), by = 1:nrow(df) ]`	`transform!(df, AsTable(r"^x") => ByRow(argmax))`
DataFrame as output	`df[, .SD[1], by=grp]`	`combine(groupby(df, :grp), first)`
DataFrame as output	`df[, .SD[which.max(x)], by=grp]`	`combine(groupby(df, :grp), sdf -> sdf[argmax(sdf.x), :])`
Reshape longer	`longdf = melt(df, measure.vars=c("x","y"), id.vars="id")`	`longdf = stack(df, [:x, :y], :id)`
Reshape wider	`dcast(longdf, id ~ variable, value.var="value")`	`unstack(longdf, :id, :variable, :value)`
Operation	data.table	DataFrames.jl
Inner join	`merge(df, df2, on = "grp")`	`innerjoin(df, df2, on = :grp)`
Outer join	`merge(df, df2, all = TRUE, on = "grp")`	`outerjoin(df, df2, on = :grp)`
Left join	`merge(df, df2, all.x = TRUE, on = "grp")`	`leftjoin(df, df2, on = :grp)`
Right join	`merge(df, df2, all.y = TRUE, on = "grp")`	`rightjoin(df, df2, on = :grp)`
Anti join (filtering)	`df[!df2, on = "grp" ]`	`antijoin(df, df2, on = :grp)`
Semi join (filtering)	`merge(df1, df2[, .(grp)])`	`semijoin(df, df2, on = :grp)`
Operation	Stata	DataFrames.jl
Reduce multiple values	`collapse (mean) x`	`combine(df, :x => mean)`
Add new columns	`egen x_mean = mean(x)`	`transform!(df, :x => mean => :x_mean)`
Rename columns	`rename x x_new`	`rename!(df, :x => :x_new)`
Pick columns	`keep x y`	`select!(df, :x, :y)`
Pick rows	`keep if x >= 1`	`subset!(df, :x => ByRow(x -> x >= 1))`
Sort rows	`sort x`	`sort!(df, :x)`