Skip to content

Commit

Permalink
Issue #124: Speedup selecting from diffs table by 58.27%
Browse files Browse the repository at this point in the history
  • Loading branch information
AmrDeveloper committed Dec 22, 2024
1 parent d42fe89 commit fe66c6e
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 83 deletions.
4 changes: 2 additions & 2 deletions docs/structure/tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ show tables
| Name | Type | Description |
| ------------- | -------- | ------------------------ |
| commit_id | Text | Commit id |
| name | Text | Author name |
| email | Text | Author email |
| author_name | Text | Author name |
| author_email | Text | Author email |
| insertions | Number | Number of inserted lines |
| deletions | Number | Number of deleted lines |
| files_changed | Number | Number of file changed |
Expand Down
152 changes: 74 additions & 78 deletions src/git_data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ use gitql_core::values::null::NullValue;
use gitql_core::values::text::TextValue;
use gitql_engine::data_provider::DataProvider;

use gix::diff::blob::pipeline::Mode;
use gix::object::blob::diff::init::Error;
use gix::object::tree::diff::Action;
use gix::refs::Category;

pub struct GitDataProvider {
Expand Down Expand Up @@ -132,10 +135,10 @@ fn select_commits(repo: &gix::Repository, selected_columns: &[String]) -> Result
}

let repo_path = repo.path().to_str().unwrap().to_string();
let revwalk = head_id.unwrap().ancestors().all().unwrap();
let walker = head_id.unwrap().ancestors().all().unwrap();
let mut rows: Vec<Row> = vec![];

for commit_info in revwalk {
for commit_info in walker {
let commit_info = commit_info.unwrap();
let commit = repo.find_object(commit_info.id).unwrap().into_commit();
let commit = commit.decode().unwrap();
Expand Down Expand Up @@ -258,8 +261,8 @@ fn select_branches(

if column_name == "commit_count" {
let commit_count = if let Some(id) = branch.try_id() {
if let Ok(revwalk) = id.ancestors().all() {
revwalk.count() as i64
if let Ok(walker) = id.ancestors().all() {
walker.count() as i64
} else {
0
}
Expand All @@ -275,8 +278,8 @@ fn select_branches(

if column_name == "updated" {
if let Ok(top_commit_id) = branch.peel_to_id_in_place() {
let revwalk = top_commit_id.ancestors().all().unwrap();
if let Some(commit_info) = revwalk.into_iter().next() {
let walker = top_commit_id.ancestors().all().unwrap();
if let Some(commit_info) = walker.into_iter().next() {
let commit_info = commit_info.unwrap();
if let Some(commit_timestamp) = commit_info.commit_time {
values.push(Box::new(DateTimeValue {
Expand Down Expand Up @@ -337,44 +340,76 @@ fn select_diffs(repo: &gix::Repository, selected_columns: &[String]) -> Result<V
repo
};

let revwalk = repo.head_id().unwrap().ancestors().all().unwrap();
let walker = repo.head_id().unwrap().ancestors().all().unwrap();
let repo_path = repo.path().to_str().unwrap().to_string();

let mut rewrite_cache = repo
.diff_resource_cache(gix::diff::blob::pipeline::Mode::ToGit, Default::default())
.diff_resource_cache(Mode::ToGit, Default::default())
.unwrap();

let mut diff_cache = rewrite_cache.clone();

let mut rows: Vec<Row> = vec![];

let select_insertions_or_deletions = selected_columns.contains(&"insertions".to_string())
|| selected_columns.contains(&"deletions".to_string());
|| selected_columns.contains(&"deletions".to_string())
|| selected_columns.contains(&"files_changed".to_string());

for commit_info in revwalk {
for commit_info in walker {
let commit_info = commit_info.unwrap();
let commit = commit_info.id().object().unwrap().into_commit();
let commit_ref = commit.decode().unwrap();
let mut values: Vec<Box<dyn Value>> = Vec::with_capacity(selected_columns.len());

// Calculate the diff between two commits take time, and should calculated once per commit
let (mut insertions, mut deletions, mut files_changed) = (0, 0, 0);
if select_insertions_or_deletions {
let current = commit.tree().unwrap();
let previous = commit_info
.parent_ids()
.next()
.map(|id| id.object().unwrap().into_commit().tree().unwrap())
.unwrap_or_else(|| repo.empty_tree());

rewrite_cache.clear_resource_cache();
diff_cache.clear_resource_cache();

if let Ok(mut changes) = previous.changes() {
let _ = changes.for_each_to_obtain_tree_with_cache(
&current,
&mut rewrite_cache,
|change| -> Result<_, Error> {
files_changed += usize::from(change.entry_mode().is_no_tree());
if select_insertions_or_deletions {
if let Ok(mut platform) = change.diff(&mut diff_cache) {
if let Ok(Some(counts)) = platform.line_counts() {
deletions += counts.removals;
insertions += counts.insertions;
}
}
}
Ok(Action::Continue)
},
);
}
}

for column_name in selected_columns {
if column_name == "commit_id" {
values.push(Box::new(TextValue {
value: commit_info.id.to_string(),
}));
let value = commit_info.id.to_string();
values.push(Box::new(TextValue { value }));
continue;
}

if column_name == "name" {
values.push(Box::new(TextValue {
value: commit_ref.author().name.to_string(),
}));
if column_name == "author_name" {
let value = commit_ref.author().name.to_string();
values.push(Box::new(TextValue { value }));
continue;
}

if column_name == "email" {
values.push(Box::new(TextValue {
value: commit_ref.author().email.to_string(),
}));
if column_name == "author_email" {
let value = commit_ref.author().email.to_string();
values.push(Box::new(TextValue { value }));
continue;
}

Expand All @@ -386,68 +421,29 @@ fn select_diffs(repo: &gix::Repository, selected_columns: &[String]) -> Result<V
continue;
}

if column_name == "repo" {
values.push(Box::new(TextValue {
value: repo_path.to_string(),
}));
if column_name == "insertions" {
let value = insertions as i64;
values.push(Box::new(IntValue { value }));
continue;
}

if column_name == "insertions"
|| column_name == "deletions"
|| column_name == "files_changed"
{
let current = commit.tree().unwrap();

let previous = commit_info
.parent_ids()
.next()
.map(|id| id.object().unwrap().into_commit().tree().unwrap())
.unwrap_or_else(|| repo.empty_tree());
rewrite_cache.clear_resource_cache();
diff_cache.clear_resource_cache();

let (mut insertions, mut deletions, mut files_changed) = (0, 0, 0);
let _ = previous
.changes()
.unwrap()
.for_each_to_obtain_tree_with_cache(
&current,
&mut rewrite_cache,
|change| -> Result<_, gix::object::blob::diff::init::Error> {
files_changed += usize::from(change.entry_mode().is_no_tree());
if select_insertions_or_deletions {
if let Ok(mut platform) = change.diff(&mut diff_cache) {
if let Ok(Some(counts)) = platform.line_counts() {
deletions += counts.removals;
insertions += counts.insertions;
}
}
}
Ok(gix::object::tree::diff::Action::Continue)
},
);

if column_name == "insertions" {
values.push(Box::new(IntValue {
value: insertions as i64,
}));
continue;
}
if column_name == "deletions" {
let value = deletions as i64;
values.push(Box::new(IntValue { value }));
continue;
}

if column_name == "deletions" {
values.push(Box::new(IntValue {
value: deletions as i64,
}));
continue;
}
if column_name == "files_changed" {
let value = files_changed as i64;
values.push(Box::new(IntValue { value }));
continue;
}

if column_name == "files_changed" {
values.push(Box::new(IntValue {
value: files_changed as i64,
}));
continue;
}
if column_name == "repo" {
values.push(Box::new(TextValue {
value: repo_path.to_string(),
}));
continue;
}

values.push(Box::new(NullValue));
Expand Down
5 changes: 2 additions & 3 deletions src/git_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ pub fn tables_fields_types() -> HashMap<&'static str, Box<dyn DataType>> {
map.insert("insertions", Box::new(IntType));
map.insert("deletions", Box::new(IntType));
map.insert("files_changed", Box::new(IntType));
map.insert("email", Box::new(TextType));
map.insert("type", Box::new(TextType));
map.insert("datetime", Box::new(DateTimeType));
map.insert("is_head", Box::new(BoolType));
Expand Down Expand Up @@ -68,8 +67,8 @@ pub fn tables_fields_names() -> &'static HashMap<&'static str, Vec<&'static str>
"diffs",
vec![
"commit_id",
"name",
"email",
"author_name",
"author_email",
"insertions",
"deletions",
"files_changed",
Expand Down

0 comments on commit fe66c6e

Please sign in to comment.