parse-rois.Rmd

---
title: "Parse VIA ROIs"
description: |
 This R script parses manual astrocyte, microglia, plaque, and tangle annotations created using the VGG Image Annotator tool into an ImageJ-readable format.
author:
  - first_name: "Ayush"
    last_name: "Noori"
    url: https://www.github.com/ayushnoori
    affiliation: Massachusetts General Hospital
    affiliation_url: https://www.serranopozolab.org
    orcid_id: 0000-0003-1420-1236
output:
  distill::distill_article:
    toc: true
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(eval = FALSE)
```

# Dependencies

Load requisite packages and define directories. Note that this script uses my personal utilities package `brainstorm`, which can be downloaded via `devtools::install_github("ayushnoori/brainstorm")`.

```{r load-packages, message=FALSE, warning=FALSE}

# data manipulation
library(data.table)
library(purrr)
library(magrittr)

# string manipulation
library(stringi)

# utility functions
library(brainstorm)

```

Note that directories are relative to the R project path.

```{r define-directores}

# set directories
ddir = file.path("Data", "2 - Channel Extraction")
dir2 = file.path("Results", "2 - ROI Annotations")
dir2.1 = file.path(dir2, "2.1 - VIA Annotations")

```

# Read Mappings

Read mappings between true crop labels and random alphanumeric IDs generated by prior ImageJ script.

```{r read-mappings}

celltypes = c("Astrocyte", "Microglia", "Plaque", "Tangle")

# read astrocyte, microglia, and plaque mappings
read_map = function(celltype) { return(fread(file.path(ddir, celltype, "ID Mappings.csv"))[, Type := celltype]) }
IDmap = rbindlist(map(celltypes, ~read_map(.x)))[, V1 := NULL]
setnames(IDmap, "Type", "Group")

show_table(head(IDmap, 20))

```

# Read Annotations

Read VGG Image Annotator (VIA) annotations from the output `.csv` files.

```{r read-annotations}

flist = list.files(path = dir2.1, pattern = "\\.csv$")

# function to read annotations
read_annot = function(file) {
  fread(file.path(dir2.1, file)) %>%
    .[, c("Annotator", "Group") := as.list(stri_split_fixed(file, "_", simplify = T)[1:2])] %>%
    return()
}

# read annotations
annot = rbindlist(map(flist, ~read_annot(.x)))

# remove empty file labels and .png extension suffix
annot = annot %>%
  .[region_shape_attributes != "{}", ] %>%
  .[, Filename := gsub(".png", "", filename)] %>%
  .[, Annotator := factor(Annotator, levels = c("alberto", "ayush", "clara"), labels = c("ASP", "AN", "CMC"))] %>%
  .[, Group := factor(Group, levels = c("astrocyte", "microglia", "plaque", "tangle"), labels = celltypes)]

show_table(annot[sample(nrow(annot), 40), ])

```

# Parse Annotations

Define function to parse VIA annotations into format readable by ImageJ. To conform with ImageJ naming conventions, the cell-type of each ROI is specified by `Group`, while the subtype (e.g., `compact` vs. `diffuse` or `soma` vs. `process`) is specified by `type`.

```{r parse-label}

# define function to parse label based on VIA output
parse_label = function(label, grp = c("region", "region_shape")) {
  
  label = label %>%
    strsplit(., ",", fixed = TRUE) %>% .[[1]] %>% # split by comma to separate vars
    gsub("[{, }, \"]", "", .) %>% # remove brackets and quotes 
    strsplit(., ":")
  
  # extract second elements in list, then assign first elements as names
  parsed = map(label, 2)
  names(parsed) = map_chr(label, 1)
  
  # replace tangle annotation name
  if("tangle" %in% names(parsed)) { names(parsed) = "type"; parsed[["quality"]] = "none" }

  # return statement
  if(grp == "region") return(parsed[c("type", "quality")]) else return(parsed[c("x", "y", "width", "height")])
  
}

```

Apply `parse_label` function to VIA annotations, and join with `IDmap`.

```{r parse-annotations}

# parse annotations
annot = annot %>%
  .[, c("Type", "Quality") := map_dfr(region_attributes, ~parse_label(.x, "region"))] %>%
  .[, c("X", "Y", "Width", "Height") := map_dfr(region_shape_attributes, ~parse_label(.x, "region_shape"))] %>%
  .[Type == "vessel", Group := "Vessel"] %>%
  .[, filename := gsub(".png", "", filename)]

# join with ID mapping information, consolidate microglia and plaque categories, order table
IDmap[, Group := NULL]
dat = merge(IDmap, annot, by.x = "ID", by.y = "filename", all.y = TRUE) %>%
  .[, .(Sample, Layer, Crop, Condition, File, Group, X, Y, Width, Height, Type, Quality, Annotator)] %>% 
  .[order(Sample, Layer, Crop, Group, as.numeric(X)), ]

# total ROI count
summary(factor(dat[, Group]))
show_table(dat[sample(nrow(dat), 40), ])

```

# Write VIA ROIs

Write VIA ROIs to appropriate output.

```{r write-rois}

# remove prior output
rmlist = list.files(path = dir2, pattern = "\\.csv$")
for (rm in rmlist){ file.remove(file.path(dir2, rm)) }

# write file list
files = unique(dat$File)
cat(paste0("Annotated Crops: ", length(files), "\n"))
write(c("Annotated TIFFs", files), file.path(dir2, "Annotated TIFFs.txt"))

# save VIA annotations of each crop after ordering by Type, then X coord.
walk(files, ~fwrite(dat[File == .x, ], file.path(dir2, paste0(.x, ".csv"))))

```