Merge pull request #161 from ljchang/rank

Rank Former-commit-id: 3228dc7
cosanlab · Nov 5, 2017 · a6054a8 · a6054a8
2 parents ef41f73 + 15bc4d1
commit a6054a8
Show file tree

Hide file tree

Showing 6 changed files with 213 additions and 119 deletions.
diff --git a/nltools/data.py b/nltools/data.py
@@ -22,7 +22,11 @@
 import os
 import pickle # import cPickle
 import nibabel as nib
-from nltools.utils import get_resource_path, set_algorithm, get_anatomical, make_cosine_basis, glover_hrf
+from nltools.utils import (get_resource_path,
+                            set_algorithm,
+                            get_anatomical,
+                            make_cosine_basis,
+                            glover_hrf)
 from nltools.cross_validation import set_cv
 from nltools.plotting import (dist_from_hyperplane_plot,
                               scatterplot,
@@ -36,9 +40,12 @@
                            fisher_r_to_z,
                            correlation_permutation,
                            one_sample_permutation,
-                           two_sample_permutation)
+                           two_sample_permutation,
+                           downsample,
+                           upsample,
+                           zscore,
+                           transform_pairwise)
 from nltools.mask import expand_mask, collapse_mask
-from nltools.stats import downsample, zscore, upsample
 from nltools.analysis import Roc
 from nilearn.input_data import NiftiMasker
 from nilearn.image import resample_img
@@ -190,6 +197,7 @@ def __getitem__(self, index):
         if isinstance(index, int):
             new.data = np.array(self.data[index, :]).flatten()
         else:
+            index = np.array(index).flatten()
             new.data = np.array(self.data[index, :])
         if not self.Y.empty:
             new.Y = self.Y.iloc[index]
@@ -1347,6 +1355,19 @@ def regions(self, min_region_size=1350, extract_type='local_regions',
 
         return Brain_Data(regions, mask=self.mask)
 
+    def transform_pairwise(self):
+        ''' Extract brain connected regions into separate regions.
+
+        Args:
+
+        Returns:
+            Brain_Data: Brain_Data instance tranformed into pairwise comparisons
+        '''
+        out = self.copy()
+        out.data, new_Y = transform_pairwise(self.data,self.Y)
+        out.Y = pd.DataFrame(new_Y)
+        out.Y.replace(-1,0,inplace=True)
+        return out
 
 class Adjacency(object):
 

diff --git a/nltools/mask.py b/nltools/mask.py
@@ -19,8 +19,6 @@
 import numpy as np
 import warnings
 from nilearn.masking import intersect_masks
-# from neurosynth.masks import Masker
-
 
 def create_sphere(coordinates, radius=5, mask=None):
     """ Generate a set of spheres in the brain mask space
@@ -35,8 +33,8 @@ def create_sphere(coordinates, radius=5, mask=None):
     from nltools.data import Brain_Data
 
     if mask is not None:
-        if not isinstance(mask,nib.Nifti1Image):
-            if type(mask) is str:
+        if not isinstance(mask, nib.Nifti1Image):
+            if isinstance(mask, six.string_types):
                 if os.path.isfile(mask):
                     data = nib.load(mask)
             else:
@@ -57,48 +55,38 @@ def sphere(r, p, mask):
 
         """
         dims = mask.shape
-        m = [dims[0]/2, dims[1]/2, dims[2]/2] # JC edit: default value for centers
-        x, y, z = np.ogrid[-m[0]:dims[0]-m[0], -m[1]:dims[1]-m[1], -m[2]:dims[2]-m[2]] #JC edit: creates sphere
-        # x, y, z = np.ogrid[-p[0]:dims[0]-p[0], -p[1]:dims[1]-p[1], -p[2]:dims[2]-p[2]]
+        m = [dims[0]/2, dims[1]/2, dims[2]/2]
+        x, y, z = np.ogrid[-m[0]:dims[0]-m[0],
+                            -m[1]:dims[1]-m[1],
+                            -m[2]:dims[2]-m[2]]
         mask_r = x*x + y*y + z*z <= r*r
 
         activation = np.zeros(dims)
         activation[mask_r] = 1
-        # JC edit shift mask to proper location
-        translation_affine= np.array([[1, 0, 0, p[0]-m[0]],
+        translation_affine = np.array([[1, 0, 0, p[0]-m[0]],
                                 [0, 1, 0, p[1]-m[1]],
                                 [0, 0, 1, p[2]-m[2]],
                                  [0, 0, 0, 1]])
 
-        # activation = np.multiply(activation, mask.get_data())
-        # activation = nib.Nifti1Image(activation, affine=np.eye(4))
-        activation = nib.Nifti1Image(activation, affine=translation_affine)
-        #return the 3D numpy matrix of zeros containing the sphere as a region of ones
-        # return activation.get_data(), translation_affine
-        return activation
-
-    # Initialize Spheres with options for multiple radii and centers of the spheres (or just an int and a 3D list)
-    # return sphere(radius,coordinates,mask)
-    if type(radius) is int:
-        radius = [radius]
-    if coordinates is None:
-        coordinates = [[dims[0]/2, dims[1]/2, dims[2]/2] * len(radius)] #default value for centers
-    elif type(coordinates) is list and type(coordinates[0]) is int and len(radius) is 1:
-        coordinates = [coordinates]
-    if (type(radius)) is list and (type(coordinates) is list) and (len(radius) == len(coordinates)):
-        A = np.zeros_like(mask.get_data())
-        A = Brain_Data(nib.Nifti1Image(A, affine=mask.affine), mask=mask)
-        for i in range(len(radius)):
-            A = A + Brain_Data(sphere(radius[i], coordinates[i], mask),
-                                mask=mask)
-        A = A.to_nifti()
-        A.get_data()[A.get_data()>0.5]=1
-        A.get_data()[A.get_data()<0.5]=0
-        return A
+        return nib.Nifti1Image(activation, affine=translation_affine)
+
+    if any(isinstance(i, list) for i in coordinates):
+        if isinstance(radius, list):
+            if len(radius) != len(coordinates):
+                raise ValueError('Make sure length of radius list matches'
+                                'length of coordinate list.')
+        elif isinstance(radius, int):
+            radius = [radius]*len(coordinates)
+        out = Brain_Data(nib.Nifti1Image(np.zeros_like(mask.get_data()),
+                        affine=mask.affine), mask=mask)
+        for r, c in zip(radius, coordinates):
+            out = out + Brain_Data(sphere(r, c, mask), mask=mask)
     else:
-        raise ValueError("Data type for sphere or radius(ii) or center(s) "
-                        "not recognized.")
-
+        out = Brain_Data(sphere(radius, coordinates, mask), mask=mask)
+    out = out.to_nifti()
+    out.get_data()[out.get_data() > 0.5]=1
+    out.get_data()[out.get_data() < 0.5]=0
+    return out
 
 def expand_mask(mask):
     """ expand a mask with multiple integers into separate binary masks
@@ -160,7 +148,8 @@ def collapse_mask(mask, auto_label=True):
 
             merge = []
             if auto_label:
-                # Combine all masks into sequential order ignoring any areas of overlap
+                # Combine all masks into sequential order
+                # ignoring any areas of overlap
                 for i in range(len(m_list)):
                     merge.append(np.multiply(
                                 Brain_Data(m_list[i]).data,

diff --git a/nltools/stats.py b/nltools/stats.py
@@ -26,11 +26,12 @@
 
 import numpy as np
 import pandas as pd
-from scipy.stats import ss, pearsonr, spearmanr, kendalltau
+from scipy.stats import pearsonr, spearmanr, kendalltau
 from copy import deepcopy
 import nibabel as nib
 from scipy.interpolate import interp1d
 import warnings
+import itertools
 
 def pearson(x, y):
     """ Correlates row vector x with each row vector in 2D array y.
@@ -39,7 +40,8 @@ def pearson(x, y):
     data = np.vstack((x, y))
     ms = data.mean(axis=1)[(slice(None, None, None), None)]
     datam = data - ms
-    datass = np.sqrt(ss(datam, axis=1))
+    datass = np.sqrt(np.sum(datam*datam, axis=1))
+    # datass = np.sqrt(ss(datam, axis=1))
     temp = np.dot(datam[1:], datam[0].T)
     rs = temp / (datass[1:] * datass[0])
     return rs
@@ -466,3 +468,50 @@ def correlation_permutation(data1, data2, n_permute=5000, metric='spearman'):
     else:
         stats['p'] = np.mean(all_p <= stats['correlation'])
     return stats
+
+def transform_pairwise(X, y):
+    '''Transforms data into pairs with balanced labels for ranking
+    Transforms a n-class ranking problem into a two-class classification
+    problem. Subclasses implementing particular strategies for choosing
+    pairs should override this method.
+    In this method, all pairs are choosen, except for those that have the
+    same target value. The output is an array of balanced classes, i.e.
+    there are the same number of -1 as +1
+
+    Reference: "Large Margin Rank Boundaries for Ordinal Regression",
+    R. Herbrich, T. Graepel, K. Obermayer.
+    Authors: Fabian Pedregosa <[email protected]>
+             Alexandre Gramfort <[email protected]>
+    Args:
+        X : array, shape (n_samples, n_features)
+            The data
+        y : array, shape (n_samples,) or (n_samples, 2)
+            Target labels. If it's a 2D array, the second column represents
+            the grouping of samples, i.e., samples with different groups will
+            not be considered.
+
+    Returns:
+        X_trans : array, shape (k, n_feaures)
+            Data as pairs
+        y_trans : array, shape (k,)
+            Output class labels, where classes have values {-1, +1}
+
+    '''
+
+    X_new = []
+    y_new = []
+    y = np.asarray(y).flatten()
+    if y.ndim == 1:
+        y = np.c_[y, np.ones(y.shape[0])]
+    comb = itertools.combinations(range(X.shape[0]), 2)
+    for k, (i, j) in enumerate(comb):
+        if y[i, 0] == y[j, 0] or y[i, 1] != y[j, 1]:
+            # skip if same target or different group
+            continue
+        X_new.append(X[i] - X[j])
+        y_new.append(np.sign(y[i, 0] - y[j, 0]))
+        # output balanced classes
+        if y_new[-1] != (-1) ** k:
+            y_new[-1] = - y_new[-1]
+            X_new[-1] = - X_new[-1]
+    return np.asarray(X_new), np.asarray(y_new).ravel()