diff --git a/src/Makefile.inc b/src/Makefile.inc
index a2b252f..ff81afc 100644
--- a/src/Makefile.inc
+++ b/src/Makefile.inc
@@ -21,6 +21,7 @@ OPTIONS=
 #   fftw3_f03  - FFTW 3.3-beta1 or later (with Fortran 2003 interface)
 #   generic    - A general FFT algorithm (no 3rd-party library needed)
 #   mkl        - Intel Math Kernel Library
+#   ssl        - Fujistu Scientific Subroutine Library
 FFT=generic
 
 # Platform-dependent information - compiler, external library etc
@@ -94,6 +95,9 @@ LIB_FFTE=$(FFTE_PATH)/libffte.a
 # For cuFFT
 LIB_CUFFT=-L$(CUDA_PATH)/lib64 -lcudart -lcufft
 
+# For SSL
+LIB_SSL=-SSL2 --linkstl=libfjc++
+
 ifeq ($(FFT),generic)
      LIBFFT=
 else ifeq ($(FFT),acml)
@@ -110,4 +114,7 @@ else ifeq ($(FFT),mkl)
      LIBFFT=$(LIB_MKL)
 else ifeq ($(FFT),cufft)
      LIBFFT=$(LIB_CUFFT)
+else ifeq ($(FFT),ssl)
+     LD_FLAGS=-Kfast
+     LIBFFT=$(LIB_SSL)
 endif
diff --git a/src/fft_ssl.f90 b/src/fft_ssl.f90
new file mode 100644
index 0000000..98c99a9
--- /dev/null
+++ b/src/fft_ssl.f90
@@ -0,0 +1,446 @@
+!=======================================================================
+! This is part of the 2DECOMP&FFT library
+! 
+! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) 
+! decomposition. It also implements a highly scalable distributed
+! three-dimensional Fast Fourier Transform (FFT).
+!
+! Copyright (C) 2009-2021 Ning Li, the Numerical Algorithms Group (NAG)
+!
+!=======================================================================
+
+! This is the Fujitsu SSL implementation of the FFT library
+! Note that the SSL engine used only supports transform size 
+! with prime factor 2 and is suited for doing multiple repeated
+! transforms. Other SSL engines are available.
+! Other sizes are computed by the generic engine.
+
+module decomp_2d_fft
+  
+  use decomp_2d  ! 2D decomposition module
+  use glassman
+  
+  implicit none
+  
+  private        ! Make everything private unless declared public
+
+  ! engine-specific global variables
+  complex(mytype), allocatable, dimension(:) :: buf, scratch
+
+  logical, save :: x_goodsize, y_goodsize, z_goodsize
+
+  ! common code used for all engines, including global variables, 
+  ! generic interface definitions and several subroutines
+#include "fft_common.f90"
+
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  !  This routine performs one-time initialisations for the FFT engine
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  subroutine init_fft_engine
+
+    implicit none
+
+    integer :: cbuf_size, errorcode
+
+    if (KIND(0.0) == mytype) then
+       errorcode = 5
+       call decomp_2d_abort(errorcode, &
+            'Choosen SSL engine only suppport double-precision calculations')
+    end if
+
+    if (nrank==0) then
+       write(*,*) ' '
+       write(*,*) '***** Using the SSL engine *****'
+       write(*,*) ' '
+    end if
+
+    cbuf_size = max(ph%xsz(1), ph%ysz(2))
+    cbuf_size = max(cbuf_size, ph%zsz(3))
+    allocate(buf(cbuf_size))
+    allocate(scratch(2*cbuf_size + 70))  ! twice space for ssl library
+
+    ! check the transform sizes
+    x_goodsize = check_size(ph%xsz(1))
+    y_goodsize = check_size(ph%ysz(2))
+    z_goodsize = check_size(ph%zsz(3))
+
+    return
+  end subroutine init_fft_engine
+
+  ! factorisation routine
+#include "factor.f90"
+
+  logical function check_size(nsize)
+
+    integer, intent(IN) :: nsize
+
+    integer, allocatable, dimension(:) :: factors
+    integer :: nfact, i
+
+    i = ceiling(log(real(nsize))*1.442695)  ! log_2_N
+    allocate(factors(i))
+
+    call primefactors(nsize,factors,nfact)
+
+    check_size = .true.
+
+    do i=1,nfact
+       if (factors(i) > 2) then
+          check_size = .false.
+          exit
+       end if
+    end do
+
+    return
+  end function check_size
+
+
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  !  This routine performs one-time finalisations for the FFT engine
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  subroutine finalize_fft_engine
+
+    implicit none
+
+    deallocate(buf,scratch)
+
+    return
+  end subroutine finalize_fft_engine
+
+
+  ! Following routines calculate multiple one-dimensional FFTs to form 
+  ! the basis of three-dimensional FFTs.
+
+  ! c2c transform, multiple 1D FFTs in x direction
+  subroutine c2c_1m_x(inout, isign, decomp)
+
+    implicit none
+
+    complex(mytype), dimension(:,:,:), intent(INOUT) :: inout
+    integer, intent(IN) :: isign
+    TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+    integer :: i,j,k
+    integer :: isw
+    integer :: ierror
+
+    isw = 1;
+    if (x_goodsize) then
+       call dvcfm1(buf,decomp%xsz(1),isw,isign,scratch,ierror)
+    end if
+    isw=0;
+    do k=1,decomp%xsz(3)
+       do j=1,decomp%xsz(2)
+          do i=1,decomp%xsz(1)
+             buf(i) = inout(i,j,k)
+          end do
+          if (x_goodsize) then
+             call dvcfm1(buf,decomp%xsz(1),isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,decomp%xsz(1),isign,scratch)
+          end if
+          do i=1,decomp%xsz(1)
+             inout(i,j,k) = buf(i)
+          end do
+       end do
+    end do
+
+    return
+
+  end subroutine c2c_1m_x
+
+  ! c2c transform, multiple 1D FFTs in y direction
+  subroutine c2c_1m_y(inout, isign, decomp)
+
+    implicit none
+
+    complex(mytype), dimension(:,:,:), intent(INOUT) :: inout
+    integer, intent(IN) :: isign
+    TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+    integer :: i,j,k
+    integer :: isw
+    integer :: ierror
+
+    isw = 1;
+    if (y_goodsize) then
+       call dvcfm1(buf,decomp%ysz(2),isw,isign,scratch,ierror)
+    end if
+    isw = 0;
+
+    do k=1,decomp%ysz(3)
+       do i=1,decomp%ysz(1)
+          do j=1,decomp%ysz(2)
+             buf(j) = inout(i,j,k)
+          end do
+          if (y_goodsize) then
+             call dvcfm1(buf,decomp%ysz(2),isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,decomp%ysz(2),isign,scratch)
+          end if
+          do j=1,decomp%ysz(2)
+             inout(i,j,k) = buf(j)
+          end do
+       end do
+    end do
+
+    if (y_goodsize .AND. isign==DECOMP_2D_FFT_BACKWARD) then
+       inout = inout * real(decomp%ysz(2), kind=mytype)
+    end if
+
+    return
+
+  end subroutine c2c_1m_y
+
+  ! c2c transform, multiple 1D FFTs in z direction
+  subroutine c2c_1m_z(inout, isign, decomp)
+
+    implicit none
+
+    complex(mytype), dimension(:,:,:), intent(INOUT) :: inout
+    integer, intent(IN) :: isign
+    TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+    integer :: i,j,k
+    integer :: isw
+    integer :: ierror
+
+    isw = 1;
+    if (z_goodsize) then
+       call dvcfm1(buf,decomp%zsz(3),isw,isign,scratch,ierror)
+    end if
+    
+    isw = 0;
+
+    do j=1,decomp%zsz(2)
+       do i=1,decomp%zsz(1)
+          do k=1,decomp%zsz(3)
+             buf(k) = inout(i,j,k)
+          end do
+          if (z_goodsize) then
+             call dvcfm1(buf,decomp%zsz(3),isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,decomp%zsz(3),isign,scratch)
+          end if
+          do k=1,decomp%zsz(3)
+             inout(i,j,k) = buf(k)
+          end do
+       end do
+    end do
+
+    return
+
+  end subroutine c2c_1m_z
+
+  ! r2c transform, multiple 1D FFTs in x direction
+  subroutine r2c_1m_x(input, output)
+
+    implicit none
+
+    real(mytype), dimension(:,:,:), intent(IN)  ::  input
+    complex(mytype), dimension(:,:,:), intent(OUT) :: output
+
+    integer :: i,j,k, s1,s2,s3, d1
+    integer :: isw
+    integer :: ierror
+    integer :: isign
+
+    isw = 1;
+    isign = -1;
+
+    s1 = size(input,1)
+    s2 = size(input,2)
+    s3 = size(input,3)
+    d1 = size(output,1)
+
+    if (x_goodsize) then
+       call dvcfm1(buf,s1,isw,isign,scratch,ierror)
+    end if
+
+    isw = 0;
+
+    do k=1,s3
+       do j=1,s2
+          ! The base FFT engine is c2c only, 
+          ! needing some pre- and post-processing for r2c
+          ! pack real input in complex storage
+          do i=1,s1
+             buf(i) = cmplx(input(i,j,k),0._mytype, kind=mytype)
+          end do
+          if (x_goodsize) then
+             call dvcfm1(buf,s1,isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,s1,isign,scratch)
+          end if
+          ! note d1 ~ s1/2+1
+          ! simply drop the redundant part of the complex output
+          do i=1,d1
+             output(i,j,k) = buf(i)
+          end do
+       end do
+    end do
+
+    return
+
+  end subroutine r2c_1m_x
+
+  ! r2c transform, multiple 1D FFTs in z direction
+  subroutine r2c_1m_z(input, output)
+
+    implicit none
+
+    real(mytype), dimension(:,:,:), intent(IN)  ::  input
+    complex(mytype), dimension(:,:,:), intent(OUT) :: output
+
+    integer :: i,j,k, s1,s2,s3, d3
+    integer :: isw
+    integer :: ierror
+    integer :: isign
+
+    isw = 1;
+    isign = -1;
+
+    s1 = size(input,1)
+    s2 = size(input,2)
+    s3 = size(input,3)
+    d3 = size(output,3)
+
+    if (z_goodsize) then
+       call dvcfm1(buf,s3,isw,isign,scratch,ierror)
+    end if
+
+    isw = 0;
+
+    do j=1,s2
+       do i=1,s1
+          do k=1,s3
+             buf(k) = cmplx(input(i,j,k),0._mytype, kind=mytype)
+          end do
+          if (z_goodsize) then
+             call dvcfm1(buf,s3,isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,s3,isign,scratch)
+          end if
+          do k=1,d3
+             output(i,j,k) = buf(k)
+          end do
+       end do
+    end do
+
+    return
+
+  end subroutine r2c_1m_z
+
+  ! c2r transform, multiple 1D FFTs in x direction
+  subroutine c2r_1m_x(input, output)
+
+    implicit none
+
+    complex(mytype), dimension(:,:,:), intent(IN)  ::  input
+    real(mytype), dimension(:,:,:), intent(OUT) :: output
+
+    integer :: i,j,k, d1,d2,d3
+    integer :: isw
+    integer :: ierror
+    integer :: isign
+
+    isw = 1;
+    isign = 1;
+
+    d1 = size(output,1)
+    d2 = size(output,2)
+    d3 = size(output,3)
+
+    if (x_goodsize) then
+       call dvcfm1(buf,d1,isw,isign,scratch,ierror)
+    end if
+
+    isw = 0;
+
+    do k=1,d3
+       do j=1,d2
+          ! The base FFT engine is c2c only, 
+          ! needing some pre- and post-processing for c2r
+          do i=1,d1/2+1
+             buf(i) = input(i,j,k)
+          end do
+          ! expanding to a full-size complex array
+          ! For odd N, the storage is:
+          !  1, 2, ...... N/2+1   integer division rounded down
+          !     N, ...... N/2+2   => a(i) is conjugate of a(N+2-i)
+          ! For even N, the storage is:
+          !  1, 2, ...... N/2  , N/2+1
+          !     N, ...... N/2+2  again a(i) conjugate of a(N+2-i)
+          do i=d1/2+2,d1
+             buf(i) =  conjg(buf(d1+2-i))
+          end do
+          if (x_goodsize) then
+             call dvcfm1(buf,d1,isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,d1,isign,scratch)
+          end if
+          do i=1,d1
+             ! simply drop imaginary part
+             output(i,j,k) = real(buf(i), kind=mytype)
+          end do
+       end do
+    end do
+
+    return
+
+  end subroutine c2r_1m_x
+
+  ! c2r transform, multiple 1D FFTs in z direction
+  subroutine c2r_1m_z(input, output)
+
+    implicit none
+
+    complex(mytype), dimension(:,:,:), intent(IN)  ::  input
+    real(mytype), dimension(:,:,:), intent(OUT) :: output
+
+    integer :: i,j,k, d1,d2,d3
+    integer :: isw
+    integer :: ierror
+    integer :: isign
+
+    isw = 1;
+    isign = 1;
+
+    d1 = size(output,1)
+    d2 = size(output,2)
+    d3 = size(output,3)
+
+    if (z_goodsize) then
+       call dvcfm1(buf,d3,isw,isign,scratch,ierror)
+    end if
+
+    isw = 0;
+
+    do j=1,d2
+       do i=1,d1
+          do k=1,d3/2+1
+             buf(k) = input(i,j,k)
+          end do
+          do k=d3/2+2,d3
+             buf(k) =  conjg(buf(d3+2-k))
+          end do
+          if (z_goodsize) then
+             call dvcfm1(buf,d2,isw,isign,scratch,ierror)
+          else
+             call spcfft(buf,d3,isign,scratch)
+          end if
+          do k=1,d3
+             output(i,j,k) = real(buf(k), kind=mytype)
+          end do
+       end do
+    end do
+
+    return
+
+  end subroutine c2r_1m_z
+
+
+#include "fft_common_3d.f90"
+
+  
+end module decomp_2d_fft