time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

saxpy.f (3414B)


      1 !>
      2 ! @license Apache-2.0
      3 !
      4 ! Copyright (c) 2018 The Stdlib Authors.
      5 !
      6 ! Licensed under the Apache License, Version 2.0 (the "License");
      7 ! you may not use this file except in compliance with the License.
      8 ! You may obtain a copy of the License at
      9 !
     10 !    http://www.apache.org/licenses/LICENSE-2.0
     11 !
     12 ! Unless required by applicable law or agreed to in writing, software
     13 ! distributed under the License is distributed on an "AS IS" BASIS,
     14 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 ! See the License for the specific language governing permissions and
     16 ! limitations under the License.
     17 !<
     18 
     19 !> Constant times a vector plus a vector.
     20 !
     21 ! ## Notes
     22 !
     23 ! * Modified version of reference BLAS level1 routine (version 3.7.0). Updated to "free form" Fortran 95.
     24 !
     25 ! ## Authors
     26 !
     27 ! * Univ. of Tennessee
     28 ! * Univ. of California Berkeley
     29 ! * Univ. of Colorado Denver
     30 ! * NAG Ltd.
     31 !
     32 ! ## History
     33 !
     34 ! * Jack Dongarra, linpack, 3/11/78.
     35 !
     36 !   - modified 12/3/93, array(1) declarations changed to array(*)
     37 !
     38 ! ## License
     39 !
     40 ! From <http://netlib.org/blas/faq.html>:
     41 !
     42 ! > The reference BLAS is a freely-available software package. It is available from netlib via anonymous ftp and the World Wide Web. Thus, it can be included in commercial software packages (and has been). We only ask that proper credit be given to the authors.
     43 ! >
     44 ! > Like all software, it is copyrighted. It is not trademarked, but we do ask the following:
     45 ! >
     46 ! > * If you modify the source for these routines we ask that you change the name of the routine and comment the changes made to the original.
     47 ! >
     48 ! > * We will gladly answer any questions regarding the software. If a modification is done, however, it is the responsibility of the person who modified the routine to provide support.
     49 !
     50 ! @param {integer} N - number of values
     51 ! @param {real} alpha - scalar
     52 ! @param {Array<real>} sx - input array
     53 ! @param {integer} strideX - `sx` stride length
     54 ! @param {Array<real>} sy - destination array
     55 ! @param {integer} strideY - `sy` stride length
     56 !<
     57 subroutine saxpy( N, alpha, sx, strideX, sy, strideY )
     58   implicit none
     59   ! ..
     60   ! Scalar arguments:
     61   real :: alpha
     62   integer :: strideX, strideY, N
     63   ! ..
     64   ! Array arguments:
     65   real :: sx(*), sy(*)
     66   ! ..
     67   ! Local scalars:
     68   integer :: mp1, ix, iy, i, m
     69   ! ..
     70   ! Intrinsic functions:
     71   intrinsic mod
     72   ! ..
     73   if ( N <= 0 ) then
     74     return
     75   end if
     76   ! ..
     77   ! If `alpha` is `0`, then `y` is unchanged...
     78   if ( alpha == 0.0 ) then
     79     return
     80   end if
     81   ! ..
     82   ! If both strides are equal to `1`, use unrolled loops...
     83   if ( strideX == 1 .AND. strideY == 1 ) then
     84     m = mod( N, 4 )
     85     ! ..
     86     ! If we have a remainder, do a clean-up loop...
     87     if ( m /= 0 ) then
     88       do i = 1, m
     89         sy( i ) = sy( i ) + alpha*sx( i )
     90       end do
     91       if ( N < 4 ) then
     92         return
     93       end if
     94     end if
     95     mp1 = m + 1
     96     do i = mp1, N, 4
     97       sy( i ) = sy( i ) + alpha*sx( i )
     98       sy( i+1 ) = sy( i+1 ) + alpha*sx( i+1 )
     99       sy( i+2 ) = sy( i+2 ) + alpha*sx( i+2 )
    100       sy( i+3 ) = sy( i+3 ) + alpha*sx( i+3 )
    101     end do
    102   else
    103     if ( strideX < 0 ) then
    104       ix = ((1-N)*strideX) + 1
    105     else
    106       ix = 1
    107     end if
    108     if ( strideY < 0 ) then
    109       iy = ((1-N)*strideY) + 1
    110     else
    111       iy = 1
    112     end if
    113     do i = 1, N
    114       sy( iy ) = sy( iy ) + alpha*sx( ix )
    115       ix = ix + strideX
    116       iy = iy + strideY
    117     end do
    118   end if
    119   return
    120 end subroutine saxpy