time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

sdsdot.f (4597B)


      1 !>
      2 ! @license Apache-2.0
      3 !
      4 ! Copyright (c) 2020 The Stdlib Authors.
      5 !
      6 ! Licensed under the Apache License, Version 2.0 (the "License");
      7 ! you may not use this file except in compliance with the License.
      8 ! You may obtain a copy of the License at
      9 !
     10 !    http://www.apache.org/licenses/LICENSE-2.0
     11 !
     12 ! Unless required by applicable law or agreed to in writing, software
     13 ! distributed under the License is distributed on an "AS IS" BASIS,
     14 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 ! See the License for the specific language governing permissions and
     16 ! limitations under the License.
     17 !<
     18 
     19 !> Computes the dot product of single-precision floating-point two vectors with extended accumulation.
     20 !
     21 ! ## Notes
     22 !
     23 ! * Modified version of reference BLAS level1 routine (version 3.7.0). Updated to "free form" Fortran 95.
     24 !
     25 ! ## Authors
     26 !
     27 ! * Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
     28 ! * Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
     29 ! * Univ. of Tennessee
     30 ! * Univ. of California Berkeley
     31 ! * Univ. of Colorado Denver
     32 ! * NAG Ltd.
     33 !
     34 ! ## History
     35 !
     36 ! | YYMMDD | DESCRIPTION |
     37 ! | ------ | ----------- |
     38 ! | 791001 | DATE WRITTEN |
     39 ! | 890531 | Changed all specific intrinsics to generic. (WRB) |
     40 ! | 890831 | Modified array declarations. (WRB) |
     41 ! | 890831 | REVISION DATE from Version 3.2 |
     42 ! | 891214 | Prologue converted to Version 4.0 format. (BAB) |
     43 ! | 920310 | Corrected definition of LX in DESCRIPTION. (WRB) |
     44 ! | 920501 | Reformatted the REFERENCES section. (WRB) |
     45 ! | 070118 | Reformat to LAPACK style (JL) |
     46 !
     47 ! ## References
     48 !
     49 ! * Lawson, Charles L., Richard J. Hanson, Fred T. Krogh, and David Ronald Kincaid. 1979. "Algorithm 539: Basic Linear Algebra Subprograms for Fortran Usage \[F1\]." _ACM Transactions on Mathematical Software_ 5 (3). New York, NY, USA: Association for Computing Machinery: 324–25. doi:[10.1145/355841.355848](https://doi.org/10.1145/355841.355848).
     50 !
     51 ! ## License
     52 !
     53 ! From <http://netlib.org/blas/faq.html>:
     54 !
     55 ! > The reference BLAS is a freely-available software package. It is available from netlib via anonymous ftp and the World Wide Web. Thus, it can be included in commercial software packages (and has been). We only ask that proper credit be given to the authors.
     56 ! >
     57 ! > Like all software, it is copyrighted. It is not trademarked, but we do ask the following:
     58 ! >
     59 ! > * If you modify the source for these routines we ask that you change the name of the routine and comment the changes made to the original.
     60 ! >
     61 ! > * We will gladly answer any questions regarding the software. If a modification is done, however, it is the responsibility of the person who modified the routine to provide support.
     62 !
     63 ! @param {integer} N - number of values over which to compute the dot product
     64 ! @param {real} sb - scalar constant added to the dot product
     65 ! @param {Array<real>} sx - first array
     66 ! @param {integer} strideX - `sx` stride length
     67 ! @param {Array<real>} sy - second array
     68 ! @param {integer} strideY - `sy` stride length
     69 ! @returns {real} the dot product of `sx` and `sy`
     70 !<
     71 real function sdsdot( N, sb, sx, strideX, sy, strideY )
     72   implicit none
     73   ! ..
     74   ! Scalar arguments:
     75   integer :: strideX, strideY, N
     76   real :: sb
     77   ! ..
     78   ! Array arguments:
     79   real, intent(in) :: sx(*), sy(*)
     80   ! ..
     81   ! Local scalars:
     82   double precision :: dtemp
     83   integer :: mp1, ix, iy, i, m
     84   ! ..
     85   ! Intrinsic functions:
     86   intrinsic mod, real, dble
     87   ! ..
     88   dtemp = dble( sb )
     89   ! ..
     90   if ( N <= 0 ) then
     91     sdsdot = real( dtemp )
     92     return
     93   end if
     94   ! ..
     95   ! If both strides are equal to `1`, use unrolled loops...
     96   if ( strideX == 1 .AND. strideY == 1 ) then
     97     m = mod( N, 5 )
     98    ! ..
     99     ! If we have a remainder, do a clean-up loop...
    100     if ( m /= 0 ) then
    101       do i = 1, m
    102         dtemp = dtemp + ( dble( sx(i) ) * dble( sy(i) ) )
    103       end do
    104     end if
    105     if ( N < M ) then
    106       sdsdot = real( dtemp )
    107       return
    108     end if
    109     mp1 = m + 1
    110     do i = mp1, N, 5
    111       dtemp = dtemp + &
    112         ( dble( sx(i) ) * dble( sy(i) ) ) + &
    113         ( dble( sx(i+1) ) * dble( sy(i+1) ) ) + &
    114         ( dble( sx(i+2) ) * dble( sy(i+2) ) ) + &
    115         ( dble( sx(i+3) ) * dble( sy(i+3) ) ) + &
    116         ( dble( sx(i+4) ) * dble( sy(i+4) ) )
    117     end do
    118   else
    119     if ( strideX < 0 ) then
    120       ix = ((1-N)*strideX) + 1
    121     else
    122       ix = 1
    123     endif
    124     if ( strideY < 0 ) then
    125       iy = ((1-N)*strideY) + 1
    126     else
    127       iy = 1
    128     endif
    129     do i = 1, N
    130       dtemp = dtemp + ( dble( sx(ix) ) * dble( sy(iy) ) )
    131       ix = ix + strideX
    132       iy = iy + strideY
    133     end do
    134   endif
    135   sdsdot = real( dtemp )
    136   return
    137 end function sdsdot