time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

dsdot.f (4306B)


      1 !>
      2 ! @license Apache-2.0
      3 !
      4 ! Copyright (c) 2020 The Stdlib Authors.
      5 !
      6 ! Licensed under the Apache License, Version 2.0 (the "License");
      7 ! you may not use this file except in compliance with the License.
      8 ! You may obtain a copy of the License at
      9 !
     10 !    http://www.apache.org/licenses/LICENSE-2.0
     11 !
     12 ! Unless required by applicable law or agreed to in writing, software
     13 ! distributed under the License is distributed on an "AS IS" BASIS,
     14 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 ! See the License for the specific language governing permissions and
     16 ! limitations under the License.
     17 !<
     18 
     19 !> Computes the dot product of two vectors with extended accumulation and result.
     20 !
     21 ! ## Notes
     22 !
     23 ! * Modified version of reference BLAS level1 routine (version 3.7.0). Updated to "free form" Fortran 95.
     24 !
     25 ! ## Authors
     26 !
     27 ! * Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
     28 ! * Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
     29 !
     30 ! ## History
     31 !
     32 ! | YYMMDD | DESCRIPTION |
     33 ! | ------ | ----------- |
     34 ! | 791001 | DATE WRITTEN |
     35 ! | 890831 | Modified array declarations. (WRB) |
     36 ! | 890831 | REVISION DATE from Version 3.2 |
     37 ! | 891214 | Prologue converted to Version 4.0 format. (BAB) |
     38 ! | 920310 | Corrected definition of LX in DESCRIPTION. (WRB) |
     39 ! | 920501 | Reformatted the REFERENCES section. (WRB) |
     40 ! | 070118 | Reformat to LAPACK style (JL) |
     41 !
     42 ! ## References
     43 !
     44 ! * Lawson, Charles L., Richard J. Hanson, Fred T. Krogh, and David Ronald Kincaid. 1979. "Algorithm 539: Basic Linear Algebra Subprograms for Fortran Usage \[F1\]." _ACM Transactions on Mathematical Software_ 5 (3). New York, NY, USA: Association for Computing Machinery: 324–25. doi:[10.1145/355841.355848](https://doi.org/10.1145/355841.355848).
     45 !
     46 ! ## License
     47 !
     48 ! From <http://netlib.org/blas/faq.html>:
     49 !
     50 ! > The reference BLAS is a freely-available software package. It is available from netlib via anonymous ftp and the World Wide Web. Thus, it can be included in commercial software packages (and has been). We only ask that proper credit be given to the authors.
     51 ! >
     52 ! > Like all software, it is copyrighted. It is not trademarked, but we do ask the following:
     53 ! >
     54 ! > * If you modify the source for these routines we ask that you change the name of the routine and comment the changes made to the original.
     55 ! >
     56 ! > * We will gladly answer any questions regarding the software. If a modification is done, however, it is the responsibility of the person who modified the routine to provide support.
     57 !
     58 ! @param {integer} N - number of values over which to compute the dot product
     59 ! @param {Array<real>} sx - first array
     60 ! @param {integer} strideX - `sx` stride length
     61 ! @param {Array<real>} sy - second array
     62 ! @param {integer} strideY - `sy` stride length
     63 ! @returns {double} the dot product of `sx` and `sy`
     64 !<
     65 double precision function dsdot( N, sx, strideX, sy, strideY )
     66   implicit none
     67   ! ..
     68   ! Scalar arguments:
     69   integer :: strideX, strideY, N
     70   ! ..
     71   ! Array arguments:
     72   real, intent(in) :: sx(*), sy(*)
     73   ! ..
     74   ! Local scalars:
     75   double precision :: dtemp
     76   integer :: mp1, ix, iy, i, m
     77   ! ..
     78   ! Intrinsic functions:
     79   intrinsic mod, dble
     80   ! ..
     81   dtemp = 0.0d0
     82   dsdot = 0.0d0
     83   ! ..
     84   if ( N <= 0 ) then
     85     return
     86   end if
     87   ! ..
     88   ! If both strides are equal to `1`, use unrolled loops...
     89   if ( strideX == 1 .AND. strideY == 1 ) then
     90     m = mod( N, 5 )
     91    ! ..
     92     ! If we have a remainder, do a clean-up loop...
     93     if ( m /= 0 ) then
     94       do i = 1, m
     95         dtemp = dtemp + ( dble( sx(i) ) * dble( sy(i) ) )
     96       end do
     97     end if
     98     if ( N < M ) then
     99       dsdot = dtemp
    100       return
    101     end if
    102     mp1 = m + 1
    103     do i = mp1, N, 5
    104       dtemp = dtemp + &
    105         ( dble( sx(i) ) * dble( sy(i) ) ) + &
    106         ( dble( sx(i+1) ) * dble( sy(i+1) ) ) + &
    107         ( dble( sx(i+2) ) * dble( sy(i+2) ) ) + &
    108         ( dble( sx(i+3) ) * dble( sy(i+3) ) ) + &
    109         ( dble( sx(i+4) ) * dble( sy(i+4) ) )
    110     end do
    111   else
    112     if ( strideX < 0 ) then
    113       ix = ((1-N)*strideX) + 1
    114     else
    115       ix = 1
    116     endif
    117     if ( strideY < 0 ) then
    118       iy = ((1-N)*strideY) + 1
    119     else
    120       iy = 1
    121     endif
    122     do i = 1, N
    123       dtemp = dtemp + ( dble( sx(ix) ) * dble( sy(iy) ) )
    124       ix = ix + strideX
    125       iy = iy + strideY
    126     end do
    127   endif
    128   dsdot = dtemp
    129   return
    130 end function dsdot