nlpodyssey/spago

View on GitHub
mat/internal/f64/asm64/addconst_amd64.s

Summary

Maintainability
Test Coverage
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build !noasm,!gccgo,!safe

#include "textflag.h"

// func Addconst(alpha float64, x []float64)
TEXT ·AddConst(SB), NOSPLIT, $0
    MOVQ   x_base+8(FP), SI // SI = &x
    MOVQ   x_len+16(FP), CX // CX = len(x)
    CMPQ   CX, $0           // if len(x) == 0 { return }
    JE     ac_end
    MOVSD  alpha+0(FP), X4  // X4 = { a, a }
    SHUFPD $0, X4, X4
    MOVUPS X4, X5           // X5 = X4
    XORQ   AX, AX           // i = 0
    MOVQ   CX, BX
    ANDQ   $7, BX           // BX = len(x) % 8
    SHRQ   $3, CX           // CX = floor( len(x) / 8 )
    JZ     ac_tail_start    // if CX == 0 { goto ac_tail_start }

ac_loop: // Loop unrolled 8x   do {
    MOVUPS (SI)(AX*8), X0   // X_i = s[i:i+1]
    MOVUPS 16(SI)(AX*8), X1
    MOVUPS 32(SI)(AX*8), X2
    MOVUPS 48(SI)(AX*8), X3
    ADDPD  X4, X0           // X_i += a
    ADDPD  X5, X1
    ADDPD  X4, X2
    ADDPD  X5, X3
    MOVUPS X0, (SI)(AX*8)   // s[i:i+1] = X_i
    MOVUPS X1, 16(SI)(AX*8)
    MOVUPS X2, 32(SI)(AX*8)
    MOVUPS X3, 48(SI)(AX*8)
    ADDQ   $8, AX           // i += 8
    LOOP   ac_loop          // } while --CX > 0
    CMPQ   BX, $0           // if BX == 0 { return }
    JE     ac_end

ac_tail_start: // Reset loop counters
    MOVQ BX, CX // Loop counter: CX = BX

ac_tail: // do {
    MOVSD (SI)(AX*8), X0 // X0 = s[i]
    ADDSD X4, X0         // X0 += a
    MOVSD X0, (SI)(AX*8) // s[i] = X0
    INCQ  AX             // ++i
    LOOP  ac_tail        // } while --CX > 0

ac_end:
    RET