mat/internal/matfuncs/exp_amd64.s
// Code generated by command: go run exp_asm.go -out ../../matfuncs/exp_amd64.s -stubs ../../matfuncs/exp_amd64_stubs.go -pkg matfuncs. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
DATA LCPI0_0<>+0(SB)/4, $0x42b0c0a5
GLOBL LCPI0_0<>(SB), RODATA|NOPTR, $4
DATA LCPI0_1<>+0(SB)/4, $0xc2b0c0a5
GLOBL LCPI0_1<>(SB), RODATA|NOPTR, $4
DATA LCPI0_2<>+0(SB)/4, $0x3fb8aa3b
GLOBL LCPI0_2<>(SB), RODATA|NOPTR, $4
DATA LCPI0_3<>+0(SB)/4, $0x3f000000
GLOBL LCPI0_3<>(SB), RODATA|NOPTR, $4
DATA LCPI0_4<>+0(SB)/4, $0x3f800000
GLOBL LCPI0_4<>(SB), RODATA|NOPTR, $4
DATA LCPI0_5<>+0(SB)/4, $0xbf318000
GLOBL LCPI0_5<>(SB), RODATA|NOPTR, $4
DATA LCPI0_6<>+0(SB)/4, $0x395e8083
GLOBL LCPI0_6<>(SB), RODATA|NOPTR, $4
DATA LCPI0_7<>+0(SB)/4, $0x39506967
GLOBL LCPI0_7<>(SB), RODATA|NOPTR, $4
DATA LCPI0_8<>+0(SB)/4, $0x3ab743ce
GLOBL LCPI0_8<>(SB), RODATA|NOPTR, $4
DATA LCPI0_9<>+0(SB)/4, $0x3c088908
GLOBL LCPI0_9<>(SB), RODATA|NOPTR, $4
DATA LCPI0_10<>+0(SB)/4, $0x3d2aa9c1
GLOBL LCPI0_10<>(SB), RODATA|NOPTR, $4
DATA LCPI0_11<>+0(SB)/4, $0x3e2aaaaa
GLOBL LCPI0_11<>(SB), RODATA|NOPTR, $4
// func ExpAVX32(x []float32, y []float32)
// Requires: AVX, AVX2
TEXT ·ExpAVX32(SB), NOSPLIT, $0-48
MOVQ x_base+0(FP), AX
MOVQ y_base+24(FP), CX
VMOVUPS (AX), Y0
VBROADCASTSS LCPI0_0<>+0(SB), Y1
VMINPS Y1, Y0, Y0
VBROADCASTSS LCPI0_1<>+0(SB), Y1
VMAXPS Y1, Y0, Y0
VBROADCASTSS LCPI0_2<>+0(SB), Y1
VMULPS Y1, Y0, Y1
VBROADCASTSS LCPI0_3<>+0(SB), Y2
VADDPS Y2, Y1, Y1
VROUNDPS $0x01, Y1, Y3
VCMPPS $0x01, Y3, Y1, Y1
VBROADCASTSS LCPI0_4<>+0(SB), Y4
VANDPS Y4, Y1, Y1
VSUBPS Y1, Y3, Y1
VBROADCASTSS LCPI0_5<>+0(SB), Y3
VMULPS Y3, Y1, Y3
VBROADCASTSS LCPI0_6<>+0(SB), Y5
VMULPS Y5, Y1, Y5
VADDPS Y3, Y0, Y0
VADDPS Y5, Y0, Y0
VMULPS Y0, Y0, Y3
VBROADCASTSS LCPI0_7<>+0(SB), Y5
VMULPS Y5, Y0, Y5
VBROADCASTSS LCPI0_8<>+0(SB), Y6
VADDPS Y6, Y5, Y5
VMULPS Y5, Y0, Y5
VBROADCASTSS LCPI0_9<>+0(SB), Y6
VADDPS Y6, Y5, Y5
VMULPS Y5, Y0, Y5
VBROADCASTSS LCPI0_10<>+0(SB), Y6
VADDPS Y6, Y5, Y5
VMULPS Y5, Y0, Y5
VBROADCASTSS LCPI0_11<>+0(SB), Y6
VADDPS Y6, Y5, Y5
VMULPS Y5, Y0, Y5
VADDPS Y2, Y5, Y2
VMULPS Y2, Y3, Y2
VADDPS Y2, Y0, Y0
VADDPS Y4, Y0, Y0
VCVTTPS2DQ Y1, Y1
VPSLLD $0x17, Y1, Y1
VPBROADCASTD LCPI0_4<>+0(SB), Y2
VPADDD Y2, Y1, Y1
VMULPS Y1, Y0, Y0
VMOVUPS Y0, (CX)
RET
DATA SSE_LCPI0_0<>+0(SB)/4, $0x42b0c0a5
DATA SSE_LCPI0_0<>+4(SB)/4, $0x42b0c0a5
DATA SSE_LCPI0_0<>+8(SB)/4, $0x42b0c0a5
DATA SSE_LCPI0_0<>+12(SB)/4, $0x42b0c0a5
GLOBL SSE_LCPI0_0<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_1<>+0(SB)/4, $0xc2b0c0a5
DATA SSE_LCPI0_1<>+4(SB)/4, $0xc2b0c0a5
DATA SSE_LCPI0_1<>+8(SB)/4, $0xc2b0c0a5
DATA SSE_LCPI0_1<>+12(SB)/4, $0xc2b0c0a5
GLOBL SSE_LCPI0_1<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_2<>+0(SB)/4, $0x3fb8aa3b
DATA SSE_LCPI0_2<>+4(SB)/4, $0x3fb8aa3b
DATA SSE_LCPI0_2<>+8(SB)/4, $0x3fb8aa3b
DATA SSE_LCPI0_2<>+12(SB)/4, $0x3fb8aa3b
GLOBL SSE_LCPI0_2<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_3<>+0(SB)/4, $0x3f000000
DATA SSE_LCPI0_3<>+4(SB)/4, $0x3f000000
DATA SSE_LCPI0_3<>+8(SB)/4, $0x3f000000
DATA SSE_LCPI0_3<>+12(SB)/4, $0x3f000000
GLOBL SSE_LCPI0_3<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_4<>+0(SB)/4, $0x3f800000
DATA SSE_LCPI0_4<>+4(SB)/4, $0x3f800000
DATA SSE_LCPI0_4<>+8(SB)/4, $0x3f800000
DATA SSE_LCPI0_4<>+12(SB)/4, $0x3f800000
GLOBL SSE_LCPI0_4<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_5<>+0(SB)/4, $0xbf318000
DATA SSE_LCPI0_5<>+4(SB)/4, $0xbf318000
DATA SSE_LCPI0_5<>+8(SB)/4, $0xbf318000
DATA SSE_LCPI0_5<>+12(SB)/4, $0xbf318000
GLOBL SSE_LCPI0_5<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_6<>+0(SB)/4, $0x395e8083
DATA SSE_LCPI0_6<>+4(SB)/4, $0x395e8083
DATA SSE_LCPI0_6<>+8(SB)/4, $0x395e8083
DATA SSE_LCPI0_6<>+12(SB)/4, $0x395e8083
GLOBL SSE_LCPI0_6<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_7<>+0(SB)/4, $0x39506967
DATA SSE_LCPI0_7<>+4(SB)/4, $0x39506967
DATA SSE_LCPI0_7<>+8(SB)/4, $0x39506967
DATA SSE_LCPI0_7<>+12(SB)/4, $0x39506967
GLOBL SSE_LCPI0_7<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_8<>+0(SB)/4, $0x3ab743ce
DATA SSE_LCPI0_8<>+4(SB)/4, $0x3ab743ce
DATA SSE_LCPI0_8<>+8(SB)/4, $0x3ab743ce
DATA SSE_LCPI0_8<>+12(SB)/4, $0x3ab743ce
GLOBL SSE_LCPI0_8<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_9<>+0(SB)/4, $0x3c088908
DATA SSE_LCPI0_9<>+4(SB)/4, $0x3c088908
DATA SSE_LCPI0_9<>+8(SB)/4, $0x3c088908
DATA SSE_LCPI0_9<>+12(SB)/4, $0x3c088908
GLOBL SSE_LCPI0_9<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_10<>+0(SB)/4, $0x3d2aa9c1
DATA SSE_LCPI0_10<>+4(SB)/4, $0x3d2aa9c1
DATA SSE_LCPI0_10<>+8(SB)/4, $0x3d2aa9c1
DATA SSE_LCPI0_10<>+12(SB)/4, $0x3d2aa9c1
GLOBL SSE_LCPI0_10<>(SB), RODATA|NOPTR, $16
DATA SSE_LCPI0_11<>+0(SB)/4, $0x3e2aaaaa
DATA SSE_LCPI0_11<>+4(SB)/4, $0x3e2aaaaa
DATA SSE_LCPI0_11<>+8(SB)/4, $0x3e2aaaaa
DATA SSE_LCPI0_11<>+12(SB)/4, $0x3e2aaaaa
GLOBL SSE_LCPI0_11<>(SB), RODATA|NOPTR, $16
// func ExpSSE32(x []float32, y []float32)
// Requires: SSE, SSE2
TEXT ·ExpSSE32(SB), NOSPLIT, $0-48
MOVQ x_base+0(FP), AX
MOVQ y_base+24(FP), CX
MOVUPS (AX), X0
MINPS SSE_LCPI0_0<>+0(SB), X0
MAXPS SSE_LCPI0_1<>+0(SB), X0
MOVAPS SSE_LCPI0_2<>+0(SB), X4
MULPS X0, X4
MOVAPS SSE_LCPI0_3<>+0(SB), X2
ADDPS X2, X4
CVTTPS2PL X4, X1
CVTPL2PS X1, X1
CMPPS X1, X4, $0x01
MOVAPS SSE_LCPI0_4<>+0(SB), X3
ANDPS X3, X4
SUBPS X4, X1
MOVAPS SSE_LCPI0_5<>+0(SB), X4
MULPS X1, X4
MOVAPS SSE_LCPI0_6<>+0(SB), X5
MULPS X1, X5
ADDPS X4, X0
ADDPS X5, X0
MOVAPS X0, X4
MOVAPS SSE_LCPI0_7<>+0(SB), X5
MULPS X0, X5
ADDPS SSE_LCPI0_8<>+0(SB), X5
MULPS X0, X4
MULPS X0, X5
ADDPS SSE_LCPI0_9<>+0(SB), X5
MULPS X0, X5
ADDPS SSE_LCPI0_10<>+0(SB), X5
MULPS X0, X5
ADDPS SSE_LCPI0_11<>+0(SB), X5
MULPS X0, X5
ADDPS X2, X5
MULPS X4, X5
ADDPS X5, X0
ADDPS X3, X0
CVTTPS2PL X1, X1
PSLLL $0x17, X1
PADDD SSE_LCPI0_4<>+0(SB), X1
MULPS X1, X0
MOVUPS X0, (CX)
RET