deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s

Summary

Maintainability
Test Coverage
.text



.p2align    4
MULADD_128x512:
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    movq    %r8,0(%rcx)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%r8
    movq    8(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    movq    %r9,8(%rcx)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%r9
    .byte    0xf3,0xc3


.p2align    4
mont_reduce:
    leaq    192(%rsp),%rdi
    movq    32(%rsp),%rsi
    addq    $576,%rsi
    leaq    520(%rsp),%rcx

    movq    96(%rcx),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    movq    (%rcx),%r8
    addq    %rax,%r8
    adcq    $0,%rdx
    movq    %r8,0(%rdi)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    movq    8(%rcx),%r9
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    movq    16(%rcx),%r10
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    movq    24(%rcx),%r11
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    movq    32(%rcx),%r12
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    movq    40(%rcx),%r13
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    movq    48(%rcx),%r14
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    movq    56(%rcx),%r15
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%r8
    movq    104(%rcx),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    movq    %r9,8(%rdi)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%r9
    movq    112(%rcx),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    movq    %r10,16(%rdi)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%r10
    movq    120(%rcx),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    movq    %r11,24(%rdi)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%r11
    xorq    %rax,%rax

    addq    64(%rcx),%r8
    adcq    72(%rcx),%r9
    adcq    80(%rcx),%r10
    adcq    88(%rcx),%r11
    adcq    $0,%rax




    movq    %r8,64(%rdi)
    movq    %r9,72(%rdi)
    movq    %r10,%rbp
    movq    %r11,88(%rdi)

    movq    %rax,384(%rsp)

    movq    0(%rdi),%r8
    movq    8(%rdi),%r9
    movq    16(%rdi),%r10
    movq    24(%rdi),%r11








    addq    $80,%rdi

    addq    $64,%rsi
    leaq    296(%rsp),%rcx

    call    MULADD_128x512


    movq    384(%rsp),%rax


    addq    -16(%rdi),%r8
    adcq    -8(%rdi),%r9
    movq    %r8,64(%rcx)
    movq    %r9,72(%rcx)

    adcq    %rax,%rax
    movq    %rax,384(%rsp)

    leaq    192(%rsp),%rdi
    addq    $64,%rsi





    movq    (%rsi),%r8
    movq    8(%rsi),%rbx

    movq    (%rcx),%rax
    mulq    %r8
    movq    %rax,%rbp
    movq    %rdx,%r9

    movq    8(%rcx),%rax
    mulq    %r8
    addq    %rax,%r9

    movq    (%rcx),%rax
    mulq    %rbx
    addq    %rax,%r9

    movq    %r9,8(%rdi)


    subq    $192,%rsi

    movq    (%rcx),%r8
    movq    8(%rcx),%r9

    call    MULADD_128x512





    movq    0(%rsi),%rax
    movq    8(%rsi),%rbx
    movq    16(%rsi),%rdi
    movq    24(%rsi),%rdx


    movq    384(%rsp),%rbp

    addq    64(%rcx),%r8
    adcq    72(%rcx),%r9


    adcq    %rbp,%rbp



    shlq    $3,%rbp
    movq    32(%rsp),%rcx
    addq    %rcx,%rbp


    xorq    %rsi,%rsi

    addq    0(%rbp),%r10
    adcq    64(%rbp),%r11
    adcq    128(%rbp),%r12
    adcq    192(%rbp),%r13
    adcq    256(%rbp),%r14
    adcq    320(%rbp),%r15
    adcq    384(%rbp),%r8
    adcq    448(%rbp),%r9



    sbbq    $0,%rsi


    andq    %rsi,%rax
    andq    %rsi,%rbx
    andq    %rsi,%rdi
    andq    %rsi,%rdx

    movq    $1,%rbp
    subq    %rax,%r10
    sbbq    %rbx,%r11
    sbbq    %rdi,%r12
    sbbq    %rdx,%r13




    sbbq    $0,%rbp



    addq    $512,%rcx
    movq    32(%rcx),%rax
    movq    40(%rcx),%rbx
    movq    48(%rcx),%rdi
    movq    56(%rcx),%rdx



    andq    %rsi,%rax
    andq    %rsi,%rbx
    andq    %rsi,%rdi
    andq    %rsi,%rdx



    subq    $1,%rbp

    sbbq    %rax,%r14
    sbbq    %rbx,%r15
    sbbq    %rdi,%r8
    sbbq    %rdx,%r9



    movq    144(%rsp),%rsi
    movq    %r10,0(%rsi)
    movq    %r11,8(%rsi)
    movq    %r12,16(%rsi)
    movq    %r13,24(%rsi)
    movq    %r14,32(%rsi)
    movq    %r15,40(%rsi)
    movq    %r8,48(%rsi)
    movq    %r9,56(%rsi)

    .byte    0xf3,0xc3


.p2align    4
mont_mul_a3b:




    movq    0(%rdi),%rbp

    movq    %r10,%rax
    mulq    %rbp
    movq    %rax,520(%rsp)
    movq    %rdx,%r10
    movq    %r11,%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    movq    %rdx,%r11
    movq    %r12,%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    movq    %rdx,%r12
    movq    %r13,%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    movq    %rdx,%r13
    movq    %r14,%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    movq    %rdx,%r14
    movq    %r15,%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    movq    %rdx,%r15
    movq    %r8,%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    movq    %rdx,%r8
    movq    %r9,%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    movq    %rdx,%r9
    movq    8(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    movq    %r10,528(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%r10
    movq    16(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    movq    %r11,536(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%r11
    movq    24(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    movq    %r12,544(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%r12
    movq    32(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    movq    %r13,552(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%r13
    movq    40(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    movq    %r14,560(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%r14
    movq    48(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    movq    %r15,568(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    addq    %rbx,%r8
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%r15
    movq    56(%rdi),%rbp
    movq    0(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r8
    adcq    $0,%rdx
    movq    %r8,576(%rsp)
    movq    %rdx,%rbx

    movq    8(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r9
    adcq    $0,%rdx
    addq    %rbx,%r9
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    16(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r10
    adcq    $0,%rdx
    addq    %rbx,%r10
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    24(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %rbx,%r11
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    32(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %rbx,%r12
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    40(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %rbx,%r13
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    48(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %rbx,%r14
    adcq    $0,%rdx
    movq    %rdx,%rbx

    movq    56(%rsi),%rax
    mulq    %rbp
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %rbx,%r15
    adcq    $0,%rdx
    movq    %rdx,%r8
    movq    %r9,584(%rsp)
    movq    %r10,592(%rsp)
    movq    %r11,600(%rsp)
    movq    %r12,608(%rsp)
    movq    %r13,616(%rsp)
    movq    %r14,624(%rsp)
    movq    %r15,632(%rsp)
    movq    %r8,640(%rsp)





    jmp    mont_reduce




.p2align    4
sqr_reduce:
    movq    16(%rsp),%rcx



    movq    %r10,%rbx

    movq    %r11,%rax
    mulq    %rbx
    movq    %rax,528(%rsp)
    movq    %rdx,%r10
    movq    %r12,%rax
    mulq    %rbx
    addq    %rax,%r10
    adcq    $0,%rdx
    movq    %rdx,%r11
    movq    %r13,%rax
    mulq    %rbx
    addq    %rax,%r11
    adcq    $0,%rdx
    movq    %rdx,%r12
    movq    %r14,%rax
    mulq    %rbx
    addq    %rax,%r12
    adcq    $0,%rdx
    movq    %rdx,%r13
    movq    %r15,%rax
    mulq    %rbx
    addq    %rax,%r13
    adcq    $0,%rdx
    movq    %rdx,%r14
    movq    %r8,%rax
    mulq    %rbx
    addq    %rax,%r14
    adcq    $0,%rdx
    movq    %rdx,%r15
    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%r15
    adcq    $0,%rdx
    movq    %rdx,%rsi

    movq    %r10,536(%rsp)





    movq    8(%rcx),%rbx

    movq    16(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r11
    adcq    $0,%rdx
    movq    %r11,544(%rsp)

    movq    %rdx,%r10
    movq    24(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %r10,%r12
    adcq    $0,%rdx
    movq    %r12,552(%rsp)

    movq    %rdx,%r10
    movq    32(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r13
    adcq    $0,%rdx
    addq    %r10,%r13
    adcq    $0,%rdx

    movq    %rdx,%r10
    movq    40(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %r10,%r14
    adcq    $0,%rdx

    movq    %rdx,%r10
    movq    %r8,%rax
    mulq    %rbx
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %r10,%r15
    adcq    $0,%rdx

    movq    %rdx,%r10
    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%rsi
    adcq    $0,%rdx
    addq    %r10,%rsi
    adcq    $0,%rdx

    movq    %rdx,%r11




    movq    16(%rcx),%rbx

    movq    24(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r13
    adcq    $0,%rdx
    movq    %r13,560(%rsp)

    movq    %rdx,%r10
    movq    32(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r14
    adcq    $0,%rdx
    addq    %r10,%r14
    adcq    $0,%rdx
    movq    %r14,568(%rsp)

    movq    %rdx,%r10
    movq    40(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %r10,%r15
    adcq    $0,%rdx

    movq    %rdx,%r10
    movq    %r8,%rax
    mulq    %rbx
    addq    %rax,%rsi
    adcq    $0,%rdx
    addq    %r10,%rsi
    adcq    $0,%rdx

    movq    %rdx,%r10
    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %r10,%r11
    adcq    $0,%rdx

    movq    %rdx,%r12





    movq    24(%rcx),%rbx

    movq    32(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r15
    adcq    $0,%rdx
    movq    %r15,576(%rsp)

    movq    %rdx,%r10
    movq    40(%rcx),%rax
    mulq    %rbx
    addq    %rax,%rsi
    adcq    $0,%rdx
    addq    %r10,%rsi
    adcq    $0,%rdx
    movq    %rsi,584(%rsp)

    movq    %rdx,%r10
    movq    %r8,%rax
    mulq    %rbx
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %r10,%r11
    adcq    $0,%rdx

    movq    %rdx,%r10
    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %r10,%r12
    adcq    $0,%rdx

    movq    %rdx,%r15




    movq    32(%rcx),%rbx

    movq    40(%rcx),%rax
    mulq    %rbx
    addq    %rax,%r11
    adcq    $0,%rdx
    movq    %r11,592(%rsp)

    movq    %rdx,%r10
    movq    %r8,%rax
    mulq    %rbx
    addq    %rax,%r12
    adcq    $0,%rdx
    addq    %r10,%r12
    adcq    $0,%rdx
    movq    %r12,600(%rsp)

    movq    %rdx,%r10
    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%r15
    adcq    $0,%rdx
    addq    %r10,%r15
    adcq    $0,%rdx

    movq    %rdx,%r11




    movq    40(%rcx),%rbx

    movq    %r8,%rax
    mulq    %rbx
    addq    %rax,%r15
    adcq    $0,%rdx
    movq    %r15,608(%rsp)

    movq    %rdx,%r10
    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%r11
    adcq    $0,%rdx
    addq    %r10,%r11
    adcq    $0,%rdx
    movq    %r11,616(%rsp)

    movq    %rdx,%r12




    movq    %r8,%rbx

    movq    %r9,%rax
    mulq    %rbx
    addq    %rax,%r12
    adcq    $0,%rdx
    movq    %r12,624(%rsp)

    movq    %rdx,632(%rsp)


    movq    528(%rsp),%r10
    movq    536(%rsp),%r11
    movq    544(%rsp),%r12
    movq    552(%rsp),%r13
    movq    560(%rsp),%r14
    movq    568(%rsp),%r15

    movq    24(%rcx),%rax
    mulq    %rax
    movq    %rax,%rdi
    movq    %rdx,%r8

    addq    %r10,%r10
    adcq    %r11,%r11
    adcq    %r12,%r12
    adcq    %r13,%r13
    adcq    %r14,%r14
    adcq    %r15,%r15
    adcq    $0,%r8

    movq    0(%rcx),%rax
    mulq    %rax
    movq    %rax,520(%rsp)
    movq    %rdx,%rbx

    movq    8(%rcx),%rax
    mulq    %rax

    addq    %rbx,%r10
    adcq    %rax,%r11
    adcq    $0,%rdx

    movq    %rdx,%rbx
    movq    %r10,528(%rsp)
    movq    %r11,536(%rsp)

    movq    16(%rcx),%rax
    mulq    %rax

    addq    %rbx,%r12
    adcq    %rax,%r13
    adcq    $0,%rdx

    movq    %rdx,%rbx

    movq    %r12,544(%rsp)
    movq    %r13,552(%rsp)

    xorq    %rbp,%rbp
    addq    %rbx,%r14
    adcq    %rdi,%r15
    adcq    $0,%rbp

    movq    %r14,560(%rsp)
    movq    %r15,568(%rsp)




    movq    576(%rsp),%r10
    movq    584(%rsp),%r11
    movq    592(%rsp),%r12
    movq    600(%rsp),%r13
    movq    608(%rsp),%r14
    movq    616(%rsp),%r15
    movq    624(%rsp),%rdi
    movq    632(%rsp),%rsi

    movq    %r9,%rax
    mulq    %rax
    movq    %rax,%r9
    movq    %rdx,%rbx

    addq    %r10,%r10
    adcq    %r11,%r11
    adcq    %r12,%r12
    adcq    %r13,%r13
    adcq    %r14,%r14
    adcq    %r15,%r15
    adcq    %rdi,%rdi
    adcq    %rsi,%rsi
    adcq    $0,%rbx

    addq    %rbp,%r10

    movq    32(%rcx),%rax
    mulq    %rax

    addq    %r8,%r10
    adcq    %rax,%r11
    adcq    $0,%rdx

    movq    %rdx,%rbp

    movq    %r10,576(%rsp)
    movq    %r11,584(%rsp)

    movq    40(%rcx),%rax
    mulq    %rax

    addq    %rbp,%r12
    adcq    %rax,%r13
    adcq    $0,%rdx

    movq    %rdx,%rbp

    movq    %r12,592(%rsp)
    movq    %r13,600(%rsp)

    movq    48(%rcx),%rax
    mulq    %rax

    addq    %rbp,%r14
    adcq    %rax,%r15
    adcq    $0,%rdx

    movq    %r14,608(%rsp)
    movq    %r15,616(%rsp)

    addq    %rdx,%rdi
    adcq    %r9,%rsi
    adcq    $0,%rbx

    movq    %rdi,624(%rsp)
    movq    %rsi,632(%rsp)
    movq    %rbx,640(%rsp)

    jmp    mont_reduce



.globl    _mod_exp_512

_mod_exp_512:
    pushq    %rbp
    pushq    %rbx
    pushq    %r12
    pushq    %r13
    pushq    %r14
    pushq    %r15


    movq    %rsp,%r8
    subq    $2688,%rsp
    andq    $-64,%rsp


    movq    %r8,0(%rsp)
    movq    %rdi,8(%rsp)
    movq    %rsi,16(%rsp)
    movq    %rcx,24(%rsp)
L$body:



    pxor    %xmm4,%xmm4
    movdqu    0(%rsi),%xmm0
    movdqu    16(%rsi),%xmm1
    movdqu    32(%rsi),%xmm2
    movdqu    48(%rsi),%xmm3
    movdqa    %xmm4,512(%rsp)
    movdqa    %xmm4,528(%rsp)
    movdqa    %xmm4,608(%rsp)
    movdqa    %xmm4,624(%rsp)
    movdqa    %xmm0,544(%rsp)
    movdqa    %xmm1,560(%rsp)
    movdqa    %xmm2,576(%rsp)
    movdqa    %xmm3,592(%rsp)


    movdqu    0(%rdx),%xmm0
    movdqu    16(%rdx),%xmm1
    movdqu    32(%rdx),%xmm2
    movdqu    48(%rdx),%xmm3

    leaq    384(%rsp),%rbx
    movq    %rbx,136(%rsp)
    call    mont_reduce


    leaq    448(%rsp),%rcx
    xorq    %rax,%rax
    movq    %rax,0(%rcx)
    movq    %rax,8(%rcx)
    movq    %rax,24(%rcx)
    movq    %rax,32(%rcx)
    movq    %rax,40(%rcx)
    movq    %rax,48(%rcx)
    movq    %rax,56(%rcx)
    movq    %rax,128(%rsp)
    movq    $1,16(%rcx)

    leaq    640(%rsp),%rbp
    movq    %rcx,%rsi
    movq    %rbp,%rdi
    movq    $8,%rax
loop_0:
    movq    (%rcx),%rbx
    movw    %bx,(%rdi)
    shrq    $16,%rbx
    movw    %bx,64(%rdi)
    shrq    $16,%rbx
    movw    %bx,128(%rdi)
    shrq    $16,%rbx
    movw    %bx,192(%rdi)
    leaq    8(%rcx),%rcx
    leaq    256(%rdi),%rdi
    decq    %rax
    jnz    loop_0
    movq    $31,%rax
    movq    %rax,32(%rsp)
    movq    %rbp,40(%rsp)

    movq    %rsi,136(%rsp)
    movq    0(%rsi),%r10
    movq    8(%rsi),%r11
    movq    16(%rsi),%r12
    movq    24(%rsi),%r13
    movq    32(%rsi),%r14
    movq    40(%rsi),%r15
    movq    48(%rsi),%r8
    movq    56(%rsi),%r9
init_loop:
    leaq    384(%rsp),%rdi
    call    mont_mul_a3b
    leaq    448(%rsp),%rsi
    movq    40(%rsp),%rbp
    addq    $2,%rbp
    movq    %rbp,40(%rsp)
    movq    %rsi,%rcx
    movq    $8,%rax
loop_1:
    movq    (%rcx),%rbx
    movw    %bx,(%rbp)
    shrq    $16,%rbx
    movw    %bx,64(%rbp)
    shrq    $16,%rbx
    movw    %bx,128(%rbp)
    shrq    $16,%rbx
    movw    %bx,192(%rbp)
    leaq    8(%rcx),%rcx
    leaq    256(%rbp),%rbp
    decq    %rax
    jnz    loop_1
    movq    32(%rsp),%rax
    subq    $1,%rax
    movq    %rax,32(%rsp)
    jne    init_loop



    movdqa    %xmm0,64(%rsp)
    movdqa    %xmm1,80(%rsp)
    movdqa    %xmm2,96(%rsp)
    movdqa    %xmm3,112(%rsp)





    movl    126(%rsp),%eax
    movq    %rax,%rdx
    shrq    $11,%rax
    andl    $2047,%edx
    movl    %edx,126(%rsp)
    leaq    640(%rsp,%rax,2),%rsi
    movq    8(%rsp),%rdx
    movq    $4,%rbp
loop_2:
    movzwq    192(%rsi),%rbx
    movzwq    448(%rsi),%rax
    shlq    $16,%rbx
    shlq    $16,%rax
    movw    128(%rsi),%bx
    movw    384(%rsi),%ax
    shlq    $16,%rbx
    shlq    $16,%rax
    movw    64(%rsi),%bx
    movw    320(%rsi),%ax
    shlq    $16,%rbx
    shlq    $16,%rax
    movw    0(%rsi),%bx
    movw    256(%rsi),%ax
    movq    %rbx,0(%rdx)
    movq    %rax,8(%rdx)
    leaq    512(%rsi),%rsi
    leaq    16(%rdx),%rdx
    subq    $1,%rbp
    jnz    loop_2
    movq    $505,48(%rsp)

    movq    8(%rsp),%rcx
    movq    %rcx,136(%rsp)
    movq    0(%rcx),%r10
    movq    8(%rcx),%r11
    movq    16(%rcx),%r12
    movq    24(%rcx),%r13
    movq    32(%rcx),%r14
    movq    40(%rcx),%r15
    movq    48(%rcx),%r8
    movq    56(%rcx),%r9
    jmp    sqr_2

main_loop_a3b:
    call    sqr_reduce
    call    sqr_reduce
    call    sqr_reduce
sqr_2:
    call    sqr_reduce
    call    sqr_reduce



    movq    48(%rsp),%rcx
    movq    %rcx,%rax
    shrq    $4,%rax
    movl    64(%rsp,%rax,2),%edx
    andq    $15,%rcx
    shrq    %cl,%rdx
    andq    $31,%rdx

    leaq    640(%rsp,%rdx,2),%rsi
    leaq    448(%rsp),%rdx
    movq    %rdx,%rdi
    movq    $4,%rbp
loop_3:
    movzwq    192(%rsi),%rbx
    movzwq    448(%rsi),%rax
    shlq    $16,%rbx
    shlq    $16,%rax
    movw    128(%rsi),%bx
    movw    384(%rsi),%ax
    shlq    $16,%rbx
    shlq    $16,%rax
    movw    64(%rsi),%bx
    movw    320(%rsi),%ax
    shlq    $16,%rbx
    shlq    $16,%rax
    movw    0(%rsi),%bx
    movw    256(%rsi),%ax
    movq    %rbx,0(%rdx)
    movq    %rax,8(%rdx)
    leaq    512(%rsi),%rsi
    leaq    16(%rdx),%rdx
    subq    $1,%rbp
    jnz    loop_3
    movq    8(%rsp),%rsi
    call    mont_mul_a3b



    movq    48(%rsp),%rcx
    subq    $5,%rcx
    movq    %rcx,48(%rsp)
    jge    main_loop_a3b



end_main_loop_a3b:


    movq    8(%rsp),%rdx
    pxor    %xmm4,%xmm4
    movdqu    0(%rdx),%xmm0
    movdqu    16(%rdx),%xmm1
    movdqu    32(%rdx),%xmm2
    movdqu    48(%rdx),%xmm3
    movdqa    %xmm4,576(%rsp)
    movdqa    %xmm4,592(%rsp)
    movdqa    %xmm4,608(%rsp)
    movdqa    %xmm4,624(%rsp)
    movdqa    %xmm0,512(%rsp)
    movdqa    %xmm1,528(%rsp)
    movdqa    %xmm2,544(%rsp)
    movdqa    %xmm3,560(%rsp)
    call    mont_reduce



    movq    8(%rsp),%rax
    movq    0(%rax),%r8
    movq    8(%rax),%r9
    movq    16(%rax),%r10
    movq    24(%rax),%r11
    movq    32(%rax),%r12
    movq    40(%rax),%r13
    movq    48(%rax),%r14
    movq    56(%rax),%r15


    movq    24(%rsp),%rbx
    addq    $512,%rbx

    subq    0(%rbx),%r8
    sbbq    8(%rbx),%r9
    sbbq    16(%rbx),%r10
    sbbq    24(%rbx),%r11
    sbbq    32(%rbx),%r12
    sbbq    40(%rbx),%r13
    sbbq    48(%rbx),%r14
    sbbq    56(%rbx),%r15


    movq    0(%rax),%rsi
    movq    8(%rax),%rdi
    movq    16(%rax),%rcx
    movq    24(%rax),%rdx
    cmovncq    %r8,%rsi
    cmovncq    %r9,%rdi
    cmovncq    %r10,%rcx
    cmovncq    %r11,%rdx
    movq    %rsi,0(%rax)
    movq    %rdi,8(%rax)
    movq    %rcx,16(%rax)
    movq    %rdx,24(%rax)

    movq    32(%rax),%rsi
    movq    40(%rax),%rdi
    movq    48(%rax),%rcx
    movq    56(%rax),%rdx
    cmovncq    %r12,%rsi
    cmovncq    %r13,%rdi
    cmovncq    %r14,%rcx
    cmovncq    %r15,%rdx
    movq    %rsi,32(%rax)
    movq    %rdi,40(%rax)
    movq    %rcx,48(%rax)
    movq    %rdx,56(%rax)

    movq    0(%rsp),%rsi
    movq    0(%rsi),%r15
    movq    8(%rsi),%r14
    movq    16(%rsi),%r13
    movq    24(%rsi),%r12
    movq    32(%rsi),%rbx
    movq    40(%rsi),%rbp
    leaq    48(%rsi),%rsp
L$epilogue:
    .byte    0xf3,0xc3