summaryrefslogtreecommitdiffstats
path: root/src/vcl/vcl_private.c
AgeCommit message (Expand)AuthorFilesLines
2020-08-12vcl: support multi-threads with session migrationhanlin1-0/+2
2020-04-06vcl session: enforce full dgram reads/writesFlorin Coras1-0/+9
2020-04-01session udp: support connect on listenersFlorin Coras1-1/+19
2020-03-05session: API cleanupJakub Grajciar1-1/+1
2019-12-23vcl: fix multi-thread app segment attachingFlorin Coras1-32/+27
2019-12-21session: move add/del segment msg to mqFlorin Coras1-0/+41
2019-12-12vcl: fix disconnect from binary apiFlorin Coras1-0/+8
2019-12-10api: multiple connections per processDave Barach1-1/+1
2019-08-27session: move ctrl messages from bapi to mqFlorin Coras1-0/+6
2019-08-02vcl: initialize worker thread index and heap map slotFlorin Coras1-0/+3
2019-04-16svm_fifo rework to avoid contention on cursizeSirshak Das1-5/+5
2019-03-15vcl: cleanup debug messagesFlorin Coras1-7/+7
2019-03-06session: use vpp to switch io events for ct sessionsFlorin Coras1-73/+6
2019-02-04session: cleanup part 1Florin Coras1-2/+2
2019-01-29vls: multi-process and multi-threaded apps improvementsFlorin Coras1-0/+64
2019-01-18vcl: move forking logic to vlsFlorin Coras1-121/+15
2019-01-17vcl/session: replicate events for shared sessionsFlorin Coras1-0/+11
2019-01-05vcl/session: add api for changing session app workerFlorin Coras1-7/+15
2018-12-04vcl: cleanup children that use _exit()Florin Coras1-11/+14
2018-12-03vcl: handle worker process exitFlorin Coras1-5/+8
2018-11-30vcl: wait for segments with segment handleFlorin Coras1-0/+30
2018-11-29vcl: basic support for apps that forkFlorin Coras1-13/+151
2018-09-14vcl: keep track of unexpected eventsFlorin Coras1-0/+2
2018-09-08vcl: set worker pthread stop keyFlorin Coras1-1/+5
2018-09-08vcl: register workers in orderFlorin Coras1-4/+13
2018-08-30vcl: add support for multi-worker appsFlorin Coras1-35/+182
2018-08-10vcl: support for eventfd mq signalingFlorin Coras1-0/+151
n536' href='#n536'>536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
  Copyright (c) 2005 Eliot Dresselhaus

  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  without limitation the rights to use, copy, modify, merge, publish,
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#if defined(__APPLE__)
# define cdecl(s) _##s
#else
# define cdecl(s) s
#endif

#if defined(__x86_64__)
	.global cdecl(clib_setjmp)
	.align 4
#ifndef __APPLE__
	.type cdecl(clib_setjmp), @function
#endif

cdecl(clib_setjmp):
	movq %rbx, 8*0(%rdi)
	movq %rbp, 8*1(%rdi)
	movq %r12, 8*2(%rdi)
	movq %r13, 8*3(%rdi)
	movq %r14, 8*4(%rdi)
	movq %r15, 8*5(%rdi)

	/* Save SP after return. */
	leaq 8(%rsp), %rdx
	movq %rdx, 8*6(%rdi)
	
	/* Save PC we are returning to from stack frame. */
	movq 0(%rsp), %rax
	movq %rax, 8*7(%rdi)
	
	/* Give back user's return value. */
	movq %rsi, %rax
	ret
	
	.global cdecl(clib_longjmp)
	.align 4
#ifndef __APPLE__
	.type cdecl(clib_longjmp), @function
#endif
cdecl(clib_longjmp):	
        /* Restore regs. */
	movq 8*0(%rdi), %rbx
	movq 8*1(%rdi), %rbp
	movq 8*2(%rdi), %r12
	movq 8*3(%rdi), %r13
	movq 8*4(%rdi), %r14
	movq 8*5(%rdi), %r15
	movq 8*6(%rdi), %rsp
	movq 8*7(%rdi), %rdx
	
	/* Give back user's return value. */
	movq %rsi, %rax
	
	/* Away we go. */
	jmpq *%rdx	
	
	.global cdecl(clib_calljmp)
	.align 4
#ifndef __APPLE__
	.type cdecl(clib_calljmp), @function
#endif
cdecl(clib_calljmp):
	/* Make sure stack is 16-byte aligned. */
	movq %rdx, %rax
	andq $0xf, %rax
	subq %rax, %rdx
	
	/* Get return address. */
	pop %rax
	
	/* Switch to new stack. */
	xchgq %rsp, %rdx
	
	/* Save return address on new stack. */
	push %rax
	
	/* Save old stack pointer on new stack. */
	push %rdx
	
	/* Get function. */
	movq %rdi, %rdx
	
	/* Move argument into place. */
	movq %rsi, %rdi
	
	/* Away we go. */
	callq *%rdx
	
	/* Switch back to old stack. */
	movq 8(%rsp), %rdx
	movq 0(%rsp), %rcx
	xchgq %rcx, %rsp
	
	/* Return to caller. */
	jmpq *%rdx

#elif defined(i386)
	.global cdecl(clib_setjmp)
	.align 4
	.type cdecl(clib_setjmp), @function
cdecl(clib_setjmp):
	movl 4(%esp), %ecx
	
	movl %ebp, 4*0(%ecx)
	movl %ebx, 4*1(%ecx)
	movl %edi, 4*2(%ecx)
	movl %esi, 4*3(%ecx)

	/* Save SP after return. */
	leal 4(%esp), %edx
	movl %edx, 4*4(%ecx)
	
	/* Save PC we are returning to from stack frame. */
	movl 0(%esp), %eax
	movl %eax, 4*5(%ecx)
	
	/* Give back user's return value. */
	movl 8(%esp), %eax
	ret
	
	.global cdecl(clib_longjmp)
	.align 4
	.type cdecl(clib_longjmp), @function
cdecl(clib_longjmp):	
	movl 4(%esp), %ecx
	
	/* Give back user's return value. */
	movl 8(%esp), %eax
	
        /* Restore regs. */
	movl 4*0(%ecx), %ebp
	movl 4*1(%ecx), %ebx
	movl 4*2(%ecx), %edi
	movl 4*3(%ecx), %esi
	movl 4*4(%ecx), %esp
	movl 4*5(%ecx), %edx
	
	/* Away we go. */
	jmp *%edx	
	
	.global cdecl(clib_calljmp)
	.align 4
	.type cdecl(clib_calljmp), @function
cdecl(clib_calljmp):	
	/* Get new stack pointer. */
	movl 12(%esp), %edx
	
	/* Switch stacks. */
	xchgl %esp, %edx
	
	/* Save old stack pointer on new stack. */
	sub $8, %esp
	movl %edx, 4(%esp)
	
	/* Put function argument in stack frame. */
	movl 8(%edx), %eax
	movl %eax, 0(%esp)
	
	/* Get function. */
	movl 4(%edx), %eax
	
	/* Away we go. */
	call *%eax
	
	/* Switch back to old stack. */
	movl 4(%esp), %edx
	xchgl %edx, %esp
	
	/* Return to caller. */
	ret
	
#elif defined(__SPU__)
	
#elif defined(__powerpc64__)
	
	.text

#define _prologue(n)				\
    .align 2 ;					\
    .globl n, .##n ;				\
    .section ".opd", "aw" ;			\
    .align 3 ;					\
n:  .quad .##n, .TOC.@tocbase, 0 ;		\
    .previous ;					\
    .size n, 24 ;				\
    .type .##n, @function ;			\
.##n:

#define _foreach_14_31							\
_ (14, 0)  _ (15, 1)  _ (16, 2)  _ (17, 3)  _ (18, 4)  _ (19, 5)	\
_ (20, 6)  _ (21, 7)  _ (22, 8)  _ (23, 9)  _ (24, 10) _ (25, 11)	\
_ (26, 12) _ (27, 13) _ (28, 14) _ (29, 15) _ (30, 16) _ (31, 17)

#define _foreach_20_31						\
_ (20, 0) _ (21, 1) _ (22, 2) _ (23, 3) _ (24, 4)  _ (25, 5)	\
_ (26, 6) _ (27, 7) _ (28, 8) _ (29, 9) _ (30, 10) _ (31, 11)
	
#ifdef __ALTIVEC__
#define CLIB_POWERPC_ALTIVEC_N_REGS 12
#else
#define CLIB_POWERPC_ALTIVEC_N_REGS 0
#endif

_prologue (cdecl(clib_setjmp))
	mflr 0
	std 0, 8*0(3)
	std 1, 8*1(3)
	std 2, 8*2(3)
	mfcr 0
	std 0, 8*3(3)
	mfspr 0, 256
	stw 0, 8*4(3)
	
	/* gprs 14 - 31 */
#define _(a,b) std a, 8*((b) + 4 + 18*0)(3) ; 
	_foreach_14_31
#undef _
	
	/* fprs 14 - 31 */
#define _(a,b) stfd a, 8*((b) + 4 + 18*1)(3) ; 
	_foreach_14_31
#undef _

#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
	/* vrs 20 - 31 */
	li 5, 8*(4 + 18*2)
#define _(a,b) stvx a, 5, 3 ; addi 5, 5, 16 ;
	_foreach_20_31
#undef _
#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
	
	/* Return value. */
	mr 3, 4
	
	blr
	
_prologue (cdecl(clib_longjmp))
	ld 0, 8*0(3)
	mtlr 0
	ld 1, 8*1(3)
	ld 2, 8*2(3)
	ld 0, 8*3(3)
	mtcrf 0xff, 0
	lwz 0, 8*3(3)
	mtspr 256, 0
	
	/* gprs 14 - 31 */
#define _(a,b) ld a, 8*((b) + 4 + 18*0)(3) ; 
	_foreach_14_31
#undef _
	
	/* fprs 14 - 31 */
#define _(a,b) lfd a, 8*((b) + 4 + 18*1)(3) ; 
	_foreach_14_31
#undef _
	
#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
	/* vrs 20 - 31 */
	li 5, 8*(4 + 18*2)
#define _(a,b) lvx a, 5, 3 ; addi 5, 5, 16 ;
	_foreach_20_31
#undef _
#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
	
	/* Return value. */
	mr 3, 4
	
	blr

	.globl cdecl(clib_calljmp)
	.section	".opd","aw"
	.align 3
cdecl(clib_calljmp):
	.quad	.L.cdecl(clib_calljmp),.TOC.@tocbase,0
	.previous
	.type	cdecl(clib_calljmp), @function
.L.cdecl(clib_calljmp):
	mflr 0
	mr 9,3
	std 0,16(1)
	stdu 1,-112(1)
#APP
	std 1,-8(5)
	addi 5,5,-256
	mr 1,5
#NO_APP
	ld 10,0(9)
	std 2,40(1)
	mr 3,4
	mtctr 10
	ld 11,16(9)
	ld 2,8(9)
	bctrl
	ld 2,40(1)
#APP
	addi 1,1,256
	ld 1,-8(1)
#NO_APP
	addi 1,1,112
	ld 0,16(1)
	mtlr 0
	blr
	.long 0
	.byte 0,0,0,1,128,0,0,0
	.size	cdecl(clib_calljmp),.-.L.cdecl(clib_calljmp)
	
#elif defined(__powerpc__)
	
#define _foreach_14_31							\
_ (14, 0)  _ (15, 1)  _ (16, 2)  _ (17, 3)  _ (18, 4)  _ (19, 5)	\
_ (20, 6)  _ (21, 7)  _ (22, 8)  _ (23, 9)  _ (24, 10) _ (25, 11)	\
_ (26, 12) _ (27, 13) _ (28, 14) _ (29, 15) _ (30, 16) _ (31, 17)

#define _foreach_20_31						\
_ (20, 0) _ (21, 1) _ (22, 2) _ (23, 3) _ (24, 4)  _ (25, 5)	\
_ (26, 6) _ (27, 7) _ (28, 8) _ (29, 9) _ (30, 10) _ (31, 11)
	
#ifdef __ALTIVEC__
#define CLIB_POWERPC_ALTIVEC_N_REGS 12
#else
#define CLIB_POWERPC_ALTIVEC_N_REGS 0
#endif

	.global cdecl(clib_setjmp)
	.align 4
	.type cdecl(clib_setjmp), @function
cdecl(clib_setjmp):
	mflr 0
	stw 0, 4*0(3)
	stw 1, 4*1(3)
	mfcr 0
	stw 0, 4*2(3)
#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
	mfspr 0, 256
#endif
	stw 0, 4*3(3)
	
#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
	li 5, 4*4
#define _(a,b) stvx a, 3, 5 ; addi 5, 5, 16 ;
	_foreach_20_31
#undef _
#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
	
	/* gp 14 - 31 */
#define _(a,b) stw a,  4*(1*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 0*18)(3) ; 
	_foreach_14_31
#undef _
	
	/* fp 14 - 31 */
#define _(a,b) stfd a, 4*(2*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 1*18)(3) ;
	_foreach_14_31
#undef _

	/* Return value. */
	mr 3, 4
	
	blr
	
	.global cdecl(clib_longjmp)
	.align 4
	.type cdecl(clib_longjmp), @function
cdecl(clib_longjmp):	
	
	lwz 0, 4*0(3)
	mtlr 0
	lwz 1, 4*1(3)
	lwz 0, 4*2(3)
	mtcr 0
	lwz 0, 4*3(3)
#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
	mtspr 256, 0
#endif
	
#if CLIB_POWERPC_ALTIVEC_N_REGS > 0
	li 5, 4*4
#define _(a,b) lvx a, 3, 5 ; addi 5, 5, 16 ;
	_foreach_20_31
#undef _
#endif /* CLIB_POWERPC_ALTIVEC_N_REGS > 0 */
	
	/* gp 14 - 31 */
#define _(a,b) lwz a, 4*(1*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 0*18)(3) ;
	_foreach_14_31
#undef _
	
	/* fp 14 - 31 */
#define _(a,b) lfd a, 4*(2*(b) + 4 + 4*CLIB_POWERPC_ALTIVEC_N_REGS + 1*18)(3) ;
	_foreach_14_31
#undef _

	/* Return value. */
	mr 3, 4
	
	blr

	.global cdecl(clib_calljmp)
	.align 4
	.type cdecl(clib_calljmp), @function
cdecl(clib_calljmp):	
	/* Make sure stack is 16 byte aligned. */
	andi. 0, 5, 0xf
	sub  5, 5, 0
	addi 5, 5, -16
	
	/* Save old stack/link pointer on new stack. */
	stw 1, 0(5)
	mflr 0
	stw 0, 4(5)
	
	/* account for (sp, lr) tuple, and keep aligned */
	addi 5, 5, -16
	
	/* Switch stacks. */
	mr 1, 5
	
	/* Move argument into place. */
	mtctr 3
	mr 3, 4
	
	/* Away we go. */
	bctrl
	
	/* back to our synthetic frame */
	addi 1,1,16
	
	/* Switch back to old stack. */
	lwz 0, 4(1)
	mtlr 0
	lwz 0, 0(1)
	mr 1, 0
	
	/* Return to caller. */
	blr
	
#elif defined(__arm__)
	
	.global cdecl(clib_setjmp)
	.align 4
	.type cdecl(clib_setjmp), %function
cdecl(clib_setjmp):
	mov ip, r0		/* jmp buffer */

	/* Save integer registers */
	stmia ip!, {v1-v6, sl, fp, sp, lr}
	
#ifdef __IWMMXT__
	/* Save the call-preserved iWMMXt registers.  */
	wstrd wr10, [ip], #8
	wstrd wr11, [ip], #8
	wstrd wr12, [ip], #8
	wstrd wr13, [ip], #8
	wstrd wr14, [ip], #8
	wstrd wr15, [ip], #8
#endif

	/* Give back user's return value. */
	mov r0, r1
	bx lr
	
	.global cdecl(clib_longjmp)
	.align 4
	.type cdecl(clib_longjmp), %function
cdecl(clib_longjmp):	
	mov ip, r0		/* jmp buffer */

	/* Restore integer registers. */
	ldmia     ip!,  {v1-v6, sl, fp, sp, lr}
	
#ifdef __IWMMXT__
	/* Save the call-preserved iWMMXt registers.  */
	wldrd wr10, [ip], #8
	wldrd wr11, [ip], #8
	wldrd wr12, [ip], #8
	wldrd wr13, [ip], #8
	wldrd wr14, [ip], #8
	wldrd wr15, [ip], #8
#endif
	
	/* Give back user's return value. */
	mov r0, r1
	bx lr

	.global cdecl(clib_calljmp)
	.align 4
	.type cdecl(clib_calljmp), %function
cdecl(clib_calljmp):	
	/* Make sure stack is 8 byte aligned. */
	bic r2, r2, #7
	
	/* Allocate space for stack/link pointer on new stack. */
	sub r2, r2, #8	
	
	/* Save old stack/link pointer on new stack. */
	str sp, [r2, #0]
	str lr, [r2, #4]
	
	/* Switch stacks. */
	mov sp, r2
	
	/* Save function to call. */
	mov ip, r0
	
	/* Move argument into place. */
	mov r0, r1
	
	/* Away we go. */
	bx ip
	
	/* Switch back to old stack. */
	ldr lr, [sp, #4]
	ldr ip, [sp, #0]
	mov sp, ip
	
	/* Return to caller. */
	bx lr
	
#elif defined(__xtensa__)
	
	/* FIXME implement if needed. */
	.global cdecl(clib_setjmp)
	.align 4
	.type cdecl(clib_setjmp), %function
cdecl(clib_setjmp):
1:	j 1b

	.global cdecl(clib_longjmp)
	.align 4
	.type cdecl(clib_longjmp), @function
cdecl(clib_longjmp):	
1:	j 1b
	
	.global cdecl(clib_calljmp)
	.align 4
	.type cdecl(clib_calljmp), %function
cdecl(clib_calljmp):	
1:	j 1b
	
#elif defined(__TMS320C6X__)
	
	/* FIXME implement if needed. */
	.global cdecl(clib_setjmp)
	.align 4
	.type cdecl(clib_setjmp), %function
cdecl(clib_setjmp):
1:	B	.S1     1b

	.global cdecl(clib_longjmp)
	.align 4
	.type cdecl(clib_longjmp), @function
cdecl(clib_longjmp):	
1:	B	.S1     1b
	
	.global cdecl(clib_calljmp)
	.align 4
	.type cdecl(clib_calljmp), %function
cdecl(clib_calljmp):	
1:	B	.S1     1b
	
#elif defined(_mips) && __mips == 64

	.global cdecl(clib_setjmp)
	.align 8
	.type cdecl(clib_setjmp), %function
cdecl(clib_setjmp):
	sd $ra, 0($a0)
	sd $sp, 8($a0)
	sd $gp, 16($a0)
	sd $16, 24($a0)
	sd $17, 32($a0)
	sd $18, 40($a0)
	sd $19, 48($a0)
	sd $20, 56($a0)
	sd $21, 64($a0)
	sd $22, 72($a0)
	sd $23, 80($a0)
	sd $30, 88($a0)
	move $v0, $a1
	jr $ra
	nop

	.global cdecl(clib_longjmp)
	.align 8
	.type cdecl(clib_longjmp), @function
cdecl(clib_longjmp):
	move $v0, $a1
	bne $v0, $0, 1f
	nop
	daddu $v0, $v0, 1
1:
	ld $ra, 0($a0)
	ld $sp, 8($a0)
	ld $gp, 16($a0)
	ld $16, 24($a0)
	ld $17, 32($a0)
	ld $18, 40($a0)
	ld $19, 48($a0)
	ld $20, 56($a0)
	ld $21, 64($a0)
	ld $22, 72($a0)
	ld $23, 80($a0)
	ld $30, 88($a0)
	jr $ra
	nop

	.global cdecl(clib_calljmp)
	.align 8
	.type cdecl(clib_calljmp), %function
cdecl(clib_calljmp):
	/* Force 16 byte alignment of the new stack */
	li $t1, -16
	and $t0, $a2, $t1
	/* Save old ra/gp/sp on new stack */
	daddiu $t0, $t0, (-24)
	sd $ra, 0($t0)
	sd $gp, 8($t0)
	sd $sp, 16($t0)
	/* Switch stacks */
	move $sp, $t0
	/* Away we go */
	move $t9, $a0
	move $a0, $a1
	jalr $t9
	nop
	/* Switch back to old ra/gp/sp */
	move $t0, $sp
	ld $ra, 0($t0)
	ld $gp, 8($t0)
	ld $sp, 16($t0)
	/* Return to caller */
	jr $ra
	nop

#elif defined (__aarch64__)
/*
   Copyright (c) 2011, 2012 ARM Ltd
   All rights reserved.
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   1. Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
   2. Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
   3. The name of the company may not be used to endorse or promote
      products derived from this software without specific prior written
      permission.
   THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#define GPR_LAYOUT			\
	REG_PAIR (x19, x20,  0);	\
	REG_PAIR (x21, x22, 16);	\
	REG_PAIR (x23, x24, 32);	\
	REG_PAIR (x25, x26, 48);	\
	REG_PAIR (x27, x28, 64);	\
	REG_PAIR (x29, x30, 80);	\
	REG_ONE (x16,      96)
#define FPR_LAYOUT			\
	REG_PAIR ( d8,  d9, 112);	\
	REG_PAIR (d10, d11, 128);	\
	REG_PAIR (d12, d13, 144);	\
	REG_PAIR (d14, d15, 160);
// int cdecl(clib_setjmp) (jmp_buf)
	.global	cdecl(clib_setjmp)
	.type	cdecl(clib_setjmp), %function
cdecl(clib_setjmp):
	mov	x16, sp
#define REG_PAIR(REG1, REG2, OFFS)	stp REG1, REG2, [x0, OFFS]
#define REG_ONE(REG1, OFFS)		str REG1, [x0, OFFS]
	GPR_LAYOUT
	FPR_LAYOUT
#undef REG_PAIR
#undef REG_ONE
	mov	x0, x1
	ret
	.size	cdecl(clib_setjmp), .-cdecl(clib_setjmp)
// void cdecl(clib_longjmp) (jmp_buf, int) __attribute__ ((noreturn))
	.global	cdecl(clib_longjmp)
	.type	cdecl(clib_longjmp), %function
cdecl(clib_longjmp):
#define REG_PAIR(REG1, REG2, OFFS)	ldp REG1, REG2, [x0, OFFS]
#define REG_ONE(REG1, OFFS)		ldr REG1, [x0, OFFS]
	GPR_LAYOUT
	FPR_LAYOUT
#undef REG_PAIR
#undef REG_ONE
	mov	sp, x16
	mov     x0, x1
	// cmp	w1, #0
	// cinc	w0, w1, eq
	// use br not ret, as ret is guaranteed to mispredict
	br	x30
	.size	cdecl(clib_longjmp), .-cdecl(clib_longjmp)


// void cdecl(clib_calljmp) (x0=function, x1=arg, x2=new_stack)
	.global	cdecl(clib_calljmp)
	.type	cdecl(clib_calljmp), %function
cdecl(clib_calljmp):
	// save fn ptr
	mov     x3, x0
	// set up fn arg
	mov     x0, x1
	// switch stacks
	mov     x4, sp
	
	// space for saved sp, lr on new stack
	sub     x2, x2, #16
	mov     sp, x2
	
	// save old sp and link register on new stack
        str     x4, [sp]
	str     x30,[sp,#8]
        mov     x4, sp

	// go there
        blr     x3
	
	// restore old sp and link register
	mov     x4, sp
        
	ldr     x3, [x4]
	ldr     x30,[x4, #8]
        mov     sp, x3
	ret
	.size	cdecl(clib_calljmp), .-cdecl(clib_calljmp)
#else
#error "unknown machine"
#endif	

#ifndef __APPLE__
.section .note.GNU-stack,"",%progbits
#endif