/* $NetBSD: startprog64.S,v 1.3.50.1 2023/05/13 13:26:57 martin Exp $ */ /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */ /* starts program in protected mode / flat space with given stackframe needs global variables flatcodeseg and flatdataseg (gdt offsets) derived from: NetBSD:sys/arch/i386/boot/asm.S */ /* * Ported to boot 386BSD by Julian Elischer (julian@tfs.com) Sept 1992 * * Mach Operating System * Copyright (c) 1992, 1991 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* Copyright 1988, 1989, 1990, 1991, 1992 by Intel Corporation, Santa Clara, California. All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appears in all copies and that both the copyright notice and this permission notice appear in supporting documentation, and that the name of Intel not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #define CODE_SEGMENT 0x08 #define DATA_SEGMENT 0x10 .align 16 .globl _C_LABEL(startprog64) _C_LABEL(startprog64): .quad 0 .globl _C_LABEL(startprog64_size) _C_LABEL(startprog64_size): .long startprog64_end - _C_LABEL(startprog64_start) .text .p2align 4,,15 /* * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size) */ ENTRY(startprog64_start) start: /* * This function is to call the loaded kernel's start() with * 32bit segment mode from x64 mode. * %rdi: kernel start address * %rsi: loaded kernel address * %rdx: stack address * %rcx: loaded kernel size * %r8 : loaded start address * %r9 : kernel entry address */ cld /* LynxOS depends on it */ cli /* skip copy if same source and destination */ cmpq %rdi,%rsi jz .Lcopy_done /* Copy kernel */ mov %rcx, %r12 /* original kernel size */ movq %rdi, %r11 /* for misaligned check */ #if !defined(NO_OVERLAP) movq %rdi, %r13 subq %rsi, %r13 #endif shrq $3, %rcx /* count for copy by words */ jz 8f /* j if less than 8 bytes */ lea -8(%rdi, %r12), %r14 /* target address of last 8 */ mov -8(%rsi, %r12), %r15 /* get last word */ #if !defined(NO_OVERLAP) cmpq %r12, %r13 /* overlapping? */ jb 10f #endif /* * Non-overlaping, copy forwards. * Newer Intel cpus (Nehalem) will do 16byte read/write transfers * if %ecx is more than 76. * AMD might do something similar some day. */ and $7, %r11 /* destination misaligned ? */ jnz 2f rep movsq mov %r15, (%r14) /* write last word */ jmp .Lcopy_done /* * Destination misaligned * AMD say it is better to align the destination (not the source). * This will also re-align copies if the source and dest are both * misaligned by the same amount) * (I think Nehalem will use its accelerated copy if the source * and destination have the same alignment.) */ 2: lea -9(%r11, %r12), %rcx /* post re-alignment count */ neg %r11 /* now -1 .. -7 */ mov (%rsi), %r12 /* get first word */ mov %rdi, %r13 /* target for first word */ lea 8(%rsi, %r11), %rsi lea 8(%rdi, %r11), %rdi shr $3, %rcx rep movsq mov %r12, (%r13) /* write first word */ mov %r15, (%r14) /* write last word */ jmp .Lcopy_done #if !defined(NO_OVERLAP) /* Must copy backwards. * Reverse copy is probably easy to code faster than 'rep movds' * since that requires (IIRC) an extra clock every 3 iterations (AMD). * However I don't suppose anything cares that much! * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4. * The copy is aligned with the buffer start (more likely to * be a multiple of 8 than the end). */ 10: lea -8(%rsi, %rcx, 8), %rsi lea -8(%rdi, %rcx, 8), %rdi std rep movsq cld mov %r15, (%r14) /* write last bytes */ jmp .Lcopy_done #endif /* Less than 8 bytes to copy, copy by bytes */ /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks). * For longer transfers it is 50+ ! */ 8: mov %r12, %rcx #if !defined(NO_OVERLAP) cmpq %r12, %r13 /* overlapping? */ jb 81f #endif /* nope, copy forwards. */ rep movsb jmp .Lcopy_done #if !defined(NO_OVERLAP) /* Must copy backwards */ 81: lea -1(%rsi, %rcx), %rsi lea -1(%rdi, %rcx), %rdi std rep movsb cld #endif /* End of copy kernel */ .Lcopy_done: mov %r8, %rdi /* %rdi: loaded start address */ mov %r9, %rsi /* %rsi: kernel entry address */ /* Prepare jump address */ lea (start32a - start)(%rdi), %rax movl %eax, (start32r - start)(%rdi) /* Setup GDT */ lea (gdt - start)(%rdi), %rax mov %rax, (gdtrr - start)(%rdi) lgdt (gdtr - start)(%rdi) /* Jump to set %cs */ ljmp *(start32r - start)(%rdi) .align 4 .code32 start32a: movl $DATA_SEGMENT, %eax movw %ax, %ds movw %ax, %es movw %ax, %fs movw %ax, %gs movw %ax, %ss movl %edx, %esp /* Disable Paging in CR0 */ movl %cr0, %eax andl $(~CR0_PG), %eax movl %eax, %cr0 /* Disable PAE in CR4 */ movl %cr4, %eax andl $(~CR4_PAE), %eax movl %eax, %cr4 jmp start32b .align 4 start32b: xor %eax, %eax call *%esi .align 16 start32r: .long 0 .long CODE_SEGMENT .align 16 gdt: .long 0, 0 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00 gdtr: .word gdtr - gdt gdtrr: .quad start32end: /* Space for the stack */ .align 16 .space 8192 startprog64_end: