Hello,

I first want to apologize for so many patch versions.

This version of the patch correctly adjusts the offset of the TLS initimage used by __libc_setup_tls when copying.

I would love feedback on my modifications to libc-tls.c as I had to use a new global variable defined in reloc_static_pie.c containing the load address. Originally I attempted to copy how glibc pulls the load address from a memory map when initializing the TLS, however I could not find a similar structure available in __libc_setup_tls. I would love to know if there is a better method which I overlooked.



From 30b435a0b7a7f2d4323eafea2bcebb087cc62462 Mon Sep 17 00:00:00 2001
From: linted <linted@users.noreply.github.com>
Date: Sat, 23 Jul 2022 16:25:41 -0400
Subject: [PATCH] Added support for creation of Static Position-Independent
 Executables (PIE) on i386, x86_64, and arm.

This patch adds the generation of rcrt1.o which is used by gcc when compiling with the --static-pie flag.

rcrt1.o differs from crt1.o and Scrt1.o in that it the executable has a dynamic section but no relocations have been performed prior to _start being called.
crt1.o assumes there to be no dynamic relocations, and Scrt1.o has all relocations performed prior to execution by lsdo.

The new reloc_static_pie function handles parsing the dynamic section, and performing the relocations in a architecture agnostic method.
It also sets _dl_load_base which is used when initalizing TLS to ensure loading from the proper location.
This allows for easier porting of static-pie support to additional architectures as only modifications to crt1.S to find the load address are required.

Signed-off-by: linted <linted@users.noreply.github.com>
---
 Makerules                                   |  5 +++
 extra/Configs/Config.in                     |  5 +++
 libc/misc/internals/Makefile.in             |  1 +
 libc/misc/internals/reloc_static_pie.c      | 47 +++++++++++++++++++++
 libc/sysdeps/linux/arm/crt1.S               | 19 +++++++++
 libc/sysdeps/linux/i386/crt1.S              | 20 +++++++++
 libc/sysdeps/linux/x86_64/crt1.S            | 16 ++++++-
 libpthread/nptl/sysdeps/generic/Makefile.in |  4 ++
 libpthread/nptl/sysdeps/generic/libc-tls.c  |  4 ++
 9 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 libc/misc/internals/reloc_static_pie.c

diff --git a/Makerules b/Makerules
index fd40e6c7b..845d81897 100644
--- a/Makerules
+++ b/Makerules
@@ -405,8 +405,13 @@ else
 CRTS=$(top_builddir)lib/$(CRT).o
 endif
 
+ifeq ($(STATIC_PIE),y)
+CRTS+=$(top_builddir)lib/r$(CRT).o
+endif
+
 ASFLAGS-$(CRT).o := -DL_$(CRT)
 ASFLAGS-S$(CRT).o := $(PIEFLAG) -DL_S$(CRT)
+ASFLAGS-r$(CRT).o := $(PIEFLAG) -DL_r$(CRT)
 $(CRTS): $(top_srcdir)libc/sysdeps/linux/$(TARGET_ARCH)/$(CRT).S
  $(compile.S)
  $(Q)$(STRIPTOOL) -x -R .note -R .comment $@
diff --git a/extra/Configs/Config.in b/extra/Configs/Config.in
index a58ceb265..a49278b30 100644
--- a/extra/Configs/Config.in
+++ b/extra/Configs/Config.in
@@ -301,6 +301,11 @@ config DOPIC
   If you wish to build all of uClibc as PIC objects, then answer Y here.
   If you are unsure, then you should answer N.
 
+config STATIC_PIE
+ bool "Add support for Static Position Independent Executables (PIE)"
+ default n
+ depends on DOPIC && !UCLIBC_FORMAT_FDPIC_ELF && (TARGET_arm || TARGET_i386 || TARGET_x86_64)
+
 config ARCH_HAS_NO_SHARED
  bool
 
diff --git a/libc/misc/internals/Makefile.in b/libc/misc/internals/Makefile.in
index a8e4e36f9..4a6e73d2d 100644
--- a/libc/misc/internals/Makefile.in
+++ b/libc/misc/internals/Makefile.in
@@ -34,6 +34,7 @@ libc-static-$(UCLIBC_FORMAT_FLAT_SEP_DATA) += \
 libc-static-$(UCLIBC_FORMAT_SHARED_FLAT) += \
   $(MISC_INTERNALS_OUT)/shared_flat_initfini.o \
   $(MISC_INTERNALS_OUT)/shared_flat_add_library.o
+libc-static-$(STATIC_PIE) += $(MISC_INTERNALS_OUT)/reloc_static_pie.o
 libc-shared-$(UCLIBC_FORMAT_SHARED_FLAT) += \
   $(MISC_INTERNALS_OUT)/shared_flat_initfini.os \
   $(MISC_INTERNALS_OUT)/shared_flat_add_library.os
diff --git a/libc/misc/internals/reloc_static_pie.c b/libc/misc/internals/reloc_static_pie.c
new file mode 100644
index 000000000..578202d23
--- /dev/null
+++ b/libc/misc/internals/reloc_static_pie.c
@@ -0,0 +1,47 @@
+/* Support for relocating static PIE.
+   Copyright (C) 2017-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <link.h>
+#include <elf.h>
+#include <dl-elf.h>
+
+ElfW(Addr) _dl_load_base = NULL;
+
+void
+reloc_static_pie (ElfW(Addr) load_addr);
+
+void
+reloc_static_pie (ElfW(Addr) load_addr)
+{
+    ElfW(Word) relative_count = 0;
+    ElfW(Addr) rel_addr = 0;
+    ElfW(Dyn) * dyn_addr = NULL;
+    unsigned long dynamic_info[DYNAMIC_SIZE] = {0};
+
+    /* Read our own dynamic section and fill in the info array.  */
+    dyn_addr = ((void *) load_addr + elf_machine_dynamic ());
+
+    /* Use the underlying function to avoid TLS access before initialization */
+    __dl_parse_dynamic_info(dyn_addr, dynamic_info, NULL, load_addr);
+
+    /* Perform relocations */
+    relative_count = dynamic_info[DT_RELCONT_IDX];
+    rel_addr = dynamic_info[DT_RELOC_TABLE_ADDR];
+    elf_machine_relative(load_addr, rel_addr, relative_count);
+    _dl_load_base = load_addr;
+}
diff --git a/libc/sysdeps/linux/arm/crt1.S b/libc/sysdeps/linux/arm/crt1.S
index a1d7f0f23..fade1d25c 100644
--- a/libc/sysdeps/linux/arm/crt1.S
+++ b/libc/sysdeps/linux/arm/crt1.S
@@ -246,6 +246,22 @@ _start:
  mov lr, #0
 
 #ifdef __ARCH_USE_MMU__
+#ifdef L_rcrt1
+ /* We don't need to save a1 since no dynamic linker should have run */
+ ldr a1, .L_GOT          /* Get value at .L_GOT + 0  (offset to GOT)*/
+ adr a2, .L_GOT          /* Get address of .L_GOT */
+ ldr a3, .L_GOT+16       /* Get value of _start(GOT) stored in .L_GOT */
+ adr a4, _start          /* Get address of _start after relocation (changes to pc - ~30 or so) */
+ add a1, a1, a2          /* Calculate where the GOT is */
+ ldr a2, [a1, a3]        /* GOT + _start(GOT) = offset of _start from begin of file */
+ sub a1, a4, a2          /* Current addr of _start - offset from beginning of file = load addr */
+ bl reloc_static_pie
+ mov a1, #0              /* Clean up a1 so that a random address won't get called at the end of program */
+
+ /* Clear the frame pointer and link register again since it might be modified by previous call */
+ mov fp, #0
+ mov lr, #0
+#endif
  /* Pop argc off the stack and save a pointer to argv */
  ldr a2, [sp], #4
  mov a3, sp
@@ -309,6 +325,9 @@ _start:
  .word _fini(GOT)
  .word _init(GOT)
  .word main(GOT)
+#ifdef L_rcrt1
+ .word _start(GOT)
+#endif
 #endif
 #endif
 
diff --git a/libc/sysdeps/linux/i386/crt1.S b/libc/sysdeps/linux/i386/crt1.S
index 35a6552e8..decc68967 100644
--- a/libc/sysdeps/linux/i386/crt1.S
+++ b/libc/sysdeps/linux/i386/crt1.S
@@ -67,6 +67,9 @@
 #endif
 .type   main,%function
 .type   __uClibc_main,%function
+#ifdef L_rcrt1
+.type reloc_static_pie,%function
+#endif
 _start:
  /* Clear the frame pointer.  The ABI suggests this be done, to mark
    the outermost frame obviously.  */
@@ -100,6 +103,23 @@ _start:
  pop %ebx
  addl $_GLOBAL_OFFSET_TABLE_+[.-.L0],%ebx
 
+#ifdef L_rcrt1
+ /* We cannot rely on _DYNAMIC being usable here due to RELRO.
+   Instead we calculate the load address based off a symbol
+   that we know will exist, _start. */
+ pushl %ecx                      /* Save ecx so it won't get clobbered */
+ pushl %ebx                      /* Save ebx so it won't get clobbered */
+ xorl %ecx, %ecx                 /* Clear ecx */
+ addl _start@GOT(%ebx), %ecx     /* Get the offset of _start */
+ movl _start@GOT(%ebx), %eax     /* Get the run time address of _start */
+ subl %ecx, %eax                 /* Subtract to find the load address */
+ pushl %eax                      /* Pass the load address */
+ call reloc_static_pie@PLT
+ popl %eax                       /* Clean up from function call */
+ popl %ebx                       /* Restore the GOT address */
+ popl %ecx                       /* restore ecx */
+#endif
+
  /* Push address of our own entry points to .fini and .init.  */
  pushl _fini@GOT(%ebx)
  pushl _init@GOT(%ebx)
diff --git a/libc/sysdeps/linux/x86_64/crt1.S b/libc/sysdeps/linux/x86_64/crt1.S
index 87777dd5d..701cbf2f6 100644
--- a/libc/sysdeps/linux/x86_64/crt1.S
+++ b/libc/sysdeps/linux/x86_64/crt1.S
@@ -80,6 +80,20 @@ _start:
    the outermost frame obviously.  */
  xorl %ebp, %ebp
 
+#ifdef L_rcrt1
+ pushq %rdi                          /* save rdi (but should be 0...) */
+ pushq %rdx                          /* store rdx (rtld_fini) */
+ xorq %rcx, %rcx                     /* ensure rcx is 0 */
+ addq _start@GOTPCREL(%rip), %rcx    /* get offset of _start from beginning of file */
+ movq _start@GOTPCREL(%rip), %rax    /* get run time address of _start */
+ subq %rcx, %rax                     /* calculate run time load offset */
+ movq %rax, %rdi                     /* load offset -> param 1 */
+ call reloc_static_pie               /* relocate dynamic addrs */
+ xorq %rax, %rax                     /* cleanup */
+ popq %rdx
+ popq %rdi
+#endif
+
  /* Extract the arguments as encoded on the stack and set up
    the arguments for __libc_start_main (int (*main) (int, char **, char **),
    int argc, char *argv,
@@ -107,7 +121,7 @@ _start:
    which grow downwards).  */
  pushq %rsp
 
-#if defined(L_Scrt1)
+#if defined(L_Scrt1) || defined(L_rcrt1)
  /* Give address for main() */
  movq main@GOTPCREL(%rip), %rdi
 
diff --git a/libpthread/nptl/sysdeps/generic/Makefile.in b/libpthread/nptl/sysdeps/generic/Makefile.in
index eb656ee17..a5ba9bbe8 100644
--- a/libpthread/nptl/sysdeps/generic/Makefile.in
+++ b/libpthread/nptl/sysdeps/generic/Makefile.in
@@ -13,6 +13,10 @@ subdirs += libpthread/nptl/sysdeps/generic
 libpthread_generic_DIR := $(top_srcdir)libpthread/nptl/sysdeps/generic
 libpthread_generic_OUT := $(top_builddir)libpthread/nptl/sysdeps/generic
 
+ifeq ($(STATIC_PIE),y)
+CFLAGS-libc-tls.c := -DSTATIC_PIE
+endif
+
 libpthread_generic_libc_a_CSRC = libc-tls.c
 libpthread_generic_libc_a_COBJ = $(patsubst %.c,$(libpthread_generic_OUT)/%.o,$(libpthread_generic_libc_a_CSRC))
 libpthread_generic_libc_a_OBJS = $(libpthread_generic_libc_a_COBJ)
diff --git a/libpthread/nptl/sysdeps/generic/libc-tls.c b/libpthread/nptl/sysdeps/generic/libc-tls.c
index d746c9a38..0c8c558d0 100644
--- a/libpthread/nptl/sysdeps/generic/libc-tls.c
+++ b/libpthread/nptl/sysdeps/generic/libc-tls.c
@@ -142,6 +142,10 @@ __libc_setup_tls (size_t tcbsize, size_t tcbalign)
   initimage = (void *) &__tdata_start;
 #else
   initimage = (void *) phdr->p_vaddr;
+#if !defined(SHARED) && defined(STATIC_PIE)
+    extern ElfW(Addr) _dl_load_base;
+    initimage += _dl_load_base;
+#endif
 #endif
   align = phdr->p_align;
   if (phdr->p_align > max_align)
--
2.34.1