Commit b287eca3 authored by H.J. Lu's avatar H.J. Lu
Browse files

gold: Support 386 TLS code sequences without PLT

There are extensions to 386 psABI:

https://groups.google.com/forum/#!topic/ia32-abi/awsRSvJOJfs

to call tls_get_addr via GOT:

call *___tls_get_addr@GOT(%reg)

where EBX register isn't required as GOT base.

Since direct call is 4-byte long and indirect call, is 5-byte long, the
extra one byte must be handled properly.

For general dynamic model, 7-byte lea instruction before call
instruction is replaced by 6-byte one to make room for indirect call.
For local dynamic model, we simply use 5-byte indirect call.

TLS linker optimization is updated to recognize new instruction
patterns.  For local dynamic model to local exec model transition,
we generate a 6-byte lea instruction as nop, instead of a 1-byte nop
plus a 4-byte lea instruction.

	PR gold/20308
	* i386.cc (Target_i386::Relocate::relocate): Allow
	R_386_GOT32X relocation against ___tls_get_addr.
	(Target_i386::Relocate::tls_gd_to_ie): Support indirect
	call to __tls_get_addr.
	(Target_i386::Relocate::tls_gd_to_le): Likewise.
	(Target_i386::Relocate::tls_ld_to_le): Likewise.
	* testsuite/Makefile.am (check_PROGRAMS): Add pr20308a_test,
	pr20308b_test, pr20308c_test, pr20308d_test, pr20308e_test.
	(pr20308a_test_SOURCES): New.
	(pr20308a_test_DEPENDENCIES): Likewise.
	(pr20308a_test_CFLAGS): Likewise.
	(pr20308a_test_LDFLAGS): Likewise.
	(pr20308a_test_LDADD): Likewise.
	(pr20308b_test_SOURCES): Likewise.
	(pr20308b_test_DEPENDENCIES): Likewise.
	(pr20308b_test_CFLAGS): Likewise.
	(pr20308b_test_LDFLAGS): Likewise.
	(pr20308b_test_LDADD): Likewise.
	(pr20308c_test_SOURCES): Likewise.
	(pr20308c_test_DEPENDENCIES): Likewise.
	(pr20308c_test_CFLAGS): Likewise.
	(pr20308c_test_LDFLAGS): Likewise.
	(pr20308c_test_LDADD): Likewise.
	(pr20308d_test_SOURCES): Likewise.
	(pr20308d_test_DEPENDENCIES): Likewise.
	(pr20308d_test_CFLAGS): Likewise.
	(pr20308d_test_LDFLAGS): Likewise.
	(pr20308d_test_LDADD): Likewise.
	(pr20308e_test_SOURCES): Likewise.
	(pr20308e_test_DEPENDENCIES): Likewise.
	(pr20308e_test_CFLAGS): Likewise.
	(pr20308e_test_LDFLAGS): Likewise.
	(pr20308e_test_LDADD): Likewise.
	(pr20308a.so): Likewise.
	(pr20308b.so): Likewise.
	(pr20308_gd.o): Likewise.
	(pr20308_ld.o): Likewise.
	(MOSTLYCLEANFILES): Add pr20308a.so pr20308b.so.
	* testsuite/Makefile.in: Regenerated.
	* testsuite/pr20308_def.c: New file.
	* testsuite/pr20308_gd.S: Likewise.
	* testsuite/pr20308_ld.S: Likewise.
	* testsuite/pr20308_main.c: Likewise.
parent ad961eab
2016-06-29 H.J. Lu <hongjiu.lu@intel.com>
PR gold/20308
* i386.cc (Target_i386::Relocate::relocate): Allow
R_386_GOT32X relocation against ___tls_get_addr.
(Target_i386::Relocate::tls_gd_to_ie): Support indirect
call to __tls_get_addr.
(Target_i386::Relocate::tls_gd_to_le): Likewise.
(Target_i386::Relocate::tls_ld_to_le): Likewise.
* testsuite/Makefile.am (check_PROGRAMS): Add pr20308a_test,
pr20308b_test, pr20308c_test, pr20308d_test, pr20308e_test.
(pr20308a_test_SOURCES): New.
(pr20308a_test_DEPENDENCIES): Likewise.
(pr20308a_test_CFLAGS): Likewise.
(pr20308a_test_LDFLAGS): Likewise.
(pr20308a_test_LDADD): Likewise.
(pr20308b_test_SOURCES): Likewise.
(pr20308b_test_DEPENDENCIES): Likewise.
(pr20308b_test_CFLAGS): Likewise.
(pr20308b_test_LDFLAGS): Likewise.
(pr20308b_test_LDADD): Likewise.
(pr20308c_test_SOURCES): Likewise.
(pr20308c_test_DEPENDENCIES): Likewise.
(pr20308c_test_CFLAGS): Likewise.
(pr20308c_test_LDFLAGS): Likewise.
(pr20308c_test_LDADD): Likewise.
(pr20308d_test_SOURCES): Likewise.
(pr20308d_test_DEPENDENCIES): Likewise.
(pr20308d_test_CFLAGS): Likewise.
(pr20308d_test_LDFLAGS): Likewise.
(pr20308d_test_LDADD): Likewise.
(pr20308e_test_SOURCES): Likewise.
(pr20308e_test_DEPENDENCIES): Likewise.
(pr20308e_test_CFLAGS): Likewise.
(pr20308e_test_LDFLAGS): Likewise.
(pr20308e_test_LDADD): Likewise.
(pr20308a.so): Likewise.
(pr20308b.so): Likewise.
(pr20308_gd.o): Likewise.
(pr20308_ld.o): Likewise.
(MOSTLYCLEANFILES): Add pr20308a.so pr20308b.so.
* testsuite/Makefile.in: Regenerated.
* testsuite/pr20308_def.c: New file.
* testsuite/pr20308_gd.S: Likewise.
* testsuite/pr20308_ld.S: Likewise.
* testsuite/pr20308_main.c: Likewise.
2016-06-29 H.J. Lu <hongjiu.lu@intel.com>
PR gold/20216
......
......@@ -2790,6 +2790,7 @@ Target_i386::Relocate::relocate(const Relocate_info<32, false>* relinfo,
if (this->skip_call_tls_get_addr_)
{
if ((r_type != elfcpp::R_386_PLT32
&& r_type != elfcpp::R_386_GOT32X
&& r_type != elfcpp::R_386_PC32)
|| gsym == NULL
|| strcmp(gsym->name(), "___tls_get_addr") != 0)
......@@ -3318,9 +3319,11 @@ Target_i386::Relocate::tls_gd_to_le(const Relocate_info<32, false>* relinfo,
unsigned char* view,
section_size_type view_size)
{
// leal foo(,%reg,1),%eax; call ___tls_get_addr
// leal foo(,%ebx,1),%eax; call ___tls_get_addr@PLT
// ==> movl %gs:0,%eax; subl $foo@tpoff,%eax
// leal foo(%reg),%eax; call ___tls_get_addr
// leal foo(%ebx),%eax; call ___tls_get_addr@PLT
// ==> movl %gs:0,%eax; subl $foo@tpoff,%eax
// leal foo(%reg),%eax; call *___tls_get_addr@GOT(%reg)
// ==> movl %gs:0,%eax; subl $foo@tpoff,%eax
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, -2);
......@@ -3328,10 +3331,12 @@ Target_i386::Relocate::tls_gd_to_le(const Relocate_info<32, false>* relinfo,
unsigned char op1 = view[-1];
unsigned char op2 = view[-2];
unsigned char op3 = view[4];
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
op2 == 0x8d || op2 == 0x04);
tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[4] == 0xe8);
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
op3 == 0xe8 || op3 == 0xff);
int roff = 5;
......@@ -3345,12 +3350,18 @@ Target_i386::Relocate::tls_gd_to_le(const Relocate_info<32, false>* relinfo,
}
else
{
unsigned char reg = op1 & 7;
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
(op1 & 0xf8) == 0x80 && (op1 & 7) != 4);
if (rel.get_r_offset() + 9 < view_size
&& view[9] == 0x90)
((op1 & 0xf8) == 0x80
&& reg != 4
&& reg != 0
&& (op3 == 0xe8 || (view[5] & 0x7) == reg)));
if (op3 == 0xff
|| (rel.get_r_offset() + 9 < view_size
&& view[9] == 0x90))
{
// There is a trailing nop. Use the size byte subl.
// There is an indirect call or a trailing nop. Use the size
// byte subl.
memcpy(view - 2, "\x65\xa1\0\0\0\0\x81\xe8\0\0\0", 12);
roff = 6;
}
......@@ -3381,20 +3392,24 @@ Target_i386::Relocate::tls_gd_to_ie(const Relocate_info<32, false>* relinfo,
unsigned char* view,
section_size_type view_size)
{
// leal foo(,%ebx,1),%eax; call ___tls_get_addr
// leal foo(,%ebx,1),%eax; call ___tls_get_addr@PLT
// ==> movl %gs:0,%eax; addl foo@gotntpoff(%ebx),%eax
// leal foo(%ebx),%eax; call ___tls_get_addr; nop
// leal foo(%ebx),%eax; call ___tls_get_addr@PLT; nop
// ==> movl %gs:0,%eax; addl foo@gotntpoff(%ebx),%eax
// leal foo(%reg),%eax; call *___tls_get_addr@GOT(%reg)
// ==> movl %gs:0,%eax; addl foo@gotntpoff(%reg),%eax
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, -2);
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, 9);
unsigned char op1 = view[-1];
unsigned char op2 = view[-2];
unsigned char op3 = view[4];
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
op2 == 0x8d || op2 == 0x04);
tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[4] == 0xe8);
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
op3 == 0xe8 || op3 == 0xff);
int roff;
......@@ -3408,10 +3423,14 @@ Target_i386::Relocate::tls_gd_to_ie(const Relocate_info<32, false>* relinfo,
}
else
{
unsigned char reg = op1 & 7;
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, 10);
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
(op1 & 0xf8) == 0x80 && (op1 & 7) != 4);
tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[9] == 0x90);
((op1 & 0xf8) == 0x80
&& reg != 4
&& reg != 0
&& ((op3 == 0xe8 && view[9] == 0x90)
|| (view[5] & 0x7) == reg)));
roff = 6;
}
......@@ -3512,19 +3531,36 @@ Target_i386::Relocate::tls_ld_to_le(const Relocate_info<32, false>* relinfo,
unsigned char* view,
section_size_type view_size)
{
// leal foo(%reg), %eax; call ___tls_get_addr
// leal foo(%ebx), %eax; call ___tls_get_addr@PLT
// ==> movl %gs:0,%eax; nop; leal 0(%esi,1),%esi
// leal foo(%reg), %eax; call call *___tls_get_addr@GOT(%reg)
// ==> movl %gs:0,%eax; leal (%esi),%esi
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, -2);
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, 9);
// FIXME: Does this test really always pass?
unsigned char op1 = view[-1];
unsigned char op2 = view[-2];
unsigned char op3 = view[4];
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
view[-2] == 0x8d && view[-1] == 0x83);
op3 == 0xe8 || op3 == 0xff);
tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size,
op3 == 0xe8 ? 9 : 10);
// FIXME: Does this test really always pass?
tls::check_tls(relinfo, relnum, rel.get_r_offset(), op2 == 0x8d);
tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[4] == 0xe8);
unsigned char reg = op1 & 7;
tls::check_tls(relinfo, relnum, rel.get_r_offset(),
((op1 & 0xf8) == 0x80
&& reg != 4
&& reg != 0
&& (op3 == 0xe8 || (view[5] & 0x7) == reg)));
memcpy(view - 2, "\x65\xa1\0\0\0\0\x90\x8d\x74\x26\0", 11);
if (op3 == 0xe8)
memcpy(view - 2, "\x65\xa1\0\0\0\0\x90\x8d\x74\x26\0", 11);
else
memcpy(view - 2, "\x65\xa1\0\0\0\0\x8d\xb6\0\0\0\0", 12);
// The next reloc should be a PLT32 reloc against __tls_get_addr.
// We can skip it.
......
......@@ -1254,6 +1254,55 @@ i386_mov_to_lea7.stdout: i386_mov_to_lea7
i386_mov_to_lea8.stdout: i386_mov_to_lea8
$(TEST_OBJDUMP) -dw $< > $@
check_PROGRAMS += pr20308a_test
pr20308a_test_SOURCES = pr20308_main.c pr20308_def.c
pr20308a_test_DEPENDENCIES = pr20308_gd.o pr20308_ld.o gcctestdir/ld gcctestdir/as
pr20308a_test_CFLAGS = -Bgcctestdir/ -fPIE
pr20308a_test_LDFLAGS = -Bgcctestdir/ -Wl,-R,.
pr20308a_test_LDADD = pr20308_gd.o pr20308_ld.o
check_PROGRAMS += pr20308b_test
pr20308b_test_SOURCES = pr20308_main.c pr20308_def.c
pr20308b_test_DEPENDENCIES = pr20308_gd.o pr20308_ld.o gcctestdir/ld gcctestdir/as
pr20308b_test_CFLAGS = -Bgcctestdir/ -fPIE
pr20308b_test_LDFLAGS = -pie -Bgcctestdir/ -Wl,-R,.
pr20308b_test_LDADD = pr20308_gd.o pr20308_ld.o
check_PROGRAMS += pr20308c_test
pr20308c_test_SOURCES = pr20308_main.c pr20308_def.c
pr20308c_test_DEPENDENCIES = pr20308_gd.o pr20308_ld.o gcctestdir/ld gcctestdir/as
pr20308c_test_CFLAGS = -Bgcctestdir/ -fPIE
pr20308c_test_LDFLAGS = -static -Bgcctestdir/ -Wl,-R,.
pr20308c_test_LDADD = pr20308_gd.o pr20308_ld.o
check_PROGRAMS += pr20308d_test
pr20308d_test_SOURCES = pr20308_main.c pr20308_def.c
pr20308d_test_DEPENDENCIES = pr20308a.so gcctestdir/ld gcctestdir/as
pr20308d_test_CFLAGS = -Bgcctestdir/ -fPIE
pr20308d_test_LDFLAGS = -Bgcctestdir/ -Wl,-R,.
pr20308d_test_LDADD = pr20308a.so
check_PROGRAMS += pr20308e_test
pr20308e_test_SOURCES = pr20308_main.c
pr20308e_test_DEPENDENCIES = pr20308_gd.o pr20308_ld.o pr20308b.so gcctestdir/ld gcctestdir/as
pr20308e_test_CFLAGS = -Bgcctestdir/ -fPIE
pr20308e_test_LDFLAGS = -Bgcctestdir/ -Wl,-R,.
pr20308e_test_LDADD = pr20308_gd.o pr20308_ld.o pr20308b.so
MOSTLYCLEANFILES += pr20308a.so pr20308b.so
pr20308a.so: pr20308_gd.o pr20308_ld.o gcctestdir/ld
$(LINK) -Bgcctestdir/ -shared pr20308_gd.o pr20308_ld.o
pr20308b.so: pr20308_def.o gcctestdir/ld
$(LINK) -Bgcctestdir/ -shared pr20308_def.o
pr20308_gd.o: pr20308_gd.S
$(COMPILE) -c -o $@ $<
pr20308_ld.o: pr20308_ld.S
$(COMPILE) -c -o $@ $<
endif DEFAULT_TARGET_I386
check_PROGRAMS += many_sections_test
......
This diff is collapsed.
__thread int gd = 1;
.text
.p2align 4,,15
.globl get_gd
.type get_gd, @function
get_gd:
pushl %ebx
call __x86.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
subl $8, %esp
leal gd@tlsgd(,%ebx,1), %eax
call ___tls_get_addr@PLT
addl $8, %esp
popl %ebx
ret
.size get_gd, .-get_gd
.p2align 4,,15
.globl set_gd
.type set_gd, @function
set_gd:
pushl %ebx
call __x86.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
subl $8, %esp
leal gd@tlsgd(%ebx), %eax
call ___tls_get_addr@PLT
nop
movl 16(%esp), %edx
movl %edx, (%eax)
addl $8, %esp
popl %ebx
ret
.size set_gd, .-set_gd
.text
.p2align 4,,15
.globl test_gd
.type test_gd, @function
test_gd:
call __x86.get_pc_thunk.cx
addl $_GLOBAL_OFFSET_TABLE_, %ecx
subl $12, %esp
leal gd@tlsgd(%ecx), %eax
call *___tls_get_addr@GOT(%ecx)
movl 16(%esp), %ecx
cmpl %ecx, (%eax)
sete %al
addl $12, %esp
movzbl %al, %eax
ret
.size test_gd, .-test_gd
.section .text.unlikely
.section .text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat
.globl __x86.get_pc_thunk.bx
.hidden __x86.get_pc_thunk.bx
.type __x86.get_pc_thunk.bx, @function
__x86.get_pc_thunk.bx:
movl (%esp), %ebx
ret
.section .text.__x86.get_pc_thunk.cx,"axG",@progbits,__x86.get_pc_thunk.cx,comdat
.globl __x86.get_pc_thunk.cx
.hidden __x86.get_pc_thunk.cx
.type __x86.get_pc_thunk.cx, @function
__x86.get_pc_thunk.cx:
movl (%esp), %ecx
ret
.section .note.GNU-stack,"",@progbits
.text
.p2align 4,,15
.globl get_ld
.type get_ld, @function
get_ld:
pushl %ebx
call __x86.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
subl $8, %esp
leal ld@tlsldm(%ebx), %eax
call ___tls_get_addr@PLT
leal ld@dtpoff(%eax), %eax
addl $8, %esp
popl %ebx
ret
.size get_ld, .-get_ld
.p2align 4,,15
.globl set_ld
.type set_ld, @function
set_ld:
pushl %ebx
call __x86.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
subl $8, %esp
leal ld@tlsldm(%ebx), %eax
call ___tls_get_addr@PLT
movl 16(%esp), %edx
leal ld@dtpoff(%eax), %eax
movl %edx, (%eax)
addl $8, %esp
popl %ebx
ret
.size set_ld, .-set_ld
.p2align 4,,15
.globl test_ld
.type test_ld, @function
test_ld:
call __x86.get_pc_thunk.cx
addl $_GLOBAL_OFFSET_TABLE_, %ecx
subl $12, %esp
leal ld@tlsldm(%ecx), %eax
call *___tls_get_addr@GOT(%ecx)
movl 16(%esp), %ecx
leal ld@dtpoff(%eax), %eax
cmpl %ecx, (%eax)
sete %al
addl $12, %esp
movzbl %al, %eax
ret
.size test_ld, .-test_ld
.section .tbss,"awT",@nobits
.align 4
.type ld, @object
.size ld, 4
ld:
.zero 4
.section .text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat
.globl __x86.get_pc_thunk.bx
.hidden __x86.get_pc_thunk.bx
.type __x86.get_pc_thunk.bx, @function
__x86.get_pc_thunk.bx:
movl (%esp), %ebx
ret
.section .text.__x86.get_pc_thunk.cx,"axG",@progbits,__x86.get_pc_thunk.cx,comdat
.globl __x86.get_pc_thunk.cx
.hidden __x86.get_pc_thunk.cx
.type __x86.get_pc_thunk.cx, @function
__x86.get_pc_thunk.cx:
movl (%esp), %ecx
ret
.section .note.GNU-stack,"",@progbits
#include <stdio.h>
#include <stdlib.h>
extern int * get_gd (void);
extern void set_gd (int);
extern int test_gd (int);
extern int * get_ld (void);
extern void set_ld (int);
extern int test_ld (int);
int
main ()
{
int *p;
p = get_gd ();
set_gd (3);
if (*p != 3 || !test_gd (3))
abort ();
p = get_ld ();
set_ld (4);
if (*p != 4 || !test_ld (4))
abort ();
printf ("PASS\n");
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment