From 4d4e61e82cd693d2800bf409c8e3dde1ac2b75a5 Mon Sep 17 00:00:00 2001
From: lemon <lsof@mailbox.org>
Date: Sun, 7 Dec 2025 12:29:27 +0100
Subject: amd64: use XORPS for floating point negation

Previously `neg x` was being turned into `sub 0, x`. But this gives the
wrong result for zero/negative zero (-0.0 == -0.0 but 0.0 - 0.0 == 0.0),
so it wasn't IEEE compliant or correct. Do what every other compiler
does instead and flip the sign bit with an exclusive or.

Should implement someway of deduplicating small data constants like the
ones used here though.
---
 amd64/emit.c | 1 +
 amd64/isel.c | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/amd64/emit.c b/amd64/emit.c
index f4c95bd..95055da 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -599,6 +599,7 @@ DEFINSTR2(Xxor,
    {4|8, PGPR, PI32, "\x81", EN_RI32, .ext=6}, /* XOR r32/64, imm */
    {  8, PGPR, PMEM, "\x33", EN_RM},           /* XOR r64, m64 */
    {4|8, PFPR, PFPR, "\x0F\x57", EN_RR},       /* XORPS xmm, xmm */
+   {4|8, PFPR, PMEM, "\x0F\x57", EN_RM},       /* XORPS xmm, m128 */
 )
 DEFINSTR2(Xshl,
    {4|8, PGPR, P1,   "\xD1", EN_R, .ext=4},   /* SHL r32/64, 1 */
diff --git a/amd64/isel.c b/amd64/isel.c
index 64cda27..be2f769 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -494,9 +494,12 @@ sel(struct function *fn, struct instr *ins, struct block *blk, int *curi)
       goto ALU;
    case Oneg:
       if (kisflt(ins->cls)) {
-         ins->op = Osub;
-         ins->r = ins->l;
-         ins->l = ZEROREF;
+         /* flip sign bit with XORPS/D */
+         static const uvlong sd[2] = {0x8000000000000000,0x8000000000000000};
+         static const uint sf[4] = {0x80000000,80000000,0x80000000,80000000};
+         ins->op = Oxor;
+         ins->r = mkdatref(NULL, mktype(ins->cls == KF32 ? TYFLOAT : TYDOUBLE), /*siz*/16,
+               /*align*/16, ins->cls == KF32 ? (void *)sf : sd, /*siz*/16, /*deref*/1);
       }
       /* fallthru */
    case Onot:
-- 
cgit v1.2.3