mirror of
https://git.h3cjp.net/H3cJP/citra.git
synced 2025-01-02 13:27:15 +00:00
Merge pull request #752 from lioncash/flush
vfp: Handle flush-to-zero mode.
This commit is contained in:
commit
6223496a49
|
@ -35,6 +35,7 @@
|
|||
#include <cstdio>
|
||||
#include "common/common_types.h"
|
||||
#include "core/arm/skyeye_common/armdefs.h"
|
||||
#include "core/arm/skyeye_common/vfp/asm_vfp.h"
|
||||
|
||||
#define do_div(n, base) {n/=base;}
|
||||
|
||||
|
@ -236,33 +237,6 @@ struct vfp_single {
|
|||
#define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
|
||||
#define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
|
||||
|
||||
// Unpack a single-precision float. Note that this returns the magnitude
|
||||
// of the single-precision float mantissa with the 1. if necessary,
|
||||
// aligned to bit 30.
|
||||
static inline void vfp_single_unpack(vfp_single* s, s32 val)
|
||||
{
|
||||
u32 significand;
|
||||
|
||||
s->sign = vfp_single_packed_sign(val) >> 16,
|
||||
s->exponent = vfp_single_packed_exponent(val);
|
||||
|
||||
significand = (u32) val;
|
||||
significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
|
||||
if (s->exponent && s->exponent != 255)
|
||||
significand |= 0x40000000;
|
||||
s->significand = significand;
|
||||
}
|
||||
|
||||
// Re-pack a single-precision float. This assumes that the float is
|
||||
// already normalised such that the MSB is bit 30, _not_ bit 31.
|
||||
static inline s32 vfp_single_pack(vfp_single* s)
|
||||
{
|
||||
u32 val = (s->sign << 16) +
|
||||
(s->exponent << VFP_SINGLE_MANTISSA_BITS) +
|
||||
(s->significand >> VFP_SINGLE_LOW_BITS);
|
||||
return (s32)val;
|
||||
}
|
||||
|
||||
enum : u32 {
|
||||
VFP_NUMBER = (1 << 0),
|
||||
VFP_ZERO = (1 << 1),
|
||||
|
@ -294,6 +268,39 @@ static inline int vfp_single_type(vfp_single* s)
|
|||
return type;
|
||||
}
|
||||
|
||||
// Unpack a single-precision float. Note that this returns the magnitude
|
||||
// of the single-precision float mantissa with the 1. if necessary,
|
||||
// aligned to bit 30.
|
||||
static inline void vfp_single_unpack(vfp_single* s, s32 val, u32* fpscr)
|
||||
{
|
||||
s->sign = vfp_single_packed_sign(val) >> 16,
|
||||
s->exponent = vfp_single_packed_exponent(val);
|
||||
|
||||
u32 significand = ((u32)val << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
|
||||
if (s->exponent && s->exponent != 255)
|
||||
significand |= 0x40000000;
|
||||
s->significand = significand;
|
||||
|
||||
// If flush-to-zero mode is enabled, turn the denormal into zero.
|
||||
// On a VFPv2 architecture, the sign of the zero is always positive.
|
||||
if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_single_type(s) & VFP_DENORMAL) != 0) {
|
||||
s->sign = 0;
|
||||
s->exponent = 0;
|
||||
s->significand = 0;
|
||||
*fpscr |= FPSCR_IDC;
|
||||
}
|
||||
}
|
||||
|
||||
// Re-pack a single-precision float. This assumes that the float is
|
||||
// already normalised such that the MSB is bit 30, _not_ bit 31.
|
||||
static inline s32 vfp_single_pack(vfp_single* s)
|
||||
{
|
||||
u32 val = (s->sign << 16) +
|
||||
(s->exponent << VFP_SINGLE_MANTISSA_BITS) +
|
||||
(s->significand >> VFP_SINGLE_LOW_BITS);
|
||||
return (s32)val;
|
||||
}
|
||||
|
||||
|
||||
u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func);
|
||||
|
||||
|
@ -328,33 +335,6 @@ struct vfp_double {
|
|||
#define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
|
||||
#define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
|
||||
|
||||
// Unpack a double-precision float. Note that this returns the magnitude
|
||||
// of the double-precision float mantissa with the 1. if necessary,
|
||||
// aligned to bit 62.
|
||||
static inline void vfp_double_unpack(vfp_double* s, s64 val)
|
||||
{
|
||||
u64 significand;
|
||||
|
||||
s->sign = vfp_double_packed_sign(val) >> 48;
|
||||
s->exponent = vfp_double_packed_exponent(val);
|
||||
|
||||
significand = (u64) val;
|
||||
significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
|
||||
if (s->exponent && s->exponent != 2047)
|
||||
significand |= (1ULL << 62);
|
||||
s->significand = significand;
|
||||
}
|
||||
|
||||
// Re-pack a double-precision float. This assumes that the float is
|
||||
// already normalised such that the MSB is bit 30, _not_ bit 31.
|
||||
static inline s64 vfp_double_pack(vfp_double* s)
|
||||
{
|
||||
u64 val = ((u64)s->sign << 48) +
|
||||
((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
|
||||
(s->significand >> VFP_DOUBLE_LOW_BITS);
|
||||
return (s64)val;
|
||||
}
|
||||
|
||||
static inline int vfp_double_type(vfp_double* s)
|
||||
{
|
||||
int type = VFP_NUMBER;
|
||||
|
@ -374,6 +354,39 @@ static inline int vfp_double_type(vfp_double* s)
|
|||
return type;
|
||||
}
|
||||
|
||||
// Unpack a double-precision float. Note that this returns the magnitude
|
||||
// of the double-precision float mantissa with the 1. if necessary,
|
||||
// aligned to bit 62.
|
||||
static inline void vfp_double_unpack(vfp_double* s, s64 val, u32* fpscr)
|
||||
{
|
||||
s->sign = vfp_double_packed_sign(val) >> 48;
|
||||
s->exponent = vfp_double_packed_exponent(val);
|
||||
|
||||
u64 significand = ((u64)val << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
|
||||
if (s->exponent && s->exponent != 2047)
|
||||
significand |= (1ULL << 62);
|
||||
s->significand = significand;
|
||||
|
||||
// If flush-to-zero mode is enabled, turn the denormal into zero.
|
||||
// On a VFPv2 architecture, the sign of the zero is always positive.
|
||||
if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_double_type(s) & VFP_DENORMAL) != 0) {
|
||||
s->sign = 0;
|
||||
s->exponent = 0;
|
||||
s->significand = 0;
|
||||
*fpscr |= FPSCR_IDC;
|
||||
}
|
||||
}
|
||||
|
||||
// Re-pack a double-precision float. This assumes that the float is
|
||||
// already normalised such that the MSB is bit 30, _not_ bit 31.
|
||||
static inline s64 vfp_double_pack(vfp_double* s)
|
||||
{
|
||||
u64 val = ((u64)s->sign << 48) +
|
||||
((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
|
||||
(s->significand >> VFP_DOUBLE_LOW_BITS);
|
||||
return (s64)val;
|
||||
}
|
||||
|
||||
u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
|
||||
|
||||
// A special flag to tell the normalisation code not to normalise.
|
||||
|
|
|
@ -291,7 +291,8 @@ static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32
|
|||
vfp_double vdm, vdd, *vdp;
|
||||
int ret, tm;
|
||||
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
|
||||
tm = vfp_double_type(&vdm);
|
||||
if (tm & (VFP_NAN|VFP_INFINITY)) {
|
||||
vdp = &vdd;
|
||||
|
@ -473,7 +474,7 @@ static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32
|
|||
u32 exceptions = 0;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
|
||||
tm = vfp_double_type(&vdm);
|
||||
|
||||
|
@ -543,7 +544,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32
|
|||
int tm;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
|
||||
/*
|
||||
* Do we have a denormalised number?
|
||||
|
@ -624,7 +625,7 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32
|
|||
int tm;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
vfp_double_dump("VDM", &vdm);
|
||||
|
||||
/*
|
||||
|
@ -896,11 +897,11 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f
|
|||
struct vfp_double vdd, vdp, vdn, vdm;
|
||||
u32 exceptions;
|
||||
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
|
||||
if (vdn.exponent == 0 && vdn.significand)
|
||||
vfp_double_normalise_denormal(&vdn);
|
||||
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
if (vdm.exponent == 0 && vdm.significand)
|
||||
vfp_double_normalise_denormal(&vdm);
|
||||
|
||||
|
@ -908,7 +909,7 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f
|
|||
if (negate & NEG_MULTIPLY)
|
||||
vdp.sign = vfp_sign_negate(vdp.sign);
|
||||
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dd));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dd), &fpscr);
|
||||
if (vdn.exponent == 0 && vdn.significand != 0)
|
||||
vfp_double_normalise_denormal(&vdn);
|
||||
|
||||
|
@ -969,11 +970,11 @@ static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
|
|||
u32 exceptions;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
|
||||
if (vdn.exponent == 0 && vdn.significand)
|
||||
vfp_double_normalise_denormal(&vdn);
|
||||
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
if (vdm.exponent == 0 && vdm.significand)
|
||||
vfp_double_normalise_denormal(&vdm);
|
||||
|
||||
|
@ -990,11 +991,11 @@ static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpsc
|
|||
u32 exceptions;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
|
||||
if (vdn.exponent == 0 && vdn.significand)
|
||||
vfp_double_normalise_denormal(&vdn);
|
||||
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
if (vdm.exponent == 0 && vdm.significand)
|
||||
vfp_double_normalise_denormal(&vdm);
|
||||
|
||||
|
@ -1013,11 +1014,11 @@ static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
|
|||
u32 exceptions;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
|
||||
if (vdn.exponent == 0 && vdn.significand)
|
||||
vfp_double_normalise_denormal(&vdn);
|
||||
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
if (vdm.exponent == 0 && vdm.significand)
|
||||
vfp_double_normalise_denormal(&vdm);
|
||||
|
||||
|
@ -1035,11 +1036,11 @@ static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
|
|||
u32 exceptions;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
|
||||
if (vdn.exponent == 0 && vdn.significand)
|
||||
vfp_double_normalise_denormal(&vdn);
|
||||
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
if (vdm.exponent == 0 && vdm.significand)
|
||||
vfp_double_normalise_denormal(&vdm);
|
||||
|
||||
|
@ -1063,8 +1064,8 @@ static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
|
|||
int tm, tn;
|
||||
|
||||
LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn));
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm));
|
||||
vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
|
||||
vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
|
||||
|
||||
vdd.sign = vdn.sign ^ vdm.sign;
|
||||
|
||||
|
|
|
@ -330,7 +330,7 @@ static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 f
|
|||
struct vfp_single vsm, vsd, *vsp;
|
||||
int ret, tm;
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
tm = vfp_single_type(&vsm);
|
||||
if (tm & (VFP_NAN|VFP_INFINITY)) {
|
||||
vsp = &vsd;
|
||||
|
@ -498,7 +498,7 @@ static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 f
|
|||
int tm;
|
||||
u32 exceptions = 0;
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
|
||||
tm = vfp_single_type(&vsm);
|
||||
|
||||
|
@ -563,7 +563,7 @@ static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 f
|
|||
int rmode = fpscr & FPSCR_RMODE_MASK;
|
||||
int tm;
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
vfp_single_dump("VSM", &vsm);
|
||||
|
||||
/*
|
||||
|
@ -643,7 +643,7 @@ static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 f
|
|||
int rmode = fpscr & FPSCR_RMODE_MASK;
|
||||
int tm;
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
vfp_single_dump("VSM", &vsm);
|
||||
|
||||
/*
|
||||
|
@ -925,11 +925,11 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp
|
|||
|
||||
v = vfp_get_float(state, sn);
|
||||
LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, v);
|
||||
vfp_single_unpack(&vsn, v);
|
||||
vfp_single_unpack(&vsn, v, &fpscr);
|
||||
if (vsn.exponent == 0 && vsn.significand)
|
||||
vfp_single_normalise_denormal(&vsn);
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
if (vsm.exponent == 0 && vsm.significand)
|
||||
vfp_single_normalise_denormal(&vsm);
|
||||
|
||||
|
@ -940,7 +940,7 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp
|
|||
|
||||
v = vfp_get_float(state, sd);
|
||||
LOG_DEBUG(Core_ARM11, "s%u = %08x", sd, v);
|
||||
vfp_single_unpack(&vsn, v);
|
||||
vfp_single_unpack(&vsn, v, &fpscr);
|
||||
if (vsn.exponent == 0 && vsn.significand != 0)
|
||||
vfp_single_normalise_denormal(&vsn);
|
||||
|
||||
|
@ -1004,11 +1004,11 @@ static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
|
|||
|
||||
LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n);
|
||||
|
||||
vfp_single_unpack(&vsn, n);
|
||||
vfp_single_unpack(&vsn, n, &fpscr);
|
||||
if (vsn.exponent == 0 && vsn.significand)
|
||||
vfp_single_normalise_denormal(&vsn);
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
if (vsm.exponent == 0 && vsm.significand)
|
||||
vfp_single_normalise_denormal(&vsm);
|
||||
|
||||
|
@ -1027,11 +1027,11 @@ static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr
|
|||
|
||||
LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n);
|
||||
|
||||
vfp_single_unpack(&vsn, n);
|
||||
vfp_single_unpack(&vsn, n, &fpscr);
|
||||
if (vsn.exponent == 0 && vsn.significand)
|
||||
vfp_single_normalise_denormal(&vsn);
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
if (vsm.exponent == 0 && vsm.significand)
|
||||
vfp_single_normalise_denormal(&vsm);
|
||||
|
||||
|
@ -1054,11 +1054,11 @@ static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
|
|||
/*
|
||||
* Unpack and normalise denormals.
|
||||
*/
|
||||
vfp_single_unpack(&vsn, n);
|
||||
vfp_single_unpack(&vsn, n, &fpscr);
|
||||
if (vsn.exponent == 0 && vsn.significand)
|
||||
vfp_single_normalise_denormal(&vsn);
|
||||
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
if (vsm.exponent == 0 && vsm.significand)
|
||||
vfp_single_normalise_denormal(&vsm);
|
||||
|
||||
|
@ -1094,8 +1094,8 @@ static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
|
|||
|
||||
LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n);
|
||||
|
||||
vfp_single_unpack(&vsn, n);
|
||||
vfp_single_unpack(&vsm, m);
|
||||
vfp_single_unpack(&vsn, n, &fpscr);
|
||||
vfp_single_unpack(&vsm, m, &fpscr);
|
||||
|
||||
vsd.sign = vsn.sign ^ vsm.sign;
|
||||
|
||||
|
|
Loading…
Reference in a new issue