Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/coreclr/src/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,47 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
break;
}

case NI_Vector64_Create:
case NI_Vector128_Create:
{
// We shouldn't handle this as an intrinsic if the
// respective ISAs have been disabled by the user.

if (!compExactlyDependsOn(InstructionSet_AdvSimd))
{
break;
}

if (sig->numArgs == 1)
{
op1 = impPopStack().val;
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
}
else if (sig->numArgs == 2)
{
op2 = impPopStack().val;
op1 = impPopStack().val;
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize);
}
else
{
assert(sig->numArgs >= 3);

GenTreeArgList* tmp = nullptr;

for (unsigned i = 0; i < sig->numArgs; i++)
{
tmp = gtNewArgList(impPopStack().val);
tmp->gtOp2 = op1;
op1 = tmp;
}

retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
}
break;
}

case NI_Vector64_get_Count:
case NI_Vector128_get_Count:
{
Expand Down
8 changes: 6 additions & 2 deletions src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S);
break;

case NI_Vector64_Create:
case NI_Vector128_Create:
case NI_AdvSimd_DuplicateToVector64:
case NI_AdvSimd_DuplicateToVector128:
case NI_AdvSimd_Arm64_DuplicateToVector64:
case NI_AdvSimd_Arm64_DuplicateToVector128:
{
if (varTypeIsFloating(intrin.baseType))
Expand All @@ -596,6 +595,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
const double dataValue = intrin.op1->AsDblCon()->gtDconVal;
GetEmitter()->emitIns_R_F(INS_fmov, emitSize, targetReg, dataValue, opt);
}
else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64)
{
assert(intrin.baseType == TYP_DOUBLE);
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
else
{
GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 0, opt);
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/src/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ HARDWARE_INTRINSIC(Vector64, AsSByte,
HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, Create, 8, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_mov, INS_mov, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand All @@ -45,7 +45,7 @@ HARDWARE_INTRINSIC(Vector128, AsSingle, 1
HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand Down Expand Up @@ -197,6 +197,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTest, 1
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareTestScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmtst, INS_cmtst, INS_invalid, INS_cmtst}, HW_Category_SIMDScalar, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Divide, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fdiv, INS_fdiv}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateSelectedScalarToVector128, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup}, HW_Category_IMM, HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ARM actually exposes native intrinsics for these under the same name as the other primitive types (vdup_n_s64, etc). I'm going to include this in the misc proposal for APIs that may be missing.

HARDWARE_INTRINSIC(AdvSimd_Arm64, DuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dup, INS_dup, INS_invalid, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla}, HW_Category_SimpleSIMD, HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(AdvSimd_Arm64, FusedMultiplySubtract, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmls}, HW_Category_SimpleSIMD, HW_Flag_HasRMWSemantics)
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector2,
HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, AsVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
// The instruction generated for float/double depends on which ISAs are supported
HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmppd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
Expand Down Expand Up @@ -77,7 +77,7 @@ HARDWARE_INTRINSIC(Vector256, AsVector256,
HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmppd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, get_Count, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4272,7 +4272,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,

if (mustExpand && (retNode == nullptr))
{
NO_WAY("JIT must expand the intrinsic!");
assert(!"Unhandled must expand intrinsic, throwing PlatformNotSupportedException");
return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously the JIT would fail fast with The JIT compiler encountered invalid IL code or an internal limitation.

Now, it will assert in debug mode but will throw PlatformNotSupportedException at runtime.

@tannergooding tannergooding May 12, 2020

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was changed to not use impUnsupportedHWIntrinsic and instead use gtNewMustThrowException directly.

impUnsupportedHWIntrinsic was renamed to impUnsupportedNamedIntrinsic and moved out of FEATURE_HW_INTRINSIC

}

// Optionally report if this intrinsic is special
Expand Down Expand Up @@ -4509,12 +4510,13 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)

result = HWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName);
}
else if (strcmp(methodName, "get_IsSupported") == 0)
{
return NI_IsSupported_False;
}
else

if (result == NI_Illegal)

@tannergooding tannergooding May 12, 2020

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This path accounts for mustExpand for unrecognized HWIntrinsics (and thus will never hit the above assert) in particular as we know that throw PlatformNotSupportedException is the desired behavior for anything not recognized and which is recursive under namespace System.Runtime.Intrinsics.

All of the code paths are either directly recursive (void MyMethod() => MyMethod()) and are under one of the S.R.I.X86 or S.R.I.Arm or are indirectly recursive and are directly under S.R.I. An indirectly recursive intrinsic is one like Vector64.Create which is:

if (AdvSimd.IsSupported)
{
    return Vector64.Create(...);
}

return SoftwareFallback(...);

So the throw PNSE will be generated under the dead AdvSimd.IsSupported code path under minOpts and will never actually be hit.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused about this, as I thought that we always expanded intrinsics, even under minopts.
That said, I think that this code section could use a comment describing the "indirectly recursive" case. In fact, for developers who are not intimately familiar with this code, a few words in general about how/why we reach this case would be useful.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't necessarily about minopts vs optimized, it's about platforms like ARM where HWIntrinsics aren't supported at all or cases like Vector64 on x86.

In both of those scenarios, it is recognized as intrinsic (due to the [Intrinsic]) and as mustExpand (due to the recursion), but it isn't possible to actually expand the call since it isn't recognized and so it was hitting https://github.com/dotnet/runtime/pull/36267/files#diff-b8d003a58643e5595d2920ca5993b952L4275

This was really only a problem for classes like Vector64/Vector128/Vector256 as they are shared between all architectures (where classes like x86.Sse or Arm.AdvSimd have two copies: one which is recursive and one which throws). It looks to have only been showing for minOpts because we drop the dead code entirely otherwise and so we never try to expand the call.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{
if (strcmp(methodName, "get_IsSupported") == 0)
{
return NI_IsSupported_False;
}
return gtIsRecursiveCall(method) ? NI_Throw_PlatformNotSupportedException : NI_Illegal;
}
}
Expand Down
Loading