From 6a90b4bb4cb396552619239a1c0e58dbd8c2f79b Mon Sep 17 00:00:00 2001
From: Yashagarwal9798 <yashagarwal9798@gmail.com>
Date: Mon, 12 Jan 2026 05:07:07 +0530
Subject: [PATCH 1/3] fastmath:

---
 base/fastmath.jl |  5 ++---
 test/fastmath.jl | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/base/fastmath.jl b/base/fastmath.jl
index ed686fb92bf34..b3216aa248370 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -297,12 +297,11 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
-@inline function pow_fast(x::Float64, y::Integer)
+@inline function pow_fast(x::T, y::Integer) where T <: IEEEFloat
     z = y % Int32
     z == y ? pow_fast(x, z) : x^y
 end
-pow_fast(x::Float32, y::Integer) = x^y
-pow_fast(x::Float64, y::Int32) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
+pow_fast(x::T, y::Int32) where T <: IEEEFloat = ccall("llvm.powi", llvmcall, T, (T, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
diff --git a/test/fastmath.jl b/test/fastmath.jl
index efca5b85c6642..d3d82f40f9f1a 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -303,6 +303,31 @@ end
     @test @fastmath (1 + 1 / n) ^ 4503599627370496 ≈ ℯ
 end
 
+# Test that x^2 is inlined to fmul for all float types (issue #60639)
+@testset "pow_fast inlining for literal powers" begin
+    for T in (Float16, Float32, Float64)
+        f(x) = @fastmath x^2
+        llvm = sprint(code_llvm, f, (T,))
+        # Should be inlined to fmul, not call power_by_squaring
+        @test occursin("fmul", llvm)
+        @test !occursin("power_by_squaring", llvm)
+    end
+end
+
+# Test correctness of pow_fast for Float32/Float16 with various exponents (issue #60639)
+@testset "pow_fast correctness" begin
+    for T in (Float16, Float32)
+        x = T(2.5)
+        # Exponents that fit in Int32
+        @test (@fastmath x^2) ≈ x^2
+        @test (@fastmath x^10) ≈ x^10
+        @test (@fastmath x^(-3)) ≈ x^(-3)
+        # Exponents that don't fit in Int32
+        big_exp = Int64(2)^40
+        @test (@fastmath x^big_exp) ≈ x^big_exp
+    end
+end
+
 @testset "sincos fall-backs" begin
     struct FloatWrapper
         inner::Float64

From bd9c0e9cc7c2ce37c01a8c054deb89f77033d85f Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith@gmail.com>
Date: Mon, 12 Jan 2026 16:10:03 -0500
Subject: [PATCH 2/3] Base.IEEEFloat

---
 base/fastmath.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/fastmath.jl b/base/fastmath.jl
index b3216aa248370..1395ee1950928 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -297,11 +297,11 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
-@inline function pow_fast(x::T, y::Integer) where T <: IEEEFloat
+@inline function pow_fast(x::T, y::Integer) where T <: Base.IEEEFloat
     z = y % Int32
     z == y ? pow_fast(x, z) : x^y
 end
-pow_fast(x::T, y::Int32) where T <: IEEEFloat = ccall("llvm.powi", llvmcall, T, (T, Int32), x, y)
+pow_fast(x::T, y::Int32) where T <: Base.IEEEFloat = ccall("llvm.powi", llvmcall, T, (T, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 

From 1da88f1f57e3c68356e10e2c31cc1edfe7bec962 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith@gmail.com>
Date: Tue, 13 Jan 2026 22:18:14 -0500
Subject: [PATCH 3/3] fix ccall

---
 base/fastmath.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/base/fastmath.jl b/base/fastmath.jl
index 1395ee1950928..8bfbbf4e2e23b 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -301,7 +301,9 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
     z = y % Int32
     z == y ? pow_fast(x, z) : x^y
 end
-pow_fast(x::T, y::Int32) where T <: Base.IEEEFloat = ccall("llvm.powi", llvmcall, T, (T, Int32), x, y)
+pow_fast(x::Float16, y::Int32) = ccall("llvm.powi", llvmcall, Float16, (Float16, Int32), x, y)
+pow_fast(x::Float32, y::Int32) = ccall("llvm.powi", llvmcall, Float32, (Float32, Int32), x, y)
+pow_fast(x::Float64, y::Int32) = ccall("llvm.powi", llvmcall, Float64, (Float64, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)