Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions field/babybear/fft/fft.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions field/koalabear/fft/fft.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 63 additions & 2 deletions internal/generator/field/template/fft/fft.go.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,13 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E
kerDIFNP_{{$ksize}}(a, twiddles, stage-twiddlesStartStage)
return
}
{{- end }}
{{- end }}{{- if .HasASMKernel}} else if n == 512 {
kerDIFNP_512(a, twiddles, stage-twiddlesStartStage)
return
} else if n == 1024 {
kerDIFNP_1024(a, twiddles, stage-twiddlesStartStage)
return
}{{- end}}
}
m := n >> 1

Expand Down Expand Up @@ -312,7 +318,13 @@ func ditFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E
kerDITNP_{{$ksize}}(a, twiddles, stage-twiddlesStartStage)
return
}
{{- end }}
{{- end }}{{- if .HasASMKernel}} else if n == 512 {
kerDITNP_512(a, twiddles, stage-twiddlesStartStage)
return
} else if n == 1024 {
kerDITNP_1024(a, twiddles, stage-twiddlesStartStage)
return
}{{- end}}
}

m := n >> 1
Expand Down Expand Up @@ -397,6 +409,55 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta
{{genKernel $.FF $ksize $klog2}}
{{end}}

{{- if .HasASMKernel}}
// kerDIFNP_512 is an optimized 512-element DIF kernel that avoids recursion overhead
// by directly processing the outer butterfly layer and then calling the 256-element kernel.
func kerDIFNP_512(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) {
// Stage 0: butterfly with m=256
innerDIFWithTwiddles(a, twiddles[stage], 0, 256, 256)
// Process two halves with the 256-element kernel
kerDIFNP_256(a[:256], twiddles, stage+1)
kerDIFNP_256(a[256:], twiddles, stage+1)
}

// kerDITNP_512 is an optimized 512-element DIT kernel that avoids recursion overhead.
func kerDITNP_512(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) {
// Process two halves with the 256-element kernel first (DIT order)
kerDITNP_256(a[:256], twiddles, stage+1)
kerDITNP_256(a[256:], twiddles, stage+1)
// Final stage: butterfly with m=256
innerDITWithTwiddles(a, twiddles[stage], 0, 256, 256)
}

// kerDIFNP_1024 is an optimized 1024-element DIF kernel that avoids recursion overhead.
func kerDIFNP_1024(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) {
// Stage 0: butterfly with m=512
innerDIFWithTwiddles(a, twiddles[stage], 0, 512, 512)
// Stage 1: butterfly with m=256 on both halves
innerDIFWithTwiddles(a[:512], twiddles[stage+1], 0, 256, 256)
innerDIFWithTwiddles(a[512:], twiddles[stage+1], 0, 256, 256)
// Process four quarters with the 256-element kernel
kerDIFNP_256(a[:256], twiddles, stage+2)
kerDIFNP_256(a[256:512], twiddles, stage+2)
kerDIFNP_256(a[512:768], twiddles, stage+2)
kerDIFNP_256(a[768:], twiddles, stage+2)
}

// kerDITNP_1024 is an optimized 1024-element DIT kernel that avoids recursion overhead.
func kerDITNP_1024(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) {
// Process four quarters with the 256-element kernel first (DIT order)
kerDITNP_256(a[:256], twiddles, stage+2)
kerDITNP_256(a[256:512], twiddles, stage+2)
kerDITNP_256(a[512:768], twiddles, stage+2)
kerDITNP_256(a[768:], twiddles, stage+2)
// Stage 1: butterfly with m=256 on both halves
innerDITWithTwiddles(a[:512], twiddles[stage+1], 0, 256, 256)
innerDITWithTwiddles(a[512:], twiddles[stage+1], 0, 256, 256)
// Final stage: butterfly with m=512
innerDITWithTwiddles(a, twiddles[stage], 0, 512, 512)
}
{{- end}}

{{define "genKernel FF sizeKernel sizeKernelLog2"}}

func kerDIFNP_{{.sizeKernel}}generic(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) {
Expand Down
Loading