Five-point Winograd DFT

First, define the SFG/block diagram

from math import cos, pi, sin

import matplotlib.pyplot as plt
import networkx as nx

from b_asic.architecture import Architecture, Memory, ProcessingElement
from b_asic.core_operations import AddSub, Butterfly, ConstantMultiplication
from b_asic.schedule import Schedule
from b_asic.signal_flow_graph import SFG
from b_asic.special_operations import Input, Output

u = -2 * pi / 5
c50 = (cos(u) + cos(2 * u)) / 2 - 1
c51 = (cos(u) - cos(2 * u)) / 2
c52 = 1j * (sin(u) + sin(2 * u)) / 2
c53 = 1j * (sin(2 * u))
c54 = 1j * (sin(u) - sin(2 * u))


in0 = Input("x0")
in1 = Input("x1")
in2 = Input("x2")
in3 = Input("x3")
in4 = Input("x4")
bf0 = Butterfly(in1, in3)
bf1 = Butterfly(in4, in2)
bf2 = Butterfly(bf0.output(0), bf1.output(0))
a0 = AddSub(True, bf0.output(1), bf1.output(0))
a1 = AddSub(True, bf2.output(0), in0)
# Should overload float*OutputPort as well
m0 = ConstantMultiplication(c50, bf2.output(0))
m1 = ConstantMultiplication(c51, bf0.output(1))
m2 = c52 * a0
m3 = ConstantMultiplication(c53, bf2.output(1))
m4 = ConstantMultiplication(c54, bf1.output(1))
a2 = AddSub(True, m0, a1)
a3 = AddSub(False, m3, m2)
a4 = AddSub(True, m3, m4)
bf3 = Butterfly(a2, m1)
bf4 = Butterfly(bf3.output(0), a3)
bf5 = Butterfly(bf3.output(1), a4)

out0 = Output(a1, "X0")
out1 = Output(bf4.output(0), "X1")
out2 = Output(bf4.output(1), "X2")
out4 = Output(bf5.output(0), "X4")
out3 = Output(bf5.output(1), "X3")

sfg = SFG(
    inputs=[in0, in1, in2, in3, in4],
    outputs=[out0, out1, out2, out3, out4],
    name="5-point Winograd DFT",
)

The SFG looks like

sfg
%3 in0 x0 (in0) addsub0 addsub0 in0->addsub0 1 addsub0.0 addsub0->addsub0.0 in1 x1 (in1) bfly5 bfly5 in1->bfly5 0 bfly0 bfly0 bfly5->bfly0 0 0 bfly5.1 bfly5->bfly5.1 1 in2 x2 (in2) bfly4 bfly4 in2->bfly4 1 cmul3 cmul3 bfly4->cmul3 1 bfly4.0 bfly4->bfly4.0 0 in3 x3 (in3) in3->bfly5 1 in4 x4 (in4) in4->bfly4 0 out0 X0 (out0) addsub0.0->out0 addsub1 addsub1 addsub0.0->addsub1 1 out1 X1 (out1) bfly2 bfly2 bfly2->out1 0 out2 X2 (out2) bfly2->out2 1 out3 X3 (out3) bfly3 bfly3 bfly3->out3 1 out4 X4 (out4) bfly3->out4 0 bfly0.0 bfly0.0->addsub0 0 cmul0 cmul0 bfly0.0->cmul0 bfly0->bfly0.0 0 cmul2 cmul2 bfly0->cmul2 1 bfly1 bfly1 addsub1->bfly1 0 cmul0->addsub1 0 bfly1->bfly2 0 0 bfly1->bfly3 0 1 cmul1 cmul1 cmul1->bfly1 1 addsub2 addsub2 addsub2->bfly3 1 cmul2.0 cmul2.0->addsub2 0 addsub4 addsub4 cmul2.0->addsub4 0 cmul2->cmul2.0 cmul3->addsub2 1 bfly4.0->bfly0 1 addsub3 addsub3 bfly4.0->addsub3 1 cmul4 cmul4 addsub3->cmul4 bfly5.1->cmul1 bfly5.1->addsub3 0 cmul4->addsub4 1 addsub4->bfly2 1


Set latencies and execution times

sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2)
sfg.set_latency_of_type(AddSub.type_name(), 1)
sfg.set_latency_of_type(Butterfly.type_name(), 1)
sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1)
sfg.set_execution_time_of_type(AddSub.type_name(), 1)
sfg.set_execution_time_of_type(Butterfly.type_name(), 1)

Generate schedule

schedule = Schedule(sfg, cyclic=True)
schedule.show()
fivepointwinograddft

Reschedule to only use one AddSub, one Butterfly, and one ConstantMultiplication per time unit

schedule.set_schedule_time(10)
schedule.move_operation('out4', 12)
schedule.move_operation('out3', 11)
schedule.move_operation('out2', 10)
schedule.move_operation('out1', 9)
schedule.move_operation('out0', 12)
schedule.move_operation('bfly3', 10)
schedule.move_operation('bfly2', 9)
schedule.move_operation('bfly1', 7)
schedule.move_operation('addsub4', 5)
schedule.move_operation('addsub2', 5)
schedule.move_operation('addsub1', 5)
schedule.move_operation('cmul4', 4)
schedule.move_operation('cmul2', 4)
schedule.move_operation('cmul0', 5)
schedule.move_operation('addsub0', 6)
schedule.move_operation('cmul1', 6)
schedule.move_operation('addsub3', 4)
schedule.move_operation('bfly0', 4)
schedule.move_operation('cmul3', 6)
schedule.move_operation('bfly5', 4)
schedule.move_operation('bfly4', 4)
schedule.move_operation('in1', 1)
schedule.move_operation('in2', 2)
schedule.move_operation('in3', 3)
schedule.move_operation('in4', 4)
schedule.move_operation('bfly5', -1)
schedule.move_operation('bfly3', 1)
schedule.move_operation('cmul2', 1)
schedule.move_operation('cmul4', 1)
schedule.move_operation('bfly0', 1)
schedule.move_operation('addsub0', -1)
schedule.move_operation('cmul1', -3)
schedule.move_operation('cmul3', -2)
schedule.move_operation('cmul4', -1)
schedule.move_operation('addsub4', 1)
schedule.move_operation('addsub1', 2)
schedule.move_operation('cmul0', 1)
schedule.move_operation('bfly0', -1)
schedule.move_operation('addsub0', -1)
schedule.move_operation('bfly2', -1)
schedule.move_operation('cmul2', -1)
schedule.move_operation('cmul4', 1)
schedule.move_operation('addsub2', -1)
schedule.move_operation('addsub4', -1)
schedule.move_operation('addsub1', -1)
schedule.move_operation('bfly1', -1)
schedule.move_operation('bfly2', -2)
schedule.move_operation('bfly3', -1)
schedule.show()
fivepointwinograddft

Extract memory variables and operation executions

operations = schedule.get_operations()
adders = operations.get_by_type_name(AddSub.type_name())
adders.show(title="AddSub executions")
mults = operations.get_by_type_name('cmul')
mults.show(title="Multiplier executions")
butterflies = operations.get_by_type_name(Butterfly.type_name())
butterflies.show(title="Butterfly executions")
inputs = operations.get_by_type_name('in')
inputs.show(title="Input executions")
outputs = operations.get_by_type_name('out')
outputs.show(title="Output executions")

addsub = ProcessingElement(adders, entity_name="addsub")
butterfly = ProcessingElement(butterflies, entity_name="butterfly")
multiplier = ProcessingElement(mults, entity_name="multiplier")
pe_in = ProcessingElement(inputs, entity_name='input')
pe_out = ProcessingElement(outputs, entity_name='output')

mem_vars = schedule.get_memory_variables()
mem_vars.show(title="All memory variables")
direct, mem_vars = mem_vars.split_on_length()
mem_vars.show(title="Non-zero time memory variables")
direct.show(title="Direct interconnects")
mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2)

memories = []
for i, mem in enumerate(mem_vars_set):
    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
    memories.append(memory)
    mem.show(title=f"{memory.entity_name} variables")
    memory.assign("left_edge")
    memory.show_content(title=f"Assigned {memory.entity_name}")

fig, ax = plt.subplots()
fig.suptitle('Exclusion graph based on ports')
nx.draw(mem_vars.create_exclusion_graph_from_ports(1, 1, 2), ax=ax)
  • AddSub executions
  • Multiplier executions
  • Butterfly executions
  • Input executions
  • Output executions
  • All memory variables
  • Non-zero time memory variables
  • Direct interconnects
  • memory0 variables
  • Assigned memory0
  • memory1 variables
  • Assigned memory1
  • memory2 variables
  • Assigned memory2
  • memory3 variables
  • Assigned memory3
  • memory4 variables
  • Assigned memory4
  • Exclusion graph based on ports

Create architecture

arch = Architecture(
    [addsub, butterfly, multiplier, pe_in, pe_out],
    memories,
    direct_interconnects=direct,
)

arch
%3 cluster_memories Memories cluster_pes Processing elements cluster_io IO memory0 in0 memory0: (RAM, 3 cells) out0 memory0out0_branch memory0:out0->memory0out0_branch memory1 in0 memory1: (RAM, 2 cells) out0 memory1out0_branch memory1:out0->memory1out0_branch memory2 in0 memory2: (RAM, 2 cells) out0 memory2out0_branch memory2:out0->memory2out0_branch memory3 in0 memory3: (RAM, 1 cell) out0 memory3out0_branch memory3:out0->memory3out0_branch memory4 in0 memory4: (RAM, 1 cell) out0 addsub_in1_mux in0 in1 in2 in3 in4 addsub_in1_mux out0 memory4:out0->addsub_in1_mux:in2 1 addsub in0 in1 addsub out0 addsubout0_branch addsub:out0->addsubout0_branch butterfly in0 in1 butterfly out0 out1 butterflyout0_branch butterfly:out0->butterflyout0_branch butterflyout1_branch butterfly:out1->butterflyout1_branch multiplier in0 multiplier out0 multiplierout0_branch multiplier:out0->multiplierout0_branch input input out0 inputout0_branch input:out0->inputout0_branch output in0 output addsub_in0_mux in0 in1 in2 addsub_in0_mux out0 addsub_in0_mux:out0->addsub:in0 addsub_in1_mux:out0->addsub:in1 memory3_in0_mux in0 in1 memory3_in0_mux out0 memory3_in0_mux:out0->memory3:in0 memory1_in0_mux in0 in1 in2 in3 memory1_in0_mux out0 memory1_in0_mux:out0->memory1:in0 memory2_in0_mux in0 in1 memory2_in0_mux out0 memory2_in0_mux:out0->memory2:in0 butterfly_in0_mux in0 in1 in2 in3 in4 butterfly_in0_mux out0 butterfly_in0_mux:out0->butterfly:in0 butterfly_in1_mux in0 in1 in2 in3 in4 butterfly_in1_mux out0 butterfly_in1_mux:out0->butterfly:in1 memory0_in0_mux in0 in1 in2 in3 memory0_in0_mux out0 memory0_in0_mux:out0->memory0:in0 multiplier_in0_mux in0 in1 in2 multiplier_in0_mux out0 multiplier_in0_mux:out0->multiplier:in0 output_in0_mux in0 in1 in2 output_in0_mux out0 output_in0_mux:out0->output:in0 multiplierout0_branch->memory4:in0 1 multiplierout0_branch->addsub_in0_mux:in1 2 multiplierout0_branch->addsub_in1_mux:in1 1 multiplierout0_branch->memory1_in0_mux:in2 1 multiplierout0_branch->memory0_in0_mux:in2 1 memory1out0_branch->addsub_in0_mux:in0 2 memory1out0_branch->butterfly_in0_mux:in1 1 memory1out0_branch->butterfly_in1_mux:in1 1 memory1out0_branch->multiplier_in0_mux:in1 1 butterflyout0_branch->addsub_in0_mux:in2 1 butterflyout0_branch->addsub_in1_mux:in4 1 butterflyout0_branch->memory1_in0_mux:in3 1 butterflyout0_branch->memory2_in0_mux:in1 1 butterflyout0_branch->butterfly_in0_mux:in4 1 butterflyout0_branch->butterfly_in1_mux:in4 1 butterflyout0_branch->memory0_in0_mux:in3 2 memory0out0_branch->addsub_in1_mux:in0 1 memory0out0_branch->butterfly_in0_mux:in2 2 memory0out0_branch->butterfly_in1_mux:in2 2 memory0out0_branch->output_in0_mux:in1 2 memory2out0_branch->addsub_in1_mux:in3 1 memory2out0_branch->multiplier_in0_mux:in0 1 memory2out0_branch->output_in0_mux:in0 2 addsubout0_branch->memory3_in0_mux:in0 1 addsubout0_branch->memory1_in0_mux:in0 1 addsubout0_branch->memory2_in0_mux:in0 2 addsubout0_branch->butterfly_in0_mux:in3 1 inputout0_branch->butterfly_in0_mux:in0 1 inputout0_branch->butterfly_in1_mux:in0 1 inputout0_branch->memory0_in0_mux:in0 3 memory3out0_branch->butterfly_in1_mux:in3 1 memory3out0_branch->output_in0_mux:in2 1 butterflyout1_branch->memory3_in0_mux:in1 1 butterflyout1_branch->memory1_in0_mux:in1 2 butterflyout1_branch->memory0_in0_mux:in1 1 butterflyout1_branch->multiplier_in0_mux:in2 3


Move memory variables to optimize architecture

arch.move_process('addsub2.0', 'memory3', 'memory2')
arch.move_process('bfly2.0', 'memory2', 'memory3')
arch.move_process('cmul2.0', 'memory1', 'memory0')
arch.move_process('bfly3.0', 'memory0', 'memory1')
arch.move_process('cmul3.0', 'memory4', 'memory0')

arch.assign_resources()

Memory 4 is now empty, so remove it.

  • Improved memory0
  • Improved memory1
  • Improved memory2
  • Improved memory3
%3 cluster_memories Memories cluster_pes Processing elements cluster_io IO memory0 in0 memory0: (RAM, 3 cells) out0 memory0out0_branch memory0:out0->memory0out0_branch memory1 in0 memory1: (RAM, 2 cells) out0 memory1out0_branch memory1:out0->memory1out0_branch memory2 in0 memory2: (RAM, 2 cells) out0 memory2out0_branch memory2:out0->memory2out0_branch memory3 in0 memory3: (RAM, 1 cell) out0 output_in0_mux in0 in1 in2 in3 output_in0_mux out0 memory3:out0->output_in0_mux:in2 2 addsub in0 in1 addsub out0 addsubout0_branch addsub:out0->addsubout0_branch butterfly in0 in1 butterfly out0 out1 butterflyout0_branch butterfly:out0->butterflyout0_branch butterflyout1_branch butterfly:out1->butterflyout1_branch multiplier in0 multiplier out0 multiplierout0_branch multiplier:out0->multiplierout0_branch input input out0 inputout0_branch input:out0->inputout0_branch output in0 output addsub_in0_mux in0 in1 in2 in3 addsub_in0_mux out0 addsub_in0_mux:out0->addsub:in0 addsub_in1_mux in0 in1 in2 in3 addsub_in1_mux out0 addsub_in1_mux:out0->addsub:in1 memory1_in0_mux in0 in1 in2 memory1_in0_mux out0 memory1_in0_mux:out0->memory1:in0 butterfly_in0_mux in0 in1 in2 in3 in4 butterfly_in0_mux out0 butterfly_in0_mux:out0->butterfly:in0 butterfly_in1_mux in0 in1 in2 in3 in4 butterfly_in1_mux out0 butterfly_in1_mux:out0->butterfly:in1 memory0_in0_mux in0 in1 in2 in3 memory0_in0_mux out0 memory0_in0_mux:out0->memory0:in0 memory3_in0_mux in0 in1 memory3_in0_mux out0 memory3_in0_mux:out0->memory3:in0 multiplier_in0_mux in0 in1 in2 multiplier_in0_mux out0 multiplier_in0_mux:out0->multiplier:in0 output_in0_mux:out0->output:in0 multiplierout0_branch->addsub_in0_mux:in2 2 multiplierout0_branch->addsub_in1_mux:in2 1 multiplierout0_branch->memory0_in0_mux:in2 3 memory0out0_branch->addsub_in0_mux:in0 1 memory0out0_branch->addsub_in1_mux:in1 2 memory0out0_branch->butterfly_in0_mux:in2 2 memory0out0_branch->butterfly_in1_mux:in2 2 memory0out0_branch->output_in0_mux:in3 1 butterflyout0_branch->addsub_in0_mux:in3 1 butterflyout0_branch->addsub_in1_mux:in3 1 butterflyout0_branch->memory1_in0_mux:in2 2 butterflyout0_branch->butterfly_in0_mux:in4 1 butterflyout0_branch->butterfly_in1_mux:in4 1 butterflyout0_branch->memory0_in0_mux:in3 1 butterflyout0_branch->memory3_in0_mux:in1 1 memory1out0_branch->addsub_in0_mux:in1 1 memory1out0_branch->butterfly_in0_mux:in1 1 memory1out0_branch->butterfly_in1_mux:in1 1 memory1out0_branch->multiplier_in0_mux:in1 1 memory1out0_branch->output_in0_mux:in1 1 memory2out0_branch->addsub_in1_mux:in0 1 memory2out0_branch->butterfly_in1_mux:in3 1 memory2out0_branch->multiplier_in0_mux:in0 1 memory2out0_branch->output_in0_mux:in0 1 addsubout0_branch->memory2:in0 3 addsubout0_branch->memory1_in0_mux:in0 1 addsubout0_branch->butterfly_in0_mux:in3 1 inputout0_branch->butterfly_in0_mux:in0 1 inputout0_branch->butterfly_in1_mux:in0 1 inputout0_branch->memory0_in0_mux:in0 3 butterflyout1_branch->memory1_in0_mux:in1 2 butterflyout1_branch->memory0_in0_mux:in1 1 butterflyout1_branch->memory3_in0_mux:in0 1 butterflyout1_branch->multiplier_in0_mux:in2 3


Total running time of the script: (0 minutes 4.138 seconds)

Gallery generated by Sphinx-Gallery