Three-point Winograd DFTΒΆ

import math

import matplotlib.pyplot as plt
import networkx as nx

from b_asic.architecture import Architecture, Memory, ProcessingElement
from b_asic.core_operations import AddSub, Addition, ConstantMultiplication, Subtraction
from b_asic.schedule import Schedule
from b_asic.sfg import SFG
from b_asic.special_operations import Input, Output

c30 = -1/2
c31 = -1j * math.sqrt(3)/2

in0 = Input("x0")
in1 = Input("x1")
in2 = Input("x2")

a0 = in1 + in2
a1 = in1 - in2
a2 = in0 + a0

m0 = c30 * a0
m1 = c31 * a1

a3 = a2 + m0
a4 = a3 + m1
a5 = a3 - m1

out0 = Output(a2, "X0")
out1 = Output(a4, "X1")
out2 = Output(a5, "X2")

sfg = SFG(
    inputs=[in0, in1, in2],
    outputs=[out0, out1, out2],
    name="3-point Winograd DFT",
)

The SFG looks like

sfg
%3 in0 x0 (in0) add0 add0 in0->add0 0 add0.0 add0->add0.0 in1 x1 (in1) in1.0 in1->in1.0 add1 add1 in1.0->add1 0 sub1 sub1 in1.0->sub1 0 add1.0 add1->add1.0 cmul1 cmul1 sub1->cmul1 in2 x2 (in2) in2.0 in2->in2.0 in2.0->add1 1 in2.0->sub1 1 out0 X0 (out0) add0.0->out0 add2 add2 add0.0->add2 0 out1 X1 (out1) add3 add3 add3->out1 out2 X2 (out2) sub0 sub0 sub0->out2 add1.0->add0 1 cmul0 cmul0 add1.0->cmul0 add2.0 add2->add2.0 cmul0->add2 1 add2.0->add3 0 add2.0->sub0 0 cmul1.0 cmul1.0->add3 1 cmul1.0->sub0 1 cmul1->cmul1.0


Replace Addition and Subtraction with AddSub operations

targets = [op for op in sfg.operations if isinstance(op, (Addition, Subtraction))]
sfg = sfg.rewrite(AddSub, targets)

The new SFG looks like

sfg
%3 in0 x0 (in0) addsub0 addsub0 in0->addsub0 0 addsub0.0 addsub0->addsub0.0 in1 x1 (in1) in1.0 in1->in1.0 addsub1 addsub1 in1.0->addsub1 0 addsub5 addsub5 in1.0->addsub5 0 addsub1.0 addsub1->addsub1.0 cmul1 cmul1 addsub5->cmul1 in2 x2 (in2) in2.0 in2->in2.0 in2.0->addsub1 1 in2.0->addsub5 1 out0 X0 (out0) addsub0.0->out0 addsub2 addsub2 addsub0.0->addsub2 0 out1 X1 (out1) addsub3 addsub3 addsub3->out1 out2 X2 (out2) addsub4 addsub4 addsub4->out2 addsub1.0->addsub0 1 cmul0 cmul0 addsub1.0->cmul0 addsub2.0 addsub2->addsub2.0 cmul0->addsub2 1 addsub2.0->addsub3 0 addsub2.0->addsub4 0 cmul1.0 cmul1.0->addsub3 1 cmul1.0->addsub4 1 cmul1->cmul1.0


Set latencies and execution times

sfg.set_latency_of_type(ConstantMultiplication, 2)
sfg.set_latency_of_type(AddSub, 1)
sfg.set_execution_time_of_type(ConstantMultiplication, 1)
sfg.set_execution_time_of_type(AddSub, 1)

Generate initial schedule

schedule = Schedule(sfg, cyclic=True)
schedule.show()
threepointwinograddft

Reschedule to only use one AddSub and one ConstantMultiplication per time unit

schedule.set_schedule_time(10)
schedule.move_operation('out0', 11)
schedule.move_operation('out1', 9)
schedule.move_operation('out2', 10)
schedule.move_operation('addsub4', 2)
schedule.move_operation('addsub3', 3)
schedule.move_operation('addsub2', 2)
schedule.move_operation('cmul1', 2)
schedule.move_operation('cmul0', 2)
schedule.move_operation('addsub0', 3)
schedule.move_operation('addsub5', 2)
schedule.move_operation('addsub1', 2)
schedule.move_operation('in1', 1)
schedule.move_operation('in2', 2)
schedule.move_operation('cmul1', 1)
schedule.move_operation('addsub5', 1)
schedule.move_operation('addsub3', 6)
schedule.move_operation('addsub4', 8)
schedule.move_operation('cmul1', 6)
schedule.move_operation('addsub2', 5)
schedule.set_schedule_time(6)
schedule.move_operation('addsub0', 1)
schedule.move_operation('addsub3', -1)
schedule.move_operation('cmul1', -2)
schedule.move_operation('addsub3', -1)
schedule.move_operation('addsub0', -1)
schedule.move_operation('addsub2', -1)
schedule.move_operation('addsub4', -4)
schedule.show()
threepointwinograddft

Extract memory variables and operation executions

operations = schedule.get_operations()
adders = operations.get_by_type_name(AddSub.type_name())
adders.show(title="AddSub executions")
mults = operations.get_by_type_name('cmul')
mults.show(title="Multiplier executions")
inputs = operations.get_by_type_name('in')
inputs.show(title="Input executions")
outputs = operations.get_by_type_name('out')
outputs.show(title="Output executions")

addsub = ProcessingElement(adders, entity_name="addsub")
multiplier = ProcessingElement(mults, entity_name="multiplier")
pe_in = ProcessingElement(inputs, entity_name='input')
pe_out = ProcessingElement(outputs, entity_name='output')

mem_vars = schedule.get_memory_variables()
mem_vars.show(title="All memory variables")
direct, mem_vars = mem_vars.split_on_length()
mem_vars.show(title="Non-zero time memory variables")
mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2)
direct.show(title="Direct interconnects")

fig, ax = plt.subplots()
fig.suptitle('Exclusion graph based on ports')
nx.draw(mem_vars.exclusion_graph_from_ports(1, 1, 2), ax=ax)

memories = []
for i, mem in enumerate(mem_vars_set):
    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
    memories.append(memory)
    mem.show(title=f"{memory.entity_name} variables")
    memory.assign("left_edge")
    memory.show_content(title=f"Assigned {memory.entity_name}")
  • AddSub executions
  • Multiplier executions
  • Input executions
  • Output executions
  • All memory variables
  • Non-zero time memory variables
  • Direct interconnects
  • Exclusion graph based on ports
  • memory0 variables
  • Assigned memory0
  • memory1 variables
  • Assigned memory1
  • memory2 variables
  • Assigned memory2

Create architecture

arch = Architecture(
    {addsub, multiplier, pe_in, pe_out}, memories, direct_interconnects=direct
)

arch
%3 cluster_memories Memories cluster_pes Processing Elements cluster_io I/O memory0 in0 memory0: (RAM, 3 cells) out0 memory0out0_branch memory0:out0->memory0out0_branch memory1 in0 memory1: (RAM, 2 cells) out0 memory1out0_branch memory1:out0->memory1out0_branch memory2 in0 memory2: (RAM, 2 cells) out0 memory2out0_branch memory2:out0->memory2out0_branch multiplier in0 multiplier out0 multiplierout0_branch multiplier:out0->multiplierout0_branch addsub in0 in1 addsub out0 addsubout0_branch addsub:out0->addsubout0_branch input input out0 inputout0_branch input:out0->inputout0_branch output in0 output addsub_in1_mux in0 in1 in2 in3 addsub_in1_mux out0 addsub_in1_mux:out0->addsub:in1 memory2_in0_mux in0 in1 memory2_in0_mux out0 memory2_in0_mux:out0->memory2:in0 memory0_in0_mux in0 in1 memory0_in0_mux out0 memory0_in0_mux:out0->memory0:in0 memory1_in0_mux in0 in1 memory1_in0_mux out0 memory1_in0_mux:out0->memory1:in0 addsub_in0_mux in0 in1 in2 addsub_in0_mux out0 addsub_in0_mux:out0->addsub:in0 output_in0_mux in0 in1 in2 output_in0_mux out0 output_in0_mux:out0->output:in0 addsubout0_branch->multiplier:in0 2 addsubout0_branch->memory2_in0_mux:in0 2 addsubout0_branch->memory0_in0_mux:in0 1 addsubout0_branch->memory1_in0_mux:in0 2 addsubout0_branch->addsub_in0_mux:in1 2 multiplierout0_branch->addsub_in1_mux:in2 2 multiplierout0_branch->memory2_in0_mux:in1 1 inputout0_branch->addsub_in1_mux:in1 1 inputout0_branch->memory0_in0_mux:in1 2 inputout0_branch->memory1_in0_mux:in1 1 memory1out0_branch->addsub_in0_mux:in2 3 memory1out0_branch->output_in0_mux:in0 1 memory0out0_branch->addsub_in1_mux:in3 1 memory0out0_branch->addsub_in0_mux:in0 1 memory0out0_branch->output_in0_mux:in1 1 memory2out0_branch->addsub_in1_mux:in0 2 memory2out0_branch->output_in0_mux:in2 1


Move memory variables to reduce the size of memory1

arch.move_process('addsub1.0', memories[2], memories[1])
arch.move_process('addsub3.0', memories[1], memories[2], assign=True)
memories[1].assign()

memories[1].show_content(title="Assigned memory1")
memories[2].show_content(title="Assigned memory2")

arch
  • Assigned memory1
  • Assigned memory2
%3 cluster_memories Memories cluster_pes Processing Elements cluster_io I/O memory0 in0 memory0: (RAM, 3 cells) out0 memory0out0_branch memory0:out0->memory0out0_branch memory1 in0 memory1: (RAM, 1 cell) out0 memory1out0_branch memory1:out0->memory1out0_branch memory2 in0 memory2: (RAM, 2 cells) out0 memory2out0_branch memory2:out0->memory2out0_branch multiplier in0 multiplier out0 multiplierout0_branch multiplier:out0->multiplierout0_branch addsub in0 in1 addsub out0 addsubout0_branch addsub:out0->addsubout0_branch input input out0 inputout0_branch input:out0->inputout0_branch output in0 output addsub_in1_mux in0 in1 in2 in3 in4 addsub_in1_mux out0 addsub_in1_mux:out0->addsub:in1 memory2_in0_mux in0 in1 memory2_in0_mux out0 memory2_in0_mux:out0->memory2:in0 memory0_in0_mux in0 in1 memory0_in0_mux out0 memory0_in0_mux:out0->memory0:in0 memory1_in0_mux in0 in1 memory1_in0_mux out0 memory1_in0_mux:out0->memory1:in0 addsub_in0_mux in0 in1 in2 addsub_in0_mux out0 addsub_in0_mux:out0->addsub:in0 output_in0_mux in0 in1 output_in0_mux out0 output_in0_mux:out0->output:in0 addsubout0_branch->multiplier:in0 2 addsubout0_branch->memory2_in0_mux:in0 2 addsubout0_branch->memory0_in0_mux:in0 1 addsubout0_branch->memory1_in0_mux:in0 2 addsubout0_branch->addsub_in0_mux:in1 2 multiplierout0_branch->addsub_in1_mux:in4 2 multiplierout0_branch->memory2_in0_mux:in1 1 inputout0_branch->addsub_in1_mux:in2 1 inputout0_branch->memory0_in0_mux:in1 2 inputout0_branch->memory1_in0_mux:in1 1 memory1out0_branch->addsub_in1_mux:in0 1 memory1out0_branch->addsub_in0_mux:in2 3 memory0out0_branch->addsub_in1_mux:in3 1 memory0out0_branch->addsub_in0_mux:in0 1 memory0out0_branch->output_in0_mux:in0 1 memory2out0_branch->addsub_in1_mux:in1 1 memory2out0_branch->output_in0_mux:in1 2


Total running time of the script: (0 minutes 2.460 seconds)

Gallery generated by Sphinx-Gallery