Memory Constrained Scheduling

from b_asic.architecture import Architecture, Memory, ProcessingElement
from b_asic.core_operations import Butterfly, ConstantMultiplication
from b_asic.list_schedulers import HybridScheduler
from b_asic.schedule import Schedule
from b_asic.scheduler import ASAPScheduler
from b_asic.sfg_generators import radix_2_dif_fft
from b_asic.special_operations import Input, Output

sfg = radix_2_dif_fft(points=16)

The SFG is

sfg
%3 in0 in0 bfly0 bfly0 in0->bfly0 0 bfly1 bfly1 bfly0->bfly1 0 0 bfly2 bfly2 bfly0->bfly2 0 1 in1 in1 bfly15 bfly15 in1->bfly15 0 cmul8 cmul8 bfly15->cmul8 1 bfly14 bfly14 bfly15->bfly14 0 0 in2 in2 bfly22 bfly22 in2->bfly22 0 bfly21 bfly21 bfly22->bfly21 0 0 cmul16 cmul16 bfly22->cmul16 1 in3 in3 bfly29 bfly29 in3->bfly29 0 cmul5 cmul5 bfly29->cmul5 1 bfly28 bfly28 bfly29->bfly28 0 0 in4 in4 bfly31 bfly31 in4->bfly31 0 bfly31->bfly1 1 0 cmul0 cmul0 bfly31->cmul0 1 in5 in5 bfly13 bfly13 in5->bfly13 0 cmul9 cmul9 bfly13->cmul9 1 bfly13->bfly14 1 0 in6 in6 bfly23 bfly23 in6->bfly23 0 bfly23->bfly21 1 0 cmul15 cmul15 bfly23->cmul15 1 in7 in7 bfly30 bfly30 in7->bfly30 0 cmul6 cmul6 bfly30->cmul6 1 bfly30->bfly28 1 0 in8 in8 in8->bfly0 1 in9 in9 in9->bfly15 1 in10 in10 in10->bfly22 1 in11 in11 in11->bfly29 1 in12 in12 in12->bfly31 1 in13 in13 in13->bfly13 1 in14 in14 in14->bfly23 1 in15 in15 in15->bfly30 1 out0 out0 bfly25 bfly25 bfly25->out0 0 out8 out8 bfly25->out8 1 out1 out1 bfly11 bfly11 bfly11->out1 0 out9 out9 bfly11->out9 1 out2 out2 bfly18 bfly18 bfly18->out2 0 out10 out10 bfly18->out10 1 out3 out3 bfly5 bfly5 bfly5->out3 0 out11 out11 bfly5->out11 1 out4 out4 bfly26 bfly26 bfly26->out4 0 out12 out12 bfly26->out12 1 out5 out5 bfly12 bfly12 bfly12->out5 0 out13 out13 bfly12->out13 1 out6 out6 bfly19 bfly19 bfly19->out6 0 out14 out14 bfly19->out14 1 out7 out7 bfly6 bfly6 bfly6->out7 0 out15 out15 bfly6->out15 1 bfly20 bfly20 bfly1->bfly20 0 1 bfly24 bfly24 bfly1->bfly24 0 0 bfly3 bfly3 bfly2->bfly3 0 0 bfly4 bfly4 bfly2->bfly4 0 1 cmul0->bfly2 1 bfly3->bfly11 0 0 bfly3->bfly12 0 1 bfly4->bfly5 0 0 bfly4->bfly6 0 1 cmul1 cmul1 cmul1->bfly4 1 cmul2 cmul2 cmul2->bfly6 1 bfly7 bfly7 bfly7->bfly5 1 0 bfly7->cmul2 1 cmul3 cmul3 cmul3->bfly7 0 cmul4 cmul4 cmul4->bfly7 1 bfly8 bfly8 bfly8->cmul4 1 bfly9 bfly9 bfly8->bfly9 1 0 cmul5->bfly8 0 cmul6->bfly8 1 bfly9->bfly11 1 0 cmul7 cmul7 bfly9->cmul7 1 bfly10 bfly10 bfly10->cmul3 1 bfly10->bfly9 0 0 cmul7->bfly12 1 cmul8->bfly10 0 cmul9->bfly10 1 bfly16 bfly16 bfly14->bfly16 0 0 cmul10 cmul10 bfly14->cmul10 1 bfly16->bfly25 1 0 cmul14 cmul14 bfly16->cmul14 1 bfly17 bfly17 cmul10->bfly17 0 bfly17->bfly18 1 0 cmul12 cmul12 bfly17->cmul12 1 cmul11 cmul11 cmul11->bfly17 1 cmul12->bfly19 1 bfly20->bfly18 0 0 bfly20->bfly19 0 1 cmul13 cmul13 cmul13->bfly20 1 bfly21->cmul13 1 bfly21->bfly24 1 0 bfly24->bfly25 0 0 bfly24->bfly26 0 1 cmul14->bfly26 1 bfly27 bfly27 cmul15->bfly27 1 bfly27->bfly3 1 0 bfly27->cmul1 1 cmul16->bfly27 0 bfly28->bfly16 1 0 bfly28->cmul11 1


Set latencies and execution times.

sfg.set_latency_of_type(Butterfly, 3)
sfg.set_latency_of_type(ConstantMultiplication, 2)
sfg.set_execution_time_of_type(Butterfly, 1)
sfg.set_execution_time_of_type(ConstantMultiplication, 1)

# # %%
# Generate an ASAP schedule for reference
schedule1 = Schedule(sfg, scheduler=ASAPScheduler())
schedule1.show()
memory constrained scheduling

Generate a PE constrained HybridSchedule

resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1}
schedule2 = Schedule(sfg, scheduler=HybridScheduler(resources))
schedule2.show()
memory constrained scheduling
direct, mem_vars = schedule2.get_memory_variables().split_on_length()
print("Max read ports:", mem_vars.read_ports_bound())
print("Max write ports:", mem_vars.write_ports_bound())
Max read ports: 3
Max write ports: 3
operations = schedule2.get_operations()
bfs = operations.get_by_type_name(Butterfly.type_name())
bfs.show(title="Butterfly executions")
const_muls = operations.get_by_type_name(ConstantMultiplication.type_name())
const_muls.show(title="ConstMul executions")
inputs = operations.get_by_type_name(Input.type_name())
inputs.show(title="Input executions")
outputs = operations.get_by_type_name(Output.type_name())
outputs.show(title="Output executions")

bf_pe = ProcessingElement(bfs, entity_name="bf")
mul_pe = ProcessingElement(const_muls, entity_name="mul")

pe_in = ProcessingElement(inputs, entity_name='input')
pe_out = ProcessingElement(outputs, entity_name='output')

mem_vars = schedule2.get_memory_variables()
mem_vars.show(title="All memory variables")
direct, mem_vars = mem_vars.split_on_length()
mem_vars.show(title="Non-zero time memory variables")
mem_vars_set = mem_vars.split_on_ports(
    read_ports=1, write_ports=1, total_ports=2, strategy="greedy_graph_color"
)
  • Butterfly executions
  • ConstMul executions
  • Input executions
  • Output executions
  • All memory variables
  • Non-zero time memory variables
memories = []
for i, mem in enumerate(mem_vars_set):
    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
    memories.append(memory)
    mem.show(title=f"{memory.entity_name}")
    memory.assign("greedy_graph_color")
    memory.show_content(title=f"Assigned {memory.entity_name}")

direct.show(title="Direct interconnects")
  • memory0
  • Assigned memory0
  • memory1
  • Assigned memory1
  • memory2
  • Assigned memory2
  • Direct interconnects
arch = Architecture(
    {bf_pe, mul_pe, pe_in, pe_out},
    memories,
    direct_interconnects=direct,
)
arch
%3 cluster_memories Memories cluster_pes Processing elements cluster_io IO memory0 in0 memory0: (RAM, 5 cells) out0 memory0out0_branch memory0:out0->memory0out0_branch memory1 in0 memory1: (RAM, 5 cells) out0 memory1out0_branch memory1:out0->memory1out0_branch memory2 in0 memory2: (RAM, 2 cells) out0 memory2out0_branch memory2:out0->memory2out0_branch bf in0 in1 bf out0 out1 bfout0_branch bf:out0->bfout0_branch bfout1_branch bf:out1->bfout1_branch mul in0 mul out0 mulout0_branch mul:out0->mulout0_branch output in0 output input input out0 inputout0_branch input:out0->inputout0_branch bf_in0_mux in0 in1 in2 in3 in4 bf_in0_mux out0 bf_in0_mux:out0->bf:in0 bf_in1_mux in0 in1 in2 in3 bf_in1_mux out0 bf_in1_mux:out0->bf:in1 memory1_in0_mux in0 in1 in2 in3 memory1_in0_mux out0 memory1_in0_mux:out0->memory1:in0 output_in0_mux in0 in1 in2 in3 output_in0_mux out0 output_in0_mux:out0->output:in0 memory0_in0_mux in0 in1 in2 in3 memory0_in0_mux out0 memory0_in0_mux:out0->memory0:in0 memory2_in0_mux in0 in1 in2 in3 memory2_in0_mux out0 memory2_in0_mux:out0->memory2:in0 memory0out0_branch->bf_in0_mux:in4 14 memory0out0_branch->bf_in1_mux:in3 10 memory0out0_branch->output_in0_mux:in3 6 memory1out0_branch->bf_in0_mux:in2 10 memory1out0_branch->bf_in1_mux:in1 12 memory1out0_branch->output_in0_mux:in0 5 memory2out0_branch->bf_in0_mux:in3 3 memory2out0_branch->bf_in1_mux:in2 2 memory2out0_branch->output_in0_mux:in1 2 mulout0_branch->bf_in0_mux:in1 2 mulout0_branch->memory1_in0_mux:in0 4 mulout0_branch->memory0_in0_mux:in0 9 mulout0_branch->memory2_in0_mux:in0 2 bfout0_branch->bf_in0_mux:in0 3 bfout0_branch->memory1_in0_mux:in3 10 bfout0_branch->output_in0_mux:in2 3 bfout0_branch->memory0_in0_mux:in3 15 bfout0_branch->memory2_in0_mux:in3 1 inputout0_branch->bf_in1_mux:in0 8 inputout0_branch->memory1_in0_mux:in1 2 inputout0_branch->memory0_in0_mux:in1 4 inputout0_branch->memory2_in0_mux:in1 2 bfout1_branch->mul:in0 17 bfout1_branch->memory1_in0_mux:in2 11 bfout1_branch->memory0_in0_mux:in2 2 bfout1_branch->memory2_in0_mux:in2 2


Generate another HybridSchedule but this time constrain the amount of reads and writes to reduce the amount of memories

resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1}
schedule3 = Schedule(
    sfg,
    scheduler=HybridScheduler(
        resources, max_concurrent_reads=2, max_concurrent_writes=2
    ),
)
schedule3.show()
memory constrained scheduling
direct, mem_vars = schedule3.get_memory_variables().split_on_length()
print("Max read ports:", mem_vars.read_ports_bound())
print("Max write ports:", mem_vars.write_ports_bound())
Max read ports: 2
Max write ports: 2
operations = schedule3.get_operations()
bfs = operations.get_by_type_name(Butterfly.type_name())
bfs.show(title="Butterfly executions")
const_muls = operations.get_by_type_name(ConstantMultiplication.type_name())
const_muls.show(title="ConstMul executions")
inputs = operations.get_by_type_name(Input.type_name())
inputs.show(title="Input executions")
outputs = operations.get_by_type_name(Output.type_name())
outputs.show(title="Output executions")

bf_pe = ProcessingElement(bfs, entity_name="bf")
mul_pe = ProcessingElement(const_muls, entity_name="mul")

pe_in = ProcessingElement(inputs, entity_name='input')
pe_out = ProcessingElement(outputs, entity_name='output')

mem_vars.show(title="Non-zero time memory variables")
mem_vars_set = mem_vars.split_on_ports(
    strategy="greedy_graph_color", read_ports=1, write_ports=1, total_ports=2
)
  • Butterfly executions
  • ConstMul executions
  • Input executions
  • Output executions
  • Non-zero time memory variables
memories = []
for i, mem in enumerate(mem_vars_set):
    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
    memories.append(memory)
    mem.show(title=f"{memory.entity_name}")
    memory.assign("greedy_graph_color")
    memory.show_content(title=f"Assigned {memory.entity_name}")

direct.show(title="Direct interconnects")
  • memory0
  • Assigned memory0
  • memory1
  • Assigned memory1
  • Direct interconnects
arch = Architecture(
    {bf_pe, mul_pe, pe_in, pe_out},
    memories,
    direct_interconnects=direct,
)
arch
%3 cluster_memories Memories cluster_pes Processing elements cluster_io IO memory0 in0 memory0: (RAM, 7 cells) out0 memory0out0_branch memory0:out0->memory0out0_branch memory1 in0 memory1: (RAM, 7 cells) out0 memory1out0_branch memory1:out0->memory1out0_branch mul in0 mul out0 mulout0_branch mul:out0->mulout0_branch bf in0 in1 bf out0 out1 bfout0_branch bf:out0->bfout0_branch bfout1_branch bf:out1->bfout1_branch input input out0 inputout0_branch input:out0->inputout0_branch output in0 output memory0_in0_mux in0 in1 in2 in3 memory0_in0_mux out0 memory0_in0_mux:out0->memory0:in0 memory1_in0_mux in0 in1 in2 in3 memory1_in0_mux out0 memory1_in0_mux:out0->memory1:in0 bf_in1_mux in0 in1 in2 in3 bf_in1_mux out0 bf_in1_mux:out0->bf:in1 output_in0_mux in0 in1 in2 output_in0_mux out0 output_in0_mux:out0->output:in0 mul_in0_mux in0 in1 in2 mul_in0_mux out0 mul_in0_mux:out0->mul:in0 bf_in0_mux in0 in1 in2 in3 bf_in0_mux out0 bf_in0_mux:out0->bf:in0 inputout0_branch->memory0_in0_mux:in0 3 inputout0_branch->memory1_in0_mux:in0 5 inputout0_branch->bf_in1_mux:in0 8 memory0out0_branch->bf_in1_mux:in3 8 memory0out0_branch->output_in0_mux:in2 6 memory0out0_branch->mul_in0_mux:in2 10 memory0out0_branch->bf_in0_mux:in3 9 memory1out0_branch->bf_in1_mux:in2 11 memory1out0_branch->output_in0_mux:in0 5 memory1out0_branch->mul_in0_mux:in0 5 memory1out0_branch->bf_in0_mux:in1 11 bfout0_branch->memory0_in0_mux:in3 7 bfout0_branch->memory1_in0_mux:in3 10 bfout0_branch->output_in0_mux:in1 5 bfout0_branch->bf_in0_mux:in2 10 bfout1_branch->memory0_in0_mux:in2 18 bfout1_branch->memory1_in0_mux:in2 12 bfout1_branch->mul_in0_mux:in1 2 mulout0_branch->memory0_in0_mux:in1 5 mulout0_branch->memory1_in0_mux:in1 5 mulout0_branch->bf_in1_mux:in1 5 mulout0_branch->bf_in0_mux:in0 2


Total running time of the script: (0 minutes 17.667 seconds)

Gallery generated by Sphinx-Gallery