package rune

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Module Ir.ScheduledSource

Sourceval prod : int array -> int
Sourceval ensure3 : int array -> int array
Sourceval contiguous_strides_elems : int array -> int array
Sourcetype axis_role = [
  1. | `Normal
  2. | `Reduction
]
Sourcetype axis = {
  1. name : string;
  2. size : int option;
  3. sym : SymVar.t option;
  4. role : axis_role;
}
Sourcetype mapping = {
  1. block : int list;
  2. thread : int list;
  3. vec : int list;
  4. serial : int list;
}
Sourcetype iter_space = {
  1. axes : axis array;
  2. mapping : mapping;
  3. tiles : int list array;
}
Sourcetype memory_scope =
  1. | Global
  2. | Shared
  3. | Register
Sourcetype layout = {
  1. shape : int array;
  2. strides : int array;
  3. alignment : int;
  4. vector_width : int;
  5. contiguous_axes : int list;
}
Sourcetype allocation = {
  1. scope : memory_scope;
  2. size_bytes : int;
  3. lifetime : int * int;
  4. alias_group : int option;
}
Sourcetype buffer_info = {
  1. buf_var : Var.t;
  2. dtype : Dtype.any;
  3. layout : layout;
  4. alloc : allocation;
  5. is_input : bool;
  6. is_output : bool;
}
Sourcetype loop_hint =
  1. | Vectorize of {
    1. axis : int;
    2. width : int;
    }
  2. | Unroll of {
    1. axis : int;
    2. factor : int;
    }
  3. | Prefetch of {
    1. var : Var.t;
    2. into : memory_scope;
    3. distance : int;
    }
  4. | Pipeline of {
    1. axis : int;
    2. stages : int;
    3. overlap : bool;
    }
Sourcetype reduction_plan = {
  1. axes : int list;
  2. intra_thread : [ `Tree | `Welford | `Shfl | `None ];
  3. inter_thread : [ `SharedTree | `Atomic | `GridReduce ];
}
Sourcetype schedule_context = {
  1. global_dims : int array;
  2. local_dims : int array;
  3. upcasted : int;
  4. device : string;
  5. stream : int option;
}
Sourcetype scheduled_op =
  1. | S_Kernel of {
    1. kernel_id : int;
    2. kernel_name : string;
    3. ops : any_node list;
    4. inputs : buffer_info list;
    5. outputs : buffer_info list;
    6. iter : iter_space;
    7. reduce : reduction_plan option;
    8. hints : loop_hint list;
    9. context : schedule_context;
    }
  2. | S_Memory_Transfer of {
    1. transfer_id : int;
    2. src_var : Var.t;
    3. dst_var : Var.t;
    4. src_device : string;
    5. dst_device : string;
    6. dims : int array;
    7. src_strides : int array option;
    8. dst_strides : int array option;
    9. size_bytes : int;
    10. is_async : bool;
    11. stream : int option;
    }
  3. | S_Synchronization of {
    1. sync_id : int;
    2. sync_type : [ `Barrier | `Fence | `Event of int ];
    3. scope : [ `Threadgroup | `Device | `System ];
    4. devices : string list;
    5. stream : int option;
    }
  4. | S_Host_Callback of {
    1. callback_id : int;
    2. callback_name : string;
    3. input_vars : Var.t list;
    4. output_vars : Var.t list;
    }
Sourcetype dependency = {
  1. dep_from : int;
  2. dep_to : int;
  3. dep_vars : Var.t list;
  4. kind : [ `Data | `Control ];
}
Sourcetype schedule_item = {
  1. item_id : int;
  2. operation : scheduled_op;
  3. depends_on : int list;
  4. dependents : int list;
}
Sourcetype fusion_opportunity = {
  1. kernel_a : int;
  2. kernel_b : int;
  3. fusion_type : [ `Elementwise | `Reduction | `Mixed ];
  4. benefit_score : float;
  5. memory_saved : int;
}
Sourcetype item_analysis = {
  1. item_id : int;
  2. flops : int;
  3. bytes_read : int;
  4. bytes_written : int;
  5. regs_per_thread : int;
  6. smem_bytes : int;
  7. occupancy : float;
  8. est_ns : int;
}
Sourcetype graph_t = {
  1. schedule_items : schedule_item array;
  2. dependencies : dependency list;
  3. fusion_opportunities : fusion_opportunity list;
  4. analysis : item_analysis array;
  5. critical_path : int list;
  6. total_memory_usage : int;
  7. estimated_runtime_ns : int;
  8. vars_metadata : (Var.t, var_metadata) Hashtbl.t;
  9. symbolic_vars : SymVar.t list;
}
Sourceval validate_dims3 : int array -> string -> unit
Sourceval validate_iter_space : iter_space -> unit
Sourceval size_bytes_of_layout : Dtype.any -> layout -> int
Sourceval default_layout : ?vector_width:int -> ?alignment:int -> int array -> layout
Sourceval default_alloc : scope:memory_scope -> dtype:Dtype.any -> layout:layout -> lifetime:(int * int) -> allocation
Sourceval compute_dependents : schedule_item array -> unit
Sourceval topological_order : schedule_item array -> int list
Sourceval find_critical_path : graph_t -> int list
Sourceval sum_estimated_runtime_ns : graph_t -> int
Sourceval estimate_peak_memory : graph_t -> int
Sourceval make_iter_space : axes:axis array -> mapping:mapping -> tiles:int list array -> iter_space
Sourceval make_buffer_info : buf_var:Var.t -> dtype:Dtype.any -> shape:int array -> scope:memory_scope -> is_input:bool -> is_output:bool -> lifetime:(int * int) -> buffer_info
Sourceval create_kernel : kernel_id:int -> kernel_name:string -> ops:any_node list -> inputs:buffer_info list -> outputs:buffer_info list -> iter:iter_space -> reduce:reduction_plan option -> hints:loop_hint list -> context:schedule_context -> scheduled_op
Sourceval create_memory_transfer : transfer_id:int -> src_var:Var.t -> dst_var:Var.t -> src_device:string -> dst_device:string -> dims:int array -> ?src_strides:int array -> ?dst_strides:int array -> size_bytes:int -> is_async:bool -> stream:int option -> unit -> scheduled_op
Sourceval create_synchronization : sync_id:int -> sync_type:[ `Barrier | `Event of int | `Fence ] -> scope:[ `Device | `System | `Threadgroup ] -> devices:string list -> stream:int option -> scheduled_op
Sourceval create_host_callback : callback_id:int -> callback_name:string -> input_vars:Var.t list -> output_vars:Var.t list -> scheduled_op
Sourceval create_schedule_item : item_id:int -> operation:scheduled_op -> depends_on:int list -> schedule_item