/*
 * Copyright (c) 2017 Lima Project
 * Copyright (c) 2013 Connor Abbott
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 */

#ifndef LIMA_IR_GP_GPIR_H
#define LIMA_IR_GP_GPIR_H

#include "util/list.h"
#include "util/u_math.h"

#include "ir/lima_ir.h"

/* list of operations that a node can do. */
typedef enum {
   gpir_op_mov,

   /* mul ops */
   gpir_op_mul,
   gpir_op_select,
   gpir_op_complex1,
   gpir_op_complex2,

   /* add ops */
   gpir_op_add,
   gpir_op_floor,
   gpir_op_sign,
   gpir_op_ge,
   gpir_op_lt,
   gpir_op_min,
   gpir_op_max,
   gpir_op_abs,
   gpir_op_not,

   /* mul/add ops */
   gpir_op_neg,

   /* passthrough ops */
   gpir_op_clamp_const,
   gpir_op_preexp2,
   gpir_op_postlog2,

   /* complex ops */
   gpir_op_exp2_impl,
   gpir_op_log2_impl,
   gpir_op_rcp_impl,
   gpir_op_rsqrt_impl,

   /* load/store ops */
   gpir_op_load_uniform,
   gpir_op_load_temp,
   gpir_op_load_attribute,
   gpir_op_load_reg,
   gpir_op_store_temp,
   gpir_op_store_reg,
   gpir_op_store_varying,
   gpir_op_store_temp_load_off0,
   gpir_op_store_temp_load_off1,
   gpir_op_store_temp_load_off2,

   /* branch */
   gpir_op_branch_cond,

   /* const (emulated) */
   gpir_op_const,

   /* emulated ops */
   gpir_op_exp2,
   gpir_op_log2,
   gpir_op_rcp,
   gpir_op_rsqrt,
   gpir_op_ceil,
   gpir_op_exp,
   gpir_op_log,
   gpir_op_sin,
   gpir_op_cos,
   gpir_op_tan,
   gpir_op_branch_uncond,
   gpir_op_eq,
   gpir_op_ne,

   /* auxiliary ops */
   gpir_op_dummy_f,
   gpir_op_dummy_m,

   gpir_op_num,
} gpir_op;

typedef enum {
   gpir_node_type_alu,
   gpir_node_type_const,
   gpir_node_type_load,
   gpir_node_type_store,
   gpir_node_type_branch,
} gpir_node_type;

typedef struct {
   char *name;
   bool dest_neg;
   bool src_neg[4];
   int *slots;
   gpir_node_type type;
   bool spillless;
   bool schedule_first;
   bool may_consume_two_slots;
} gpir_op_info;

extern const gpir_op_info gpir_op_infos[];

typedef struct {
   enum {
      GPIR_DEP_INPUT,     /* def is the input of use */
      GPIR_DEP_OFFSET,    /* def is the offset of use (i.e. temp store) */
      GPIR_DEP_READ_AFTER_WRITE,
      GPIR_DEP_WRITE_AFTER_READ,
   } type;

   /* node execute before succ */
   struct gpir_node *pred;
   /* node execute after pred */
   struct gpir_node *succ;

   /* for node pred_list */
   struct list_head pred_link;
   /* for ndoe succ_list */
   struct list_head succ_link;
} gpir_dep;

struct gpir_instr;
struct gpir_store_node;

typedef struct gpir_node {
   struct list_head list;
   gpir_op op;
   gpir_node_type type;
   int index;
   char name[16];
   bool printed;
   struct gpir_block *block;

   /* for nodes relationship */
   /* for node who uses this node (successor) */
   struct list_head succ_list;
   /* for node this node uses (predecessor) */
   struct list_head pred_list;

   /* for scheduler and regalloc */
   int value_reg;
   union {
      struct {
         struct gpir_instr *instr;
         struct gpir_store_node *physreg_store;
         int pos;
         int dist;
         int index;
         bool ready;
         bool inserted;
         bool max_node, next_max_node;
         bool complex_allowed;
      } sched;
      struct {
         int parent_index;
         float reg_pressure;
         int est;
         bool scheduled;
      } rsched;
      struct {
         float index;
         struct gpir_node *last;
      } vreg;
      struct {
         int index;
      } preg;
   };
} gpir_node;

typedef struct {
   gpir_node node;

   gpir_node *children[3];
   bool children_negate[3];
   int num_child;

   bool dest_negate;
} gpir_alu_node;

typedef struct {
   gpir_node node;
   union fi value;
} gpir_const_node;

typedef struct {
   int index;
   struct list_head list;
} gpir_reg;

typedef struct {
   gpir_node node;

   unsigned index;
   unsigned component;

   gpir_reg *reg;
   struct list_head reg_link;
} gpir_load_node;

typedef struct gpir_store_node {
   gpir_node node;

   unsigned index;
   unsigned component;
   gpir_node *child;

   gpir_reg *reg;
} gpir_store_node;

enum gpir_instr_slot {
   GPIR_INSTR_SLOT_MUL0,
   GPIR_INSTR_SLOT_MUL1,
   GPIR_INSTR_SLOT_ADD0,
   GPIR_INSTR_SLOT_ADD1,
   GPIR_INSTR_SLOT_PASS,
   GPIR_INSTR_SLOT_COMPLEX,
   GPIR_INSTR_SLOT_REG0_LOAD0,
   GPIR_INSTR_SLOT_REG0_LOAD1,
   GPIR_INSTR_SLOT_REG0_LOAD2,
   GPIR_INSTR_SLOT_REG0_LOAD3,
   GPIR_INSTR_SLOT_REG1_LOAD0,
   GPIR_INSTR_SLOT_REG1_LOAD1,
   GPIR_INSTR_SLOT_REG1_LOAD2,
   GPIR_INSTR_SLOT_REG1_LOAD3,
   GPIR_INSTR_SLOT_MEM_LOAD0,
   GPIR_INSTR_SLOT_MEM_LOAD1,
   GPIR_INSTR_SLOT_MEM_LOAD2,
   GPIR_INSTR_SLOT_MEM_LOAD3,
   GPIR_INSTR_SLOT_STORE0,
   GPIR_INSTR_SLOT_STORE1,
   GPIR_INSTR_SLOT_STORE2,
   GPIR_INSTR_SLOT_STORE3,
   GPIR_INSTR_SLOT_NUM,
   GPIR_INSTR_SLOT_END,
   GPIR_INSTR_SLOT_ALU_BEGIN      = GPIR_INSTR_SLOT_MUL0,
   GPIR_INSTR_SLOT_ALU_END        = GPIR_INSTR_SLOT_COMPLEX,
   GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,
   GPIR_INSTR_SLOT_DIST_TWO_END   = GPIR_INSTR_SLOT_PASS,
};

typedef struct gpir_instr {
   int index;
   struct list_head list;

   gpir_node *slots[GPIR_INSTR_SLOT_NUM];

   /* The number of ALU slots free for moves. */
   int alu_num_slot_free;

   /* The number of ALU slots free for moves, except for the complex slot. */
   int alu_non_cplx_slot_free;

   /* We need to make sure that we can insert moves in the following cases:
    * (1) There was a use of a value two cycles ago.
    * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
    *     possibly satisfy (1) for the next cycle).
    * (3) There is a store instruction scheduled, but not its child.
    *
    * The complex slot cannot be used for a move in case (1), since it only
    * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long
    * as the uses aren't in certain slots. It turns out that we don't have to
    * worry about nodes that can't use the complex slot for (2), since there
    * are at most 4 uses 1 cycle ago that can't use the complex slot, but we
    * do have to worry about (3). This means tracking stores whose children
    * cannot be in the complex slot. In order to ensure that we have enough
    * space for all three, we maintain the following invariants:
    *
    * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
    *       alu_num_slot_needed_by_max +
    *       max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
    * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
    *       alu_num_slot_needed_by_non_cplx_store
    *
    * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
    * nodes for the next instruction) but when there is a complex1 node in
    * this instruction it reduces to 4 to reserve a slot for complex2 in the
    * next instruction.
    */
   int alu_num_slot_needed_by_store;
   int alu_num_slot_needed_by_non_cplx_store;
   int alu_num_slot_needed_by_max;
   int alu_num_unscheduled_next_max;
   int alu_max_allowed_next_max;

   /* Used to communicate to the scheduler how many slots need to be cleared
    * up in order to satisfy the invariants.
    */
   int slot_difference;
   int non_cplx_slot_difference;

   int reg0_use_count;
   bool reg0_is_attr;
   int reg0_index;

   int reg1_use_count;
   int reg1_index;

   int mem_use_count;
   bool mem_is_temp;
   int mem_index;

   enum {
      GPIR_INSTR_STORE_NONE,
      GPIR_INSTR_STORE_VARYING,
      GPIR_INSTR_STORE_REG,
      GPIR_INSTR_STORE_TEMP,
   } store_content[2];
   int store_index[2];
} gpir_instr;

typedef struct gpir_block {
   struct list_head list;
   struct list_head node_list;
   struct list_head instr_list;
   struct gpir_compiler *comp;

   struct gpir_block *successors[2];
   struct list_head predecessors;
   struct list_head predecessors_node;

   /* for regalloc */

   /* The set of live registers, i.e. registers whose value may be used
    * eventually, at the beginning of the block.
    */
   BITSET_WORD *live_in;

   /* Set of live registers at the end of the block. */
   BITSET_WORD *live_out;

   /* Set of registers that may have a value defined at the end of the
    * block.
    */
   BITSET_WORD *def_out;

   /* After register allocation, the set of live physical registers at the end
    * of the block. Needed for scheduling.
    */
   uint64_t live_out_phys;

   /* For codegen, the offset in the final program. */
   unsigned instr_offset;

   /* for scheduler */
   union {
      struct {
         int instr_index;
      } sched;
      struct {
         int node_index;
      } rsched;
   };
} gpir_block;

typedef struct {
   gpir_node node;
   gpir_block *dest;
   gpir_node *cond;
} gpir_branch_node;

struct lima_vs_compiled_shader;

#define GPIR_VECTOR_SSA_VIEWPORT_SCALE  0
#define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1
#define GPIR_VECTOR_SSA_NUM 2

typedef struct gpir_compiler {
   struct list_head block_list;
   int cur_index;

   /* Find the gpir node for a given NIR SSA def. */
   gpir_node **node_for_ssa;

   /* Find the gpir node for a given NIR register. */
   gpir_node **node_for_reg;

   /* Find the gpir register for a given NIR SSA def. */
   gpir_reg **reg_for_ssa;

   /* Find the gpir register for a given NIR register. */
   gpir_reg **reg_for_reg;

   /* gpir block for NIR block. */
   gpir_block **blocks;

   /* for physical reg */
   struct list_head reg_list;
   int cur_reg;

   /* lookup for vector ssa */
   struct {
      int ssa;
      gpir_node *nodes[4];
   } vector_ssa[GPIR_VECTOR_SSA_NUM];

   struct lima_vs_compiled_shader *prog;
   int constant_base;

   /* shaderdb */
   int num_instr;
   int num_loops;
   int num_spills;
   int num_fills;
} gpir_compiler;

#define GPIR_VALUE_REG_NUM 11
#define GPIR_PHYSICAL_REG_NUM 64

void *gpir_node_create(gpir_block *block, gpir_op op);
gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);
void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);
void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);
void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);
void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);
void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);
void gpir_node_delete(gpir_node *node);
void gpir_node_print_prog_dep(gpir_compiler *comp);
void gpir_node_print_prog_seq(gpir_compiler *comp);

#define gpir_node_foreach_succ(node, dep) \
   list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)
#define gpir_node_foreach_succ_safe(node, dep) \
   list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)
#define gpir_node_foreach_pred(node, dep) \
   list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)
#define gpir_node_foreach_pred_safe(node, dep) \
   list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)

static inline bool gpir_node_is_root(gpir_node *node)
{
   return list_is_empty(&node->succ_list);
}

static inline bool gpir_node_is_leaf(gpir_node *node)
{
   return list_is_empty(&node->pred_list);
}

#define gpir_node_to_alu(node) ((gpir_alu_node *)(node))
#define gpir_node_to_const(node) ((gpir_const_node *)(node))
#define gpir_node_to_load(node) ((gpir_load_node *)(node))
#define gpir_node_to_store(node) ((gpir_store_node *)(node))
#define gpir_node_to_branch(node) ((gpir_branch_node *)(node))

gpir_instr *gpir_instr_create(gpir_block *block);
bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
void gpir_instr_print_prog(gpir_compiler *comp);

bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);

bool gpir_optimize(gpir_compiler *comp);
bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
bool gpir_regalloc_prog(gpir_compiler *comp);
bool gpir_schedule_prog(gpir_compiler *comp);
bool gpir_codegen_prog(gpir_compiler *comp);

gpir_reg *gpir_create_reg(gpir_compiler *comp);

#endif