Skip to content

Commit

Permalink
x64: fixed phi parallel copies (might do swaps later)
Browse files Browse the repository at this point in the history
  • Loading branch information
RealNeGate committed Jul 8, 2023
1 parent 7853f4b commit 080db36
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 33 deletions.
62 changes: 52 additions & 10 deletions tb/src/x64/generic_cg.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ typedef struct Def {
Clobbers* clobbers;
} Def;

typedef struct {
// if is usually -1 unless there's weird parallel copies
int val, tmp;
} PhiVal;

typedef NL_Map(TB_Node*, MachineBB) MachineBBs;
typedef DynArray(DefIndex) RegAllocWorklist;

Expand All @@ -122,6 +127,9 @@ typedef struct {
TB_Node* fallthrough;
TB_PostorderWalk order;

// temporary but still precious
DynArray(PhiVal) phi_vals;

// machine output sequences
Inst *first, *last;
DynArray(Def) defs;
Expand Down Expand Up @@ -151,6 +159,13 @@ typedef struct {
TB_SafepointKey* safepoints;
} Ctx;

enum {
// dst = COPY src
INST_COPY = 1022,
INST_MOVE = 1021,
INST_USE = 1020,
};

#if 1
#define ASM if (ctx->emit.emit_asm)
#else
Expand Down Expand Up @@ -191,6 +206,7 @@ static bool fits_into_int32(uint64_t x) {
}

static bool wont_spill_around(int type);
static Inst inst_move(TB_DataType dt, int lhs, int rhs);
static int classify_reg_class(TB_DataType dt);
static int isel(Ctx* restrict ctx, TB_Node* n);
static void finna_use_reg(Ctx* restrict ctx, int reg_class, int reg_num);
Expand Down Expand Up @@ -417,14 +433,14 @@ static RegAllocWorklist liveness(Ctx* restrict ctx, TB_Function* f) {
timeline += 2;

// convert initial move into copy
if (inst->type == X86_INST_MOVE) {
if (inst->type == INST_MOVE) {
assert(inst->regs[1] < -1);
int di = -inst->regs[1] - 2;

if (!set_get(&copy_init, di)) {
set_put(&copy_init, di);

inst->type = (int) X86_INST_COPY;
inst->type = INST_COPY;
inst->regs[0] = USE(inst->regs[1]);
inst->regs[1] = inst->regs[2];
inst->regs[2] = 0;
Expand Down Expand Up @@ -536,27 +552,52 @@ static void hint(Ctx* restrict ctx, DefIndex di, int reg) {

static void phi_edge(Ctx* restrict ctx, TB_Node* dst, int index) {
TB_NodeRegion* region = TB_NODE_GET_EXTRA(dst);
DynArray(PhiVal) phi_vals = ctx->phi_vals;
dyn_array_clear(phi_vals);

FOREACH_N(i, 0, region->proj_count) {
TB_Node* n = region->projs[i];
assert(n->type == TB_PHI);

// allocate virtual register
ptrdiff_t search = nl_map_get(ctx->values, n);
int dst_vreg = -1;
if (search < 0) {
dst_vreg = DEF(n, classify_reg_class(n->dt));
nl_map_put(ctx->values, n, dst_vreg);

// log_debug("values[%p] = %d", n, dst_vreg);
} else {
dst_vreg = ctx->values[search].v;
// log_debug("reuse values[%p] (%d)", n, dst_vreg);
}

// handle phis
// log_debug("phi %p: %d", n, dst_vreg);
copy_value(ctx, n, USE(dst_vreg), n->inputs[1 + index], n->dt);
PhiVal p = { dst_vreg, -1 };
dyn_array_put(phi_vals, p);
}

// do copies which on parallel phis (swaps usually but we don't do those yet)
FOREACH_N(i, 0, region->proj_count) {
TB_Node* n = region->projs[i];
assert(n->type == TB_PHI);

if (n->inputs[1 + index]->type == TB_PHI && n->inputs[1 + index]->inputs[0] == dst) {
int tmp = DEF(n, classify_reg_class(n->dt));
copy_value(ctx, n, USE(tmp), n->inputs[1 + index], n->dt);
phi_vals[i].tmp = tmp;
}
}

// do normal copies
FOREACH_N(i, 0, region->proj_count) {
TB_Node* n = region->projs[i];

int dst = USE(phi_vals[i].val);
if (phi_vals[i].tmp >= 0) {
int src = USE(phi_vals[i].tmp);
SUBMIT(inst_move(n->dt, dst, src));
} else {
copy_value(ctx, n, dst, n->inputs[1 + index], n->dt);
}
}

ctx->phi_vals = phi_vals;
}

static void schedule_effect(Ctx* restrict ctx, TB_Node* parent, TB_Node* n) {
Expand Down Expand Up @@ -615,7 +656,7 @@ static void compile_function(TB_Function* restrict f, TB_FunctionOutput* restric
}
};

// ctx.emit.emit_asm = true;
ctx.emit.emit_asm = true;
/* if (ctx.emit.emit_asm) {
tb_function_print(f, tb_default_print_callback, stdout);
}*/
Expand Down Expand Up @@ -698,6 +739,7 @@ static void compile_function(TB_Function* restrict f, TB_FunctionOutput* restric
nl_map_free(ctx.emit.labels);
nl_map_free(ctx.values);
nl_map_free(ctx.machine_bbs);
dyn_array_destroy(ctx.phi_vals);

if (dyn_array_length(f->lines)) {
f->lines[0].pos = 0;
Expand Down
6 changes: 3 additions & 3 deletions tb/src/x64/reg_alloc.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// TODO(NeGate): We should switch to Efficient global regsiter allocation, 2011
// https://arxiv.org/pdf/2011.05608.pdf
#define REG_ALLOC_LOG if (0)
#define REG_ALLOC_LOG if (1)

// returns true if used in the next n instructions
static bool check_if_used(Ctx* restrict ctx, Inst* inst, int def_i, int n) {
Expand Down Expand Up @@ -81,7 +81,7 @@ static int spill_register(Ctx* restrict ctx, RegAllocWorklist* worklist, Inst* s
// if it's used, refer to reload
bool skip_next = false;
FOREACH_REVERSE_N(j, 1, 4) if (inst->regs[j] == USE(split_def)) {
if (inst->type == X86_INST_MOVE && j == 1) {
if (inst->type == INST_MOVE && j == 1) {
skip_next = true;
r.old = split_def;
spill(ctx, inst, &r);
Expand Down Expand Up @@ -197,7 +197,7 @@ static void reg_alloc(Ctx* restrict ctx, TB_Function* f, RegAllocWorklist workli
int time = d->start;
REG_ALLOC_LOG {
printf(" \x1b[32m# D%zu t=[%d,%d) ", di, time, d->end);
if (d->node) printf("%p", d->node);
if (d->node) printf("%p %s", d->node, tb_node_get_name(d->node));
printf("\x1b[0m\n");
}

Expand Down
35 changes: 15 additions & 20 deletions tb/src/x64/x64.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,7 @@ enum {
REG_CLASS_XMM
};

typedef enum X86_InstType {
// dst = COPY src
X86_INST_COPY = 1022,
X86_INST_MOVE = 1021,
X86_INST_USE = 1020,
} X86_InstType;
typedef int X86_InstType;

// for memory operands imm[0] is two fields:
// top 32bits is scale, bottom 32bits is displacement
Expand Down Expand Up @@ -167,18 +162,9 @@ static Inst inst_u(int op, TB_DataType dt) {
};
}

static Inst inst_move(TB_DataType dt, int lhs, int rhs) {
return (Inst){
.type = (int)X86_INST_MOVE,
.layout = X86_OP_RR,
.data_type = legalize(dt),
.regs = { -1, lhs, rhs }
};
}

static Inst inst_use(int src) {
return (Inst){
.type = (int)X86_INST_USE,
.type = INST_USE,
.layout = X86_OP_NONE,
.data_type = TB_X86_TYPE_NONE,
.regs = { src },
Expand Down Expand Up @@ -214,9 +200,18 @@ static Inst inst_g(int op, TB_DataType dt, int dst, const TB_Symbol* sym) {
};
}

static Inst inst_move(TB_DataType dt, int lhs, int rhs) {
return (Inst){
.type = (int)INST_MOVE,
.layout = X86_OP_RR,
.data_type = legalize(dt),
.regs = { -1, lhs, rhs }
};
}

static Inst inst_copy(TB_DataType dt, int lhs, int rhs) {
return (Inst){
.type = (int) X86_INST_COPY,
.type = INST_COPY,
.layout = X86_OP_RR,
.data_type = legalize(dt),
.regs = { lhs, rhs }
Expand Down Expand Up @@ -1516,7 +1511,7 @@ static void emit_code(Ctx* restrict ctx) {
};
dyn_array_put(f->lines, l);
continue;
} else if (inst->type == X86_INST_USE) {
} else if (inst->type == INST_USE) {
continue;
}

Expand Down Expand Up @@ -1615,11 +1610,11 @@ static void emit_code(Ctx* restrict ctx) {
// TODO(NeGate): this can potentially place the prefix too early
if (inst->prefix & INST_REP) EMIT1(&ctx->emit, 0xF3);

if (inst->type == X86_INST_MOVE) {
if (inst->type == INST_MOVE) {
if (!is_value_match(&ops[1], &ops[2])) {
inst2_print(ctx, is_fp ? FP_MOV : MOV, &ops[1], &ops[2], inst->data_type);
}
} else if (inst->type == X86_INST_COPY) {
} else if (inst->type == INST_COPY) {
if (!is_value_match(&ops[0], &ops[1])) {
inst2_print(ctx, is_fp ? FP_MOV : MOV, &ops[0], &ops[1], inst->data_type);
}
Expand Down

0 comments on commit 080db36

Please sign in to comment.