/*
 * vmpi.c
 * 
 * This file is part of the Oxford Oberon-2 compiler
 * Copyright (c) 2006--2016 J. M. Spivey
 * All rights reserved
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "config.h"

#include "vm.h"
#include "vminternal.h"

/* #define USE_MOVW 1 */

// REGISTERS

/* Register numbers -- agree with binary encoding */
#define R0  0
#define R1  1
#define R2  2
#define R3  3
#define R4  4
#define R5  5
#define R6  6
#define R7  7
#define R8  8
#define R9  9
#define R10 10
#define FP  11
#define IP  12
#define SP  13
#define LR  14
#define PC  15
#define NOREG -1
#define R(i) (i)

/* Use only even floating-point registers, numbered to suit the 
   instruction format */
#define F0   0x10
#define F2   0x11
#define F4   0x12
#define F6   0x13
#define F8   0x14
#define F10  0x15
#define F12  0x16
#define F14  0x17

/* vmreg structures for presentation in the interface */
struct _vmreg
     reg_i0 = { "I0", R3 }, // I registers are caller-save
     reg_v0 = { "V0", R4 }, // V registers are callee-save
     reg_v1 = { "V1", R5 },
     reg_v2 = { "V2", R6 },
     reg_v3 = { "V3", R7 },
     reg_v4 = { "V4", R8 },
     reg_v5 = { "V5", R9 },
     reg_v6 = { "V6", R10 },
     reg_f0 = { "F0", F0 }, // F registers for floating point (caller-save)
     reg_f1 = { "F1", F2 },
     reg_f2 = { "F2", F4 },
     reg_f3 = { "F3", F6 },
     reg_f4 = { "F4", F8 },
     reg_f5 = { "F5", F10 },
     reg_f6 = { "F6", F12 },
     reg_rr = { "RET", R0 }, // Result register -- may overlap with others
     reg_sp = { "BASE", SP }; // Base register for locals

/* Number of V registers, total int registers, number of F registers */
const int vm_nvreg = 7, vm_nireg = 8, vm_nfreg = 7;

/* The int registers, V followed by I */
const vmreg vm_ireg[] = {
     &reg_v0, &reg_v1, &reg_v2, &reg_v3, &reg_v4, &reg_v5, &reg_v6,
     &reg_i0
};

/* The F registers */
const vmreg vm_freg[] = {
     &reg_f0, &reg_f1, &reg_f2, &reg_f3,
     &reg_f4, &reg_f5, &reg_f6
};

/* The RET and BASE registers */
const vmreg vm_ret = &reg_rr, vm_base = &reg_sp;

#define isfloat(r) (((r)&0x10) != 0)

#define UBIT   (0x08<<20) // Add the offset
#define DBIT   (0x04<<20) // Use odd FP register

#ifdef DEBUG

char *_regname[] = {
     "none", 
     "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", 
     "fp", "ip", "sp", "lr", "pc",
     "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14"
};

char **regname = &_regname[1];

/* fmt_addr -- format an address for debugging */
static char *fmt_addr(int rs, int imm, int op) {
     static char buf[32];

     if (imm == 0)
	  sprintf(buf, "[%s]", regname[rs]);
     else
          sprintf(buf, "[%s, #%d]", regname[rs], (op&UBIT ? imm : -imm));

     return buf;
}

/* fmt_shift -- format a scaled register as part of an address */
static char *fmt_shift(int r, int s) {
     static char buf[16];

     if (s == 0)
          return regname[r];
     else {
          sprintf(buf, "%s, LSL %d", regname[r], s);
          return buf;
     }
}

// Macros that allow each opcode to be passed around with the 
// corresponding mnemonic attached to it.

#define MNEM(mnem, op) mnem, op

#define OPDECL const char *mnem, int op
#define OPDECL2 const char *mnem2, int op2
#define OP mnem, op
#define OP2 mnem2, op2
#define NULLOP NULL, 0

#define _GETOP(mnem, op) op
#define GETOP(op) _GETOP(op)

/* In a call SETBIT(op, bit), the argument op will have the form 
   'mnem, opcode'.  This trick (exploiting details of the CPP macro
   expansion process) lets us separate the mnemonic and opcode
   as arguments of _SETBIT. */
#define _SETBIT(mnem, op, bit) mnem, (op|bit) 
#define SETBIT(op, bit) _SETBIT(op, bit)

#else

#define MNEM(mnem, op) op

#define OPDECL int op
#define OPDECL2 int op2
#define OP op
#define OP2 op2
#define NULLOP 0
#define GETOP(op) op
#define SETBIT(op, bit) (op|bit)

#endif


// OPCODES

/* ALU operations */
#define aluAND 0
#define aluEOR 2
#define aluSUB 4
#define aluRSB 6
#define aluADD 8
#define aluADC 10
#define aluSBC 12
#define aluRSC 14
#define aluTST 16
#define aluTEQ 18
#define aluCMP 21 // Sets condition codes
#define aluCMN 23 // Ditto
#define aluORR 24
#define aluMOV 26
#define aluBIC 28
#define aluMVN 30

/* Condition codes */
#define condEQ 0
#define condNE 1
#define condHS 2
#define condLO 3
#define condMI 4
#define condPL 5
#define condVS 6
#define condVC 7
#define condHI 8
#define condLS 9
#define condGE 10
#define condLT 11
#define condGT 12
#define condLE 13
#define condAL 14

/* Coprocessor codes */
#define cpSGL 10
#define cpDBL 11


// INSTRUCTIONS

/*
Rough instruction layout:
     [31..28] Condition
     [27..20] Opcode
     [19..16] Rn or third opcode
     [15..12] Rd
     [11..8]  Rs or coprocessor number
     [7..4]   Second opcode
     [3..0]   Rm
*/

#define opcode(cond, op, op2, op3, cp) \
     ((cond)<<28 | (op)<<20 | (op2)<<4 | (op3)<<16 | (cp)<<8)

// Ordinary operations with 1, 2, 3 opcode fields
#define opn(x)             opcode(condAL, x, 0, 0, 0)
#define opn2(x, y)         opcode(condAL, x, y, 0, 0)
#define opn3(x, y, z)      opcode(condAL, x, y, z, 0)

// Conditional version of opn
#define opnc(cond, x)      opcode(cond,   x, 0, 0, 0)

// Floating point operations: cp is cpSGL or cpDBL
#define opf(x, cp)         opcode(condAL, x, 0, 0, cp)
#define opf2(x, y, cp)     opcode(condAL, x, y, 0, cp)
#define opf3(x, y, z, cp)  opcode(condAL, x, y, z, cp)

#define fmt_instr(op, rd, rn, imm) \
     ((op) | (rn)<<16 | (rd)<<12 | (imm))

#define instr(op, rd, rn, imm) \
     word(fmt_instr(op, rd, rn, imm))

#define instr4(op, rd, rn, rm, rs) \
     instr(op, rd, rn, (rm) | (rs)<<8)

#define reg(r) ((r)&0xf)

#define opADD    MNEM("add",    opn(aluADD))
#define opAND    MNEM("and",    opn(aluAND))
#define opBIC    MNEM("bic",    opn(aluBIC))
#define opASR    MNEM("asr",    opn2(aluMOV, 0x4))
#define opB      MNEM("b",      opnc(condAL, 0xa0))
#define opBEQ    MNEM("beq",    opnc(condEQ, 0xa0))
#define opBGE    MNEM("bge",    opnc(condGE, 0xa0))
#define opBGT    MNEM("bgt",    opnc(condGT, 0xa0))
#define opBHI    MNEM("bhi",    opnc(condHI, 0xa0))
#define opBHS    MNEM("bhs",    opnc(condHS, 0xa0))
#define opBLE    MNEM("ble",    opnc(condLE, 0xa0))
#define opBLO    MNEM("blo",    opnc(condLO, 0xa0))
#define opBLS    MNEM("bls",    opnc(condLS, 0xa0))
#define opBLT    MNEM("blt",    opnc(condLT, 0xa0))
#define opBNE    MNEM("bne",    opnc(condNE, 0xa0))
#define opBLX    MNEM("blx",    opn2(0x12, 0x3))
#define opBX     MNEM("bx",     opn2(0x12, 0x1))
#define opCMN    MNEM("cmn",    opn(aluCMN))
#define opCMP    MNEM("cmp",    opn(aluCMP))
#define opEOR    MNEM("eor",    opn(aluEOR))
#define opFADDD  MNEM("faddd",  opf2(0xe3, 0x0, cpDBL))
#define opFADDS  MNEM("fadds",  opf2(0xe3, 0x0, cpSGL))
#define opFCMPD  MNEM("fcmpd",  opf3(0xeb, 0x4, 0x4, cpDBL))
#define opFCMPS  MNEM("fcmps",  opf3(0xeb, 0x4, 0x4, cpSGL))
#define opFCVTDS MNEM("fcvtds", opf3(0xeb, 0xc, 0x7, cpSGL))
#define opFCVTSD MNEM("fcvtsd", opf3(0xeb, 0xc, 0x7, cpDBL))
#define opFDIVD  MNEM("fdivd",  opf(0xe8, cpDBL))
#define opFDIVS  MNEM("fdivs",  opf(0xe8, cpSGL))
#define opFLDD   MNEM("fldd",   opf(0xd1, cpDBL))
#define opFLDS   MNEM("flds",   opf(0xd1, cpSGL))
#define opFMOVD  MNEM("fmovd",  opf3(0xeb, 0x4, 0, cpDBL))
#define opFMOVS  MNEM("fmovs",  opf3(0xeb, 0x4, 0, cpSGL))
#define opFMRS   MNEM("fmrs",   opf2(0xe1, 0x1, cpSGL))
#define opFMSR   MNEM("fmsr",   opf2(0xe0, 0x1, cpSGL))
#define opFMSTAT MNEM("fmstat", opf3(0xef, 0x1, 0x1, cpSGL))
#define opFMULD  MNEM("fmuld",  opf(0xe2, cpDBL))
#define opFMULS  MNEM("fmuls",  opf(0xe2, cpSGL))
#define opFNEGD  MNEM("fnegd",  opf3(0xeb, 0x4, 0x1, cpDBL))
#define opFNEGS  MNEM("fnegs",  opf3(0xeb, 0x4, 0x1, cpSGL))
#define opFSITOD MNEM("fsitod", opf3(0xeb, 0xc, 0x8, cpDBL))
#define opFSITOS MNEM("fsitos", opf3(0xeb, 0xc, 0x8, cpSGL))
#define opFTOSIZD MNEM("ftosizd", opf3(0xeb, 0xc, 0xd, cpDBL))
#define opFTOSIZS MNEM("ftosizd", opf3(0xeb, 0xc, 0xd, cpSGL))
#define opFSTD   MNEM("fstd",   opf(0xd0, cpDBL))
#define opFSTS   MNEM("fsts",   opf(0xd0, cpSGL))
#define opFSUBD  MNEM("fsubd",  opf2(0xe3, 0x4, cpDBL))
#define opFSUBS  MNEM("fsubs",  opf2(0xe3, 0x4, cpSGL))
#define opLDMFD  MNEM("ldmfd",  opn(0x89))
#define opLDR    MNEM("ldr",    opn(0x51))
#define opLDRB   MNEM("ldrb",   opn(0x55))
#define opLDRH   MNEM("ldrh",   opn2(0x11, 0xb))
#define opLDSB   MNEM("ldsb",   opn2(0x11, 0xd))
#define opLDSH   MNEM("ldsh",   opn2(0x11, 0xf))
#define opLSL    MNEM("lsl",    opn2(aluMOV, 0x0))
#define opLSR    MNEM("lsr",    opn2(aluMOV, 0x2))
#define opMOV    MNEM("mov",    opn(aluMOV))
#define opMOVEQ  MNEM("moveq",  opnc(condEQ, aluMOV))
#define opMOVGE  MNEM("movge",  opnc(condGE, aluMOV))
#define opMOVGT  MNEM("movgt",  opnc(condGT, aluMOV))
#define opMOVHI  MNEM("movhi",  opnc(condHI, aluMOV))
#define opMOVHS  MNEM("movhs",  opnc(condHS, aluMOV))
#define opMOVLE  MNEM("movle",  opnc(condLE, aluMOV))
#define opMOVLO  MNEM("movlo",  opnc(condLO, aluMOV))
#define opMOVLS  MNEM("movls",  opnc(condLS, aluMOV))
#define opMOVLT  MNEM("movlt",  opnc(condLT, aluMOV))
#define opMOVNE  MNEM("movne",  opnc(condNE, aluMOV))
#define opMOVT   MNEM("movt",   opn(0x34))
#define opMOVW   MNEM("movw",   opn(0x30))
#define opMUL    MNEM("mul",    opn2(0x00, 0x9))
#define opMVN    MNEM("mvn",    opn(aluMVN))
#define opORR    MNEM("orr",    opn(aluORR))
#define opROR    MNEM("ror",    opn2(aluMOV, 0x6))
#define opRSB    MNEM("rsb",    opn(aluRSB))
#define opSTMFDw MNEM("stmfd!", opn(0x92))
#define opSTRB   MNEM("strb",   opn(0x54))
#define opSTRH   MNEM("strh",   opn2(0x10, 0xb))
#define opSTR    MNEM("str",    opn(0x50))
#define opSUB    MNEM("sub",    opn(aluSUB))
#define opSXTH   MNEM("sxth",   opn3(0x6b, 0x7, 0xf))


// INSTRUCTION FORMATTING

#define IMMED (0x20<<20)
#define RSHIFT (1<<4) // Shift amount in Rs

#define immed(imm) ((imm)&0xff)
#define shift_imm(c) (((c)&0x1f)<<7)
#define imm12(imm) ((imm)&0xfff)

#ifdef DEBUG
/* decode -- decode a rotated immediate field */
static unsigned decode(unsigned imm) {
     int shift = 2 * (imm >> 8);
     imm &= 0xff;
     return (shift == 0 ? imm : (imm >> shift) | (imm << (32-shift)));
}
#endif

// rd := rn op rm
static void op_rrr(OPDECL, int rd, int rn, int rm) {
     vm_debug2("%s %s, %s, %s", mnem, regname[rd], regname[rn], regname[rm]);
     instr(op, reg(rd), reg(rn), reg(rm));
     vm_done();
}

// Multiply rn := rm * rs
static void op_mul(OPDECL, int rn, int rm, int rs) {
     vm_debug2("%s %s, %s, %s", mnem, regname[rn], regname[rm], regname[rs]);
     instr4(op, 0, reg(rn), reg(rm), reg(rs));
     vm_done();
}

// rd := rn op imm with shifted immediate
static void op_rri(OPDECL, int rd, int rn, int imm) {
     vm_debug2("%s %s, %s, #%s", mnem,
               regname[rd], regname[rn], fmt_val(decode(imm)));
     instr(op|IMMED, reg(rd), reg(rn), imm12(imm));
     vm_done();
}

#ifdef USE_MOVW
// rd := op imm16
static void op_ri16(OPDECL, int rd, int imm) {
     vm_debug2("%s %s, #%#x", mnem, regname[rd], imm);
     instr(op, reg(rd), (imm>>12)&0xf, imm12(imm));
     vm_done();
}
#endif

// rd := rm shift rs
static void shift_r(OPDECL, int rd, int rm, int rs) {
     vm_debug2("mov %s, %s, %s %s", 
               regname[rd], regname[rm], mnem, regname[rs]);
     instr4(op|RSHIFT, reg(rd), 0, reg(rm), reg(rs));
     vm_done();
}

// rd := rm shift c
static void shift_i(OPDECL, int rd, int rm, int c) {
     vm_debug2("%s %s, %s, #%d", mnem, regname[rd], regname[rm], c);
     instr(op, reg(rd), 0, reg(rm)|shift_imm(c));
     vm_done();
}

// rd := rn op (rm << s)
static void op_rrrs(OPDECL, int rd, int rn, int rm, int s) {
     vm_debug2("%s %s, %s, %s, LSL #%d", mnem,
               regname[rd], regname[rn], regname[rm], s);
     instr(op, reg(rd), reg(rn), reg(rm)|shift_imm(s));
     vm_done();
}

// rd := op rm
static void op_rr(OPDECL, int rd, int rm) {
     vm_debug2("%s %s, %s", mnem, regname[rd], regname[rm]);
     instr(op, reg(rd), 0, reg(rm));
     vm_done();
}

// compare two registers
static void cmp_r(OPDECL, int rn, int rm) {
     vm_debug2("%s %s, %s", mnem, regname[rn], regname[rm]);
     instr(op, 0, reg(rn), reg(rm));
     vm_done();
}

// compare rn with imm
static void cmp_i(OPDECL, int rn, int imm) {
     vm_debug2("%s %s, #%s", mnem, regname[rn], fmt_val(decode(imm)));
     instr(op|IMMED, 0, reg(rn), imm12(imm));
     vm_done();
}

// rd := op imm -- used for moves and conditional moves
static void op_ri(OPDECL, int rd, int imm) {
     vm_debug2("%s %s, #%s", mnem, regname[rd], fmt_val(decode(imm)));
     instr(op|IMMED, reg(rd), 0, imm12(imm));
     vm_done();
}

// rd :=: mem[rn +/- off] -- must specify UBIT for addition
static void ldst_ri(OPDECL, int rd, int rn, int off) {
     vm_debug2("%s %s, %s", mnem, regname[rd], fmt_addr(rn, off, op));
     instr(op, reg(rd), reg(rn), imm12(off));
     vm_done();
}

#define RRBIT  (0x20<<20) // Double-reg indirect

// rd :=: mem[rn + rm<<s]
static void ldst_rr(OPDECL, int rd, int rn, int rm, int s) {
     vm_debug2("%s %s, [%s, %s]", mnem, regname[rd], regname[rn],
               fmt_shift(rm, s));
     instr(op|RRBIT|UBIT, reg(rd), reg(rn), reg(rm)|shift_imm(s));
     vm_done();
}


// Loads and stores for less common types

#define IBIT  (0x04<<20)

#define offx(n) ((((n)&0xf0)<<4)|((n)&0xf))

// must specify UBIT for addition
static void ldst2_ri(OPDECL, int rd, int rn, int off) {
     vm_debug2("%s %s, %s", mnem, regname[rd], fmt_addr(rn, off, op));
     instr(op|IBIT, reg(rd), reg(rn), offx(off));
     vm_done();
}

static void ldst2_rr(OPDECL, int rd, int rn, int rm) {
     vm_debug2("%s %s, [%s, %s]", mnem, regname[rd], regname[rn], regname[rm]);
     instr(op|UBIT, reg(rd), reg(rn), reg(rm));
     vm_done();
}

// load/store float
static void ldstf_ri(OPDECL, int rd, int rn, int off) {
     vm_debug2("%s %s%s, %s", mnem, regname[rd], (op&DBIT ? "+1" : ""),
               fmt_addr(rn, off*4, op));
     instr(op, reg(rd), reg(rn), immed(off));
     vm_done();
}

#define bit(r) (1<<(r))
#define range(a, b) (((-1)<<a)&~(-1<<(b+1)))


// Branches

static void branch_i(OPDECL, int dest) {
     vm_debug2("%s %d", mnem, dest);
     instr(op, 0, 0, ((int) dest)&0xffffff);
     vm_done();
}

static void jump_r(OPDECL, int rm) {
     vm_debug2("%s %s", mnem, regname[rm]);
     instr4(op, 0xf, 0xf, reg(rm), 0xf);
     vm_done();
}

// Copy FP to integer status
static void _fmstat(OPDECL) {
     vm_debug2("%s", mnem);
     instr(op, 0xf, 0, 0);
     vm_done();
}

#define fmstat() _fmstat(opFMSTAT)

// move from int reg to single-prec FP register rn := rd
static void _fmsr(OPDECL, int rn, int rd) {
     vm_debug2("%s %s, %s", mnem, regname[rn], regname[rd]);
     instr(op, reg(rd), reg(rn), 0);
     vm_done();
}

#define fmsr(rn, rd) _fmsr(opFMSR, rn, rd)

// move from single-prec FP register to int reg rd := rn
static void _fmrs(OPDECL, int rd, int rn) {
     vm_debug2("%s %s, %s", mnem, regname[rd], regname[rn]);
     instr(op, reg(rd), reg(rn), 0);
     vm_done();
}

#define fmrs(rd, rn) _fmrs(opFMRS, rd, rn)


// LITERAL TABLE

#ifndef USE_MOVW

#define MAXLITS 256

static int literals[MAXLITS];
static code_addr litloc[MAXLITS];
static int nlits;

/* make_literal -- create or reuse an entry in the literal pool */
code_addr make_literal(int val) {
     for (int i = 0; i < nlits; i++) {
          if (literals[i] == val)
               return litloc[i];
     }

     if (nlits >= MAXLITS)
          vm_panic("too many literals");
     
     code_addr loc = vm_literal(4);
     * (int *) loc = val;

     literals[nlits] = val;
     litloc[nlits++] = loc;
     return loc;
}

#endif

// VIRTUAL INSTRUCTIONS

static unsigned imm_field;      /* Formatted immediate field */

/* immediate -- try to format shifter operand and set imm_field */
static int immediate(int imm) {
     unsigned val = imm;
     int shift = 0;

     /* Try to find a shift that works, using ROL by minimum distance */
     while (val >= 256 && shift < 15) {
          val = (val >> 2) | (val << 30); shift++;
     }

     /* Compute immediate field; nonsense if val >= 256. Decoded using ROR. */
     imm_field = (((16-shift)&0xf) << 8) | val;

     /* Return true on success */
     return (val < 256);
}
 
/* move_immed -- move constant into specified register */
static void move_immed(int r, int imm) {
     if (immediate(imm))
          op_ri(opMOV, r, imm_field);
     else if (immediate(~imm))
          op_ri(opMVN, r, imm_field);
     else {
#ifdef USE_MOVW
          op_ri16(opMOVW, r, imm);
          if ((unsigned) imm >> 16 != 0)
               op_ri16(opMOVT, r, imm>>16);
#else
          code_addr loc = make_literal(imm);
          ldst_ri(SETBIT(opLDR, UBIT), r, PC, loc - (pc+8));
#endif
     }
}

/* const_reg -- move constant into scratch register */
static int const_reg(int imm) {
     move_immed(IP, imm);
     return IP;
}

/* index_reg -- sum two regs into scratch register with shift */
static int index_reg(int ra, int rb, int s) {
     op_rrrs(opADD, IP, ra, rb, s);
     return IP;
}

/* compare_immed -- CMP instruction with immediate operand */
void compare_immed(int rn, int imm) {
     if (immediate(imm))
	  cmp_i(opCMP, rn, imm_field);
     else if (immediate(-imm))
          cmp_i(opCMN, rn, imm_field);
     else {
	  int rm = const_reg(imm); // MVN may succeed where CMN fails
          cmp_r(opCMP, rn, rm);
     }
}

static void arith_signed(OPDECL, OPDECL2, int rd, int rn, int imm) {
     if (immediate(imm))
          op_rri(OP, rd, rn, imm_field);
     else if (op2 != 0 && immediate(-imm))
          op_rri(OP2, rd, rn, imm_field);
     else {
	  int rm = const_reg(imm); // Also might use MVN
          op_rrr(OP, rd, rn, rm);
     }
}

#define arith_immed(op, rd, rn, imm)	\
     arith_signed(op, NULLOP, rd, rn, imm)

#define add_immed(ra, rb, c) \
     arith_signed(opADD, opSUB, ra, rb, c)

static void arith_compl(OPDECL, OPDECL2, int rd, int rn, int imm) {
     if (immediate(imm))
          op_rri(OP, rd, rn, imm_field);
     else if (op2 != 0 && immediate(~imm))
          op_rri(OP2, rd, rn, imm_field);
     else {
	  int rm = const_reg(imm);
          op_rrr(OP, rd, rn, rm);
     }
}

static void boolcond(OPDECL, int r) {
     move_immed(r, 0);
     op_ri(OP, r, 1);
}

/* vm_patch -- patch offset into a branch */
void vm_patch(code_addr loc, code_addr lab) {
     /* Let's hope that if a branch crosses between code segments, 
        the segments have been allocated close enough to each other. */

     int off = lab - loc - 8; // in bytes
     assert((off & 0x3) == 0);
     off >>= 2;
     if (off < -0x800000 || off >= 0x800000)
          vm_panic("branch offset out of range");
     int *p = ((int *) loc);
     *p = (*p & ~0xffffff) | (off & 0xffffff);
}

static void branch(OPDECL, vmlabel lab) {
     code_addr loc = pc;
     branch_i(OP, 0);
     vm_branch(BRANCH, loc, lab);
}

#define bool_reg(op, ra, rb, rc) \
     cmp_r(opCMP, rb, rc), boolcond(op, ra)

#define bool_immed(op, ra, rb, c) \
     compare_immed(rb, c), boolcond(op, ra)

#define bool_reg_f(op, ra, rb, rc) \
     op_rr(opFCMPS, rb, rc), fmstat(), boolcond(op, ra)

#define bool_reg_d(op, ra, rb, rc) \
     op_rr(opFCMPD, rb, rc), fmstat(), boolcond(op, ra)

#define br_reg(op, ra, rb, lab) \
     cmp_r(opCMP, ra, rb), branch(op, lab)

#define br_reg_f(op, ra, rb, lab) \
     op_rr(opFCMPS, ra, rb), fmstat(), branch(op, lab)
 
#define br_reg_d(op, ra, rb, lab) \
     op_rr(opFCMPD, ra, rb), fmstat(), branch(op, lab)
 
#define br_immed(op, ra, b, lab) \
     compare_immed(ra, b), branch(op, lab)

/* Loads and stores for word and unsigned byte */

static void load_store(OPDECL, int ra, int rb, int c) {
     if (rb == NOREG) {
	  int rc = const_reg(c);
          ldst_ri(SETBIT(OP, UBIT), ra, rc, 0);
          return;
     }

     if (c >= 0 && c < 4096)
          ldst_ri(SETBIT(OP, UBIT), ra, rb, c);
     else if (c < 0 && c > -4096)
          ldst_ri(OP, ra, rb, -c);
     else
          ldst_rr(OP, ra, rb, const_reg(c), 0);
}

#define load_word(ra, rb, off) \
     load_store(opLDR, ra, rb, off)

/* Other integer loads and stores */

static void load_store2_ri(OPDECL, int ra, int rb, int c) {
     if (rb == NOREG) {
	  int rc = const_reg(c);
          ldst2_ri(SETBIT(OP, UBIT), ra, rc, 0);
          return;
     }

     if (c >= 0 && c < 256)
          ldst2_ri(SETBIT(OP, UBIT), ra, rb, c);
     else if (c < 0 && c > -256)
          ldst2_ri(OP, ra, rb, -c);
     else {
	  int rc = const_reg(c);
          ldst2_rr(OP, ra, rb, rc);
     }
}

static void load_store2_rrs(OPDECL, int ra, int rb, int rc, int s) {
     if (s == 0)
          ldst2_rr(OP, ra, rb, rc);
     else
          ldst2_ri(OP, ra, index_reg(rb, rc, s), 0);
}

/* Floating point loads and stores */

static void load_store_f(OPDECL, int ra, int rb, int c) {
     assert((c&3) == 0);

     if (rb == NOREG) {
	  int rc = const_reg(c);
          ldstf_ri(SETBIT(OP, UBIT), ra, rc, 0);
          return;
     }

     if (c >= 0 && c < 1024)
          ldstf_ri(SETBIT(OP, UBIT), ra, rb, c/4);
     else if (c < 0 && c > -1024)
	  ldstf_ri(OP, ra, rb, (-c)/4);
     else {
          add_immed(IP, rb, c);
          ldstf_ri(SETBIT(OP, UBIT), ra, IP, 0);
     }
}     

/* Load or store double, perhaps unaligned */
static void load_store_d(OPDECL, int ra, int rb, int c) {
     // Must avoid unaligned addressing.  (It's emulated by a trap
     // handler on the Pi, but that's very slow.)

     if (rb == NOREG) {
          int rc = const_reg(c);
          ldstf_ri(SETBIT(OP, UBIT), ra, rc, 0);
          ldstf_ri(SETBIT(OP, UBIT|DBIT), ra, rc, 1);
          return;
     }

     load_store_f(OP, ra, rb, c);
     load_store_f(SETBIT(OP, DBIT), ra, rb, c+4);
}

static void move_reg(int ra, int rb) {
     if (ra != rb) op_rr(opMOV, ra, rb);
}

static int argp;

static void proc_call(int ra) {
     assert(argp == 0);
     jump_r(opBLX, ra);
}


// REGISTER MAP

static unsigned regmap = 0;

static void write_reg(int r) {
     if (! isfloat(r)) regmap |= bit(r);
}

#define W(r) (write_reg(r), r)


// CODE GENERATION INTERFACE

#define badop() vm_unknown(__FUNCTION__, op)

static void vm_load_store_ri(operation op, int ra, int rb, int c);
static void vm_load_store_rrs(operation op, int ra, int rb, int rc, int s);

void vm_gen1r(operation op, vmreg rega) {
     int ra = rega->vr_reg;

     vm_debug1(op, 1, rega->vr_name);
     vm_space(0);

     switch (op) {
     case JUMP: 
	  jump_r(opBX, ra); break;

     case CALL:
          proc_call(ra); break;
          
     case ARG:
	  argp--;
	  move_reg(R(argp), ra);
	  break;

     case ZEROf:
          fmsr(ra, const_reg(0));
          break;

     case ZEROd:
	  fmsr(ra, const_reg(0));
          op_rr(opFCVTSD, ra, ra);
          break;

     default:
	  badop();
     }
}

void vm_gen1i(operation op, int a) {
     vm_debug1(op, 1, fmt_val(a));
     vm_space(0);

     switch (op) {
     case PREP:
	  assert(a <= 3);
	  argp = a;
	  break;

     case ARG:
          argp--;
          move_immed(R(argp), a);
          break;

     default:
	  badop();
     }
}

void vm_gen1a(operation op, void *a) {
     vm_debug1(op, 1, fmt_val((int) a));
     vm_space(0);

     switch (op) {
     case CALL:
       proc_call(const_reg((uint) a)); break;

     default:
	  badop();
     }
}

void vm_gen1j(operation op, vmlabel lab) {
     vm_debug1(op, 1, fmt_lab(lab));
     vm_space(0);

     switch (op) {
     case JUMP: 
	  branch(opB, lab);
          break;

     default:
	  badop();
     }
}

void vm_gen2rr(operation op, vmreg rega, vmreg regb) {
     int ra = rega->vr_reg, rb = regb->vr_reg;

     vm_debug1(op, 2, rega->vr_name, regb->vr_name);
     vm_space(0);

     switch (op) {
     case MOV:
	  if (isfloat(ra) && isfloat(rb))
	       op_rr(opFMOVS, ra, rb);
          else if (isfloat(ra))
	       fmsr(ra, rb);	// Can only happen via SYSTEM.VAL etc.
          else if (isfloat(rb))
               fmrs(W(ra), rb);	// Ditto
	  else
               move_reg(W(ra), rb); 
	  break;

     case MOVq:
          assert(isfloat(ra) && isfloat(rb));
          op_rr(opFMOVD, ra, rb);
          break;

     case NEG:    
	  arith_immed(opRSB, W(ra), rb, 0); break;

     case NOT:    
	  op_rr(opMVN, W(ra), rb); break;

     case NEGf:
	  op_rr(opFNEGS, ra, rb); break;

     case NEGd:
	  op_rr(opFNEGD, ra, rb); break;

     case CONVif:
	  fmsr(ra, rb); 
	  op_rr(opFSITOS, ra, ra); 
	  break;

     case CONVid:
	  fmsr(ra, rb); 
	  op_rr(opFSITOD, ra, ra); 
	  break;

     case CONVfi:
          op_rr(opFTOSIZS, F14, rb);
          fmrs(ra, F14);
          break;

     case CONVdi:
          op_rr(opFTOSIZD, F14, rb);
          fmrs(ra, F14);
          break;

     case CONVfd:
	  op_rr(opFCVTDS, ra, rb); break;

     case CONVdf:
	  op_rr(opFCVTSD, ra, rb); break;

     case CONVis: 
	  op_rr(opSXTH, W(ra), rb); break;

     default:
          vm_load_store_ri(op, ra, rb, 0);
     }
}

void vm_gen2ri(operation op, vmreg rega, int b) {
     int ra = rega->vr_reg;

     vm_debug1(op, 2, rega->vr_name, fmt_val(b));
     vm_space(0);

     switch (op) {
     case MOV: 
          move_immed(W(ra), b);
          break;

     case GETARG: 
	  move_reg(W(ra), R(b));
          break;

     default:
          vm_load_store_ri(op, ra, NOREG, b);
     }
}

void vm_gen2ra(operation op, vmreg rega, void *b) {
     int ra = rega->vr_reg;

     vm_debug1(op, 2, rega->vr_name, fmt_val((int) b));
     vm_space(0);

     switch (op) {
     case MOV: 
	  move_immed(W(ra), (uint) b);
          break;

     default:
	  badop();
     }
}

void vm_gen2rj(operation op, vmreg rega, vmlabel b) {
     int ra = rega->vr_reg;
     code_addr r;

     vm_debug1(op, 2, rega->vr_name, fmt_lab(b));
     vm_space(0);

     switch (op) {
     case MOV:
          r = vm_literal(4);
          load_store(opLDR, W(ra), PC, r - (pc+8));
          vm_branch(ABS, r, b);
          break;
              
     default:
          badop();
     }
}

void vm_gen3rrr(operation op, vmreg rega, vmreg regb, vmreg regc) {
     int ra = rega->vr_reg, rb = regb->vr_reg, rc = regc->vr_reg;

     vm_debug1(op, 3, rega->vr_name, regb->vr_name, regc->vr_name);
     vm_space(0);

     switch (op) {
     case ADD: 
	  op_rrr(opADD, W(ra), rb, rc); break;
     case AND: 
	  op_rrr(opAND, W(ra), rb, rc); break;
     case XOR: 
	  op_rrr(opEOR, W(ra), rb, rc); break;
     case OR: 
	  op_rrr(opORR, W(ra), rb, rc); break;
     case SUB: 
	  op_rrr(opSUB, W(ra), rb, rc); break;
     case MUL: 
	  op_mul(opMUL, W(ra), rb, rc); break;

     case LSH: 
	  shift_r(opLSL, W(ra), rb, rc); break;
     case RSH: 
	  shift_r(opASR, W(ra), rb, rc); break;
     case RSHu: 
	  shift_r(opLSR, W(ra), rb, rc); break;
     case ROR:
          shift_r(opROR, W(ra), rb, rc); break;

     case ADDf:
	  op_rrr(opFADDS, ra, rb, rc); break;
     case SUBf:
	  op_rrr(opFSUBS, ra, rb, rc); break;
     case MULf:
	  op_rrr(opFMULS, ra, rb, rc); break;
     case DIVf:
	  op_rrr(opFDIVS, ra, rb, rc); break;

     case ADDd:
	  op_rrr(opFADDD, ra, rb, rc); break;
     case SUBd:
	  op_rrr(opFSUBD, ra, rb, rc); break;
     case MULd:
	  op_rrr(opFMULD, ra, rb, rc); break;
     case DIVd:
	  op_rrr(opFDIVD, ra, rb, rc); break;

     case EQ: 
	  bool_reg(opMOVEQ, W(ra), rb, rc); break;
     case GE:
	  bool_reg(opMOVGE, W(ra), rb, rc); break;
     case GT: 
	  bool_reg(opMOVGT, W(ra), rb, rc); break;
     case LE:
	  bool_reg(opMOVLE, W(ra), rb, rc); break;
     case LT: 
	  bool_reg(opMOVLT, W(ra), rb, rc); break;
     case NE:
	  bool_reg(opMOVNE, W(ra), rb, rc); break;

     case EQf:
	  bool_reg_f(opMOVEQ, W(ra), rb, rc); break;
     case LTf:
	  bool_reg_f(opMOVLO, W(ra), rb, rc); break;
     case LEf:
	  bool_reg_f(opMOVLS, W(ra), rb, rc); break;
     case GTf:
	  bool_reg_f(opMOVGT, W(ra), rb, rc); break;
     case GEf:
	  bool_reg_f(opMOVGE, W(ra), rb, rc); break;
     case NEf:
	  bool_reg_f(opMOVNE, W(ra), rb, rc); break;

     case EQd:
	  bool_reg_d(opMOVEQ, W(ra), rb, rc); break;
     case GEd:
	  bool_reg_d(opMOVGE, W(ra), rb, rc); break;
     case GTd:
	  bool_reg_d(opMOVGT, W(ra), rb, rc); break;
     case LEd:
	  bool_reg_d(opMOVLS, W(ra), rb, rc); break;
     case LTd:
	  bool_reg_d(opMOVLO, W(ra), rb, rc); break;
     case NEd:
	  bool_reg_d(opMOVNE, W(ra), rb, rc); break;

     default:
	  vm_load_store_rrs(op, ra, rb, rc, 0);
     }
}

static void vm_load_store_rrs(operation op, int ra, int rb, int rc, int s) {
     switch(op) {
     case LDW:
	  if (isfloat(ra)) 
               load_store_f(opFLDS, ra, index_reg(rb, rc, s), 0); 
          else 
               ldst_rr(opLDR, W(ra), rb, rc, s);
	  break;

     case STW: 
	  if (isfloat(ra)) 
               load_store_f(opFSTS, ra, index_reg(rb, rc, s), 0);
          else
               ldst_rr(opSTR, ra, rb, rc, s); 
	  break;

     case LDS:
	  load_store2_rrs(opLDSH, W(ra), rb, rc, s); break;
     case LDSu: 
          load_store2_rrs(opLDRH, W(ra), rb, rc, s); break;
     case STS: 
	  load_store2_rrs(opSTRH, ra, rb, rc, s); break;

     case LDB:
	  load_store2_rrs(opLDSB, W(ra), rb, rc, s); break;
     case LDBu: 
	  ldst_rr(opLDRB, W(ra), rb, rc, s); break;
     case STB: 
	  ldst_rr(opSTRB, ra, rb, rc, s); break;

     case LDQ: 
          assert(isfloat(ra));
          load_store_d(opFLDS, ra, index_reg(rb, rc, s), 0); break;
     case STQ:    
          assert(isfloat(ra));
          load_store_d(opFSTS, ra, index_reg(rb, rc, s), 0); break;

     default:
	  badop();
     }
}

void vm_gen3rri(operation op, vmreg rega, vmreg regb, int c) {
     int ra = rega->vr_reg, rb = regb->vr_reg;

     vm_debug1(op, 3, rega->vr_name, regb->vr_name, fmt_val(c));
     vm_space(0);

     switch (op) {
     case ADD: 
          add_immed(W(ra), rb, c); break;
     case SUB: 
	  arith_signed(opSUB, opADD, W(ra), rb, c); break;
     case AND: 
	  arith_compl(opAND, opBIC, W(ra), rb, c); break;
     case OR: 
	  arith_immed(opORR, W(ra), rb, c); break;
     case XOR: 
	  arith_immed(opEOR, W(ra), rb, c); break;
     case MUL:
	  op_mul(opMUL, W(ra), rb, const_reg(c)); break;

     case LSH: 
	  shift_i(opLSL, W(ra), rb, c); break;
     case RSH: 
	  shift_i(opASR, W(ra), rb, c); break;
     case RSHu: 
	  shift_i(opLSR, W(ra), rb, c); break;
     case ROR:
          shift_i(opROR, W(ra), rb, c); break;

     case EQ:
	  bool_immed(opMOVEQ, W(ra), rb, c); break;
     case GE:
	  bool_immed(opMOVGE, W(ra), rb, c); break;
     case GT: 
	  bool_immed(opMOVGT, W(ra), rb, c); break;
     case LE:
	  bool_immed(opMOVLE, W(ra), rb, c); break;
     case LT: 
	  bool_immed(opMOVLT, W(ra), rb, c); break;
     case NE:
	  bool_immed(opMOVNE, W(ra), rb, c); break;

     default:
          vm_load_store_ri(op, ra, rb, c);
     }
}

static void vm_load_store_ri(operation op, int ra, int rb, int c) {
     switch(op) {
     case LDW:
	  if (isfloat(ra)) 
               load_store_f(opFLDS, ra, rb, c);
          else 
               load_store(opLDR, W(ra), rb, c); 
	  break;

     case STW: 
	  if (isfloat(ra)) 
               load_store_f(opFSTS, ra, rb, c); 
          else
               load_store(opSTR, ra, rb, c); 
	  break;

     case LDS:
	  load_store2_ri(opLDSH, W(ra), rb, c); break;
     case LDSu: 
          load_store2_ri(opLDRH, W(ra), rb, c); break;
     case STS: 
	  load_store2_ri(opSTRH, ra, rb, c); break;

     case LDB:
	  load_store2_ri(opLDSB, W(ra), rb, c); break;
     case LDBu: 
	  load_store(opLDRB, W(ra), rb, c); break;
     case STB: 
	  load_store(opSTRB, ra, rb, c); break;

     case LDQ: 
          load_store_d(opFLDS, ra, rb, c); break;
     case STQ:    
          load_store_d(opFSTS, ra, rb, c); break;

     default:
          badop();
     }
}

void vm_gen3rrj(operation op, vmreg rega, vmreg regb, vmlabel lab) {
     int ra = rega->vr_reg, rb = regb->vr_reg;

     vm_debug1(op, 3, rega->vr_name, regb->vr_name, fmt_lab(lab));
     vm_space(0);

     switch (op) {
     case BEQ: 
	  br_reg(opBEQ, ra, rb, lab); break;
     case BGE: 
	  br_reg(opBGE, ra, rb, lab); break;
     case BGT: 
	  br_reg(opBGT, ra, rb, lab); break;
     case BLE: 
	  br_reg(opBLE, ra, rb, lab); break;
     case BLT: 
	  br_reg(opBLT, ra, rb, lab); break;
     case BNE: 
	  br_reg(opBNE, ra, rb, lab); break;
     case BLTu: 
	  br_reg(opBLO, ra, rb, lab); break;
     case BGEu:
	  br_reg(opBHS, ra, rb, lab); break;
     case BGTu:
	  br_reg(opBHI, ra, rb, lab); break;
     case BLEu:
	  br_reg(opBLS, ra, rb, lab); break;

/*
In this table, the four central columns correspond to the four
outcomes of a floating-point comparison, shown with their encodings in
the status bits of the ARM.  On the left are the ten conditional
branches supported by Thunder, and on the right the corresponding ARM
instructions.

FCMP result:| <     =     >    Unord|
NZCV bits:  |1000  0110  0010  0011 |
------------+-----------------------+-------------
  Thunder   |                       |         ARM
            |                       |
  BEQ       |  F     T     F     F  | Z       BEQ
  BLT       |  T     F     F     F  | !C      BLO (or BMI)
  BLE       |  T     T     F     F  | Z|!C    BLS
  BGT       |  F     F     T     F  | !Z&N=V  BGT
  BGE       |  F     T     T     F  | N=V     BGE
  BNE       |  T     F     T     T  | !Z      BNE
  BNLT      |  F     T     T     T  | C       BHS (or BPL)
  BNLE      |  F     F     T     T  | !Z&C    BHI
  BNGT      |  T     T     F     T  | Z|N!=V  BLE
  BNGE      |  T     F     F     T  | N!=V    BLT
*/

     case BEQf:
	  br_reg_f(opBEQ, ra, rb, lab); break;
     case BLTf:
	  br_reg_f(opBLO, ra, rb, lab); break;
     case BLEf:
	  br_reg_f(opBLS, ra, rb, lab); break;
     case BGTf:
	  br_reg_f(opBGT, ra, rb, lab); break;
     case BGEf:
	  br_reg_f(opBGE, ra, rb, lab); break;
     case BNEf:
	  br_reg_f(opBNE, ra, rb, lab); break;
     case BNLTf:
	  br_reg_f(opBHS, ra, rb, lab); break;
     case BNLEf:
	  br_reg_f(opBHI, ra, rb, lab); break;
     case BNGTf:
	  br_reg_f(opBLE, ra, rb, lab); break;
     case BNGEf:
	  br_reg_f(opBLT, ra, rb, lab); break;

     case BEQd:
	  br_reg_d(opBEQ, ra, rb, lab); break;
     case BLTd:
	  br_reg_d(opBLO, ra, rb, lab); break;
     case BLEd:
	  br_reg_d(opBLS, ra, rb, lab); break;
     case BGTd:
	  br_reg_d(opBGT, ra, rb, lab); break;
     case BGEd:
	  br_reg_d(opBGE, ra, rb, lab); break;
     case BNEd:
	  br_reg_d(opBNE, ra, rb, lab); break;
     case BNLTd:
	  br_reg_d(opBHS, ra, rb, lab); break;
     case BNLEd:
	  br_reg_d(opBHI, ra, rb, lab); break;
     case BNGTd:
	  br_reg_d(opBLE, ra, rb, lab); break;
     case BNGEd:
	  br_reg_d(opBLT, ra, rb, lab); break;

     default:
	  badop();
     }
}

void vm_gen3rij(operation op, vmreg rega, int b, vmlabel lab) {
     int ra = rega->vr_reg;

     vm_debug1(op, 3, rega->vr_name, fmt_val(b), fmt_lab(lab));
     vm_space(0);

     switch (op) {
     case BEQ: 
	  br_immed(opBEQ, ra, b, lab); break;
     case BGEu: 
	  br_immed(opBHS, ra, b, lab); break;
     case BGE: 
	  br_immed(opBGE, ra, b, lab); break;
     case BGT: 
	  br_immed(opBGT, ra, b, lab); break;
     case BLE: 
	  br_immed(opBLE, ra, b, lab); break;
     case BLTu: 
	  br_immed(opBLO, ra, b, lab); break;
     case BLT: 
	  br_immed(opBLT, ra, b, lab); break;
     case BNE: 
	  br_immed(opBNE, ra, b, lab); break;
     case BGTu:
	  br_immed(opBHI, ra, b, lab); break;
     case BLEu:
	  br_immed(opBLS, ra, b, lab); break;

     default:
	  badop();
     }
}

void vm_gen4rrrs(operation op, vmreg rega, vmreg regb, vmreg regc, int s) {
     int ra = rega->vr_reg, rb = regb->vr_reg, rc = regc->vr_reg;

     vm_debug1(op, 4, rega->vr_name, regb->vr_name, regc->vr_name, fmt_val(s));
     vm_space(0);
     
     switch (op) {
     case ADD:
          op_rrrs(opADD, W(ra), rb, rc, s);
          break;

     default:
          vm_load_store_rrs(op, ra, rb, rc, s);
     }
}


/* Prelude and postlude */

static code_addr entry;
static int locals;

int vm_prelude(int n, int locs) {
     regmap = 0;
     locals = (locs+7)&~7;
#ifndef USE_MOVW
     nlits = 0;
#endif

     entry = pc;
     move_reg(IP, SP);
     vm_debug2("stmfd sp!, ...\n");
     word(0);
     move_reg(FP, SP);
     if (locals > 0) arith_immed(opSUB, SP, SP, locals);

     return (int) entry;
}

void vm_chain(code_addr p) {
     code_addr loc = pc;
     branch_i(opB, 0);
     vm_patch(loc, p);
#ifndef USE_MOVW
     nlits = 0;
#endif
}

/* parity -- parity of a 16-bit quantity */
int parity(unsigned short x) {
     x ^= x >> 8;               // These are single instructions on the ARM
     x ^= x >> 4;
     x ^= x >> 2;
     x ^= x >> 1;
     return x&1;
}

/* vm_postlude -- finish compiling procedure */
void vm_postlude(void) {
     // Only save relevant registers r4 -- r10
     regmap &= range(4, 10);
     
     // Must save an even number of registers overall
     if (parity(regmap) == 0)
          // Add another register: e.g. if regmap = 0x0230 it becomes 0x270
          regmap |= regmap+0x10;

     vm_debug2("regmap = %#x\n", regmap);

     // stmfd! sp, {r4-r10, fp, ip, lr}
     * (int *) (entry + 4) =
          fmt_instr(GETOP(opSTMFDw), 0, reg(SP),
		    regmap|bit(FP)|bit(IP)|bit(LR));

     vm_debug2("ldmfd fp, ...\n");
     instr(GETOP(opLDMFD), 0, reg(FP),
           regmap|bit(FP)|bit(SP)|bit(PC));
}

#ifdef DEBUG
int vm_print(code_addr p) {
     printf("%08x", * (unsigned *) p);
     return 4;
}
#endif
