mruby/実行コード生成を読むのバックアップの現在との差分(No.4)

バックアップ一覧
差分を表示
ソースを表示
バックアップを表示
mruby/実行コード生成を読むへ行く。
- 1 (2012-05-24 (木) 06:25:05)
- 2 (2012-05-27 (日) 23:03:41)
- 3 (2012-06-06 (水) 22:42:38)
- 4 (2012-06-07 (木) 06:13:51)
- 5 (2012-06-07 (木) 21:19:06)
- 6 (2012-06-07 (木) 21:19:06)
追加された行はこの色です。
削除された行はこの色です。
[[mrubyを読む]]
 
 #contents
 
 *はじめに [#ud081715]
 
 スクリプトの解析が終わったら今度はコード生成をするようです。ここら辺、MRI((MRIとはMatz Ruby Implementationの略であり、Magnetic Resonance Imagingのことではない))よりYARVと似ているような気がします。というわけで、mrb_generate_code()を読むことにします。
 
 * mrb_generate_code(src/codegen.c) [#qc9db2a0]
 
 それではさっそくmrb_generate_code()を見てみましょう。
 
 #code(C){{
 int
 mrb_generate_code(mrb_state *mrb, node *tree)
 {
   int start = mrb->irep_len;
   int n;
 
   n = codegen_start(mrb, tree);
   if (n < 0) return n;
 
   return start;
 }
 
 static int
 codegen_start(mrb_state *mrb, node *tree)
 {
   codegen_scope *scope = scope_new(mrb, 0, 0);
 
   if (!scope) {
     return -1;
   }
   scope->mrb = mrb;
 
   if (setjmp(scope->jmp) != 0) {
     return -1;
   }
   // prepare irep
   codegen(scope, tree, NOVAL);
   return 0;
 }
 }}
 
 codegen()はnodeの種類に応じて実行コード生成を行う巨大なswitch文です。このcodegen()が実行コード生成の本体のようです。また、codegen_scopeというのが現在のスコープに関する情報を持っているようなので実行コード生成読解の鍵となるデータ構造と考えていいでしょう。
 
 * コード生成してみる [#udb8a47b]
 
 コード生成時にデータ構造が具体的にどのようになるかは実例で確認、てことでさっそく実際にコード生成をしてみましょう。スクリプト生成の時にも書きましたがmrubyに--verboseオプションを付けると実行コードがダンプされます。
 
  ./mruby.exe -c --verbose ../../montecarlo.rb
  irep 116 nregs=7 nlocals=4 pools=2 syms=5
  000 OP_LOADNIL  R4
  001 OP_LOADNIL  R5
  002 OP_CLASS    R4      'MonteCarlo'
  003 OP_EXEC     R4      I(117)
  004 OP_LOADI    R4      10000
  005 OP_LOADI    R5      10000
  006 OP_LOADNIL  R6
  007 OP_SEND     R4      '*'     1
  008 OP_MOVE     R1      R4
  009 OP_GETCONST R4      'MonteCarlo'
  010 OP_LOADNIL  R5
  011 OP_SEND     R4      'new'   0
  012 OP_MOVE     R5      R1
  013 OP_LOADNIL  R6
  014 OP_SEND     R4      'pi'    1
  015 OP_MOVE     R2      R4
  016 OP_LOADSELF R4
  017 OP_STRING   R5      'pi = '
  018 OP_MOVE     R6      R2
  019 OP_STRCAT   R5      R6
  020 OP_STRING   R6      ''
  021 OP_STRCAT   R5      R6
  022 OP_LOADNIL  R6
  023 OP_SEND     R4      'puts'  1
  024 OP_STOP
  
  irep 117 nregs=3 nlocals=2 pools=0 syms=1
  000 OP_TCLASS   R2
  001 OP_LAMBDA   R3      I(118)  1
  002 OP_METHOD   R2      'pi'
  003 OP_LOADNIL  R2
  004 OP_RETURN   R4
  
  irep 118 nregs=7 nlocals=5 pools=0 syms=4
  000 OP_ENTER    1:0:0:0:0:0:0
  001 OP_LOADI    R3      0
  002 OP_LOADI    R5      1
  003 OP_MOVE     R6      R1
  004 OP_RANGE    R5      R5      0
  005 OP_LAMBDA   R6      I(119)  2
  006 OP_SEND     R5      'each'  0
  007 OP_MOVE     R5      R3
  008 OP_LOADNIL  R6
  009 OP_SEND     R5      'to_f'  0
  010 OP_MOVE     R6      R1
  011 OP_LOADNIL  R7
  012 OP_SEND     R5      '/'     1
  013 OP_LOADI    R6      4
  014 OP_LOADNIL  R7
  015 OP_SEND     R5      '*'     1
  016 OP_RETURN   R5
  
  irep 119 nregs=7 nlocals=4 pools=0 syms=4
  000 OP_LOADSELF R4
  001 OP_LOADNIL  R5
  002 OP_SEND     R4      'rand'  0
  003 OP_MOVE     R1      R4
  004 OP_LOADSELF R4
  005 OP_LOADNIL  R5
  006 OP_SEND     R4      'rand'  0
  007 OP_MOVE     R2      R4
  008 OP_MOVE     R4      R1
  009 OP_MOVE     R5      R1
  010 OP_LOADNIL  R6
  011 OP_SEND     R4      '*'     1
  012 OP_MOVE     R5      R2
  013 OP_MOVE     R6      R2
  014 OP_LOADNIL  R7
  015 OP_SEND     R5      '*'     1
  016 OP_LOADNIL  R6
  017 OP_ADD      R4      '+'     1
  018 OP_LOADI    R5      1
  019 OP_LOADNIL  R6
  020 OP_LE       R4      '<='    1
  021 OP_JMPNOT   R4      028
  022 OP_GETUPVAR R4      3       0
  023 OP_LOADI    R5      1
  024 OP_LOADNIL  R6
  025 OP_ADD      R4      '+'     2
  026 OP_SETUPVAR R4      3       0
  027 OP_LOADNIL  R4
  028 OP_RETURN   R4
 
 トップレベル、クラス定義、メソッド定義、ブロックの4つのコードブロックが出力されることがわかります。また、以下のことがわかります。
 
 -symsはコードブロック内で使っているシンボルの数と思われる
 -poolsはコードブロック内で使っている文字列の数と思われる
 -nlocalsはコードブロック内のローカル変数の数と思われるがやや数が合わない
 -nregsはコードブロック内で使っているレジスタ数な気がするがややずれてるところがある
 
 なお、各実行コードの説明はsrc/opcode.h内に書かれています。
 
 ところで、irepのrepって何の略なんでしょうね？iはInstructionだと思いますが。
 
 ちなみに、irepの番号がいきなり116から始まっているのは初期化時にRubyで書かれたライブラリの定義コードを読み込んでいるためと思われます。（つまり、115以前はライブラリ定義のirep）
 
 ** NODE_SCOPE [#laf52f6e]
 
 ではnodeツリーをたどってどうやってコードが生成されていくかを眺めていきましょう。まずはルートのNODE_SCOPEです。なお、今後示すcase文はcodegen()の巨大なswitch文中のcase文です。
 
 #code(C){{
   case NODE_SCOPE:
     scope_body(s, tree);
     break;
 }}
 
 #code(C){{
 static int
 scope_body(codegen_scope *s, node *tree)
 {
   codegen_scope *scope = scope_new(s->mrb, s, tree->car);
   int idx = scope->idx;
 
   if (!s->iseq) {
     codegen(scope, tree->cdr, VAL);
     genop(scope, MKOP_A(OP_STOP, 0));
   }
   else {
     codegen(scope, tree->cdr, VAL);
     genop(scope, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
   }
   scope_finish(scope, idx);
 
   return idx - s->idx;
 }
 }}
 
 というわけで新しいスコープを作ってcodegen()を再帰呼び出ししています。トップレベルなのでs->iseqはNULL、すなわち、実行コードの最後はOP_STOPが埋め込まれるようです。
 
 scope_new()、scope_finish()も見てみましょう。
 
 #code(C){{
 static codegen_scope*
 scope_new(mrb_state *mrb, codegen_scope *prev, node *lv)
 {
   mrb_pool *pool = mrb_pool_open(mrb);
   codegen_scope *p = mrb_pool_alloc(pool, sizeof(codegen_scope));
   if (!p) return 0;
 
   memset(p, 0, sizeof(codegen_scope));
   p->mrb = mrb;
   p->mpool = pool;
   if (!prev) return p;
   p->prev = prev;
   p->ainfo = -1;
 
   p->mrb = prev->mrb;
   p->icapa = 1024;
   p->iseq = mrb_malloc(mrb, sizeof(mrb_code)*p->icapa);
 
   p->pcapa = 32;
   p->pool = mrb_malloc(mrb, sizeof(mrb_value)*p->pcapa);
 
   p->syms = mrb_malloc(mrb, sizeof(mrb_sym)*256);
 
   p->lv = lv;
   p->sp += node_len(lv)+2;
   p->nlocals = p->sp;
 
   p->idx = mrb->irep_len++;
 
   return p;
 }
 
 static void
 scope_finish(codegen_scope *s, int idx)
 {
   mrb_state *mrb = s->mrb;
   mrb_irep *irep;
 
   mrb_add_irep(mrb, idx);
   irep = mrb->irep[idx] = mrb_malloc(mrb, sizeof(mrb_irep));
 
   irep->idx = idx;
   irep->flags = 0;
   if (s->iseq) {
     irep->iseq = codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->pc);
     irep->ilen = s->pc;
   }
   if (s->pool) {
     irep->pool = codegen_realloc(s, s->pool, sizeof(mrb_value)*s->plen);
     irep->plen = s->plen;
   }
   if (s->syms) {
     irep->syms = codegen_realloc(s, s->syms, sizeof(mrb_sym)*s->slen);
     irep->slen = s->slen;
   }
 
   irep->nlocals = s->nlocals;
   irep->nregs = s->nregs;
 
   mrb_pool_close(s->mpool);
 }
 }}
 
 nlocalsがローカル変数の数と一致しない理由がわかりました。そういえばYARVでも特殊変数用にスタックを割り当てていた気がします。本当にそうなのかはコード実行のところで見ることにします。
 nlocalsがローカル変数の数と一致しない理由がわかりました。そういえばYARVでも特殊変数用にスタックを割り当てていた気がします。本当にそうなのかはコード実行のところで見ることにします。((ちなみに2012/10/16にnode_len(lv)+2からnode_len(lv)+1に直されました))
 
 また、scope_finish()を見るとわかるようにスコープを閉じるとmrb_stateにirepが追加されるようです。
 
 **NODE_CLASS [#mc225f51]
 
 次にNODE_CLASSです。
 
 #code(C){{
   case NODE_CLASS:
     {
       int idx;
 
       if (tree->car->car == (node*)0) {
         genop(s, MKOP_A(OP_LOADNIL, cursp()));
         push();
       }
       else if (tree->car->car == (node*)1) {
         genop(s, MKOP_A(OP_OCLASS, cursp()));
         push();
       }
       else {
         codegen(s, tree->car->car, VAL);
       }
       if (tree->cdr->car) {
         codegen(s, tree->cdr->car, VAL);
       }
       else {
         genop(s, MKOP_A(OP_LOADNIL, cursp()));
         push();
       }
       pop(); pop();
       idx = new_msym(s, (mrb_sym)tree->car->cdr);
       genop(s, MKOP_AB(OP_CLASS, cursp(), idx));
       idx = scope_body(s, tree->cdr->cdr->car);
       genop(s, MKOP_ABx(OP_EXEC, cursp(), idx));
       if (val) {
         push();
       }
     }
     break;
 }}
 
 parse.yに書かれたNODE_CLASSの構造を考えると初めのif文で生成しているコードはクラスを定義するモジュールの参照、2つ目のif文でスーパークラスを参照しているようです。今回はトップレベルに、スーパークラス指定はなしなので両方OP_LOADNILが埋め込まれることになります。その後、クラス定義をしているirepを作ってそれを呼び出すコードを生成しています。
 
 **NODE_DEF [#g15f08d4]
 
 深さ優先にクラス定義の中に入っていくことにします。というわけでNODE_DEFを見てみます。
 
 #code(C){{
   case NODE_DEF:
     {
       int sym = new_msym(s, (mrb_sym)tree->car);
       int idx = lambda_body(s, tree->cdr, 0);
 
       genop(s, MKOP_A(OP_TCLASS, cursp()));
       push();
       genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_METHOD));
       pop();
       genop(s, MKOP_AB(OP_METHOD, cursp(), sym));
       if (val) {
         genop(s, MKOP_A(OP_LOADNIL, cursp()));
       }
     }
     break;
 }}
 
 lambda_body()ですがちょっと長めです。部分部分にわけて説明します。
 
 #code(C){{
 static int
 lambda_body(codegen_scope *s, node *tree, int blk)
 {
   int idx, base = s->idx;
   mrb_code c;
 
   s = scope_new(s->mrb, s, tree->car);
   idx = s->idx;
 }}
 
 まず新しいスコープを作っています。
 
 #code(C){{
   if (blk) {
     struct loopinfo *lp = loop_push(s, LOOP_BLOCK);
     lp->pc1 = new_label(s);
   }
 }
 }}
 
 ブロック定義の場合の処理がされていますが今はメソッド定義なので飛ばします。
 
 #code(C){{
   tree = tree->cdr;
   if (tree->car) {
     int ma, oa, ra, pa, ka, kd, ba, a;
     int pos, i;
     node *n, *opt;
 
     ma = node_len(tree->car->car);
     n = tree->car->car;
     while (n) {
       n = n->cdr;
     }
     oa = node_len(tree->car->cdr->car);
     ra = tree->car->cdr->cdr->car ? 1 : 0;
     pa = node_len(tree->car->cdr->cdr->cdr->car);
     ka = kd = 0;
     ba = tree->car->cdr->cdr->cdr->cdr ? 1 : 0;
 
     a = ((ma & 0x1f) << 18)
       | ((oa & 0x1f) << 13)
       | ((ra & 1) << 12)
       | ((pa & 0x1f) << 7)
       | ((ka & 0x1f) << 2)
       | ((kd & 1)<< 1)
       | (ba & 1);
     s->ainfo = (((ma+oa) & 0x3f) << 6) /* (12bits = 6:1:5) */
       | ((ra & 1) << 5)
       | (pa & 0x1f);
     genop(s, MKOP_Ax(OP_ENTER, a));
 }}
 
 tree->carは引数情報が入っています。それぞれ、
 
 :ma|通常引数
 :oa|オプション引数
 :ra|残余引数
 :pa|ポスト引数((いつの間にかこんな引数種別増えたんですねぇ、処理系が大変になるだけでは・・・))
 :ka|キーワード引数((CRubyでもまだサポートされていません))
 :ba|ブロック引数
 
 を示しており、その情報がコードに埋め込まれるようです。
 
 #code(C){{
     pos = new_label(s);
     for (i=0; i<oa; i++) {
       new_label(s);
       genop(s, MKOP_Ax(OP_JMP, 0));
     }
     if (oa > 0) {
       genop(s, MKOP_Ax(OP_JMP, 0));
     }
     opt = tree->car->cdr->car;
     i = 0;
     while (opt) {
       int idx;
 
       dispatch(s, pos+i);
       codegen(s, opt->car->cdr, VAL);
       idx = lv_idx(s, (mrb_sym)opt->car->car);
       pop();
       genop_peep(s, MKOP_AB(OP_MOVE, idx, cursp()), NOVAL);
       i++;
       opt = opt->cdr;
     }
     if (oa > 0) {
       dispatch(s, pos+i);
     }
   }
 }}
 
 オプション引数の処理を行っています。コードをそのまま解説してもわかる人は少数でしょうからまず実例を示します。
 
  def foo(a = 1, b = 'xxx')
  end
 
 上記のRubyスクリプトの実行コードは以下のようになります。
 
  $ ./mruby.exe -c --verbose ../../optarg.rb
  irep 117 nregs=6 nlocals=5 pools=1 syms=0
  000 OP_ENTER    0:2:0:0:0:0:0
  001 OP_JMP              004
  002 OP_JMP              005
  003 OP_JMP              006
  004 OP_LOADI    R1      1
  005 OP_STRING   R2      'xxx'
  006 OP_RETURN   R4
 
 実行系はまだ見てませんが何となく以下の挙動をするんだろうなぁということがわかります。
 
 -foo()と呼んだら001から実行される（つまり、004から実行される）
 -foo(2)と呼んだら002から実行される（つまり、005から実行される）
 -foo(3, 'zzz')と呼んだら003から実行される（つまり、006から実行される）
 
 なんでこんなめんどくさいことを・・・、と思われるかもしれませんがRubyのオプション引数はデフォルト値ではなく、デフォルト「式」なのでこうしないといけません。例えば、以下のスクリプトのように、
 
  def bar
    123
  end
  
  def foo(a = 1, b = bar)
  end
 
  ./mruby.exe -c --verbose ../../optarg2.rb
  irep 118 nregs=6 nlocals=5 pools=0 syms=1
  000 OP_ENTER    0:2:0:0:0:0:0
  001 OP_JMP              004
  002 OP_JMP              005
  003 OP_JMP              009
  004 OP_LOADI    R1      1
  005 OP_LOADSELF R5
  006 OP_LOADNIL  R6
  007 OP_SEND     R5      'bar'   0
  008 OP_MOVE     R2      R5
  009 OP_RETURN   R4
 
 と、bの値を決定するために別メソッドが呼び出されることもあるのです。
 
 話が長くなりましたが以上を踏まえてオプション引数処理として何をやっているかというと、
 
 +まず、OP_JMPを埋め込み
 +各オプション引数について
 ++OP_JMPのジャンプ先を決定し（dispatch()に書かれています）
 ++デフォルト式に対応するコードを生成し
 ++デフォルト式の結果を
 ++デフォルト式の結果を格納
 +最後にメソッド本体へのOP_JMP先を決定して終わり
 
 ということをしています。ちなみに、genop_peep()というのは冗長なコードを簡約する処理なようです。また、上記のコードではOP_JMPが絶対アドレスジャンプになっていますが、実際のコードは相対アドレスジャンプになっているようなのでdispatch()を見るときは注意してください。
 
 #code(C){{
   codegen(s, tree->cdr->car, VAL);
   pop();
   c = s->iseq[s->pc-1];
   if (GET_OPCODE(c) != OP_RETURN || GETARG_B(c) != OP_R_NORMAL || s->pc == s->lastlabel) {
     genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
   }
   if (blk) {
     loop_pop(s, NOVAL);
   }
   scope_finish(s, idx);
 
   return idx - base;
 }
 }}
 
 ようやく、メソッド定義のコードを生成し、最後にreturnの処理を入れて((Rubyはreturnを書かないと最後の式がメソッドの戻り値になるということを思い出してください))終わりです。
 
 **NODE_ASGN [#f2f06f88]
 
 次はNODE_ASGNです。
 
 #code(C){{
   case NODE_ASGN:
     codegen(s, tree->cdr, VAL);
     pop();
     gen_assignment(s, tree->car, cursp(), val);
     break;
 }}
 
 cdrは右辺、carが格納先の左辺です。
 
 gen_assignment()は代入先の種類に応じて生成するコードを変更しています。ちょっと長いので今回対象となるNODE_LVAR以外は省略して貼り付け（そのNODE_LVARが一番長いのですが）
 
 #code(C){{
 static void
 gen_assignment(codegen_scope *s, node *node, int sp, int val)
 {
   int idx;
   int type = (intptr_t)node->car;
 
   node = node->cdr;
   switch ((intptr_t)type) {
   case NODE_GVAR:
     ...
   case NODE_LVAR:
     idx = lv_idx(s, (mrb_sym)node);
     if (idx > 0) {
       if (idx != sp) {
         genop_peep(s, MKOP_AB(OP_MOVE, idx, sp), val);
       }
       break;
     }
     else {                      /* upvar */
       int lv = 0;
       codegen_scope *up = s->prev;
 
       while (up) {
         idx = lv_idx(up, (mrb_sym)node);
         if (idx > 0) {
           genop_peep(s, MKOP_ABC(OP_SETUPVAR, sp, idx, lv), val);
           break;
         }
         lv++;
         up = up->prev;
       }
       //      assert(up!=0);
     }
     break;
   case NODE_IVAR:
     ...
   case NODE_CVAR:
     ...
   case NODE_CONST:
     ...
   case NODE_COLON2:
     ...
   case NODE_CALL:
     ...
   default:
     ...
   }
   if (val) push();
 }
 }}
 
 前半部分は普通のローカル変数、後半（else部分）はブロック内でブロックの外で定義されたローカル変数を参照するための処理です。mrubyではブロックの外のローカル変数はupvarという名前が使われているようです。YARVはdvarだったかな。
 
 ちなみに、NODE_CALLのcaseは配列要素やハッシュ要素への代入の時に使われるようです。
 
 **NODE_CALL [#n32d968f]
 
 次にNODE_CALL。
 
 #code(C){{
   case NODE_CALL:
     gen_call(s, tree, 0, 0, val);
     break;
 }}
 
 gen_call()に進みます。長いのでまたわけて解説します。
 
 #code(C){{
 static void
 gen_call(codegen_scope *s, node *tree, mrb_sym name, int sp, int val)
 {
   mrb_sym sym = name ? name : (mrb_sym)tree->cdr->car;
   int idx;
   int n = 0, noop = 0, sendv = 0;
 
   codegen(s, tree->car, VAL); /* receiver */
 }}
 
 コメント通りにレシーバーを生成しているようです。
 
 #code(C){{
   idx = new_msym(s, sym);
   tree = tree->cdr->cdr->car;
   if (tree) {
     n = gen_values(s, tree->car);
     if (n < 0) {
       n = noop = sendv = 1;
       push();
     }
   }
   if (sp) {
     if (sendv) {
       pop();
       genop(s, MKOP_AB(OP_ARYPUSH, cursp(), sp));
       push();
     }
     else {
       genop(s, MKOP_AB(OP_MOVE, cursp(), sp));
       push();
       n++;
     }
   }
 }}
 
 引数を設定するコードを生成しています。gen_values()は引数展開（*argと書くやつ）があると-1が返されるようです。if(sp){}でやっている処理は先ほどのgen_assignment()のNODE_CALLの場合（つまり、配列要素やハッシュ要素への代入）の場合に通るようです。
 
 #code(C){{
   if (tree && tree->cdr) {
     noop = 1;
     codegen(s, tree->cdr, VAL);
     pop();
   }
   else {
     genop(s, MKOP_A(OP_LOADNIL, cursp()));
   }
   pop_n(n+1);
 }}
 
 ブロックを積む処理をしているようです。
 
 #code(C){{
   {
     const char *name = mrb_sym2name(s->mrb, sym);
 
     if (!noop && name[0] == '+' && strlen(name) == 1)  {
       genop(s, MKOP_ABC(OP_ADD, cursp(), idx, n));
     }
     (中略。-, <, <=, >, >=について同様の特別処理)
     else {
       if (sendv) n = CALL_MAXARGS;
       genop(s, MKOP_ABC(OP_SEND, cursp(), idx, n));
     }
   }
   if (val) {
     push();
   }
 }
 }}
 
 最後にメソッド呼び出しのコードを埋め込んでいます。+など、一部の演算子は専用の命令があるようです。
 
 （以下執筆中）
 **NODE_BLOCK [#v274929e]
 
 **NODE_BLOCK [#u07e2b83]
 次はNODE_BLOCKです。
 
 #code(C){{
   case NODE_BLOCK:
     {
       int idx = lambda_body(s, tree, 1);
 
       genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_BLOCK));
       push();
     }
     break;
 }}
 
 **NODE_IF [#z2dba5af]
 NODE_CALLと同様にlambda_body()を呼び出してます。ただし、今回はblk=1で呼び出しているのでブロック用の処理が行われます。
 
 #code(C){{
 static int
 lambda_body(codegen_scope *s, node *tree, int blk)
 {
   int idx, base = s->idx;
   mrb_code c;
 
   s = scope_new(s->mrb, s, tree->car);
   idx = s->idx;
 
   if (blk) {
     struct loopinfo *lp = loop_push(s, LOOP_BLOCK);
     lp->pc1 = new_label(s);
   }
   (本体処理。詳しくは上記参照)
   if (blk) {
     loop_pop(s, NOVAL);
   }
   scope_finish(s, idx);
 
   return idx - base;
 }
 }}
 
 loopinfoにはブロック内でnextやbreakを実行したときにどこに飛べばいいかといった情報が入るようですが扱っているサンプルスクリプトではnextやbreakはしないのでこれ以上は突っ込みません:-P
 
 **NODE_IF [#mdc96c26]
 
 次にNODE_IFです。
 
 #code(C){{
   case NODE_IF:
     {
       int pos1, pos2;
       node *e = tree->cdr->cdr->car;
 
       codegen(s, tree->car, VAL);
       pop();
       pos1 = new_label(s);
       genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0));
 
       codegen(s, tree->cdr->car, val);
       if (e) {
         if (val) pop();
         pos2 = new_label(s);
         genop(s, MKOP_sBx(OP_JMP, 0));
         dispatch(s, pos1);
         codegen(s, e, val);
         dispatch(s, pos2);
       }
       else {
         if (val) {
           pop();
           genop(s, MKOP_A(OP_LOADNIL, cursp()));
           push();
         }
         dispatch(s, pos1);
       }
     }
     break;
 }}
 
 やっていることは以下の通りです。
 
 +条件節に対応するコードを生成
 +条件が成り立たないときのジャンプ命令を埋め込み
 +then節に対応するコードを生成
 +else節がある場合
 ++then節の末尾にif全体の末尾にジャンプする命令を埋め込み
 ++条件が成り立たない場合のジャンプ先としてelse節の先頭を指定
 ++else節に対応するコードを生成
 +else節がない場合
 ++条件が成り立たない場合のジャンプ先としてif全体の末尾を指定
 
 言葉だけだとわかりにくいと思うので実際に変換したコードを示します。
 
  if true
    1
  else
    2
  end
 
  irep 116 nregs=3 nlocals=2 pools=0 syms=0
  000 OP_LOADT    R2
  001 OP_JMPNOT   R2      004
  002 OP_LOADI    R2      1
  003 OP_JMP              005
  004 OP_LOADI    R2      2
  005 OP_STOP
 
 **NODE_DSTR [#zeb37344]
 
 最後にNODE_DSTRについて見てみます。
 
 #code(C){{
   case NODE_DSTR:
     if (val) {
       node *n = tree;
 
       codegen(s, n->car, VAL);
       n = n->cdr;
       while (n) {
         codegen(s, n->car, VAL);
         pop(); pop();
         genop(s, MKOP_AB(OP_STRCAT, cursp(), cursp()+1));
         push();
         n = n->cdr;
       }
     }
     else {
       node *n = tree;
 
       while (n) {
         if ((intptr_t)n->car->car != NODE_STR) {
           codegen(s, n->car, NOVAL);
         }
         n = n->cdr;
       }
     }
     break;
 }}
 
 valの値によって大きく処理が分かれています。今まで特に触れませんでしたがvalの値は「その値を後で使うか」を示す値のようです。そう思って上記のコードを眺めるとelseの方で文字列連結処理が省かれているのがわかります。ちなみに、NODE_STR以外はコード生成しているのはコードを実行することで何か副作用があるかもしれないから、だと思います。
 
 **nregs [#y2075d6b]
 
 最後の最後にnregsについて。レジスタ数っぽいと上で書きましたが、
 
 #code(C){{
 #define nregs_update do {if (s->sp > s->nregs) s->nregs = s->sp;} while (0)
 static void
 push_(codegen_scope *s)
 {
   if (s->sp > 511) {
     codegen_error(s, "too complex expression");
   }
   s->sp++;
   nregs_update;
 }
 }}
 
 そのスコープでの最大スタック伸長数のようです。
 
 *おわりに [#p8075c82]
 
 以上、mrubyのコード生成を見てきました。基本的には淡々とNODEを命令コードに変換しているだけなので慣れればサクサク読めるのですが幾分、「これって何やってるのか」というコメントが少ないかなと感じました。コメント書きのコミッタって需要あるかな？
mruby/実行コード生成を読む のバックアップの現在との差分(No.4)

mruby/実行コード生成を読むのバックアップの現在との差分(No.4)