文章归档

数据结构对齐问题

昨天去了迅雷,有个笔试基础题,结构体对齐相关,sizeof关键字值。回来后感觉不太确信,于是回头翻了翻lcc编译器的代码,看了结构体字段相关的处理过程。
几个基础的数据结构,其中

  • Metrics表示数据类型的大小,对齐值,autofline目前不太清楚
  • 结构体类型是通过type中的u.sym表示的,里面包含一个field类型的链表

typedef struct metrics {
    unsigned char size, align, outofline;
} Metrics;

struct type {
    int op;
    Type type;
    int align;
    int size;
    union {
	Symbol sym;
	struct {
	    unsigned oldstyle:1;
	    Type *proto;
	} f;
    } u;
    Xtype x;
};

struct field {
    char *name;
    Type type;
    int offset;
    short bitsize;
    short lsb;
    Field link;
};

在x86平台上,基础类型的Metries值(分别是 size align outofline)如下:

Interface x86linuxIR = {
        1, 1, 0,  /* char */
        2, 2, 0,  /* short */
        4, 4, 0,  /* int */
        4, 4, 0,  /* long */
        4, 4, 0,  /* long long */
        4, 4, 1,  /* float */
        8, 4, 1,  /* double */
        8, 4, 1,  /* long double */
        4, 4, 0,  /* T * */
        0, 1, 0,  /* struct */
...
};

然后我们来看看编译器是如何计算结构体字段的偏移量和大小的,相关代码在src/decl.c的fields()函数里,调用栈如下:

#0  fields (ty=0x6b39d0) at src/decl.c:709
#1  0x000000000040413d in structdcl (op=9) at src/decl.c:600
#2  0x000000000040263b in specifier (sclass=0x7fffffffd7f4) at src/decl.c:119
#3  0x00000000004029dc in decl (dcl=0x402de9 ) at src/decl.c:184
#4  0x00000000004023fb in program () at src/decl.c:40
#5  0x00000000004014a5 in main (argc=4, argv=0x7fffffffd998) at src/main.c:85

fields()函数代码:

static void fields(Type ty)
{
  // 词法和语法分析
  {
    int n = 0;
    while (istypename(t, tsym)) {
      static char stop[] = { IF, CHAR, '}', 0 };
      Type ty1 = specifier(NULL);
      for (;;) {
        Field p;
        char *id = NULL;
        Type fty = dclr(ty1, &id, NULL, 0);
        p = newfield(id, ty, fty);
        if (Aflag >= 1 && !hasproto(p->type))
          warning("missing prototype\n");
        if (t == ':') {
          if (unqual(p->type) != inttype
            &&  unqual(p->type) != unsignedtype) {
            error("`%t' is an illegal bit-field type\n",
                p->type);
            p->type = inttype;
          }
          t = gettok();
          p->bitsize = intexpr(0, 0);
          if (p->bitsize > 8*inttype->size || p->bitsize < 0) { 
            error("`%d' is an illegal bit-field size\n", p->bitsize);
            p->bitsize = 8*inttype->size;
          } else if (p->bitsize == 0 && id) {
            warning("extraneous 0-width bit field `%t %s' ignored\n", 
                    p->type, id);
            p->name = stringd(genlabel(1));
          }
          p->lsb = 1;
        } else {
          if (id == NULL)
            error("field name missing\n");
          else if (isfunc(p->type))
            error("`%t' is an illegal field type\n", p->type);
          else if (p->type->size == 0)
            error("undefined size for field `%t %s'\n",
                p->type, id);
        }
        if (isconst(p->type))
          ty->u.sym->u.s.cfields = 1;
        if (isvolatile(p->type))
          ty->u.sym->u.s.vfields = 1;
        n++;
        if (Aflag >= 2 && n == 128)
          warning("more than 127 fields in `%t'\n", ty);
        if (t != ',')
          break;
        t = gettok();
      }
      test(';', stop);
    }
  }

  // 结构体字段offset和size的处理
  {
    int bits = 0, off = 0, overflow = 0;
    Field p, *q = &ty->u.sym->u.s.flist;
    ty->align = IR->structmetric.align;
    for (p = *q; p; p = p->link) {
      int a = p->type->align ? p->type->align : 1;
      if (p->lsb)
        a = unsignedtype->align;
      if (ty->op == UNION)
        off = bits = 0;
      else if (p->bitsize == 0 || bits == 0
           || bits - 1 + p->bitsize > 8*unsignedtype->size) {
        off = add(off, bits2bytes(bits-1));
        bits = 0;
        chkoverflow(off, a - 1);
        off = roundup(off, a);
      }
      if (a > ty->align)
        ty->align = a;
      p->offset = off;

      if (p->lsb) {
        if (bits == 0)
          bits = 1;
        if (IR->little_endian)
          p->lsb = bits;
        else
          p->lsb = 8*unsignedtype->size - bits + 1
            - p->bitsize + 1;
        bits += p->bitsize;
      } else
        off = add(off, p->type->size);
      if (off + bits2bytes(bits-1) > ty->size)
        ty->size = off + bits2bytes(bits-1);
      if (p->name == NULL
        || !('1' <= *p->name && *p->name <= '9')) { *q = p; q = &p->link;
      }
    }
    *q = NULL;
    chkoverflow(ty->size, ty->align - 1);
    ty->size = roundup(ty->size, ty->align);
    if (overflow) {
      error("size of `%t' exceeds %d bytes\n", ty, inttype->u.sym->u.limits.max.i);
      ty->size = inttype->u.sym->u.limits.max.i&(~(ty->align - 1));
    }
  }
}

代码这么清晰,就不继续码字了。

Leave a Reply

You can use these HTML tags

<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>