Datatypes

Major data types:


struct Token

There is no struct Token in tcc.

A standard design might use a token data structure to represent a node in the parse tree. A parsed lexeme would be packaged into a token, which would include attributes standard attributes such as text, value, line number, etc. tcc does things a little differently. First off, it doesn't use such a token struct; some parse tokens - operators, keywords, punctuation, etc. - do not need attributes (except maybe for line number for debugging purposes and error reporting), so supplying every token with such attribute fields would waste space and time. tcc only uses attribute information when necessary.

A recognized lexeme is treated as a token. tcc encodes the token type of a parsed lexeme using a global variable of type int (named tok) that carries a token code. It encodes the token value attribute in a global variable of type struct CValue (named tokc). Various other attributes are recorded in other data items: struct CType carries the c language type associated with the token; struct Sym (see below); and in particular struct TokenSym is a hash table link node that carries token attribute values (see below for details).

Caveat: ordinarily a "token" would be a data structure representing a parsed lexeme - its token type, value, text, and other attributes. In tcc, the term is often a little ambiguous, since there is no such single data type. The global tok is sometimes referred to as a token, but in fact it is just a token type code for the most recently recognized lexeme. The attributes that would usually be encoded in a struct token are encoded in struct CValue, etc. So a token in tcc is an abstract notion.


typedef int tok_t

Currently tcc just uses int to encode token type information, along with a set of preproccessor macros such as

#define TOK_CINT   0xb3 /* number in tokc */
#define TOK_CCHAR 0xb4 /* char constant in tokc */
#define TOK_STR   0xb5 /* pointer to string in tokc */

The problem is that int v used as a formal argument to a function is not very reader-friendly; using a typedef would improve this:

static inline Sym *struct_find(tokt tok)

instead of

static inline Sym *struct_find(int v)

struct TokenSym

/* token symbol management */
typedef struct TokenSym {
    struct TokenSym *hash_next;
    struct Sym *sym_define; /* direct pointer to define */
    struct Sym *sym_label; /* direct pointer to label */
    struct Sym *sym_struct; /* direct pointer to structure */
    struct Sym *sym_identifier; /* direct pointer to identifier */
    int tok; /* token number */
    int len;
    char str[1];
} TokenSym;

static TokenSym **table_ident;
static TokenSym *hash_ident[TOK_HASH_SIZE];

In terms of classic compiler design, TokenSym something like a symbol table entry with token attributes. It is a hash table linked list node that carries some information about a token/symbol.


struct Sym

typedef struct Sym {
    int v;    /* symbol token */ <<==  tok_t tok;
    int r;    /* associated register */ <<== reg_t r;
    int c;    /* associated number */   <<== ELFSym_idx
    CType type;    /* associated type */
    struct Sym *next; /* next related symbol */
    struct Sym *prev; /* prev symbol in stack */
    struct Sym *prev_tok; /* previous symbol for this token */
} Sym;

--

struct CType

/* type definition */
typedef struct CType {
    int t;
    struct Sym *ref;
} CType;

union CValue

/* constant value */
typedef union CValue {
    long double ld;
    double d;
    float f;
    int i;
    unsigned int ui;
    unsigned int ul; /* address (should be unsigned long on 64 bit cpu) */
    long long ll;
    unsigned long long ull;
    struct CString *cstr;
    void *ptr;
    int tab[1];
} CValue;

struct SValue

/* value on stack */
typedef struct SValue {
    CType type;      /* type */
    unsigned short r;      /* register + flags */
    unsigned short r2;     /* second register, used for 'long long'
                              type. If not used, set to VT_CONST */
    CValue c;              /* constant, if VT_CONST */
    struct Sym *sym;       /* symbol, if (VT_SYM | VT_CONST) */
} SValue;

IO


struct BufferedFile

typedef struct BufferedFile {
    uint8_t *buf_ptr;
    uint8_t *buf_end;
    int fd;
    int line_num;    /* current line number - here to simplify code */
    int ifndef_macro;  /* #ifndef macro / #endif search */
    int ifndef_macro_saved; /* saved ifndef_macro */
    int *ifdef_stack_ptr; /* ifdef_stack value at the start of the file */
    char inc_type;          /* type of include */
    char inc_filename[512]; /* filename specified by the user */
    char filename[1024];    /* current filename - here to simplify code */
    unsigned char buffer[IO_BUF_SIZE + 1]; /* extra size for CH_EOB char */
} BufferedFile;

typedef struct ParseState

/* parsing state (used to save parser state to reparse part of the
   source several times) */
typedef struct ParseState {
    int *macro_ptr;
    int line_num;
    int tok;
    CValue tokc;
} ParseState;

typedef struct TokenString

/* used to record tokens */
typedef struct TokenString {
    int *str;
    int len;
    int allocated_len;
    int last_line_num;
} TokenString;

typedef struct CachedInclude

/* include file cache, used to find files faster and also to eliminate
   inclusion if the include file is protected by #ifndef ... #endif */
typedef struct CachedInclude {
    int ifndef_macro;
    int hash_next; /* -1 if none */
    char type; /* '"' or '>' to give include type */
    char filename[1]; /* path specified in #include */
} CachedInclude;

Output File


typedef struct Section

typedef struct Section {
    unsigned long data_offset; /* current data offset */
    unsigned char *data;       /* section data */
    unsigned long data_allocated; /* used for realloc() handling */
    int sh_name;             /* elf section name (only used during output) */
    int sh_num;              /* elf section number */
    int sh_type;             /* elf section type */
    int sh_flags;            /* elf section flags */
    int sh_info;             /* elf section info */
    int sh_addralign;        /* elf section alignment */
    int sh_entsize;          /* elf entry size */
    unsigned long sh_size;   /* section size (only used during output) */
    unsigned long sh_addr;      /* address at which the section is relocated */
    unsigned long sh_offset;    /* file offset */
    int nb_hashed_syms;      /* used to resize the hash table */
    struct Section *link;    /* link to another section */
    struct Section *reloc;   /* corresponding section for relocation, if any */
    struct Section *hash;     /* hash table for symbols */
    struct Section *next;
    char name[1];           /* section name */
} Section;

typedef struct DLLReference

typedef struct DLLReference {
    int level;
    char name[1];
} DLLReference;

typedef struct AttributeDef

/* GNUC attribute definition */
typedef struct AttributeDef {
    int aligned;
    int packed; 
    Section *section;
    unsigned char func_call; /* FUNC_CDECL, FUNC_STDCALL, FUNC_FASTCALLx */
    unsigned char dllexport; 
} AttributeDef;
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License