First Working Assembler

I finally made the virtual-machine and XieXie Assembler working!

Here is my first XieXie test program to run in the virtual-machine (It just prints “Test” 10 times on the console):

void Main() {
    const string str1 := "Test"
    const float Math.PI := 3.141592654
    for i := 1 .. 10 {
        Print(str1)
    }
}

And here is the XieXie assembler code (currently hand-written – I’m currently just testing the assembler, not the ASM code generator):

JMP Main                ; Jump to 'Main' procedure
str1:                   ; Constant name 'str1'
DATA.string "Test\n"    ; String data field
Math.PI:                ; Constant name 'Math.PI'
DATA.float 3.141592654  ; Floating-point data field (not used here, but for testing the assembler)
Main:                   ; 'Main' procedure
XOR i0, i0              ; i0 := 0
MOV i1, 10              ; i1 := 10
.Lfor_begin:            ; Local label of 'for'-loop
PUSH str1               ; Push address of 'str1'
CALL 0x003fff21         ; SysCall 'PrintTerm(const byte* text)'
INC i0                  ; i0++
CMP i0, i1              ; Condition for next loop iteration
JL .Lfor_begin          ; Jump if comparision is 'less'
STOP                    ; Stop program execution

As you can see, I oriented myself to the x86 (or rather IA-32) assembler.
I’m using a register machine (and not a stack-machine, as Java does) with similar instructions to x86, like ‘CALL’, ‘JL’ (for ‘jump-if-less’) etc.

After assmebling, the Assembler will have generated something like this – but in byte-code and not into human readable code ;-):

JMP (PC) 4       ; Jump to 'Main' procedure with offset 4
0x54657374       ; 'T', 'e', 's', 't'
0x0a000000       ; '\n', '\0', '\0', '\0'
0x40490fdb       ; 3.141592654 represented as HEX in IEEE 754 32-bit floating-point format
XOR i0, i0
MOV i1, 10
PUSH 0x00000001  ; Push address of 'str1' at location 1
CALL 0x003fff21
INC i0
CMP i0, i1
JL (PC) -4       ; Jump if comparision is 'less' to offset -4
STOP

I also want to show you a little larger program:

void Main() {
    for row := 0 .. 9 {
        for col := 0 .. 9 {
            if row % 2 ^ col % 2 = 0 {
                PrintTerm("#")
            } else {
                PrintTerm(" ")
            }
        }
        PrintTerm("\n")
    }
}

This is the output (35 instructions, 1745 cycles):

# # # # # 
 # # # # #
# # # # # 
 # # # # #
# # # # # 
 # # # # #
# # # # # 
 # # # # #
# # # # # 
 # # # # #

And this is my hand-written XieXie assembler code:

JMP Main					; Jump to 'Main' procedure

str_chr:
DATA.string "#"
str_space:
DATA.string " "
str_nl:
DATA.string "\n"
str_pause:
DATA.string "pause"

Main:
XOR i0, i0					; row := 0
MOV i1, 10					; tmp := 10

.Lwhile1:					; while row < 10
CMP i0, i1
JGE .Lwhile1_end
	
	XOR i2, i2				; col := 0
	
	.Lwhile2:				; while col < 10
	CMP i2, i1
	JGE .Lwhile2_end
		
		PUSH i0				; (store 'row' on stack)
		PUSH i2				; (store 'col' on stack)
		
		MOD i0, 2
		MOD i2, 2
		XOR i0, i2
		XOR i2, i2
		
		CMP i0, i2			; if tmp = 0
		JNE .Lelse
		.Lif:
			
			PUSH str_chr
			CALL 0x003fff21	; PrintTerm("#")
			
		JMP .Lfi
		.Lelse:
			
			PUSH str_space
			CALL 0x003fff21	; PrintTerm(" ")
			
		.Lfi:
		
		POP i2
		POP i0
		
		INC i2				; col++
		
	JMP .Lwhile2
	.Lwhile2_end:
	
	PUSH str_nl
	CALL 0x003fff21			; PrintTerm("\n")
	
	INC i0					; row++
	
JMP .Lwhile1
.Lwhile1_end:

STOP

I’m very happy that everything works perfectly as I expected and planed it :-D

First Byte-Code Executed

I made the first (hand-written) byte-codes running inside my virtual-machine :-D.
Concurrency works pretty well, too :-)

Here are the two first test programs:

; Print(1 + 2*(3 - 4))
MOV i0, 3       ; i0 = 3
MOV i1, 4       ; i1 = 4
SUB i0, i1      ; i0 = i0 - i1
MOV i1, 2       ; i1 = 2
MUL i0, i1      ; i0 = i0 * i1
INC i0          ; i0++
CALL 0x03ffff20 ; SysCall for 'Print' (currently just for debugging)
STOP            ; Stop program execution
; int i  while i < 100 { Print(i++) }
MOV i1, 100     ; i1 = 100
XOR i0, i0      ; i0 = 0
CALL 0x03ffff20 ; SysCall for 'Print' (currently just for debugging)
INC i0          ; i0++
CMP i0, i1      ; Compare i0 with i1
JL 0x00000002   ; Jump-If-Less to address 2
STOP            ; Stop program execution

100 Commits

Today I made my 100th commit on the Git repository :-D

I also extended the ‘enumeration’ declarations. Now they have a similar complex code-generation as the ‘flags enumerations’:

// XieXie
enum Types {
	Unknown	("unknown")			:= 0
	Foo		("this is foo")		:= 5
	Bar		("and this is bar") := 6
}

// C++
struct Types
{
    enum __XX__Enum
    {
        Unknown = 0,
        Foo = 5,
        Bar = 6,
        __XX__Uninitialized
    };
    Types() :
        __XX__Entry(__XX__Uninitialized)
    {
    }
    Types(const __XX__Enum& entry) :
        __XX__Entry(entry)
    {
    }
    inline operator __XX__Enum () const
    {
        return __XX__Entry;
    }
    inline size_t Num() const
    {
        return 3;
    }
    std::string Str() const
    {
        switch (__XX__Entry)
        {
            case Unknown: return "unknown";
            case Foo:     return "this is foo";
            case Bar:     return "and this is bar";
        }
        return "";
    }
    __XX__Enum __XX__Entry;
};

Lambdas

Just right now I’ve added a syntax for lambda expressions:

// XieXie
lambda bool(int a, int b) { ret a < b }

// C++
[&](int a, int b) -> bool { return a < b; }

For XieXie I simplified the feature set of lambdas by always using automatic variable capturing (with the ‘[&]‘ C++ syntax).

In XieXie I’m using lots of different keywords. In contrast to C++, where keywords are massive overloaded – such as the ‘const’ keyword.
I think this helps for the readability.

Math Expression Abbreviated Form

I’m currently testing a syntax of math expressions in an abbreviated form:

// XieXie
bool b1 := x < y < z
bool b2 := 0 < num < 10
bool b3 := x > y + 3 >= z/2 > 0

// C++
bool b1 = x < y && y < z;
bool b2 = 0 < num && num < 10;
bool b3 = x > y + 3 && y + 3 >= z/2 && z/2 > 0;

In C++ those expressions are allowed as well but usually end up in an unexpected result, e.g. consider the following code sample:

bool b = x < y < z;

A C++ compiler will generate code that evaluates (x < y), which results in true or false.
Then this boolean value will be compared with (… < z). And this is guaranteed not what you've expected.

In Java, those expressions are not allowed, since '<', '<=', '>‘ and ‘>=’ comparisions between boolean and integer are not defined.
In XieXie those comparisions are actually not allowed as well, but in the above examples you can make use of the math expressions in the abbreviated form.

This is actually not a big deal but I currently have some trouble with the parser for distinguishing the syntax of templates and the relation binary operator.
The fact that templates use a similar syntax as in C++ you should not mix those comparisions, i.e. don’t write expressions like in the following example:

// False example
bool b := x < y > z

The parser will expect that ‘x’ is a template and the argument is ‘y’. What you can do in such situations is to split this up:

// XieXie
bool b := x < y < z && (u > v > w)

// C++
bool b = x < y && y < z && (u > v && v > w);

This feature can be useful for instance in GUI programming:

class Button {
    bool MouseOver(int mx, int my) readonly {
        ret x <= mx <= x + w &&
            y <= my <= y + h
    }
private:
    [[set,get]] int x, y, w, h // <-- X, Y, Width, Height
}

Initializer Lists and new Attributes Syntax

Today I’ve added the initializer lists which should work similiar to the C++11′s equivalent.

// --- Already implemented: ---
int[] array1 := { 1, 2, 3 }
string[] array2 := { "Hello", "World" }
int[][] array3 := { { 1, 2, 3 }, { 42, 19 } }

// --- Currently just in planning: ---

// Test class
class Test { int foo, bar }

// Default initialization
Test t1 := { 12, 5 }

// Initialization with member denomination (like with the named parameters)
Test t2 := { foo: 12, bar: 5 }
Test t3 := { bar: 5, foo: 12 }

I also changed the syntax for the attributes. Now you can write several attributes for a statement:

void DoSomething(int x) { /* ... */ }

// Auto-unroll loop
[[unroll]]
for i := 1 .. 3 {
    DoSomething(i)
}

// Pack class (and disable alignment)
[[pack]] class Foo {
    [[set,get]] int bar
}

And this is the generated code:

// Example.xx.cpp
// XieXie generated source file
// Sat Feb 01 21:10:13 2014

//! DoSomething function
//! \param[in] x Input parameter x.
void DoSomething(int x)
{
}

// Unrolled Range-Based For Loop
{
    // Iteration 1
    unsigned char i = 1;
    {
        DoSomething(i);
    }
    
    // Iteration 2
    i = 2;
    {
        DoSomething(i);
    }
    
    // Iteration 3
    i = 3;
    {
        DoSomething(i);
    }
    
}
// Structure Packing Alignment
#if defined(_MSC_VER)
#   pragma pack(push, packing)
#   pragma pack(1)
#   define __XX__PACK_STRUCT__
#elif defined(__GNUC__)
#   define __XX__PACK_STRUCT__ __attribute__((packed))
#else
#   define __XX__PACK_STRUCT__
#endif

//! Foo class.
class Foo
{
    public:
        int bar;
        Foo() :
            bar(0)
        {
        }
        
        ~Foo()
        {
        }
        
    public:
        //! Setter for "bar" member variable.
        //! \param[in] bar Specifies the new value.
        //! \see bar
        inline void SetBar(const int& bar)
        {
            this->bar = bar;
        }
        //! Getter for "bar" member variable.
        //! \see bar
        //! \return Current value.
        inline const int& GetBar() const
        {
            return bar;
        }
        
} __XX__PACK_STRUCT__;

// /Structure Packing Alignment
#ifdef _MSC_VER
#   pragma pack(pop, packing)
#endif
#undef __XX__PACK_STRUCT__

// ================

The old syntax with the ‘#’ character was just a temporary solution.
The new one works correct together with the rest of the grammar.