zahl

Log | Files | Refs | README

commit 1068dd9d4b352cba1f2db33bee4969e00aaba2d0
parent b00ee4a4db002dfed750f357f7b5b5f3bdafd619
Author: Thomas Vigouroux <thomas.vigouroux@univ-grenoble-alpes.fr>
Date:   Tue, 11 Jun 2024 16:39:21 +0200

feat: functions / jumps

Diffstat:
Msrc/chunk.zig | 39+++++++++++++++++++++++++++++++--------
Msrc/compile.zig | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/main.zig | 61++++++++++++++++++++++++++++++++++++++++++-------------------
Msrc/scan.zig | 7++++++-
Asrc/symintern.zig | 45+++++++++++++++++++++++++++++++++++++++++++++
Msrc/value.zig | 50++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/vm.zig | 136+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
7 files changed, 399 insertions(+), 85 deletions(-)

diff --git a/src/chunk.zig b/src/chunk.zig @@ -5,14 +5,26 @@ pub const Op = union(enum) { // Finish execution of current function @"return", + // Set current pc to this if the condition is false + jumpfalse: u32, + + // Jump absolute to the target + jump: u32, + // ..[a] -> .. pop, // .. -> ..[c] - @"constant": usize, + @"constant": u32, // ..[v].. -> ..[v]..[v] - @"local": usize, + @"local": u32, + + // .. -> ..[g] + get_global: u32, + + // ..[g] -> .. + define_global: u32, // ..[scope...][top] -> ..[top] popscope: usize, @@ -30,6 +42,8 @@ pub const Op = union(enum) { div }; +var _ = std.debug.assert(@sizeOf(Op) == 4); + pub const Chunk = struct { const Self = @This(); @@ -60,19 +74,19 @@ pub const Chunk = struct { try self.instrs.append(instr); } - pub fn addConstant(self: *Self, val: *value.Value) !usize { + pub fn addConstant(self: *Self, val: *value.Value) !u32 { try self.values.append(val); - return self.values.items.len - 1; + return @intCast(self.values.items.len - 1); } pub fn disassembleInstr(self: Self, instr: Op) void { switch (instr) { - .@"constant" => |vindex| { + inline .@"constant", .get_global, .define_global => |vindex, tag| { const num = self.values.items[vindex]; - std.debug.print("constant {}\n", .{num.formatter()}); + std.debug.print("{s} {}\n", .{@tagName(tag), num.formatter()}); }, - inline .@"local", .popscope => |vindex, tag| { + inline .@"local", .popscope, .jumpfalse, .jump => |vindex, tag| { std.debug.print("{s} {}\n", .{@tagName(tag), vindex}); }, @@ -83,9 +97,18 @@ pub const Chunk = struct { } pub fn disassemble(self: Self) void { - for (self.instrs.items) |instr| { + for (self.instrs.items, 0..) |instr, pc| { + std.debug.print("{} |\t", .{pc}); self.disassembleInstr(instr); } } + + pub fn lastInstrRef(self: *Self) *Op { + return &self.instrs.items[self.instrs.items.len - 1]; + } + + pub fn nextPc(self: *const Self) u32 { + return @intCast(self.instrs.items.len); + } }; diff --git a/src/compile.zig b/src/compile.zig @@ -4,32 +4,37 @@ const c = @import("chunk.zig"); const complog = std.log.scoped(.compile); -const Storage = union(enum) { local: usize, value: *v.Value }; +const Storage = union(enum) { local: u32, value: *v.Value }; + +const CompileError = error{ InvalidNumberOfArguments, InvalidArgType, UnbalancedBindings, Undefined, OutOfMemory }; pub const Compiler = struct { const Self = @This(); all: std.mem.Allocator, val: *v.Value, - chunk: c.Chunk, + func: v.Function, // Variable management (+ constant folding) sym_map: std.StringHashMap(Storage), - scope_depth: usize, + scope_depth: u32, pub fn init(all: std.mem.Allocator, val: *v.Value) !Self { return Self{ .all = all, .val = val.ref(), - .chunk = c.Chunk.init(all), + .func = v.Function.init(all), .sym_map = std.StringHashMap(Storage).init(all), .scope_depth = 0, }; } + fn getChunk(self: *Self) *c.Chunk { + return &self.func.code; + } + pub fn deinit(self: *Self) void { self.val.unref(); - self.chunk.deinit(); self.sym_map.deinit(); } @@ -43,7 +48,7 @@ pub const Compiler = struct { if (self.sym_map.get(sym)) |store| { switch (store) { .value => |cval| return cval, - else => {} + else => {}, } } }, @@ -52,28 +57,27 @@ pub const Compiler = struct { return null; } - fn compileInner(self: *Self, val: *v.Value) !void { + fn compileInner(self: *Self, val: *v.Value) CompileError!void { switch (val.val) { - .number, .nil, .true, .false => { - const cst = try self.chunk.addConstant(val.ref()); - try self.chunk.write(.{ .constant = cst }); - }, + .function => |_| @panic("Unreachable"), .symbol => |sym| { if (self.sym_map.get(sym)) |store| { switch (store) { .local => |index| { - try self.chunk.write(.{ .local = index }); + try self.getChunk().write(.{ .local = index }); }, // This variant takes care of constant folding // actual constant folding is done when generating the symbol .value => |cst| { - const cval = try self.chunk.addConstant(cst.ref()); - try self.chunk.write(.{ .constant = cval }); - } + const cval = try self.getChunk().addConstant(cst.ref()); + try self.getChunk().write(.{ .constant = cval }); + }, } } else { - return error.UndefinedVariable; + // Late binding of global variables + const symval = try self.getChunk().addConstant(val.ref()); + try self.getChunk().write(.{ .get_global = symval }); } }, .special => @panic("Cannot compile special"), @@ -85,23 +89,23 @@ pub const Compiler = struct { try self.compileInner(lst[1]); for (lst[2..]) |inner| { try self.compileInner(inner); - try self.chunk.write(if (tag == .@"+") .add else .multiply); + try self.getChunk().write(if (tag == .@"+") .add else .multiply); } }, .@"-" => { if (lst.len == 2) { try self.compileInner(lst[1]); - try self.chunk.write(.negate); + try self.getChunk().write(.negate); } else { try self.compileInner(lst[2]); try self.compileInner(lst[1]); - try self.chunk.write(.substract); + try self.getChunk().write(.substract); } }, .@"^" => { try self.compileInner(lst[2]); try self.compileInner(lst[1]); - try self.chunk.write(.pow); + try self.getChunk().write(.pow); }, .@"/" => { @panic("TODO: multiple return values"); @@ -113,15 +117,15 @@ pub const Compiler = struct { // (let* (bindings...) expr) .@"let*" => { if (lst.len != 3) { - return error.InvalidNumberOfArguments; + return CompileError.InvalidNumberOfArguments; } if (!lst[1].isA(.list)) { - return error.InvalidArgType; + return CompileError.InvalidArgType; } const blist = lst[1].val.list; if (blist.len % 2 != 0) { - return error.UnbalancedLet; + return CompileError.UnbalancedBindings; } for (0..blist.len / 2) |idx| { @@ -129,7 +133,7 @@ pub const Compiler = struct { const sval = blist[2 * idx + 1]; if (!sym.isA(.symbol)) { - return error.InvalidArgType; + return CompileError.InvalidArgType; } // Perform constant folding !! @@ -151,7 +155,7 @@ pub const Compiler = struct { // Now, remove the bindings of the variables var to_pop: usize = 0; for (0..blist.len / 2) |idx| { - const got = self.sym_map.fetchRemove(blist[2*idx].val.symbol).?; + const got = self.sym_map.fetchRemove(blist[2 * idx].val.symbol).?; switch (got.value) { .local => { to_pop += 1; @@ -161,18 +165,102 @@ pub const Compiler = struct { } } if (to_pop != 0) { - try self.chunk.write(.{.popscope = to_pop}); + try self.getChunk().write(.{ .popscope = to_pop }); + } + }, + + // (if cond then else) + .@"if" => { + if (lst.len != 4) { + return CompileError.InvalidNumberOfArguments; } + + // Generate cond + try self.compileInner(lst[1]); + + // If falsy then jump to else + try self.getChunk().write(.{ .jumpfalse = undefined }); + const jmpthen = self.getChunk().lastInstrRef(); + + try self.compileInner(lst[2]); + + try self.getChunk().write(.{ .jump = undefined }); + const jmpelse = self.getChunk().lastInstrRef(); + + jmpthen.* = .{ .jumpfalse = self.getChunk().nextPc() }; + + try self.compileInner(lst[3]); + jmpelse.* = .{ .jump = self.getChunk().nextPc() }; }, + + // (def! name value) + .@"def!" => { + if (lst.len != 3) { + return CompileError.InvalidNumberOfArguments; + } + + if (!lst[1].isA(.symbol)) { + return CompileError.InvalidArgType; + } + + const sym = try self.getChunk().addConstant(lst[1].ref()); + + try self.compileInner(lst[2]); + + try self.getChunk().write(.{ .define_global = sym }); + }, + + // (fn* (args...) expr) + .@"fn*" => { + if (lst.len != 3) { + return CompileError.InvalidNumberOfArguments; + } + + if (!lst[1].isA(.list)) { + return CompileError.InvalidArgType; + } + + const args = lst[1].val.list; + + for (args) |arg| { + if (!arg.isA(.symbol)) { + return CompileError.InvalidArgType; + } + } + + const arity = args.len; + + var inner = try Compiler.init(self.all, lst[2]); + defer inner.deinit(); + + // Declare the parameters + for (args) |arg| { + const loc: Storage = .{.local = inner.scope_depth}; + inner.scope_depth+=1; + try inner.sym_map.put(arg.val.symbol, loc); + } + + // Now finish compilation + var res = try inner.compile(); + res.arity = arity; + const valres = try v.Value.newFunction(self.all, res); + + const vindex = try self.getChunk().addConstant(valres); + try self.getChunk().write(.{.constant = vindex}); + } } }, + .number, .nil, .true, .false => { + const cst = try self.getChunk().addConstant(val.ref()); + try self.getChunk().write(.{ .constant = cst }); + }, } } - pub fn compile(self: *Self) !*const c.Chunk { + pub fn compile(self: *Self) CompileError!v.Function { try self.compileInner(self.val); - try self.chunk.write(.@"return"); + try self.getChunk().write(.@"return"); - return &self.chunk; + return self.func; } }; diff --git a/src/main.zig b/src/main.zig @@ -6,35 +6,53 @@ const vm = @import("vm.zig"); const cl = @cImport(@cInclude("crossline.h")); const s = @import("scan.zig"); const comp = @import("compile.zig"); +const symintern = @import("symintern.zig"); const GPAll = std.heap.GeneralPurposeAllocator(.{}); pub const std_options = .{ .log_level = std.log.Level.debug, .log_scope_levels = &.{.{ .scope = .read, .level = .err }}, + .logFn = myLogFn, }; +pub fn myLogFn( + comptime level: std.log.Level, + comptime scope: @TypeOf(.EnumLiteral), + comptime format: []const u8, + args: anytype, +) void { + // Ignore all non-error logging from sources other than + // .my_project, .nice_library and the default + const scope_prefix = "(" ++ @tagName(scope) ++ "): "; + + // Print the message to stderr, silently ignoring any errors + std.debug.lockStdErr(); + defer std.debug.unlockStdErr(); + const stderrf = std.io.getStdErr(); + const stderr = stderrf.writer(); + + const config = std.io.tty.detectConfig(stderrf); + + config.setColor(stderr, switch (level) { + .debug => .blue, + .info => .green, + .warn => .orange, + .err => .red, + }) catch return; + stderr.writeAll(comptime level.asText()) catch return; + config.setColor(stderr, .reset) catch return; + stderr.print(scope_prefix ++ format ++ "\n", args) catch return; +} + pub fn main() !void { var gp = GPAll{}; const all = gp.allocator(); defer std.debug.assert(gp.deinit() == .ok); - // Initialize bytecode - // var c = chunk.Chunk.init(all); - // defer c.deinit(); - // - // const cst = try c.addConstant(try value.Value.newInt(all, 50000000)); - // try c.write(.{ .@"constant" = cst }); - // - // const cst2 = try c.addConstant(try value.Value.newInt(all, 439134597143958149358)); - // try c.write(.{ .@"constant" = cst2 }); - // - // try c.write(.negate); - // try c.write(.multiply); - // try c.write(.negate); - // - // try c.write(.@"return"); + symintern.init(all); + defer symintern.deinit(); var v = try vm.VM.init(all); defer v.deinit(); @@ -48,6 +66,9 @@ pub fn main() !void { } else { var buf: [1024:0]u8 = undefined; + + const stdout = std.io.getStdOut(); + while (cl.crossline_readline("> ", &buf, buf.len)) |obuf| { const input = obuf[0..std.mem.len(obuf)]; var reader = try s.Reader.init(input, all); @@ -61,15 +82,17 @@ pub fn main() !void { var compiler = try comp.Compiler.init(all, val); defer compiler.deinit(); - const c = try compiler.compile(); + var c = try compiler.compile(); + defer c.deinit(); std.log.debug("Compiled version of {}", .{val.formatter()}); - c.disassemble(); + c.code.disassemble(); - const result = try v.interpret(c); + const result = try v.interpret(&c); defer result.unref(); - std.log.info("Result: {f}", .{result.formatter()}); std.debug.assert(v.stack.items.len == 0); + + try std.fmt.format(stdout.writer(), "Result: {}\n", .{result.formatter()}); } } diff --git a/src/scan.zig b/src/scan.zig @@ -1,5 +1,6 @@ const std = @import("std"); const v = @import("value.zig"); +const symintern = @import("symintern.zig"); const ascii = std.ascii; @@ -145,8 +146,12 @@ pub const Reader = struct { return if (ueql(sym, "nil")) v.Value.newNil(self.all) + else if (ueql(sym, "true")) + v.Value.newBool(self.all, true) + else if (ueql(sym, "false")) + v.Value.newBool(self.all, false) else - v.Value.newSymbol(self.all, try self.all.dupe(u8, sym)); + v.Value.newSymbol(self.all, try symintern.intern(sym)); }, .cparen, .eof => return error.Unexpected } diff --git a/src/symintern.zig b/src/symintern.zig @@ -0,0 +1,45 @@ +const std = @import("std"); + +const Interner = struct { + all: std.mem.Allocator, + values: std.StringHashMap([]const u8), + + fn init(all: std.mem.Allocator) @This() { + return .{ .all = all, .values = std.StringHashMap([]const u8).init(all) }; + } + + fn intern(self: *@This(), val: []const u8) ![]const u8 { + if (self.values.get(val)) |found| { + return found; + } else { + const copied = try self.all.dupe(u8, val); + try self.values.put(copied, copied); + + return copied; + } + } + + fn deinit(self: *@This()) void { + var iter = self.values.valueIterator(); + + while (iter.next()) |val| { + self.all.free(val.*); + } + + self.values.deinit(); + } +}; + +var interner: Interner = undefined; + +pub fn init(all: std.mem.Allocator) void { + interner = Interner.init(all); +} + +pub fn deinit() void { + interner.deinit(); +} + +pub fn intern(val: []const u8) ![]const u8 { + return interner.intern(val); +} diff --git a/src/value.zig b/src/value.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const c = @import("chunk.zig"); pub const Int = std.math.big.int.Managed; pub const Special = enum { @@ -10,6 +11,30 @@ pub const Special = enum { @"-", @"let*", + @"if", + @"def!", + + @"fn*", +}; + +pub const Function = struct { + const Self = @This(); + + code: c.Chunk, + all: std.mem.Allocator, + arity: usize, + + pub fn init(all: std.mem.Allocator) Self { + return Self { + .code = c.Chunk.init(all), + .all = all, + .arity = 0, + }; + } + + pub fn deinit(self: *Self) void { + self.code.deinit(); + } }; const vallog = std.log.scoped(.val); @@ -19,8 +44,9 @@ pub const Value = struct { const Payload = union(enum) { number: Int, list: []*Value, - symbol: []u8, + symbol: []const u8, special: Special, + function: Function, true, false, nil, @@ -48,7 +74,7 @@ pub const Value = struct { } // orig has to be allocated with all - pub fn newSymbol(all: std.mem.Allocator, orig: []u8) !*Self { + pub fn newSymbol(all: std.mem.Allocator, orig: []const u8) !*Self { return try Self.init(.{ .symbol = orig }, all); } @@ -69,6 +95,10 @@ pub const Value = struct { return try Self.init(if (val) .true else .false, all); } + pub fn newFunction(all: std.mem.Allocator, val: Function) !*Self { + return try Self.init(.{ .function = val }, all); + } + pub fn ref(self: *Self) *Self { self.refcount += 1; return self; @@ -99,8 +129,12 @@ pub const Value = struct { } self.all.free(lst); }, - .symbol => |sym| { - self.all.free(sym); + .symbol => |_| { + // Symbols are interned + // self.all.free(sym); + }, + .function => |*fun| { + fun.deinit(); }, .nil, .special, .true, .false => {}, } @@ -111,6 +145,13 @@ pub const Value = struct { return @as(Tag, self.val) == tag; } + pub fn isFalsy(self: Self) bool { + return switch (self.val) { + .false, .nil => true, + else => false, + }; + } + pub fn formatter(self: *const Self) std.fmt.Formatter(Self.format) { return .{ .data = self }; } @@ -138,6 +179,7 @@ pub const Value = struct { }, .symbol => |sym| try writer.writeAll(sym), .special => |s| try writer.writeAll(@tagName(s)), + .function => |_| try writer.writeAll("<function>"), inline .nil, .true, .false => |_, tag| try writer.writeAll(@tagName(tag)), } } diff --git a/src/vm.zig b/src/vm.zig @@ -7,29 +7,64 @@ const vmlog = std.log.scoped(.vm); // TODO: efficient stack implementation at some point ? const Stack = std.ArrayList(*v.Value); +const VMError = error { + UndefinedSymbol, + InvalidType, + OutOfMemory, + NegativeIntoUnsigned, + TargetTooSmall +}; + +const CallFrame = struct { + ip: []const c.Op, + offset: usize, + func: *const v.Function, +}; + +const CallStack = std.ArrayList(CallFrame); + pub const VM = struct { const Self = @This(); stack: Stack, - cchunk: *const c.Chunk, all: std.mem.Allocator, - ip: []const c.Op, + call_stack: CallStack, + + // Global variables + globals: std.StringHashMap(*v.Value), pub fn init(all: std.mem.Allocator) !Self { - return Self{ .cchunk = undefined, .ip = undefined, .stack = try Stack.initCapacity(all, 256), .all = all }; + return Self{ + .stack = try Stack.initCapacity(all, 256), + .all = all, + .globals = std.StringHashMap(*v.Value).init(all), + .call_stack = CallStack.init(all), + }; } fn push(self: *Self, val: *v.Value) !void { try self.stack.append(val); } - fn pop(self: *Self) !*v.Value { + fn pop(self: *Self) *v.Value { return self.stack.pop(); } - pub fn interpret(self: *Self, chunk: *const c.Chunk) !*v.Value { - self.cchunk = chunk; - self.ip = self.cchunk.instrs.items; + fn peek(self: *Self) *v.Value { + return self.stack.getLast(); + } + + fn currentFrame(self: *Self) *CallFrame { + return &self.call_stack.items[self.call_stack.items.len - 1]; + } + + pub fn interpret(self: *Self, func: *const v.Function) VMError!*v.Value { + std.debug.assert(self.call_stack.items.len == 0); + try self.call_stack.append(.{ + .offset = 0, + .func = func, + .ip = func.code.instrs.items, + }); const start_time = std.time.microTimestamp(); const res = self.run(); @@ -40,8 +75,10 @@ pub const VM = struct { } fn run(self: *Self) !*v.Value { - while (true) : (self.ip = self.ip[1..]) { - const instr = self.ip[0]; + while (true) : (self.currentFrame().ip = self.currentFrame().ip[1..]) { + const cframe = self.currentFrame(); + const instr = cframe.ip[0]; + const cchunk = cframe.func.code; if (comptime std.log.logEnabled(.debug, .vm)) { // TODO: proper logging here @@ -49,39 +86,81 @@ pub const VM = struct { std.debug.print("[{}] ", .{val.formatter()}); } std.debug.print("\n", .{}); - self.cchunk.disassembleInstr(instr); + cchunk.disassembleInstr(instr); } switch (instr) { .constant => |vindex| { - const val = self.cchunk.values.items[vindex]; + const val = cchunk.values.items[vindex]; try self.push(val.ref()); }, - .@"local" => |vindex| { - const val = self.stack.items[vindex]; + .local => |vindex| { + const val = self.stack.items[vindex + cframe.offset]; try self.push(val.ref()); }, + .get_global => |vindex| { + const sym = cchunk.values.items[vindex]; + if (!sym.isA(.symbol)) { + return VMError.InvalidType; + } + + vmlog.debug("Getting {s}", .{sym.val.symbol}); + + if (self.globals.get(sym.val.symbol)) |found| { + try self.push(found.ref()); + } else { + return VMError.UndefinedSymbol; + } + }, + + .define_global => |index| { + const sym = cchunk.values.items[index]; + const val = self.pop(); + + std.debug.assert(sym.isA(.symbol)); + + vmlog.debug("Defining {s}", .{sym.val.symbol}); + try self.globals.put(sym.val.symbol, val); + }, + + .jumpfalse => |offset| { + const val = self.pop(); + defer val.unref(); + + if (val.isFalsy()) { + cframe.ip = cchunk.instrs.items[offset - 1 ..]; + } + }, + .jump => |offset| { + cframe.ip = cchunk.instrs.items[offset - 1 ..]; + }, + .@"return" => { + _ = self.call_stack.pop(); // XXX: no unref because we return - return try self.pop(); + if (self.stack.items.len > 0) { + return self.pop(); + } else { + return try v.Value.newNil(self.all); + } }, - .pop => (try self.pop()).unref(), + .pop => self.pop().unref(), .popscope => |amount| { - const topval = try self.pop(); + const topval = self.pop(); for (0..amount) |_| { - (try self.pop()).unref(); + self.pop().unref(); } try self.push(topval); }, .negate => { - var val = try self.pop(); + var val = self.pop(); defer val.unref(); std.debug.assert(val.isA(.number)); @@ -96,10 +175,10 @@ pub const VM = struct { }, .div => { - var left = try self.pop(); + var left = self.pop(); defer left.unref(); - var right = try self.pop(); + var right = self.pop(); defer right.unref(); var div = try v.Int.init(self.all); @@ -112,10 +191,10 @@ pub const VM = struct { }, inline .add, .multiply, .substract, .pow => |_, tag| { - var left = try self.pop(); + var left = self.pop(); defer left.unref(); - var right = try self.pop(); + var right = self.pop(); defer right.unref(); const lnum = &left.val.number; @@ -131,11 +210,11 @@ pub const VM = struct { .pow => try new.pow(lnum, try rnum.to(u32)), else => { // Unreachable - } + }, } try self.push(try v.Value.newInt(self.all, new)); - } + }, } } } @@ -145,5 +224,14 @@ pub const VM = struct { val.unref(); } self.stack.deinit(); + + var iter = self.globals.iterator(); + + while (iter.next()) |entry| { + entry.value_ptr.*.unref(); + } + self.globals.deinit(); + + self.call_stack.deinit(); } };