zahl

Log | Files | Refs | README

commit b6d8ae1d294e540e396b01ab9d88a8375356c2aa
parent 0eb2eea9a25d0b034bbc2082f66e5d75e757c51b
Author: Thomas Vigouroux <thomas.vigouroux@univ-grenoble-alpes.fr>
Date:   Thu,  6 Jun 2024 09:45:20 +0200

feat: parsing and compilation

Diffstat:
AREADME.md | 34++++++++++++++++++++++++++++++++++
Msrc/chunk.zig | 8+++++---
Asrc/compile.zig | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/main.zig | 78+++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Msrc/scan.zig | 50+++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/value.zig | 45++++++++++++++++++++++++++++++++++++---------
Msrc/vm.zig | 71+++++++++++++++++++++++++++++++++++++++++++++--------------------------
7 files changed, 269 insertions(+), 70 deletions(-)

diff --git a/README.md b/README.md @@ -0,0 +1,34 @@ +# `zahl` + +This is a toy language, made for fun and by following "Crafting +Interpreters" (especially the part on bytecode intepreters). + +## LISP-y + +The base language is lisp-like, that for multiple reasons: +- Easy parsing +- Macros to simplify writing code + +## Integer only + +I mainly intend to use that as a playground to implement various +algorithms on integers (extracted from Numberphile videos), and +recreational mathematics. + +This means that for now the only datatype will be integers, and that +the standard library will be augmented with "standard" number theory +functions. + +Also, this means that the bytecode will be geared towards that, with +special instructions for some number theory operations. + +## Zig-based + +This is just for fun, I like zig, and I like writing software using +zig. + +Zig also has big integers in its standard library, which is neat. + +## Reference-counting based + +I don't like garbage collection, and I like reference counting. diff --git a/src/chunk.zig b/src/chunk.zig @@ -3,14 +3,16 @@ const value = @import("value.zig"); pub const Op = union(enum) { @"return", + pop, @"constant": usize, negate, add, substract, - multiply, + pow, + div }; pub const Chunk = struct { @@ -52,10 +54,10 @@ pub const Chunk = struct { switch (instr) { .@"constant" => |vindex| { const num = self.values.items[vindex]; - std.debug.print("constant {}\n", .{num.val.number}); + std.debug.print("constant {}\n", .{num.formatter()}); }, - inline .@"return", .negate, .add, .substract, .multiply => |_, tag| { + inline .@"return", .negate, .add, .substract, .multiply, .pow, .div, .pop => |_, tag| { std.debug.print("{s}\n", .{@tagName(tag)}); }, } diff --git a/src/compile.zig b/src/compile.zig @@ -0,0 +1,53 @@ +const std = @import("std"); +const v = @import("value.zig"); +const c = @import("chunk.zig"); + +inline fn ueql(left: []const u8, right: []const u8) bool { + return std.mem.eql(u8, left, right); +} + +pub fn compile(val: *v.Value, chunk: *c.Chunk) !void { + switch (val.val) { + .number, .nil, .symbol => { + const cst = try chunk.addConstant(val.ref()); + try chunk.write(.{ .constant = cst }); + }, + .list => |lst| { + const fstsym = lst[0].val.symbol; + // TODO: maybe this can be something else + if (ueql(fstsym, "+")) { + try compile(lst[1], chunk); + for (lst[2..]) |inner| { + try compile(inner, chunk); + try chunk.write(.add); + } + } else if (ueql(fstsym, "*")) { + try compile(lst[1], chunk); + for (lst[2..]) |inner| { + try compile(inner, chunk); + try chunk.write(.multiply); + } + } else if (ueql(fstsym, "-")) { + if (lst.len == 2) { + try compile(lst[1], chunk); + try chunk.write(.negate); + } else { + try compile(lst[2], chunk); + try compile(lst[1], chunk); + try chunk.write(.substract); + } + } else if (ueql(fstsym, "^")) { + try compile(lst[2], chunk); + try compile(lst[1], chunk); + try chunk.write(.pow); + } else if (ueql(fstsym, "/")) { + try compile(lst[2], chunk); + try compile(lst[1], chunk); + try chunk.write(.div); + try chunk.write(.pop); + } else { + @panic("TODO"); + } + }, + } +} diff --git a/src/main.zig b/src/main.zig @@ -5,6 +5,7 @@ const chunk = @import("chunk.zig"); const vm = @import("vm.zig"); const cl = @cImport(@cInclude("crossline.h")); const s = @import("scan.zig"); +const comp = @import("compile.zig"); const GPAll = std.heap.GeneralPurposeAllocator(.{}); @@ -20,39 +21,58 @@ pub fn main() !void { defer std.debug.assert(gp.deinit() == .ok); // Initialize bytecode - var c = chunk.Chunk.init(all); - defer c.deinit(); + // var c = chunk.Chunk.init(all); + // defer c.deinit(); + // + // const cst = try c.addConstant(try value.Value.newInt(all, 50000000)); + // try c.write(.{ .@"constant" = cst }); + // + // const cst2 = try c.addConstant(try value.Value.newInt(all, 439134597143958149358)); + // try c.write(.{ .@"constant" = cst2 }); + // + // try c.write(.negate); + // try c.write(.multiply); + // try c.write(.negate); + // + // try c.write(.@"return"); - const cst = try c.addConstant(try value.Value.newInt(all, 50000000)); - try c.write(.{ .@"constant" = cst }); + var v = try vm.VM.init(all); + defer v.deinit(); - const cst2 = try c.addConstant(try value.Value.newInt(all, 439134597143958149358)); - try c.write(.{ .@"constant" = cst2 }); + var args = std.process.args(); + _ = args.next(); - try c.write(.multiply); + if (args.next()) |file| { + // TODO: file parsing stuff + _ = file; + } else { + var buf: [1024:0]u8 = undefined; - try c.write(.@"return"); + while (cl.crossline_readline("> ", &buf, buf.len)) |obuf| { + const input = obuf[0..std.mem.len(obuf)]; + var reader = try s.Reader.init(input, all); - var v = try vm.VM.init(all); - defer v.deinit(); + var val = reader.readValue() catch { + std.log.err("Parsing error", .{}); + continue; + }; + defer val.unref(); - // var args = std.process.args(); - // _ = args.next(); - // - // if (args.next()) |file| { - // // TODO: file parsing stuff - // _ = file; - // } else { - // var buf: [1024:0]u8 = undefined; - // - // while (cl.crossline_readline("> ", &buf, buf.len)) |obuf| { - // const input = obuf[0..std.mem.len(obuf)]; - // var reader = try s.Reader.init(input, all); - // - // while (reader.peekTok() != .eof) : (try reader.nextTok()) {} - // } - // } - // - // - try v.interpret(&c); + var c = chunk.Chunk.init(all); + defer c.deinit(); + + try comp.compile(val, &c); + try c.write(.@"return"); + + std.log.debug("Compiled version of {}", .{val.formatter()}); + c.disassemble(); + + const result = try v.interpret(&c); + defer result.unref(); + std.log.info("Result: {f}", .{result.formatter()}); + } + } + + + // try v.interpret(&c); } diff --git a/src/scan.zig b/src/scan.zig @@ -51,7 +51,7 @@ pub const Reader = struct { } fn isSymChar(char: u8) bool { - return switch (char) { + return !ascii.isWhitespace(char) and switch (char) { '(', ')', ';' => false, else => true, }; @@ -69,14 +69,21 @@ pub const Reader = struct { } self.curtok = switch (self.input[self.curpos]) { - '(' => onechar: { + inline '(', ')' => |val| onechar: { self.curpos += 1; - break :onechar .oparen; + break :onechar switch (val) { + '(' => .oparen, + ')' => .cparen, + else => @panic("Unreachable") + }; }, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => .{ .number = try self.tokInt() }, else => sym: { var offset = self.curpos; while (offset < self.input.len and isSymChar(self.input[offset])) : (offset += 1) {} + + // Check to avoid infinite loop + std.debug.assert(offset > self.curpos); const tok = .{ .symbol = self.input[self.curpos..offset] }; self.curpos = offset; break :sym tok; @@ -98,4 +105,41 @@ pub const Reader = struct { return ret; } + + // Reader functions + pub fn readValue(self: *Self) !*v.Value { + switch (self.peekTok()) { + .oparen => { + var acc = std.ArrayList(*v.Value).init(self.all); + defer acc.deinit(); + errdefer { + for (acc.items) |val| { + val.unref(); + } + } + + try self.nextTok(); + while (self.peekTok() != .cparen) : (try self.nextTok()) { + if (self.peekTok() == .eof) { + return error.Eof; + } + + try acc.append(try self.readValue()); + } + + return v.Value.newList(self.all, try acc.toOwnedSlice()); + }, + .number => |num| { + return v.Value.newInt(self.all, num); + }, + .symbol => |sym| { + if (std.mem.eql(u8, sym, "nil")) { + return v.Value.newNil(self.all); + } else { + return v.Value.newSymbol(self.all, try self.all.dupe(u8, sym)); + } + }, + .cparen, .eof => return error.Unexpected + } + } }; diff --git a/src/value.zig b/src/value.zig @@ -7,9 +7,12 @@ pub const Value = struct { const Payload = union(enum) { number: Int, - list: []Value, + list: []*Value, + symbol: []u8, + nil, }; + // XXX: It is very important that payload is not modified ! val: Payload, all: std.mem.Allocator, refcount: usize, @@ -26,9 +29,22 @@ pub const Value = struct { } pub fn newInt(all: std.mem.Allocator, val: anytype) !*Self { - var payload = try Int.initSet(all, val); - errdefer payload.deinit(); - return try Self.init(.{ .number = payload }, all); + const payload = if (@TypeOf(val) == Int) val else (try Int.initSet(all, val)); + return try Self.init(.{ .number = payload }, all); + } + + // orig has to be allocated with all + pub fn newSymbol(all: std.mem.Allocator, orig: []u8) !*Self { + return try Self.init(.{ .symbol = orig }, all); + } + + // lst has to be allocated with all + pub fn newList(all: std.mem.Allocator, lst: []*Value) !*Self { + return try Self.init(.{ .list = lst }, all); + } + + pub fn newNil(all: std.mem.Allocator) !*Self { + return try Self.init(.nil, all); } pub fn ref(self: *Self) *Self { @@ -56,11 +72,15 @@ pub const Value = struct { num.deinit(); }, .list => |lst| { - for (lst) |*val| { + for (lst) |val| { val.unref(); } self.all.free(lst); - } + }, + .symbol => |sym| { + self.all.free(sym); + }, + .nil => {}, } self.all.destroy(self); } @@ -76,7 +96,13 @@ pub const Value = struct { fn format(self: *const Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { switch (self.val) { .number => |num| { - try std.fmt.format(writer, "{}", .{num}); + if (fmt.len > 0 and fmt[0] == 'f') { + const digits = try num.toString(self.all, 10, std.fmt.Case.upper); + defer self.all.free(digits); + try writer.writeAll(digits); + } else { + try std.fmt.format(writer, "{}", .{num}); + } }, .list => |lst| { try writer.writeByte('('); @@ -87,8 +113,9 @@ pub const Value = struct { try sub.format(fmt, options, writer); } try writer.writeByte(')'); - } + }, + .symbol => |sym| try writer.writeAll(sym), + .nil => try writer.writeAll("nil"), } } }; - diff --git a/src/vm.zig b/src/vm.zig @@ -27,13 +27,19 @@ pub const VM = struct { return self.stack.pop(); } - pub fn interpret(self: *Self, chunk: *const c.Chunk) !void { + pub fn interpret(self: *Self, chunk: *const c.Chunk) !*v.Value { self.cchunk = chunk; self.ip = self.cchunk.instrs.items; - return self.run(); + + const start_time = std.time.microTimestamp(); + const res = self.run(); + const end_time = std.time.microTimestamp(); + vmlog.debug("Eval in {} us", .{end_time - start_time}); + + return res; } - fn run(self: *Self) !void { + fn run(self: *Self) !*v.Value { while (true) : (self.ip = self.ip[1..]) { const instr = self.ip[0]; @@ -47,36 +53,55 @@ pub const VM = struct { } switch (instr) { + // .. -> ..[c] .constant => |vindex| { const val = self.cchunk.values.items[vindex]; try self.push(val.ref()); }, .@"return" => { - var val = try self.pop(); - defer val.unref(); - - const stdout = std.io.getStdOut(); - try std.fmt.format(stdout.writer(), "{}\n", .{val.formatter()}); - return; + // XXX: no unref because we return + return try self.pop(); }, + .pop => (try self.pop()).unref(), + + // ..[a] -> ..[-a] .negate => { var val = try self.pop(); defer val.unref(); std.debug.assert(val.isA(.number)); - var new = try v.Value.newInt(self.all, 0); - errdefer new.unref(); + var n = try v.Int.cloneWithDifferentAllocator(val.val.number, self.all); + n.negate(); - try new.val.number.copy(val.val.number.toConst()); - new.val.number.negate(); + var new = try v.Value.newInt(self.all, n); + errdefer new.unref(); try self.push(new); }, - inline .add, .multiply, .substract => |_, tag| { + // Pops 2 pushes 2 + // ..[r][l] -> ..[l/r][l%r] + .div => { + var left = try self.pop(); + defer left.unref(); + + var right = try self.pop(); + defer right.unref(); + + var div = try v.Int.init(self.all); + var rem = try v.Int.init(self.all); + + try div.divTrunc(&rem, &left.val.number, &right.val.number); + + try self.push(try v.Value.newInt(self.all, div)); + try self.push(try v.Value.newInt(self.all, rem)); + }, + + // ..[r][l] -> ..[l op r] + inline .add, .multiply, .substract, .pow => |_, tag| { var left = try self.pop(); defer left.unref(); @@ -86,25 +111,19 @@ pub const VM = struct { const lnum = &left.val.number; const rnum = &right.val.number; - var new = try v.Value.newInt(self.all, 0); - errdefer new.unref(); + var new = try v.Int.init(self.all); switch (tag) { - .add => { - try new.val.number.add(lnum, rnum); - }, - .substract => { - try new.val.number.sub(lnum, rnum); - }, - .multiply => { - try new.val.number.mul(lnum, rnum); - }, + .add => try new.add(lnum, rnum), + .substract => try new.sub(lnum, rnum), + .multiply => try new.mul(lnum, rnum), + .pow => try new.pow(lnum, try rnum.to(u32)), else => { // Unreachable } } - try self.push(new); + try self.push(try v.Value.newInt(self.all, new)); } } }