commit b6d8ae1d294e540e396b01ab9d88a8375356c2aa
parent 0eb2eea9a25d0b034bbc2082f66e5d75e757c51b
Author: Thomas Vigouroux <thomas.vigouroux@univ-grenoble-alpes.fr>
Date: Thu, 6 Jun 2024 09:45:20 +0200
feat: parsing and compilation
Diffstat:
A | README.md | | | 34 | ++++++++++++++++++++++++++++++++++ |
M | src/chunk.zig | | | 8 | +++++--- |
A | src/compile.zig | | | 53 | +++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | src/main.zig | | | 78 | +++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
M | src/scan.zig | | | 50 | +++++++++++++++++++++++++++++++++++++++++++++++--- |
M | src/value.zig | | | 45 | ++++++++++++++++++++++++++++++++++++--------- |
M | src/vm.zig | | | 71 | +++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
7 files changed, 269 insertions(+), 70 deletions(-)
diff --git a/README.md b/README.md
@@ -0,0 +1,34 @@
+# `zahl`
+
+This is a toy language, made for fun and by following "Crafting
+Interpreters" (especially the part on bytecode intepreters).
+
+## LISP-y
+
+The base language is lisp-like, that for multiple reasons:
+- Easy parsing
+- Macros to simplify writing code
+
+## Integer only
+
+I mainly intend to use that as a playground to implement various
+algorithms on integers (extracted from Numberphile videos), and
+recreational mathematics.
+
+This means that for now the only datatype will be integers, and that
+the standard library will be augmented with "standard" number theory
+functions.
+
+Also, this means that the bytecode will be geared towards that, with
+special instructions for some number theory operations.
+
+## Zig-based
+
+This is just for fun, I like zig, and I like writing software using
+zig.
+
+Zig also has big integers in its standard library, which is neat.
+
+## Reference-counting based
+
+I don't like garbage collection, and I like reference counting.
diff --git a/src/chunk.zig b/src/chunk.zig
@@ -3,14 +3,16 @@ const value = @import("value.zig");
pub const Op = union(enum) {
@"return",
+ pop,
@"constant": usize,
negate,
add,
substract,
-
multiply,
+ pow,
+ div
};
pub const Chunk = struct {
@@ -52,10 +54,10 @@ pub const Chunk = struct {
switch (instr) {
.@"constant" => |vindex| {
const num = self.values.items[vindex];
- std.debug.print("constant {}\n", .{num.val.number});
+ std.debug.print("constant {}\n", .{num.formatter()});
},
- inline .@"return", .negate, .add, .substract, .multiply => |_, tag| {
+ inline .@"return", .negate, .add, .substract, .multiply, .pow, .div, .pop => |_, tag| {
std.debug.print("{s}\n", .{@tagName(tag)});
},
}
diff --git a/src/compile.zig b/src/compile.zig
@@ -0,0 +1,53 @@
+const std = @import("std");
+const v = @import("value.zig");
+const c = @import("chunk.zig");
+
+inline fn ueql(left: []const u8, right: []const u8) bool {
+ return std.mem.eql(u8, left, right);
+}
+
+pub fn compile(val: *v.Value, chunk: *c.Chunk) !void {
+ switch (val.val) {
+ .number, .nil, .symbol => {
+ const cst = try chunk.addConstant(val.ref());
+ try chunk.write(.{ .constant = cst });
+ },
+ .list => |lst| {
+ const fstsym = lst[0].val.symbol;
+ // TODO: maybe this can be something else
+ if (ueql(fstsym, "+")) {
+ try compile(lst[1], chunk);
+ for (lst[2..]) |inner| {
+ try compile(inner, chunk);
+ try chunk.write(.add);
+ }
+ } else if (ueql(fstsym, "*")) {
+ try compile(lst[1], chunk);
+ for (lst[2..]) |inner| {
+ try compile(inner, chunk);
+ try chunk.write(.multiply);
+ }
+ } else if (ueql(fstsym, "-")) {
+ if (lst.len == 2) {
+ try compile(lst[1], chunk);
+ try chunk.write(.negate);
+ } else {
+ try compile(lst[2], chunk);
+ try compile(lst[1], chunk);
+ try chunk.write(.substract);
+ }
+ } else if (ueql(fstsym, "^")) {
+ try compile(lst[2], chunk);
+ try compile(lst[1], chunk);
+ try chunk.write(.pow);
+ } else if (ueql(fstsym, "/")) {
+ try compile(lst[2], chunk);
+ try compile(lst[1], chunk);
+ try chunk.write(.div);
+ try chunk.write(.pop);
+ } else {
+ @panic("TODO");
+ }
+ },
+ }
+}
diff --git a/src/main.zig b/src/main.zig
@@ -5,6 +5,7 @@ const chunk = @import("chunk.zig");
const vm = @import("vm.zig");
const cl = @cImport(@cInclude("crossline.h"));
const s = @import("scan.zig");
+const comp = @import("compile.zig");
const GPAll = std.heap.GeneralPurposeAllocator(.{});
@@ -20,39 +21,58 @@ pub fn main() !void {
defer std.debug.assert(gp.deinit() == .ok);
// Initialize bytecode
- var c = chunk.Chunk.init(all);
- defer c.deinit();
+ // var c = chunk.Chunk.init(all);
+ // defer c.deinit();
+ //
+ // const cst = try c.addConstant(try value.Value.newInt(all, 50000000));
+ // try c.write(.{ .@"constant" = cst });
+ //
+ // const cst2 = try c.addConstant(try value.Value.newInt(all, 439134597143958149358));
+ // try c.write(.{ .@"constant" = cst2 });
+ //
+ // try c.write(.negate);
+ // try c.write(.multiply);
+ // try c.write(.negate);
+ //
+ // try c.write(.@"return");
- const cst = try c.addConstant(try value.Value.newInt(all, 50000000));
- try c.write(.{ .@"constant" = cst });
+ var v = try vm.VM.init(all);
+ defer v.deinit();
- const cst2 = try c.addConstant(try value.Value.newInt(all, 439134597143958149358));
- try c.write(.{ .@"constant" = cst2 });
+ var args = std.process.args();
+ _ = args.next();
- try c.write(.multiply);
+ if (args.next()) |file| {
+ // TODO: file parsing stuff
+ _ = file;
+ } else {
+ var buf: [1024:0]u8 = undefined;
- try c.write(.@"return");
+ while (cl.crossline_readline("> ", &buf, buf.len)) |obuf| {
+ const input = obuf[0..std.mem.len(obuf)];
+ var reader = try s.Reader.init(input, all);
- var v = try vm.VM.init(all);
- defer v.deinit();
+ var val = reader.readValue() catch {
+ std.log.err("Parsing error", .{});
+ continue;
+ };
+ defer val.unref();
- // var args = std.process.args();
- // _ = args.next();
- //
- // if (args.next()) |file| {
- // // TODO: file parsing stuff
- // _ = file;
- // } else {
- // var buf: [1024:0]u8 = undefined;
- //
- // while (cl.crossline_readline("> ", &buf, buf.len)) |obuf| {
- // const input = obuf[0..std.mem.len(obuf)];
- // var reader = try s.Reader.init(input, all);
- //
- // while (reader.peekTok() != .eof) : (try reader.nextTok()) {}
- // }
- // }
- //
- //
- try v.interpret(&c);
+ var c = chunk.Chunk.init(all);
+ defer c.deinit();
+
+ try comp.compile(val, &c);
+ try c.write(.@"return");
+
+ std.log.debug("Compiled version of {}", .{val.formatter()});
+ c.disassemble();
+
+ const result = try v.interpret(&c);
+ defer result.unref();
+ std.log.info("Result: {f}", .{result.formatter()});
+ }
+ }
+
+
+ // try v.interpret(&c);
}
diff --git a/src/scan.zig b/src/scan.zig
@@ -51,7 +51,7 @@ pub const Reader = struct {
}
fn isSymChar(char: u8) bool {
- return switch (char) {
+ return !ascii.isWhitespace(char) and switch (char) {
'(', ')', ';' => false,
else => true,
};
@@ -69,14 +69,21 @@ pub const Reader = struct {
}
self.curtok = switch (self.input[self.curpos]) {
- '(' => onechar: {
+ inline '(', ')' => |val| onechar: {
self.curpos += 1;
- break :onechar .oparen;
+ break :onechar switch (val) {
+ '(' => .oparen,
+ ')' => .cparen,
+ else => @panic("Unreachable")
+ };
},
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => .{ .number = try self.tokInt() },
else => sym: {
var offset = self.curpos;
while (offset < self.input.len and isSymChar(self.input[offset])) : (offset += 1) {}
+
+ // Check to avoid infinite loop
+ std.debug.assert(offset > self.curpos);
const tok = .{ .symbol = self.input[self.curpos..offset] };
self.curpos = offset;
break :sym tok;
@@ -98,4 +105,41 @@ pub const Reader = struct {
return ret;
}
+
+ // Reader functions
+ pub fn readValue(self: *Self) !*v.Value {
+ switch (self.peekTok()) {
+ .oparen => {
+ var acc = std.ArrayList(*v.Value).init(self.all);
+ defer acc.deinit();
+ errdefer {
+ for (acc.items) |val| {
+ val.unref();
+ }
+ }
+
+ try self.nextTok();
+ while (self.peekTok() != .cparen) : (try self.nextTok()) {
+ if (self.peekTok() == .eof) {
+ return error.Eof;
+ }
+
+ try acc.append(try self.readValue());
+ }
+
+ return v.Value.newList(self.all, try acc.toOwnedSlice());
+ },
+ .number => |num| {
+ return v.Value.newInt(self.all, num);
+ },
+ .symbol => |sym| {
+ if (std.mem.eql(u8, sym, "nil")) {
+ return v.Value.newNil(self.all);
+ } else {
+ return v.Value.newSymbol(self.all, try self.all.dupe(u8, sym));
+ }
+ },
+ .cparen, .eof => return error.Unexpected
+ }
+ }
};
diff --git a/src/value.zig b/src/value.zig
@@ -7,9 +7,12 @@ pub const Value = struct {
const Payload = union(enum) {
number: Int,
- list: []Value,
+ list: []*Value,
+ symbol: []u8,
+ nil,
};
+ // XXX: It is very important that payload is not modified !
val: Payload,
all: std.mem.Allocator,
refcount: usize,
@@ -26,9 +29,22 @@ pub const Value = struct {
}
pub fn newInt(all: std.mem.Allocator, val: anytype) !*Self {
- var payload = try Int.initSet(all, val);
- errdefer payload.deinit();
- return try Self.init(.{ .number = payload }, all);
+ const payload = if (@TypeOf(val) == Int) val else (try Int.initSet(all, val));
+ return try Self.init(.{ .number = payload }, all);
+ }
+
+ // orig has to be allocated with all
+ pub fn newSymbol(all: std.mem.Allocator, orig: []u8) !*Self {
+ return try Self.init(.{ .symbol = orig }, all);
+ }
+
+ // lst has to be allocated with all
+ pub fn newList(all: std.mem.Allocator, lst: []*Value) !*Self {
+ return try Self.init(.{ .list = lst }, all);
+ }
+
+ pub fn newNil(all: std.mem.Allocator) !*Self {
+ return try Self.init(.nil, all);
}
pub fn ref(self: *Self) *Self {
@@ -56,11 +72,15 @@ pub const Value = struct {
num.deinit();
},
.list => |lst| {
- for (lst) |*val| {
+ for (lst) |val| {
val.unref();
}
self.all.free(lst);
- }
+ },
+ .symbol => |sym| {
+ self.all.free(sym);
+ },
+ .nil => {},
}
self.all.destroy(self);
}
@@ -76,7 +96,13 @@ pub const Value = struct {
fn format(self: *const Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
switch (self.val) {
.number => |num| {
- try std.fmt.format(writer, "{}", .{num});
+ if (fmt.len > 0 and fmt[0] == 'f') {
+ const digits = try num.toString(self.all, 10, std.fmt.Case.upper);
+ defer self.all.free(digits);
+ try writer.writeAll(digits);
+ } else {
+ try std.fmt.format(writer, "{}", .{num});
+ }
},
.list => |lst| {
try writer.writeByte('(');
@@ -87,8 +113,9 @@ pub const Value = struct {
try sub.format(fmt, options, writer);
}
try writer.writeByte(')');
- }
+ },
+ .symbol => |sym| try writer.writeAll(sym),
+ .nil => try writer.writeAll("nil"),
}
}
};
-
diff --git a/src/vm.zig b/src/vm.zig
@@ -27,13 +27,19 @@ pub const VM = struct {
return self.stack.pop();
}
- pub fn interpret(self: *Self, chunk: *const c.Chunk) !void {
+ pub fn interpret(self: *Self, chunk: *const c.Chunk) !*v.Value {
self.cchunk = chunk;
self.ip = self.cchunk.instrs.items;
- return self.run();
+
+ const start_time = std.time.microTimestamp();
+ const res = self.run();
+ const end_time = std.time.microTimestamp();
+ vmlog.debug("Eval in {} us", .{end_time - start_time});
+
+ return res;
}
- fn run(self: *Self) !void {
+ fn run(self: *Self) !*v.Value {
while (true) : (self.ip = self.ip[1..]) {
const instr = self.ip[0];
@@ -47,36 +53,55 @@ pub const VM = struct {
}
switch (instr) {
+ // .. -> ..[c]
.constant => |vindex| {
const val = self.cchunk.values.items[vindex];
try self.push(val.ref());
},
.@"return" => {
- var val = try self.pop();
- defer val.unref();
-
- const stdout = std.io.getStdOut();
- try std.fmt.format(stdout.writer(), "{}\n", .{val.formatter()});
- return;
+ // XXX: no unref because we return
+ return try self.pop();
},
+ .pop => (try self.pop()).unref(),
+
+ // ..[a] -> ..[-a]
.negate => {
var val = try self.pop();
defer val.unref();
std.debug.assert(val.isA(.number));
- var new = try v.Value.newInt(self.all, 0);
- errdefer new.unref();
+ var n = try v.Int.cloneWithDifferentAllocator(val.val.number, self.all);
+ n.negate();
- try new.val.number.copy(val.val.number.toConst());
- new.val.number.negate();
+ var new = try v.Value.newInt(self.all, n);
+ errdefer new.unref();
try self.push(new);
},
- inline .add, .multiply, .substract => |_, tag| {
+ // Pops 2 pushes 2
+ // ..[r][l] -> ..[l/r][l%r]
+ .div => {
+ var left = try self.pop();
+ defer left.unref();
+
+ var right = try self.pop();
+ defer right.unref();
+
+ var div = try v.Int.init(self.all);
+ var rem = try v.Int.init(self.all);
+
+ try div.divTrunc(&rem, &left.val.number, &right.val.number);
+
+ try self.push(try v.Value.newInt(self.all, div));
+ try self.push(try v.Value.newInt(self.all, rem));
+ },
+
+ // ..[r][l] -> ..[l op r]
+ inline .add, .multiply, .substract, .pow => |_, tag| {
var left = try self.pop();
defer left.unref();
@@ -86,25 +111,19 @@ pub const VM = struct {
const lnum = &left.val.number;
const rnum = &right.val.number;
- var new = try v.Value.newInt(self.all, 0);
- errdefer new.unref();
+ var new = try v.Int.init(self.all);
switch (tag) {
- .add => {
- try new.val.number.add(lnum, rnum);
- },
- .substract => {
- try new.val.number.sub(lnum, rnum);
- },
- .multiply => {
- try new.val.number.mul(lnum, rnum);
- },
+ .add => try new.add(lnum, rnum),
+ .substract => try new.sub(lnum, rnum),
+ .multiply => try new.mul(lnum, rnum),
+ .pow => try new.pow(lnum, try rnum.to(u32)),
else => {
// Unreachable
}
}
- try self.push(new);
+ try self.push(try v.Value.newInt(self.all, new));
}
}
}