zahl

Log | Files | Refs | README

scan.zig (5471B)


      1 const std = @import("std");
      2 const v = @import("value.zig");
      3 const symintern = @import("symintern.zig");
      4 
      5 const ascii = std.ascii;
      6 
      7 const readlog = std.log.scoped(.read);
      8 
      9 inline fn ueql(left: []const u8, right: []const u8) bool {
     10     return std.mem.eql(u8, left, right);
     11 }
     12 
     13 pub const Reader = struct {
     14     const Self = @This();
     15 
     16     const Token = union(enum) {
     17         number: v.Int,
     18         symbol: []const u8,
     19         oparen,
     20         cparen,
     21         eof,
     22     };
     23 
     24     input: []const u8,
     25     curpos: usize,
     26     curtok: Token,
     27     all: std.mem.Allocator,
     28 
     29     pub fn init(input: []const u8, all: std.mem.Allocator, err_offset: *?usize) !Self {
     30         var self: Self = .{
     31             .input = input,
     32             .curpos = 0,
     33             .curtok = undefined,
     34             .all = all,
     35         };
     36 
     37         try self.nextTok(err_offset);
     38 
     39         return self;
     40     }
     41 
     42     fn skipWhitespaceComment(self: *Self) void {
     43         while (self.curpos < self.input.len and
     44             (ascii.isWhitespace(self.input[self.curpos]) or
     45             self.input[self.curpos] == ';')) : (self.curpos += 1)
     46         {
     47             readlog.debug("Skip over offset {} char '{c}'", .{self.curpos, self.input[self.curpos]});
     48             // skip comments
     49             if (self.input[self.curpos] == ';') {
     50                 const endcomment = std.mem.indexOfScalar(u8, self.input[self.curpos..], '\n') orelse {
     51                     self.curpos = self.input.len;
     52                     break;
     53                 };
     54                 self.curpos += endcomment - 1;
     55             }
     56         }
     57     }
     58 
     59     fn isSymChar(char: u8) bool {
     60         return !ascii.isWhitespace(char) and switch (char) {
     61             '(', ')', ';' => false,
     62             else => true,
     63         };
     64     }
     65 
     66     pub inline fn peekTok(self: *const Reader) Token {
     67         return self.curtok;
     68     }
     69 
     70     pub fn nextTok(self: *Reader, err_offset: *?usize) !void {
     71         self.skipWhitespaceComment();
     72         if (self.curpos >= self.input.len) {
     73             self.curtok = .eof;
     74             return;
     75         }
     76 
     77         errdefer if (err_offset.* == null) {
     78             err_offset.* = self.curpos;
     79         };
     80 
     81         self.curtok = switch (self.input[self.curpos]) {
     82             inline '(', ')' => |val| onechar: {
     83                 self.curpos += 1;
     84                 break :onechar switch (val) {
     85                     '(' => .oparen,
     86                     ')' => .cparen,
     87                     else => @compileError("Unreachable")
     88                 };
     89             },
     90             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => .{ .number = try self.tokInt(err_offset) },
     91             else => sym: {
     92                 var offset = self.curpos;
     93                 while (offset < self.input.len and isSymChar(self.input[offset])) : (offset += 1) {}
     94 
     95                 // Check to avoid infinite loop
     96                 std.debug.assert(offset > self.curpos);
     97                 const tok = .{ .symbol = self.input[self.curpos..offset] };
     98                 self.curpos = offset;
     99                 break :sym tok;
    100             },
    101         };
    102         readlog.debug("Curtok: {}", .{self.curtok});
    103     }
    104 
    105     fn tokInt(self: *Self, err_offset: *?usize) !v.Int {
    106         var offset = self.curpos;
    107         errdefer err_offset.* = offset;
    108 
    109         // TODO: hex/binary...
    110         while (offset < self.input.len and ascii.isDigit(self.input[offset])) : (offset += 1) {}
    111 
    112         var ret = try v.Int.init(self.all);
    113         try ret.setString(10, self.input[self.curpos..offset]);
    114 
    115         self.curpos = offset;
    116 
    117         return ret;
    118     }
    119 
    120     // Reader functions
    121     pub fn readValue(self: *Self, err_offset: *?usize) !*v.Value {
    122         errdefer if (err_offset.* == null) {
    123             err_offset.* = self.curpos;
    124         };
    125 
    126         switch (self.peekTok()) {
    127             .oparen => {
    128                 readlog.debug("Start parse list", .{});
    129                 var acc = std.ArrayList(*v.Value).init(self.all);
    130                 defer acc.deinit();
    131                 errdefer {
    132                     for (acc.items) |val| {
    133                         val.unref();
    134                     }
    135                 }
    136 
    137                 try self.nextTok(err_offset);
    138                 while (self.peekTok() != .cparen) {
    139                     if (self.peekTok() == .eof) {
    140                         return error.Eof;
    141                     }
    142 
    143                     try acc.append(try self.readValue(err_offset));
    144                 }
    145                 // Consume the closing parenthesis
    146                 try self.nextTok(err_offset);
    147 
    148                 return v.Value.newList(self.all, try acc.toOwnedSlice());
    149             },
    150             .number => |num| {
    151                 try self.nextTok(err_offset);
    152                 return v.Value.newInt(self.all, num);
    153             },
    154             .symbol => |sym| {
    155                 try self.nextTok(err_offset);
    156 
    157                 inline for (comptime std.enums.values(v.Special)) |tag| {
    158                     if (ueql(sym, @tagName(tag))) {
    159                         return v.Value.newSpecial(self.all, tag);
    160                     }
    161                 }
    162 
    163                 return if (ueql(sym, "nil"))
    164                     v.Value.newNil(self.all)
    165                 else if (ueql(sym, "true"))
    166                     v.Value.newBool(self.all, true)
    167                 else if (ueql(sym, "false"))
    168                     v.Value.newBool(self.all, false)
    169                 else
    170                     v.Value.newSymbol(self.all, try symintern.intern(sym));
    171             },
    172             .cparen, .eof => return error.Eof
    173         }
    174     }
    175 };