From fc65b1c3b2eae3e550653fa0583bd9f4e652e9d0 Mon Sep 17 00:00:00 2001 From: Masaya Tojo Date: Fri, 2 Apr 2021 01:41:15 +0900 Subject: Add acl2 lexer. * language/acl2/lexer.scm: New file. --- language/acl2/lexer.scm | 194 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 language/acl2/lexer.scm diff --git a/language/acl2/lexer.scm b/language/acl2/lexer.scm new file mode 100644 index 0000000..af6fb3e --- /dev/null +++ b/language/acl2/lexer.scm @@ -0,0 +1,194 @@ +;;; Guile-ACL2 --- Guile's ACL2 compiler +;;; Copyright © 2021 Masaya Tojo +;;; +;;; This file is part of Guile-ACL2. +;;; +;;; Guile-ACL2 is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU Lesser General Public License as +;;; published by the Free Software Foundation; either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; Guile-ACL2 is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with Guile-ACL2. If not, see +;;; . + +(define-module (language acl2 lexer) + #:use-module (ice-9 control) + #:use-module (srfi srfi-8) + #:use-module (srfi srfi-9) + #:use-module (srfi srfi-14) + #:export (lexer)) + +(define char-set:acl2 char-set:ascii) + +(define char-set:acl2-digit + (char-set-intersection char-set:acl2 char-set:digit)) + +(define char-set:acl2-letter + (char-set-intersection char-set:acl2 char-set:letter)) + +(define char-set:acl2-whitespace + (char-set-intersection char-set:acl2 char-set:whitespace)) + +(define char-set:acl2-paren-open + (char-set #\()) + +(define char-set:acl2-paren-close + (char-set #\))) + +(define char-set:acl2-quote + (char-set #\')) + +(define char-set:acl2-quasiquote + (char-set #\`)) + +(define char-set:acl2-unquote + (char-set #\,)) + +(define char-set:acl2-double-quote + (char-set #\")) + +(define char-set:acl2-separator + (char-set-union char-set:acl2-whitespace + char-set:acl2-paren-open + char-set:acl2-paren-close + char-set:acl2-quote + char-set:acl2-double-quote + char-set:acl2-quasiquote + char-set:acl2-unquote)) + +(define char-set:acl2-dot + (char-set #\.)) + +(define char-set:acl2-escape + (char-set #\\)) + +(define (skip-whitespaces p) + (let ((c (peek-char p))) + (cond + ((char-set-contains? char-set:acl2-whitespace c) + (read-char p) + (skip-whitespaces p)) + (else + (lex p))))) + +(define (next v f) + (shift k (values v + (lambda () + (reset (f) (k #f)))))) + +(define (set-acl2-source-properties! p obj) + (let ((out (open-output-string))) + (set-source-property! obj 'filename (if (file-port? p) + (port-filename p) + #f)) + (set-source-property! obj 'line (1+ (port-line p))) + (set-source-property! obj 'column (1+ (port-column p))))) + +(define (open-output-string-source p) + (let ((out (open-output-string))) + (set-acl2-source-properties! p out) + out)) + +(define (token-source p token-symbol) + (let ((token (cons token-symbol #f))) + (set-acl2-source-properties! p token) + token)) + +(define* (lex-numeric-or-symbol p #:optional (out (open-output-string-source p))) + (let ((c (peek-char p))) + (cond + ((or (eof-object? c) + (char-set-contains? char-set:acl2-separator c)) + (next (cons-source out ' (get-output-string out)) + (lambda () (lex p)))) + ((char-set-contains? char-set:digit c) + (write-char (read-char p) out) + (lex-numeric-or-symbol p out)) + (else + (lex-symbol p out))))) + +(define* (lex-symbol p #:optional (out (open-output-string-source p))) + (let ((c (peek-char p))) + (cond + ((or (eof-object? c) + (char-set-contains? char-set:acl2-separator c)) + (next (cons-source out ' (get-output-string out)) + (lambda () (lex p)))) + ((char-set-contains? char-set:acl2-escape c) + (read-char p) + (let ((c2 (peek-char p))) + (unless (eof-object? c2) + (write-char (read-char p) out)) + (lex-symbol p out))) + (else + (write-char (read-char p) out) + (lex-symbol p out))))) + +(define* (lex-string p #:optional (out (open-output-string-source p))) + (let ((c (peek-char p))) + (cond + ((eof-object? c) + (next (cons-source out ' (get-output-string out)) + (lambda () (lex p)))) + ((char-set-contains? char-set:acl2-double-quote c) + (display (read-char p)) + (newline) + (next (cons-source out ' (get-output-string out)) + (lambda () (lex p)))) + ((char-set-contains? char-set:acl2-escape c) + (read-char p) + (let ((c2 (peek-char p))) + (unless (eof-object? c2) + (write-char (read-char p) out)) + (lex-string p out))) + (else + (write-char (read-char p) out) + (lex-string p out))))) + +(define (lex p) + (let ((c (peek-char p))) + (cond + ((eof-object? c) + (next (token-source p ') + (lambda () (lex p)))) + ((char-set-contains? char-set:acl2-whitespace c) + (skip-whitespaces p)) + ((char-set-contains? char-set:acl2-digit c) + (lex-numeric-or-symbol p)) + ((char-set-contains? char-set:acl2-double-quote c) + (read-char p) + (lex-string p)) + ((char-set-contains? char-set:acl2-paren-open c) + (next (token-source p ') + (lambda () (read-char p) (lex p)))) + ((char-set-contains? char-set:acl2-paren-close c) + (next (token-source p ') + (lambda () (read-char p) (lex p)))) + ((char-set-contains? char-set:acl2-quote c) + (next (token-source p ') + (lambda () (read-char p) (lex p)))) + ((char-set-contains? char-set:acl2-quasiquote c) + (next (token-source p ') + (lambda () (read-char p) (lex p)))) + ((char-set-contains? char-set:acl2-unquote c) + (next (token-source p ') + (lambda () (read-char p) (lex p)))) + ((char-set-contains? char-set:acl2-dot c) + (read-char) + (let ((c2 (peek-char c))) + (if (char-set-contains? char-set:acl2-separator c2) + (next (token-source p ') + (lambda () (lex p))) + (let ((out (open-output-string-source p))) + (write-char c out) + (lex-symbol p out))))) + (else (lex-symbol p))))) + +(define (lexer p) + (lambda () (reset (lex p)))) -- cgit v1.2.3