aboutsummaryrefslogtreecommitdiff
;;; Guile-ACL2 --- Guile's ACL2 compiler
;;; Copyright © 2021 Masaya Tojo <masaya@tojo.tokyo>
;;;
;;; This file is part of Guile-ACL2.
;;;
;;; Guile-ACL2 is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Lesser General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; Guile-ACL2 is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with Guile-ACL2.  If not, see
;;; <http://www.gnu.org/licenses/>.

(define-module (language acl2 lexer)
  #:use-module (ice-9 control)
  #:use-module (srfi srfi-8)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-14)
  #:export (lexer))

(define char-set:acl2 char-set:ascii)

(define char-set:acl2-digit
  (char-set-intersection char-set:acl2 char-set:digit))

(define char-set:acl2-letter
  (char-set-intersection char-set:acl2 char-set:letter))

(define char-set:acl2-whitespace
  (char-set-intersection char-set:acl2 char-set:whitespace))

(define char-set:acl2-paren-open
  (char-set #\())

(define char-set:acl2-paren-close
  (char-set #\)))

(define char-set:acl2-quote
  (char-set #\'))

(define char-set:acl2-quasiquote
  (char-set #\`))

(define char-set:acl2-unquote
  (char-set #\,))

(define char-set:acl2-double-quote
  (char-set #\"))

(define char-set:acl2-separator
  (char-set-union char-set:acl2-whitespace
                  char-set:acl2-paren-open
                  char-set:acl2-paren-close
                  char-set:acl2-quote
                  char-set:acl2-double-quote
                  char-set:acl2-quasiquote
                  char-set:acl2-unquote))

(define char-set:acl2-dot
  (char-set #\.))

(define char-set:acl2-escape
  (char-set #\\))

(define (skip-whitespaces p)
  (let ((c (peek-char p)))
    (cond
     ((char-set-contains? char-set:acl2-whitespace c)
      (read-char p)
      (skip-whitespaces p))
     (else
      (lex p)))))

(define (next v f)
  (shift k (values v
                   (lambda ()
                     (reset (f) (k #f))))))

(define (set-acl2-source-properties! p obj)
  (let ((out (open-output-string)))
    (set-source-property! obj 'filename (if (file-port? p)
                                            (port-filename p)
                                            #f))
    (set-source-property! obj 'line (1+ (port-line p)))
    (set-source-property! obj 'column (1+ (port-column p)))))

(define (open-output-string-source p)
  (let ((out (open-output-string)))
    (set-acl2-source-properties! p out)
    out))

(define (token-source p token-symbol)
  (let ((token (cons token-symbol #f)))
    (set-acl2-source-properties! p token)
    token))

(define* (lex-numeric-or-symbol p #:optional (out (open-output-string-source p)))
  (let ((c (peek-char p)))
    (cond
     ((or (eof-object? c)
          (char-set-contains? char-set:acl2-separator c))
      (next (cons-source out '<number> (get-output-string out))
            (lambda () (lex p))))
     ((char-set-contains? char-set:digit c)
      (write-char (read-char p) out)
      (lex-numeric-or-symbol p out))
     (else
      (lex-symbol p out)))))

(define* (lex-symbol p #:optional (out (open-output-string-source p)))
  (let ((c (peek-char p)))
    (cond
     ((or (eof-object? c)
          (char-set-contains? char-set:acl2-separator c))
      (next (cons-source out '<symbol> (get-output-string out))
            (lambda () (lex p))))
     ((char-set-contains? char-set:acl2-escape c)
      (read-char p)
      (let ((c2 (peek-char p)))
        (unless (eof-object? c2)
          (write-char (read-char p) out))
        (lex-symbol p out)))
     (else
      (write-char (read-char p) out)
      (lex-symbol p out)))))

(define* (lex-string p #:optional (out (open-output-string-source p)))
  (let ((c (peek-char p)))
    (cond
     ((eof-object? c)
      (next (cons-source out '<string> (get-output-string out))
            (lambda () (lex p))))
     ((char-set-contains? char-set:acl2-double-quote c)
      (display (read-char p))
      (newline)
      (next (cons-source out '<string> (get-output-string out))
            (lambda () (lex p))))
     ((char-set-contains? char-set:acl2-escape c)
      (read-char p)
      (let ((c2 (peek-char p)))
        (unless (eof-object? c2)
          (write-char (read-char p) out))
        (lex-string p out)))
     (else
      (write-char (read-char p) out)
      (lex-string p out)))))

(define (lex p)
  (let ((c (peek-char p)))
    (cond
     ((eof-object? c)
      (next (token-source p '<eof>)
            (lambda () (lex p))))
     ((char-set-contains? char-set:acl2-whitespace c)
      (skip-whitespaces p))
     ((char-set-contains? char-set:acl2-digit c)
      (lex-numeric-or-symbol p))
     ((char-set-contains? char-set:acl2-double-quote c)
      (read-char p)
      (lex-string p))
     ((char-set-contains? char-set:acl2-paren-open c)
      (next (token-source p '<paren-open>)
            (lambda () (read-char p) (lex p))))
     ((char-set-contains? char-set:acl2-paren-close c)
      (next (token-source p '<paren-close>)
            (lambda () (read-char p) (lex p))))
     ((char-set-contains? char-set:acl2-quote c)
      (next (token-source p '<quote>)
            (lambda () (read-char p) (lex p))))
     ((char-set-contains? char-set:acl2-quasiquote c)
      (next (token-source p '<quasiquote>)
            (lambda () (read-char p) (lex p))))
     ((char-set-contains? char-set:acl2-unquote c)
      (next (token-source p '<unquote>)
            (lambda () (read-char p) (lex p))))
     ((char-set-contains? char-set:acl2-dot c)
      (read-char)
      (let ((c2 (peek-char c)))
        (if (char-set-contains? char-set:acl2-separator c2)
            (next (token-source p '<dot>)
                  (lambda () (lex p)))
            (let ((out (open-output-string-source p)))
              (write-char c out)
              (lex-symbol p out)))))
     (else (lex-symbol p)))))

(define (lexer p)
  (lambda () (reset (lex p))))