csharpfftfsharpintegrationinterpolationlinear-algebramathdifferentiationmatrixnumericsrandomregressionstatisticsmathnet
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
343 lines
13 KiB
343 lines
13 KiB
(*------------------------------------------------------------------------
|
|
* (c) Microsoft Corporation. All rights reserved
|
|
*
|
|
* Helper functions for the F# lexer lex.mll
|
|
*-----------------------------------------------------------------------*)
|
|
|
|
#light
|
|
|
|
module Microsoft.FSharp.Compiler.Lexhelp
|
|
|
|
open Internal.Utilities
|
|
open Internal.Utilities.Text
|
|
open Internal.Utilities.Pervasives
|
|
open Internal.Utilities.Text.Lexing
|
|
open Microsoft.FSharp.Compiler
|
|
open Microsoft.FSharp.Compiler.AbstractIL
|
|
open Microsoft.FSharp.Compiler.AbstractIL.Internal
|
|
open Microsoft.FSharp.Compiler.Lib
|
|
open Microsoft.FSharp.Compiler.Ast
|
|
open Microsoft.FSharp.Compiler.PrettyNaming
|
|
open Microsoft.FSharp.Compiler.ErrorLogger
|
|
open Microsoft.FSharp.Compiler.AbstractIL.Diagnostics
|
|
open Microsoft.FSharp.Compiler.Range
|
|
open Microsoft.FSharp.Compiler.Parser
|
|
|
|
let set_pos (lexbuf:UnicodeLexing.Lexbuf) p = lexbuf.EndPos <- p
|
|
|
|
/// Lexer args: status of #light processing. Mutated when a #light
|
|
/// directive is processed. This alters the behaviour of the lexfilter.
|
|
[<Sealed>]
|
|
type LightSyntaxStatus(initial:bool,warn:bool) =
|
|
let mutable status = None
|
|
member x.Status
|
|
with get() = match status with None -> initial | Some v -> v
|
|
and set v = status <- Some(v)
|
|
member x.ExplicitlySet = status.IsSome
|
|
member x.WarnOnMultipleTokens = warn
|
|
|
|
|
|
/// Manage lexer resources (string interning)
|
|
[<Sealed>]
|
|
type LexResourceManager() =
|
|
let strings = new System.Collections.Generic.Dictionary<string,string>(100)
|
|
member x.InternString(s) =
|
|
let mutable res = "" in
|
|
let ok = strings.TryGetValue(s,&res) in
|
|
if ok then res
|
|
else
|
|
(strings.[s] <- s; s)
|
|
|
|
/// Lexer parameters
|
|
type lexargs =
|
|
{ defines: string list;
|
|
ifdefStack: ifdefStack;
|
|
resourceManager: LexResourceManager;
|
|
getSourceDirectory: (unit -> string);
|
|
lightSyntaxStatus : LightSyntaxStatus;
|
|
errorLogger: ErrorLogger }
|
|
|
|
let mkLexargs (srcdir,filename,defines,lightSyntaxStatus,resourceManager,ifdefStack,errorLogger) =
|
|
(* resetLexbufPos filename lexbuf; *) (* called explicitly from usingLexbufForParsing *)
|
|
{ defines = defines;
|
|
ifdefStack= ifdefStack;
|
|
lightSyntaxStatus=lightSyntaxStatus;
|
|
resourceManager=resourceManager;
|
|
getSourceDirectory=srcdir;
|
|
errorLogger=errorLogger }
|
|
|
|
/// Set some (buffer local) mutable variables on the currently active lexbuf.
|
|
let registerLexbufForParsing (lexbuf:UnicodeLexing.Lexbuf) concreteSyntaxSink =
|
|
LexbufLocalXmlDocStore.ClearXmlDoc lexbuf;
|
|
SetConcreteSyntaxSink lexbuf concreteSyntaxSink
|
|
|
|
/// Register the lexbuf and call the given function
|
|
let reusingLexbufForParsing(lexbuf,concreteSyntaxSink) f =
|
|
registerLexbufForParsing lexbuf concreteSyntaxSink;
|
|
try
|
|
f ()
|
|
with e ->
|
|
raise (WrappedError(e,(try GetLexerRange lexbuf with _ -> range0)))
|
|
|
|
let resetLexbufPos filename (lexbuf: UnicodeLexing.Lexbuf) =
|
|
lexbuf.EndPos <- {lexbuf.EndPos with pos_fname= encode_file filename;
|
|
pos_cnum=0;
|
|
pos_lnum=1 }
|
|
|
|
/// Reset the lexbuf, configure the initial position with the given filename and call the given function
|
|
let usingLexbufForParsing (lexbuf:UnicodeLexing.Lexbuf,filename,concreteSyntaxSink) f =
|
|
resetLexbufPos filename lexbuf;
|
|
reusingLexbufForParsing(lexbuf,concreteSyntaxSink) (fun () -> f lexbuf)
|
|
|
|
(*------------------------------------------------------------------------
|
|
!* Functions to manipulate lexer transient state
|
|
*-----------------------------------------------------------------------*)
|
|
|
|
let default_string_finish = (fun endm b s -> STRING (Bytes.unicode_bytes_as_string s))
|
|
|
|
let call_string_finish fin buf endm b = fin endm b (Bytes.Bytebuf.close buf)
|
|
|
|
let add_string buf x = Bytes.Bytebuf.emit_bytes buf (Bytes.string_as_unicode_bytes x)
|
|
|
|
let add_int_char buf c =
|
|
Bytes.Bytebuf.emit_int_as_byte buf (c % 256);
|
|
Bytes.Bytebuf.emit_int_as_byte buf (c / 256)
|
|
|
|
let add_unichar buf c = add_int_char buf (int c)
|
|
let add_byte_char buf (c:char) = add_int_char buf (int32 c % 256)
|
|
|
|
/// When lexing bytearrays we don't expect to see any unicode stuff.
|
|
/// Likewise when lexing string constants we shouldn't see any trigraphs > 127
|
|
/// So to turn the bytes collected in the string buffer back into a bytearray
|
|
/// we just take every second byte we stored. Note all bytes > 127 should have been
|
|
/// stored using add_int_char
|
|
let stringbuf_as_bytes buf =
|
|
let bytes = Bytes.Bytebuf.close buf
|
|
Bytes.make (fun i -> Bytes.get bytes (i*2)) (Bytes.length bytes / 2)
|
|
|
|
/// Sanity check that high bytes are zeros. Further check each low byte <= 127
|
|
let stringbuf_is_bytes buf =
|
|
let bytes = Bytes.Bytebuf.close buf
|
|
let mutable ok = true
|
|
for i = 0 to Bytes.length bytes/2-1 do
|
|
if Bytes.get bytes (i*2+1) <> 0 then ok <- false
|
|
ok
|
|
|
|
let newline (lexbuf:LexBuffer<_>) =
|
|
lexbuf.EndPos <- lexbuf.EndPos.NextLine
|
|
|
|
let trigraph c1 c2 c3 =
|
|
let digit (c:char) = int c - int '0'
|
|
char (digit c1 * 100 + digit c2 * 10 + digit c3)
|
|
|
|
let digit d =
|
|
if d >= '0' && d <= '9' then int32 d - int32 '0'
|
|
else failwith "digit"
|
|
|
|
let hexdigit d =
|
|
if d >= '0' && d <= '9' then digit d
|
|
elif d >= 'a' && d <= 'f' then int32 d - int32 'a' + 10
|
|
elif d >= 'A' && d <= 'F' then int32 d - int32 'A' + 10
|
|
else failwith "hexdigit"
|
|
|
|
let unicodegraph_short s =
|
|
if String.length s <> 4 then failwith "unicodegraph";
|
|
uint16 (hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3])
|
|
|
|
let hexgraph_short s =
|
|
if String.length s <> 2 then failwith "hexgraph";
|
|
uint16 (hexdigit s.[0] * 16 + hexdigit s.[1])
|
|
|
|
let unicodegraph_long s =
|
|
if String.length s <> 8 then failwith "unicodegraph_long";
|
|
let high = hexdigit s.[0] * 4096 + hexdigit s.[1] * 256 + hexdigit s.[2] * 16 + hexdigit s.[3] in
|
|
let low = hexdigit s.[4] * 4096 + hexdigit s.[5] * 256 + hexdigit s.[6] * 16 + hexdigit s.[7] in
|
|
if high = 0 then None, uint16 low
|
|
else
|
|
(* A surrogate pair - see http://www.unicode.org/unicode/uni2book/ch03.pdf, section 3.7 *)
|
|
Some (uint16 (0xD800 + ((high * 0x10000 + low - 0x10000) / 0x400))),
|
|
uint16 (0xDF30 + ((high * 0x10000 + low - 0x10000) % 0x400))
|
|
|
|
let escape c =
|
|
match c with
|
|
| '\\' -> '\\'
|
|
| '\'' -> '\''
|
|
| 'a' -> char 7
|
|
| 'f' -> char 12
|
|
| 'v' -> char 11
|
|
| 'n' -> '\n'
|
|
| 't' -> '\t'
|
|
| 'b' -> '\b'
|
|
| 'r' -> '\r'
|
|
| c -> c
|
|
|
|
/// Token skipper. Colorizers for language modes such as Visual Studio see some tokens
|
|
/// that the parser does not see.
|
|
let inline skipToken skip (skippedToken: token) (lexer: bool -> UnicodeLexing.Lexbuf -> token) lexbuf =
|
|
// NOTE: The "lexer lexbuf" call MUST be a tailcall - this is a
|
|
// recursive loop back to the lexer.
|
|
if skip then lexer skip lexbuf else skippedToken
|
|
|
|
(*------------------------------------------------------------------------
|
|
!* Keyword table
|
|
*-----------------------------------------------------------------------*)
|
|
|
|
exception ReservedKeyword of string * range
|
|
exception IndentationProblem of string * range
|
|
|
|
module Keywords =
|
|
type private compatibilityMode =
|
|
| ALWAYS (* keyword *)
|
|
| FSHARP (* keyword, but an identifier under --ml-compatibility mode *)
|
|
|
|
let private keywordList =
|
|
[ FSHARP, "abstract", ABSTRACT;
|
|
ALWAYS, "and" ,AND;
|
|
ALWAYS, "as" ,AS;
|
|
ALWAYS, "assert" ,ASSERT;
|
|
ALWAYS, "asr" ,INFIX_STAR_STAR_OP "asr";
|
|
ALWAYS, "base" ,BASE;
|
|
ALWAYS, "begin" ,BEGIN;
|
|
ALWAYS, "class" ,CLASS;
|
|
FSHARP, "default" ,DEFAULT;
|
|
FSHARP, "delegate" ,DELEGATE;
|
|
ALWAYS, "do" ,DO;
|
|
ALWAYS, "done" ,DONE;
|
|
FSHARP, "downcast" ,DOWNCAST;
|
|
ALWAYS, "downto" ,DOWNTO;
|
|
FSHARP, "elif" ,ELIF;
|
|
ALWAYS, "else" ,ELSE;
|
|
ALWAYS, "end" ,END;
|
|
ALWAYS, "exception" ,EXCEPTION;
|
|
FSHARP, "extern" ,EXTERN;
|
|
ALWAYS, "false" ,FALSE;
|
|
ALWAYS, "finally" ,FINALLY;
|
|
ALWAYS, "for" ,FOR;
|
|
ALWAYS, "fun" ,FUN;
|
|
ALWAYS, "function" ,FUNCTION;
|
|
ALWAYS, "if" ,IF;
|
|
ALWAYS, "in" ,IN;
|
|
ALWAYS, "inherit" ,INHERIT;
|
|
FSHARP, "inline" ,INLINE;
|
|
FSHARP, "interface" ,INTERFACE;
|
|
FSHARP, "internal" ,INTERNAL;
|
|
ALWAYS, "land" ,INFIX_STAR_DIV_MOD_OP "land";
|
|
ALWAYS, "lazy" ,LAZY;
|
|
ALWAYS, "let" ,LET(false);
|
|
ALWAYS, "lor" ,INFIX_STAR_DIV_MOD_OP "lor";
|
|
ALWAYS, "lsl" ,INFIX_STAR_STAR_OP "lsl";
|
|
ALWAYS, "lsr" ,INFIX_STAR_STAR_OP "lsr";
|
|
ALWAYS, "lxor" ,INFIX_STAR_DIV_MOD_OP "lxor";
|
|
ALWAYS, "match" ,MATCH;
|
|
FSHARP, "member" ,MEMBER;
|
|
ALWAYS, "mod" ,INFIX_STAR_DIV_MOD_OP "mod";
|
|
ALWAYS, "module" ,MODULE;
|
|
ALWAYS, "mutable" ,MUTABLE;
|
|
FSHARP, "namespace" ,NAMESPACE;
|
|
ALWAYS, "new" ,NEW;
|
|
FSHARP, "null" ,NULL;
|
|
ALWAYS, "of" ,OF;
|
|
ALWAYS, "open" ,OPEN;
|
|
ALWAYS, "or" ,OR;
|
|
FSHARP, "override" ,OVERRIDE;
|
|
ALWAYS, "private" ,PRIVATE;
|
|
FSHARP, "public" ,PUBLIC;
|
|
ALWAYS, "rec" ,REC;
|
|
FSHARP, "return" ,YIELD(false);
|
|
ALWAYS, "sig" ,SIG;
|
|
FSHARP, "static" ,STATIC;
|
|
ALWAYS, "struct" ,STRUCT;
|
|
ALWAYS, "then" ,THEN;
|
|
ALWAYS, "to" ,TO;
|
|
ALWAYS, "true" ,TRUE;
|
|
ALWAYS, "try" ,TRY;
|
|
ALWAYS, "type" ,TYPE;
|
|
FSHARP, "upcast" ,UPCAST;
|
|
FSHARP, "use" ,LET(true);
|
|
ALWAYS, "val" ,VAL;
|
|
ALWAYS, "virtual" ,VIRTUAL;
|
|
FSHARP, "void" ,VOID;
|
|
ALWAYS, "when" ,WHEN;
|
|
ALWAYS, "while" ,WHILE;
|
|
ALWAYS, "with" ,WITH;
|
|
FSHARP, "yield" ,YIELD(true);
|
|
ALWAYS, "_" ,UNDERSCORE;
|
|
(*------- for prototyping and explaining offside rule *)
|
|
FSHARP, "__token_OBLOCKSEP" ,OBLOCKSEP;
|
|
FSHARP, "__token_OWITH" ,OWITH;
|
|
FSHARP, "__token_ODECLEND" ,ODECLEND;
|
|
FSHARP, "__token_OTHEN" ,OTHEN;
|
|
FSHARP, "__token_OELSE" ,OELSE;
|
|
FSHARP, "__token_OEND" ,OEND;
|
|
FSHARP, "__token_ODO" ,ODO;
|
|
FSHARP, "__token_OLET" ,OLET(true);
|
|
FSHARP, "__token_constraint",CONSTRAINT;
|
|
]
|
|
(*------- reserved keywords which are ml-compatibility ids *)
|
|
@ List.map (fun s -> (FSHARP,s,RESERVED))
|
|
[ "atomic"; "break";
|
|
"checked"; "component"; "const"; "constraint"; "constructor"; "continue";
|
|
"eager";
|
|
"fixed"; "fori"; "functor"; "global";
|
|
"include"; (* "instance"; *)
|
|
"method"; "mixin";
|
|
"object"; "parallel"; "params"; "process"; "protected"; "pure"; (* "pattern"; *)
|
|
"sealed"; "trait"; "tailcall";
|
|
"volatile"; ]
|
|
|
|
let private unreserve_words =
|
|
keywordList |> List.choose (function (mode,keyword,_) -> if mode = FSHARP then Some keyword else None)
|
|
|
|
(*------------------------------------------------------------------------
|
|
!* Keywords
|
|
*-----------------------------------------------------------------------*)
|
|
|
|
let keywordNames =
|
|
keywordList |> List.map (fun (_, w, _) -> w)
|
|
|
|
let keywordTable =
|
|
let tab = Hashtbl.create 1000 in
|
|
List.iter (fun (mode,keyword,token) -> Hashtbl.add tab keyword token) keywordList;
|
|
tab
|
|
|
|
let KeywordToken s = keywordTable.[s]
|
|
|
|
(* REVIEW: get rid of this element of global state *)
|
|
let permitFsharpKeywords = ref true
|
|
|
|
let IdentifierToken args (lexbuf:UnicodeLexing.Lexbuf) (s:string) =
|
|
if IsCompilerGeneratedName s then
|
|
let m = GetLexerRange lexbuf
|
|
warning(Error("Identifiers containing '@' are reserved for use in F# code generation",m));
|
|
IDENT (args.resourceManager.InternString(s))
|
|
|
|
let KeywordOrIdentifierToken args (lexbuf:UnicodeLexing.Lexbuf) s =
|
|
if not !permitFsharpKeywords && List.mem s unreserve_words then
|
|
IdentifierToken args lexbuf s
|
|
elif Hashtbl.mem keywordTable s then
|
|
let v = KeywordToken s
|
|
if v = RESERVED then
|
|
let m = GetLexerRange lexbuf
|
|
warning(ReservedKeyword("The identifier '"^s^"' is reserved for future use by F#.",m));
|
|
IdentifierToken args lexbuf s
|
|
else v
|
|
else
|
|
match s with
|
|
| "__SOURCE_DIRECTORY__" ->
|
|
STRING (args.getSourceDirectory())
|
|
| "__SOURCE_FILE__" ->
|
|
STRING (System.IO.Path.GetFileName((file_of_file_idx (decode_file_idx lexbuf.StartPos.FileName))))
|
|
| "__LINE__" ->
|
|
STRING (string lexbuf.StartPos.Line)
|
|
| _ ->
|
|
IdentifierToken args lexbuf s
|
|
|
|
/// A utility to help determine if an identifier needs to be quoted
|
|
let QuoteIdentifierIfNeeded (s : string) : string =
|
|
let isKeyword (n : string) : bool = List.exists ((=) n) keywordNames
|
|
if isKeyword s || not (String.for_all IsLongIdentifierPartCharacter s) then
|
|
"``" + s + "``"
|
|
else
|
|
s
|
|
|
|
|
|
|