-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
208 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
diff --git a/src/Skylighting/Regex.hs b/src/Skylighting/Regex.hs | ||
index a6f6207..4140276 100644 | ||
--- a/src/Skylighting/Regex.hs | ||
+++ b/src/Skylighting/Regex.hs | ||
@@ -2,11 +2,14 @@ | ||
{-# LANGUAGE DeriveDataTypeable #-} | ||
{-# LANGUAGE DeriveGeneric #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
+{-# LANGUAGE PatternSynonyms #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
|
||
module Skylighting.Regex ( | ||
Regex(..) | ||
- , RE(..) | ||
+ , RE | ||
+ , pattern RE, reCaseSensitive, reString | ||
+ , compileRE | ||
, compileRegex | ||
, matchRegex | ||
, testRegex | ||
@@ -14,25 +17,81 @@ module Skylighting.Regex ( | ||
) where | ||
|
||
import Data.Aeson | ||
-import Data.Binary (Binary) | ||
+import Data.Binary (Binary(..)) | ||
import qualified Data.ByteString.Base64 as Base64 | ||
import qualified Data.ByteString.Char8 as BS | ||
import Data.Data | ||
import qualified Data.Text as Text | ||
import qualified Data.Text.Encoding as TE | ||
-import GHC.Generics (Generic) | ||
#if !MIN_VERSION_base(4,13,0) | ||
import Control.Monad.Fail (MonadFail) | ||
#endif | ||
import Regex.KDE | ||
|
||
+import Text.Read hiding (get) | ||
+ | ||
-- | A representation of a regular expression. | ||
-data RE = RE{ | ||
- reString :: BS.ByteString | ||
- , reCaseSensitive :: Bool | ||
-} deriving (Show, Read, Ord, Eq, Data, Typeable, Generic) | ||
+data RE = RE'{ | ||
+ _reString :: BS.ByteString | ||
+ , _reCaseSensitive :: Bool | ||
+ , reCompiled' :: Either String Regex | ||
+} deriving Typeable | ||
+ | ||
+-- We define a smart constructor which also holds the compiled regex, to avoid | ||
+-- recompiling each time we tokenize. | ||
+ | ||
+{-# COMPLETE RE #-} | ||
+pattern RE :: BS.ByteString -> Bool -> RE | ||
+pattern RE {reString, reCaseSensitive} <- RE' reString reCaseSensitive _ where | ||
+ RE str caseSensitive = RE' str caseSensitive (compileRegex caseSensitive str) | ||
+ | ||
+-- Unfortunately this means we need to derive all the instances ourselves. | ||
+ | ||
+instance Show RE where | ||
+ showsPrec d (RE str caseSensitive) = showParen (d > 10) | ||
+ $ showString "RE {reString = " | ||
+ . showsPrec 11 str | ||
+ . showString ", reCaseSensitive = " | ||
+ . showsPrec 11 caseSensitive | ||
+ . showString "}" | ||
+ | ||
+instance Read RE where | ||
+ readPrec = parens . prec 10 $ do | ||
+ Ident "RE" <- lexP | ||
+ Punc "{" <- lexP | ||
+ Ident "reString" <- lexP | ||
+ Punc "=" <- lexP | ||
+ str <- readPrec | ||
+ Punc "," <- lexP | ||
+ Ident "reCaseSensitive" <- lexP | ||
+ Punc "=" <- lexP | ||
+ caseSensitive <- readPrec | ||
+ Punc "}" <- lexP | ||
+ pure (RE str caseSensitive) | ||
|
||
-instance Binary RE | ||
+toComparisonKey :: RE -> (BS.ByteString, Bool) | ||
+toComparisonKey (RE x y) = (x, y) | ||
+ | ||
+instance Eq RE where | ||
+ x == y = toComparisonKey x == toComparisonKey y | ||
+ | ||
+instance Ord RE where | ||
+ x `compare` y = toComparisonKey x `compare` toComparisonKey y | ||
+ | ||
+conRE :: Constr | ||
+conRE = mkConstr tyRE "RE" [] Prefix | ||
+tyRE :: DataType | ||
+tyRE = mkDataType "Skylighting.Regex.RE" [conRE] | ||
+ | ||
+instance Data RE where | ||
+ gfoldl k z (RE s c) = z RE `k` s `k` c | ||
+ gunfold k z _ = k (k (z RE)) | ||
+ toConstr _ = conRE | ||
+ dataTypeOf _ = tyRE | ||
+ | ||
+instance Binary RE where | ||
+ put (RE x y) = put x >> put y | ||
+ get = RE <$> get <*> get | ||
|
||
instance ToJSON RE where | ||
toJSON re = object [ "reString" .= encodeToText (reString re) | ||
@@ -49,3 +108,6 @@ encodeToText = TE.decodeUtf8 . Base64.encode | ||
|
||
decodeFromText :: (Monad m, MonadFail m) => Text.Text -> m BS.ByteString | ||
decodeFromText = either fail return . Base64.decode . TE.encodeUtf8 | ||
+ | ||
+compileRE :: RE -> Either String Regex | ||
+compileRE = reCompiled' | ||
\ No newline at end of file | ||
diff --git a/src/Skylighting/Tokenizer.hs b/src/Skylighting/Tokenizer.hs | ||
index 42c9b83..27119a4 100644 | ||
--- a/src/Skylighting/Tokenizer.hs | ||
+++ b/src/Skylighting/Tokenizer.hs | ||
@@ -56,7 +56,6 @@ data TokenizerState = TokenizerState{ | ||
, column :: Int | ||
, lineContinuation :: Bool | ||
, firstNonspaceColumn :: Maybe Int | ||
- , compiledRegexes :: Map.Map RE Regex | ||
} | ||
|
||
-- | Configuration options for 'tokenize'. | ||
@@ -163,7 +162,6 @@ tokenize config syntax inp = | ||
, column = 0 | ||
, lineContinuation = False | ||
, firstNonspaceColumn = Nothing | ||
- , compiledRegexes = Map.empty | ||
} | ||
|
||
info :: String -> TokenizerM () | ||
@@ -552,18 +550,11 @@ regExpr dynamic re inp = do | ||
-- return $! traceShowId $! (reStr, inp) | ||
let reStr = reString re | ||
when (BS.take 2 reStr == "\\b") $ wordBoundary inp | ||
- compiledREs <- gets compiledRegexes | ||
- regex <- case Map.lookup re compiledREs of | ||
- Nothing -> do | ||
- cre <- case compileRegex (reCaseSensitive re) reStr of | ||
- Right r -> return r | ||
- Left e -> throwError $ | ||
- "Error compiling regex " ++ | ||
- UTF8.toString reStr ++ ": " ++ e | ||
- modify $ \st -> st{ compiledRegexes = | ||
- Map.insert re cre (compiledRegexes st) } | ||
- return cre | ||
- Just cre -> return cre | ||
+ regex <- case compileRE re of | ||
+ Right r -> return r | ||
+ Left e -> throwError $ | ||
+ "Error compiling regex " ++ | ||
+ UTF8.toString reStr ++ ": " ++ e | ||
regex' <- if dynamic | ||
then subDynamic regex | ||
else return regex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{-# LANGUAGE BlockArguments, GeneralizedNewtypeDeriving, TypeFamilies #-} | ||
|
||
-- | Take the | ||
module Shake.Digest (digestRules, getFileDigest) where | ||
|
||
import qualified Data.ByteString.Lazy as LazyBS | ||
import Data.Digest.Pure.SHA | ||
import Data.Typeable | ||
|
||
import Development.Shake.Classes (Hashable, Binary, NFData) | ||
import Development.Shake | ||
|
||
newtype FileDigest = FileDigest FilePath | ||
deriving (Show, Typeable, Eq, Hashable, Binary, NFData) | ||
|
||
type instance RuleResult FileDigest = String | ||
|
||
-- | Shake rules required for compiling KaTeX equations. | ||
digestRules :: Rules () | ||
digestRules = versioned 1 do | ||
_ <- addOracle \(FileDigest f) -> do | ||
need [f] | ||
take 8 . showDigest . sha256 <$> liftIO (LazyBS.readFile f) | ||
pure () | ||
|
||
-- | Get parsed preamble | ||
getFileDigest :: FilePath -> Action String | ||
getFileDigest = askOracle . FileDigest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters