{-# LANGUAGE CPP                        #-}
{-# LANGUAGE ConstraintKinds            #-}
{-# LANGUAGE DeriveAnyClass             #-}
{-# LANGUAGE DeriveGeneric              #-}
{-# LANGUAGE DerivingStrategies         #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE PackageImports             #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE RecordWildCards            #-}
{-# LANGUAGE ScopedTypeVariables        #-}
{-# LANGUAGE StrictData                 #-}
{-# LANGUAGE TemplateHaskell            #-}
{-# LANGUAGE TypeApplications           #-}
{-# LANGUAGE UndecidableInstances       #-}

-- |
-- Module      : Codex.Xlsx.Parser.Stream
-- Description : Stream parser for xlsx files
-- Copyright   :
--   (c) Adam, 2021
--   (c) Supercede, 2021
-- License     : MIT
-- Stability   : experimental
-- Portability : POSIX
--
-- Parse @.xlsx@ sheets in constant memory.
--
-- All actions on an xlsx file run inside the 'XlsxM' monad, and must
-- be run with 'runXlsxM'. XlsxM is not a monad transformer, a design
-- inherited from the "zip" package's ZipArchive monad.
--
-- Inside the XlsxM monad, you can stream 'SheetItem's (a row) from a
-- particular sheet, using 'readSheetByIndex', which is callback-based and tied to IO.
--
module Codec.Xlsx.Parser.Stream
  ( XlsxM
  , runXlsxM
  , WorkbookInfo(..)
  , SheetInfo(..)
  , wiSheets
  , getOrParseSharedStringss
  , getWorkbookInfo
  , CellRow
  , readSheet
  , countRowsInSheet
  , collectItems
  -- ** Index
  , SheetIndex
  , makeIndex
  , makeIndexFromName
  -- ** SheetItem
  , SheetItem(..)
  , si_sheet_index
  , si_row
  -- ** Row
  , Row(..)
  , ri_row_index
  , ri_cell_row
  -- * Errors
  , SheetErrors(..)
  , AddCellErrors(..)
  , CoordinateErrors(..)
  , TypeError(..)
  , WorkbookError(..)
  ) where

import qualified "zip" Codec.Archive.Zip as Zip
import Codec.Xlsx.Types.Cell
import Codec.Xlsx.Types.Common
import Codec.Xlsx.Types.Internal (RefId (..))
import Codec.Xlsx.Types.Internal.Relationships (Relationship (..),
                                                Relationships (..))
import Conduit (PrimMonad, (.|))
import qualified Conduit as C
import qualified Data.Vector as V
#ifdef USE_MICROLENS
import Lens.Micro
import Lens.Micro.GHC ()
import Lens.Micro.Mtl
import Lens.Micro.Platform
import Lens.Micro.TH
#else
import Control.Lens
#endif
import Codec.Xlsx.Parser.Internal
import Control.Monad
import Control.Monad.Catch
import Control.Monad.Except
import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bifunctor
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Conduit (ConduitT)
import qualified Data.DList as DL
import Data.Foldable
import Data.IORef
import Data.IntMap.Strict (IntMap)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.Map.Strict as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Read as Read
import Data.Traversable (for)
import Data.XML.Types
import GHC.Generics
import Control.DeepSeq
import Codec.Xlsx.Parser.Internal.Memoize

import qualified Codec.Xlsx.Parser.Stream.HexpatInternal as HexpatInternal
import Control.Monad.Base
import Control.Monad.Trans.Control
import Text.XML.Expat.Internal.IO as Hexpat
import Text.XML.Expat.SAX as Hexpat

#ifdef USE_MICROLENS
(<>=) :: (MonadState s m, Monoid a) => ASetter' s a -> a -> m ()
l <>= a = modify (l <>~ a)
#else
#endif

type CellRow = IntMap Cell

-- | Sheet item
--
-- The current sheet at a time, every sheet is constructed of these items.
data SheetItem = MkSheetItem
  { SheetItem -> Int
_si_sheet_index :: Int       -- ^ The sheet number
  , SheetItem -> Row
_si_row         :: ~Row
  } deriving stock ((forall x. SheetItem -> Rep SheetItem x)
-> (forall x. Rep SheetItem x -> SheetItem) -> Generic SheetItem
forall x. Rep SheetItem x -> SheetItem
forall x. SheetItem -> Rep SheetItem x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. SheetItem -> Rep SheetItem x
from :: forall x. SheetItem -> Rep SheetItem x
$cto :: forall x. Rep SheetItem x -> SheetItem
to :: forall x. Rep SheetItem x -> SheetItem
Generic, Int -> SheetItem -> ShowS
[SheetItem] -> ShowS
SheetItem -> FilePath
(Int -> SheetItem -> ShowS)
-> (SheetItem -> FilePath)
-> ([SheetItem] -> ShowS)
-> Show SheetItem
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SheetItem -> ShowS
showsPrec :: Int -> SheetItem -> ShowS
$cshow :: SheetItem -> FilePath
show :: SheetItem -> FilePath
$cshowList :: [SheetItem] -> ShowS
showList :: [SheetItem] -> ShowS
Show)
    deriving anyclass SheetItem -> ()
(SheetItem -> ()) -> NFData SheetItem
forall a. (a -> ()) -> NFData a
$crnf :: SheetItem -> ()
rnf :: SheetItem -> ()
NFData

data Row = MkRow
  { Row -> RowIndex
_ri_row_index   :: RowIndex  -- ^ Row number
  , Row -> CellRow
_ri_cell_row    :: ~CellRow  -- ^ Row itself
  } deriving stock ((forall x. Row -> Rep Row x)
-> (forall x. Rep Row x -> Row) -> Generic Row
forall x. Rep Row x -> Row
forall x. Row -> Rep Row x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Row -> Rep Row x
from :: forall x. Row -> Rep Row x
$cto :: forall x. Rep Row x -> Row
to :: forall x. Rep Row x -> Row
Generic, Int -> Row -> ShowS
[Row] -> ShowS
Row -> FilePath
(Int -> Row -> ShowS)
-> (Row -> FilePath) -> ([Row] -> ShowS) -> Show Row
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Row -> ShowS
showsPrec :: Int -> Row -> ShowS
$cshow :: Row -> FilePath
show :: Row -> FilePath
$cshowList :: [Row] -> ShowS
showList :: [Row] -> ShowS
Show)
    deriving anyclass Row -> ()
(Row -> ()) -> NFData Row
forall a. (a -> ()) -> NFData a
$crnf :: Row -> ()
rnf :: Row -> ()
NFData

makeLenses 'MkSheetItem
makeLenses 'MkRow

type SharedStringsMap = V.Vector Text

-- | Type of the excel value
--
-- Note: Some values are untyped and rules of their type resolution are not known.
-- They may be treated simply as strings as well as they may be context-dependent.
-- By far we do not bother with it.
data ExcelValueType
  = TS      -- ^ shared string
  | TStr    -- ^ either an inline string ("inlineStr") or a formula string ("str")
  | TN      -- ^ number
  | TB      -- ^ boolean
  | TE      -- ^ excell error, the sheet can contain error values, for example if =1/0, causes division by zero
  | Untyped -- ^ Not all values have types
  deriving stock ((forall x. ExcelValueType -> Rep ExcelValueType x)
-> (forall x. Rep ExcelValueType x -> ExcelValueType)
-> Generic ExcelValueType
forall x. Rep ExcelValueType x -> ExcelValueType
forall x. ExcelValueType -> Rep ExcelValueType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. ExcelValueType -> Rep ExcelValueType x
from :: forall x. ExcelValueType -> Rep ExcelValueType x
$cto :: forall x. Rep ExcelValueType x -> ExcelValueType
to :: forall x. Rep ExcelValueType x -> ExcelValueType
Generic, Int -> ExcelValueType -> ShowS
[ExcelValueType] -> ShowS
ExcelValueType -> FilePath
(Int -> ExcelValueType -> ShowS)
-> (ExcelValueType -> FilePath)
-> ([ExcelValueType] -> ShowS)
-> Show ExcelValueType
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> ExcelValueType -> ShowS
showsPrec :: Int -> ExcelValueType -> ShowS
$cshow :: ExcelValueType -> FilePath
show :: ExcelValueType -> FilePath
$cshowList :: [ExcelValueType] -> ShowS
showList :: [ExcelValueType] -> ShowS
Show)

-- | State for parsing sheets
data SheetState = MkSheetState
  { SheetState -> CellRow
_ps_row             :: ~CellRow        -- ^ Current row
  , SheetState -> Int
_ps_sheet_index     :: Int             -- ^ Current sheet ID (AKA 'sheetInfoSheetId')
  , SheetState -> RowIndex
_ps_cell_row_index  :: RowIndex        -- ^ Current row number
  , SheetState -> ColumnIndex
_ps_cell_col_index  :: ColumnIndex     -- ^ Current column number
  , SheetState -> Maybe Int
_ps_cell_style      :: Maybe Int
  , SheetState -> Bool
_ps_is_in_val       :: Bool            -- ^ Flag for indexing wheter the parser is in value or not
  , SheetState -> SharedStringsMap
_ps_shared_strings  :: SharedStringsMap -- ^ Shared string map
  , SheetState -> ExcelValueType
_ps_type            :: ExcelValueType  -- ^ The last detected value type

  , SheetState -> Text
_ps_text_buf        :: Text
  -- ^ for hexpat only, which can break up char data into multiple events
  , SheetState -> Bool
_ps_worksheet_ended :: Bool
  -- ^ For hexpat only, which can throw errors right at the end of the sheet
  -- rather than ending gracefully.
  } deriving stock ((forall x. SheetState -> Rep SheetState x)
-> (forall x. Rep SheetState x -> SheetState) -> Generic SheetState
forall x. Rep SheetState x -> SheetState
forall x. SheetState -> Rep SheetState x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. SheetState -> Rep SheetState x
from :: forall x. SheetState -> Rep SheetState x
$cto :: forall x. Rep SheetState x -> SheetState
to :: forall x. Rep SheetState x -> SheetState
Generic, Int -> SheetState -> ShowS
[SheetState] -> ShowS
SheetState -> FilePath
(Int -> SheetState -> ShowS)
-> (SheetState -> FilePath)
-> ([SheetState] -> ShowS)
-> Show SheetState
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SheetState -> ShowS
showsPrec :: Int -> SheetState -> ShowS
$cshow :: SheetState -> FilePath
show :: SheetState -> FilePath
$cshowList :: [SheetState] -> ShowS
showList :: [SheetState] -> ShowS
Show)
makeLenses 'MkSheetState

-- | State for parsing shared strings
data SharedStringsState = MkSharedStringsState
  { SharedStringsState -> Builder
_ss_string :: TB.Builder -- ^ String we are parsing
  -- TODO: At the moment SharedStrings can be used only to create CellText values.
  -- We should add support for CellRich values.
  , SharedStringsState -> DList Text
_ss_list   :: DL.DList Text -- ^ list of shared strings
  } deriving stock ((forall x. SharedStringsState -> Rep SharedStringsState x)
-> (forall x. Rep SharedStringsState x -> SharedStringsState)
-> Generic SharedStringsState
forall x. Rep SharedStringsState x -> SharedStringsState
forall x. SharedStringsState -> Rep SharedStringsState x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. SharedStringsState -> Rep SharedStringsState x
from :: forall x. SharedStringsState -> Rep SharedStringsState x
$cto :: forall x. Rep SharedStringsState x -> SharedStringsState
to :: forall x. Rep SharedStringsState x -> SharedStringsState
Generic, Int -> SharedStringsState -> ShowS
[SharedStringsState] -> ShowS
SharedStringsState -> FilePath
(Int -> SharedStringsState -> ShowS)
-> (SharedStringsState -> FilePath)
-> ([SharedStringsState] -> ShowS)
-> Show SharedStringsState
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SharedStringsState -> ShowS
showsPrec :: Int -> SharedStringsState -> ShowS
$cshow :: SharedStringsState -> FilePath
show :: SharedStringsState -> FilePath
$cshowList :: [SharedStringsState] -> ShowS
showList :: [SharedStringsState] -> ShowS
Show)
makeLenses 'MkSharedStringsState

type HasSheetState = MonadState SheetState
type HasSharedStringsState = MonadState SharedStringsState

-- | Represents sheets from the workbook.xml file. E.g.
-- <sheet name="Data" sheetId="1" state="hidden" r:id="rId2" /
data SheetInfo = SheetInfo
  { SheetInfo -> Text
sheetInfoName    :: Text,
    -- | The r:id attribute value.
    SheetInfo -> RefId
sheetInfoRelId   :: RefId,
    -- | The sheetId attribute value
    SheetInfo -> Int
sheetInfoSheetId :: Int
  } deriving (Int -> SheetInfo -> ShowS
[SheetInfo] -> ShowS
SheetInfo -> FilePath
(Int -> SheetInfo -> ShowS)
-> (SheetInfo -> FilePath)
-> ([SheetInfo] -> ShowS)
-> Show SheetInfo
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SheetInfo -> ShowS
showsPrec :: Int -> SheetInfo -> ShowS
$cshow :: SheetInfo -> FilePath
show :: SheetInfo -> FilePath
$cshowList :: [SheetInfo] -> ShowS
showList :: [SheetInfo] -> ShowS
Show, SheetInfo -> SheetInfo -> Bool
(SheetInfo -> SheetInfo -> Bool)
-> (SheetInfo -> SheetInfo -> Bool) -> Eq SheetInfo
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SheetInfo -> SheetInfo -> Bool
== :: SheetInfo -> SheetInfo -> Bool
$c/= :: SheetInfo -> SheetInfo -> Bool
/= :: SheetInfo -> SheetInfo -> Bool
Eq)

-- | Information about the workbook contained in xl/workbook.xml
-- (currently a subset)
data WorkbookInfo = WorkbookInfo
  { WorkbookInfo -> [SheetInfo]
_wiSheets :: [SheetInfo]
  } deriving Int -> WorkbookInfo -> ShowS
[WorkbookInfo] -> ShowS
WorkbookInfo -> FilePath
(Int -> WorkbookInfo -> ShowS)
-> (WorkbookInfo -> FilePath)
-> ([WorkbookInfo] -> ShowS)
-> Show WorkbookInfo
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> WorkbookInfo -> ShowS
showsPrec :: Int -> WorkbookInfo -> ShowS
$cshow :: WorkbookInfo -> FilePath
show :: WorkbookInfo -> FilePath
$cshowList :: [WorkbookInfo] -> ShowS
showList :: [WorkbookInfo] -> ShowS
Show
makeLenses 'WorkbookInfo

data XlsxMState = MkXlsxMState
  { XlsxMState -> Memoized SharedStringsMap
_xs_shared_strings :: Memoized (V.Vector Text)
  , XlsxMState -> Memoized WorkbookInfo
_xs_workbook_info  :: Memoized WorkbookInfo
  , XlsxMState -> Memoized Relationships
_xs_relationships  :: Memoized Relationships
  }

newtype XlsxM a = XlsxM {forall a. XlsxM a -> ReaderT XlsxMState ZipArchive a
_unXlsxM :: ReaderT XlsxMState Zip.ZipArchive a}
  deriving newtype
    ( (forall a b. (a -> b) -> XlsxM a -> XlsxM b)
-> (forall a b. a -> XlsxM b -> XlsxM a) -> Functor XlsxM
forall a b. a -> XlsxM b -> XlsxM a
forall a b. (a -> b) -> XlsxM a -> XlsxM b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
$cfmap :: forall a b. (a -> b) -> XlsxM a -> XlsxM b
fmap :: forall a b. (a -> b) -> XlsxM a -> XlsxM b
$c<$ :: forall a b. a -> XlsxM b -> XlsxM a
<$ :: forall a b. a -> XlsxM b -> XlsxM a
Functor,
      Functor XlsxM
Functor XlsxM =>
(forall a. a -> XlsxM a)
-> (forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b)
-> (forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c)
-> (forall a b. XlsxM a -> XlsxM b -> XlsxM b)
-> (forall a b. XlsxM a -> XlsxM b -> XlsxM a)
-> Applicative XlsxM
forall a. a -> XlsxM a
forall a b. XlsxM a -> XlsxM b -> XlsxM a
forall a b. XlsxM a -> XlsxM b -> XlsxM b
forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b
forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c
forall (f :: * -> *).
Functor f =>
(forall a. a -> f a)
-> (forall a b. f (a -> b) -> f a -> f b)
-> (forall a b c. (a -> b -> c) -> f a -> f b -> f c)
-> (forall a b. f a -> f b -> f b)
-> (forall a b. f a -> f b -> f a)
-> Applicative f
$cpure :: forall a. a -> XlsxM a
pure :: forall a. a -> XlsxM a
$c<*> :: forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b
<*> :: forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b
$cliftA2 :: forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c
liftA2 :: forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c
$c*> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
*> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
$c<* :: forall a b. XlsxM a -> XlsxM b -> XlsxM a
<* :: forall a b. XlsxM a -> XlsxM b -> XlsxM a
Applicative,
      Applicative XlsxM
Applicative XlsxM =>
(forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b)
-> (forall a b. XlsxM a -> XlsxM b -> XlsxM b)
-> (forall a. a -> XlsxM a)
-> Monad XlsxM
forall a. a -> XlsxM a
forall a b. XlsxM a -> XlsxM b -> XlsxM b
forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
forall (m :: * -> *).
Applicative m =>
(forall a b. m a -> (a -> m b) -> m b)
-> (forall a b. m a -> m b -> m b)
-> (forall a. a -> m a)
-> Monad m
$c>>= :: forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
>>= :: forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
$c>> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
>> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
$creturn :: forall a. a -> XlsxM a
return :: forall a. a -> XlsxM a
Monad,
      Monad XlsxM
Monad XlsxM => (forall a. IO a -> XlsxM a) -> MonadIO XlsxM
forall a. IO a -> XlsxM a
forall (m :: * -> *).
Monad m =>
(forall a. IO a -> m a) -> MonadIO m
$cliftIO :: forall a. IO a -> XlsxM a
liftIO :: forall a. IO a -> XlsxM a
MonadIO,
      MonadThrow XlsxM
MonadThrow XlsxM =>
(forall e a.
 (HasCallStack, Exception e) =>
 XlsxM a -> (e -> XlsxM a) -> XlsxM a)
-> MonadCatch XlsxM
forall e a.
(HasCallStack, Exception e) =>
XlsxM a -> (e -> XlsxM a) -> XlsxM a
forall (m :: * -> *).
MonadThrow m =>
(forall e a.
 (HasCallStack, Exception e) =>
 m a -> (e -> m a) -> m a)
-> MonadCatch m
$ccatch :: forall e a.
(HasCallStack, Exception e) =>
XlsxM a -> (e -> XlsxM a) -> XlsxM a
catch :: forall e a.
(HasCallStack, Exception e) =>
XlsxM a -> (e -> XlsxM a) -> XlsxM a
MonadCatch,
      MonadCatch XlsxM
MonadCatch XlsxM =>
(forall b.
 HasCallStack =>
 ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b)
-> (forall b.
    HasCallStack =>
    ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b)
-> (forall a b c.
    HasCallStack =>
    XlsxM a
    -> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c))
-> MonadMask XlsxM
forall b.
HasCallStack =>
((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
forall a b c.
HasCallStack =>
XlsxM a
-> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c)
forall (m :: * -> *).
MonadCatch m =>
(forall b. HasCallStack => ((forall a. m a -> m a) -> m b) -> m b)
-> (forall b.
    HasCallStack =>
    ((forall a. m a -> m a) -> m b) -> m b)
-> (forall a b c.
    HasCallStack =>
    m a -> (a -> ExitCase b -> m c) -> (a -> m b) -> m (b, c))
-> MonadMask m
$cmask :: forall b.
HasCallStack =>
((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
mask :: forall b.
HasCallStack =>
((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
$cuninterruptibleMask :: forall b.
HasCallStack =>
((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
uninterruptibleMask :: forall b.
HasCallStack =>
((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
$cgeneralBracket :: forall a b c.
HasCallStack =>
XlsxM a
-> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c)
generalBracket :: forall a b c.
HasCallStack =>
XlsxM a
-> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c)
MonadMask,
      Monad XlsxM
Monad XlsxM =>
(forall e a. (HasCallStack, Exception e) => e -> XlsxM a)
-> MonadThrow XlsxM
forall e a. (HasCallStack, Exception e) => e -> XlsxM a
forall (m :: * -> *).
Monad m =>
(forall e a. (HasCallStack, Exception e) => e -> m a)
-> MonadThrow m
$cthrowM :: forall e a. (HasCallStack, Exception e) => e -> XlsxM a
throwM :: forall e a. (HasCallStack, Exception e) => e -> XlsxM a
MonadThrow,
      MonadReader XlsxMState,
      MonadBase IO,
      MonadBaseControl IO
    )

-- | Initial parsing state
initialSheetState :: SheetState
initialSheetState :: SheetState
initialSheetState = MkSheetState
  { _ps_row :: CellRow
_ps_row             = CellRow
forall a. Monoid a => a
mempty
  , _ps_sheet_index :: Int
_ps_sheet_index     = Int
0
  , _ps_cell_row_index :: RowIndex
_ps_cell_row_index  = RowIndex
0
  , _ps_cell_col_index :: ColumnIndex
_ps_cell_col_index  = ColumnIndex
0
  , _ps_is_in_val :: Bool
_ps_is_in_val       = Bool
False
  , _ps_shared_strings :: SharedStringsMap
_ps_shared_strings  = SharedStringsMap
forall a. Monoid a => a
mempty
  , _ps_type :: ExcelValueType
_ps_type            = ExcelValueType
Untyped
  , _ps_text_buf :: Text
_ps_text_buf        = Text
forall a. Monoid a => a
mempty
  , _ps_worksheet_ended :: Bool
_ps_worksheet_ended = Bool
False
  , _ps_cell_style :: Maybe Int
_ps_cell_style      = Maybe Int
forall a. Maybe a
Nothing
  }

-- | Initial parsing state
initialSharedStrings :: SharedStringsState
initialSharedStrings :: SharedStringsState
initialSharedStrings = MkSharedStringsState
  { _ss_string :: Builder
_ss_string = Builder
forall a. Monoid a => a
mempty
  , _ss_list :: DList Text
_ss_list = DList Text
forall a. Monoid a => a
mempty
  }

-- | Parse shared string entry from xml event and return it once
-- we've reached the end of given element
{-# SCC parseSharedStrings #-}
parseSharedStrings
  :: ( MonadThrow m
     , HasSharedStringsState m
     )
  => HexpatEvent -> m (Maybe Text)
parseSharedStrings :: forall (m :: * -> *).
(MonadThrow m, HasSharedStringsState m) =>
HexpatEvent -> m (Maybe Text)
parseSharedStrings = \case
  -- TODO: Add parsing of text styles to further create CellRich values.
  StartElement ByteString
"si" [(ByteString, Text)]
_ -> Maybe Text
forall a. Maybe a
Nothing Maybe Text -> m () -> m (Maybe Text)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ((Builder -> Identity Builder)
-> SharedStringsState -> Identity SharedStringsState
Lens' SharedStringsState Builder
ss_string ((Builder -> Identity Builder)
 -> SharedStringsState -> Identity SharedStringsState)
-> Builder -> m ()
forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Builder
forall a. Monoid a => a
mempty)
  EndElement ByteString
"si"     -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> (Builder -> Text) -> Builder -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyText -> Text
LT.toStrict (LazyText -> Text) -> (Builder -> LazyText) -> Builder -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> LazyText
TB.toLazyText (Builder -> Maybe Text) -> m Builder -> m (Maybe Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (SharedStringsState -> Builder) -> m Builder
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets SharedStringsState -> Builder
_ss_string
  CharacterData Text
txt   -> Maybe Text
forall a. Maybe a
Nothing Maybe Text -> m () -> m (Maybe Text)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ((Builder -> Identity Builder)
-> SharedStringsState -> Identity SharedStringsState
Lens' SharedStringsState Builder
ss_string ((Builder -> Identity Builder)
 -> SharedStringsState -> Identity SharedStringsState)
-> Builder -> m ()
forall s (m :: * -> *) a.
(MonadState s m, Semigroup a) =>
ASetter' s a -> a -> m ()
<>= Text -> Builder
TB.fromText Text
txt)
  HexpatEvent
_                   -> Maybe Text -> m (Maybe Text)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe Text
forall a. Maybe a
Nothing

-- | Run a series of actions on an Xlsx file
runXlsxM :: MonadIO m => FilePath -> XlsxM a -> m a
runXlsxM :: forall (m :: * -> *) a. MonadIO m => FilePath -> XlsxM a -> m a
runXlsxM FilePath
xlsxFile (XlsxM ReaderT XlsxMState ZipArchive a
act) = IO a -> m a
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO a -> m a) -> IO a -> m a
forall a b. (a -> b) -> a -> b
$ do
  -- TODO: don't run the withArchive multiple times but use liftWith or runInIO instead
  _xs_workbook_info  <- IO WorkbookInfo -> IO (Memoized WorkbookInfo)
forall a. IO a -> IO (Memoized a)
memoizeRef (FilePath -> ZipArchive WorkbookInfo -> IO WorkbookInfo
forall (m :: * -> *) a.
MonadIO m =>
FilePath -> ZipArchive a -> m a
Zip.withArchive FilePath
xlsxFile ZipArchive WorkbookInfo
readWorkbookInfo)
  _xs_relationships  <- memoizeRef (Zip.withArchive xlsxFile readWorkbookRelationships)
  _xs_shared_strings <- memoizeRef (Zip.withArchive xlsxFile parseSharedStringss)
  Zip.withArchive xlsxFile $ runReaderT act $ MkXlsxMState{..}

liftZip :: Zip.ZipArchive a -> XlsxM a
liftZip :: forall a. ZipArchive a -> XlsxM a
liftZip = ReaderT XlsxMState ZipArchive a -> XlsxM a
forall a. ReaderT XlsxMState ZipArchive a -> XlsxM a
XlsxM (ReaderT XlsxMState ZipArchive a -> XlsxM a)
-> (ZipArchive a -> ReaderT XlsxMState ZipArchive a)
-> ZipArchive a
-> XlsxM a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (XlsxMState -> ZipArchive a) -> ReaderT XlsxMState ZipArchive a
forall r (m :: * -> *) a. (r -> m a) -> ReaderT r m a
ReaderT ((XlsxMState -> ZipArchive a) -> ReaderT XlsxMState ZipArchive a)
-> (ZipArchive a -> XlsxMState -> ZipArchive a)
-> ZipArchive a
-> ReaderT XlsxMState ZipArchive a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ZipArchive a -> XlsxMState -> ZipArchive a
forall a b. a -> b -> a
const

parseSharedStringss :: Zip.ZipArchive (V.Vector Text)
parseSharedStringss :: ZipArchive SharedStringsMap
parseSharedStringss = do
      sharedStrsSel <- FilePath -> ZipArchive EntrySelector
forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector FilePath
"xl/sharedStrings.xml"
      hasSharedStrs <- Zip.doesEntryExist sharedStrsSel
      if not hasSharedStrs
        then pure mempty
        else do
          let state0 = SharedStringsState
initialSharedStrings
          byteSrc <- Zip.getEntrySource sharedStrsSel
          st <- liftIO $ runExpat state0 byteSrc $ \[HexpatEvent]
evs -> [HexpatEvent]
-> (HexpatEvent -> StateT SharedStringsState IO ())
-> StateT SharedStringsState IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [HexpatEvent]
evs ((HexpatEvent -> StateT SharedStringsState IO ())
 -> StateT SharedStringsState IO ())
-> (HexpatEvent -> StateT SharedStringsState IO ())
-> StateT SharedStringsState IO ()
forall a b. (a -> b) -> a -> b
$ \HexpatEvent
ev -> do
            mTxt <- HexpatEvent -> StateT SharedStringsState IO (Maybe Text)
forall (m :: * -> *).
(MonadThrow m, HasSharedStringsState m) =>
HexpatEvent -> m (Maybe Text)
parseSharedStrings HexpatEvent
ev
            for_ mTxt $ \Text
txt ->
              (DList Text -> Identity (DList Text))
-> SharedStringsState -> Identity SharedStringsState
Lens' SharedStringsState (DList Text)
ss_list ((DList Text -> Identity (DList Text))
 -> SharedStringsState -> Identity SharedStringsState)
-> (DList Text -> DList Text) -> StateT SharedStringsState IO ()
forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> (a -> b) -> m ()
%= (DList Text -> Text -> DList Text
forall a. DList a -> a -> DList a
`DL.snoc` Text
txt)
          pure $ V.fromList $ DL.toList $ _ss_list st

{-# SCC getOrParseSharedStringss #-}
getOrParseSharedStringss :: XlsxM (V.Vector Text)
getOrParseSharedStringss :: XlsxM SharedStringsMap
getOrParseSharedStringss = Memoized SharedStringsMap -> XlsxM SharedStringsMap
forall (m :: * -> *) a. MonadIO m => Memoized a -> m a
runMemoized (Memoized SharedStringsMap -> XlsxM SharedStringsMap)
-> XlsxM (Memoized SharedStringsMap) -> XlsxM SharedStringsMap
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (XlsxMState -> Memoized SharedStringsMap)
-> XlsxM (Memoized SharedStringsMap)
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks XlsxMState -> Memoized SharedStringsMap
_xs_shared_strings

readWorkbookInfo :: Zip.ZipArchive WorkbookInfo
readWorkbookInfo :: ZipArchive WorkbookInfo
readWorkbookInfo = do
   sel <- FilePath -> ZipArchive EntrySelector
forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector FilePath
"xl/workbook.xml"
   src <- Zip.getEntrySource sel
   sheets <- liftIO $ runExpat [] src $ \[HexpatEvent]
evs -> [HexpatEvent]
-> (HexpatEvent -> StateT [SheetInfo] IO ())
-> StateT [SheetInfo] IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [HexpatEvent]
evs ((HexpatEvent -> StateT [SheetInfo] IO ())
 -> StateT [SheetInfo] IO ())
-> (HexpatEvent -> StateT [SheetInfo] IO ())
-> StateT [SheetInfo] IO ()
forall a b. (a -> b) -> a -> b
$ \case
     StartElement (ByteString
"sheet" :: ByteString) [(ByteString, Text)]
attrs -> do
       nm <- ByteString -> [(ByteString, Text)] -> StateT [SheetInfo] IO Text
forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"name" [(ByteString, Text)]
attrs
       sheetId <- lookupBy "sheetId" attrs
       rId <- lookupBy "r:id" attrs
       sheetNum <- either (throwM . ParseDecimalError sheetId) pure $ eitherDecimal sheetId
       modify' (SheetInfo nm (RefId rId) sheetNum :)
     HexpatEvent
_ -> () -> StateT [SheetInfo] IO ()
forall a. a -> StateT [SheetInfo] IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
   pure $ WorkbookInfo sheets

lookupBy :: MonadThrow m => ByteString -> [(ByteString, Text)] -> m Text
lookupBy :: forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
fields [(ByteString, Text)]
attrs = m Text -> (Text -> m Text) -> Maybe Text -> m Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (WorkbookError -> m Text
forall e a. (HasCallStack, Exception e) => e -> m a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (WorkbookError -> m Text) -> WorkbookError -> m Text
forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> ByteString -> WorkbookError
LookupError [(ByteString, Text)]
attrs ByteString
fields) Text -> m Text
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Text -> m Text) -> Maybe Text -> m Text
forall a b. (a -> b) -> a -> b
$ ByteString -> [(ByteString, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup ByteString
fields [(ByteString, Text)]
attrs

-- | Returns information about the workbook, found in
-- xl/workbook.xml. The result is cached so the XML will only be
-- decompressed and parsed once inside a larger XlsxM action.
getWorkbookInfo :: XlsxM WorkbookInfo
getWorkbookInfo :: XlsxM WorkbookInfo
getWorkbookInfo = Memoized WorkbookInfo -> XlsxM WorkbookInfo
forall (m :: * -> *) a. MonadIO m => Memoized a -> m a
runMemoized (Memoized WorkbookInfo -> XlsxM WorkbookInfo)
-> XlsxM (Memoized WorkbookInfo) -> XlsxM WorkbookInfo
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (XlsxMState -> Memoized WorkbookInfo)
-> XlsxM (Memoized WorkbookInfo)
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks XlsxMState -> Memoized WorkbookInfo
_xs_workbook_info

readWorkbookRelationships :: Zip.ZipArchive Relationships
readWorkbookRelationships :: ZipArchive Relationships
readWorkbookRelationships = do
   sel <- FilePath -> ZipArchive EntrySelector
forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector FilePath
"xl/_rels/workbook.xml.rels"
   src <- Zip.getEntrySource sel
   liftIO $ fmap Relationships $ runExpat mempty src $ \[HexpatEvent]
evs -> [HexpatEvent]
-> (HexpatEvent -> StateT (Map RefId Relationship) IO ())
-> StateT (Map RefId Relationship) IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [HexpatEvent]
evs ((HexpatEvent -> StateT (Map RefId Relationship) IO ())
 -> StateT (Map RefId Relationship) IO ())
-> (HexpatEvent -> StateT (Map RefId Relationship) IO ())
-> StateT (Map RefId Relationship) IO ()
forall a b. (a -> b) -> a -> b
$ \case
     StartElement (ByteString
"Relationship" :: ByteString) [(ByteString, Text)]
attrs -> do
       rId <- ByteString
-> [(ByteString, Text)] -> StateT (Map RefId Relationship) IO Text
forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"Id" [(ByteString, Text)]
attrs
       rTarget <- lookupBy "Target" attrs
       rType <- lookupBy "Type" attrs
       modify' $ M.insert (RefId rId) $
         Relationship { relType = rType,
                        relTarget = T.unpack rTarget
                       }
     HexpatEvent
_ -> () -> StateT (Map RefId Relationship) IO ()
forall a. a -> StateT (Map RefId Relationship) IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Gets relationships for the workbook (this means the filenames in
-- the relationships map are relative to "xl/" base path within the
-- zip file.
--
-- The relationships xml file will only be parsed once when called
-- multiple times within a larger XlsxM action.
getWorkbookRelationships :: XlsxM Relationships
getWorkbookRelationships :: XlsxM Relationships
getWorkbookRelationships = Memoized Relationships -> XlsxM Relationships
forall (m :: * -> *) a. MonadIO m => Memoized a -> m a
runMemoized (Memoized Relationships -> XlsxM Relationships)
-> XlsxM (Memoized Relationships) -> XlsxM Relationships
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (XlsxMState -> Memoized Relationships)
-> XlsxM (Memoized Relationships)
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks XlsxMState -> Memoized Relationships
_xs_relationships

type HexpatEvent = SAXEvent ByteString Text

relIdToEntrySelector :: RefId -> XlsxM (Maybe Zip.EntrySelector)
relIdToEntrySelector :: RefId -> XlsxM (Maybe EntrySelector)
relIdToEntrySelector RefId
rid = do
  Relationships rels <- XlsxM Relationships
getWorkbookRelationships
  for (M.lookup rid rels) $ \Relationship
rel -> do
    FilePath -> XlsxM EntrySelector
forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector (FilePath -> XlsxM EntrySelector)
-> FilePath -> XlsxM EntrySelector
forall a b. (a -> b) -> a -> b
$ FilePath
"xl/" FilePath -> ShowS
forall a. Semigroup a => a -> a -> a
<> Relationship -> FilePath
relTarget Relationship
rel

sheetIdToRelId :: Int -> XlsxM (Maybe RefId)
sheetIdToRelId :: Int -> XlsxM (Maybe RefId)
sheetIdToRelId Int
sheetId = do
  WorkbookInfo sheets <- XlsxM WorkbookInfo
getWorkbookInfo
  pure $ sheetInfoRelId <$> find ((== sheetId) . sheetInfoSheetId) sheets

sheetIdToEntrySelector :: Int -> XlsxM (Maybe Zip.EntrySelector)
sheetIdToEntrySelector :: Int -> XlsxM (Maybe EntrySelector)
sheetIdToEntrySelector Int
sheetId = do
  Int -> XlsxM (Maybe RefId)
sheetIdToRelId Int
sheetId XlsxM (Maybe RefId)
-> (Maybe RefId -> XlsxM (Maybe EntrySelector))
-> XlsxM (Maybe EntrySelector)
forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Maybe RefId
Nothing  -> Maybe EntrySelector -> XlsxM (Maybe EntrySelector)
forall a. a -> XlsxM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe EntrySelector
forall a. Maybe a
Nothing
    Just RefId
rid -> RefId -> XlsxM (Maybe EntrySelector)
relIdToEntrySelector RefId
rid

-- If the given sheet number exists, returns Just a conduit source of the stream
-- of XML events in a particular sheet. Returns Nothing when the sheet doesn't
-- exist.
{-# SCC getSheetXmlSource #-}
getSheetXmlSource ::
  (PrimMonad m, MonadThrow m, C.MonadResource m) =>
  Int ->
  XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource :: forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
Int -> XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource Int
sheetId = do
  -- TODO: The Zip library may throw exceptions that aren't exposed from this
  -- module, so downstream library users would need to add the 'zip' package to
  -- handle them. Consider re-wrapping zip library exceptions, or just
  -- re-export them?
  mSheetSel <- Int -> XlsxM (Maybe EntrySelector)
sheetIdToEntrySelector Int
sheetId
  sheetExists <- maybe (pure False) (liftZip . Zip.doesEntryExist) mSheetSel
  case mSheetSel of
    Just EntrySelector
sheetSel
      | Bool
sheetExists ->
          ConduitT () ByteString m () -> Maybe (ConduitT () ByteString m ())
forall a. a -> Maybe a
Just (ConduitT () ByteString m ()
 -> Maybe (ConduitT () ByteString m ()))
-> XlsxM (ConduitT () ByteString m ())
-> XlsxM (Maybe (ConduitT () ByteString m ()))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ZipArchive (ConduitT () ByteString m ())
-> XlsxM (ConduitT () ByteString m ())
forall a. ZipArchive a -> XlsxM a
liftZip (EntrySelector -> ZipArchive (ConduitT () ByteString m ())
forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
EntrySelector -> ZipArchive (ConduitT () ByteString m ())
Zip.getEntrySource EntrySelector
sheetSel)
    Maybe EntrySelector
_ -> Maybe (ConduitT () ByteString m ())
-> XlsxM (Maybe (ConduitT () ByteString m ()))
forall a. a -> XlsxM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe (ConduitT () ByteString m ())
forall a. Maybe a
Nothing

{-# SCC runExpat #-}
runExpat :: forall state tag text.
  (GenericXMLString tag, GenericXMLString text) =>
  state ->
  ConduitT () ByteString (C.ResourceT IO) () ->
  ([SAXEvent tag text] -> StateT state IO ()) ->
  IO state
runExpat :: forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat state
initialState ConduitT () ByteString (ResourceT IO) ()
byteSource [SAXEvent tag text] -> StateT state IO ()
handler = do
  -- Set up state
  ref <- state -> IO (IORef state)
forall a. a -> IO (IORef a)
newIORef state
initialState
  -- Set up parser and callbacks
  (parseChunk, _getLoc) <- Hexpat.hexpatNewParser Nothing Nothing False
  let noExtra p
_ b
offset = ((), b) -> f ((), b)
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((), b
offset)
      {-# SCC processChunk #-}
      {-# INLINE processChunk #-}
      processChunk Bool
isFinalChunk ByteString
chunk = do
        (buf, len, mError) <- HParser
parseChunk ByteString
chunk Bool
isFinalChunk
        saxen <- HexpatInternal.parseBuf buf len noExtra
        case mError of
          Just XMLParseError
err -> FilePath -> IO ()
forall a. HasCallStack => FilePath -> a
error (FilePath -> IO ()) -> FilePath -> IO ()
forall a b. (a -> b) -> a -> b
$ FilePath
"expat error: " FilePath -> ShowS
forall a. Semigroup a => a -> a -> a
<> XMLParseError -> FilePath
forall a. Show a => a -> FilePath
show XMLParseError
err
          Maybe XMLParseError
Nothing -> do
            state0 <- IO state -> IO state
forall a. IO a -> IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO state -> IO state) -> IO state -> IO state
forall a b. (a -> b) -> a -> b
$ IORef state -> IO state
forall a. IORef a -> IO a
readIORef IORef state
ref
            state1 <-
              {-# SCC "runExpat_runStateT_call" #-}
              execStateT (handler $ map fst saxen) state0
            writeIORef ref state1
  C.runConduitRes $
    byteSource .|
    C.awaitForever (liftIO . processChunk False)
  processChunk True BS.empty
  readIORef ref

runExpatForSheet ::
  SheetState ->
  ConduitT () ByteString (C.ResourceT IO) () ->
  (SheetItem -> IO ()) ->
  XlsxM ()
runExpatForSheet :: SheetState
-> ConduitT () ByteString (ResourceT IO) ()
-> (SheetItem -> IO ())
-> XlsxM ()
runExpatForSheet SheetState
initState ConduitT () ByteString (ResourceT IO) ()
byteSource SheetItem -> IO ()
inner =
  XlsxM SheetState -> XlsxM ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (XlsxM SheetState -> XlsxM ()) -> XlsxM SheetState -> XlsxM ()
forall a b. (a -> b) -> a -> b
$ IO SheetState -> XlsxM SheetState
forall a. IO a -> XlsxM a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO SheetState -> XlsxM SheetState)
-> IO SheetState -> XlsxM SheetState
forall a b. (a -> b) -> a -> b
$ SheetState
-> ConduitT () ByteString (ResourceT IO) ()
-> ([HexpatEvent] -> StateT SheetState IO ())
-> IO SheetState
forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat SheetState
initState ConduitT () ByteString (ResourceT IO) ()
byteSource [HexpatEvent] -> StateT SheetState IO ()
forall {m :: * -> *} {t :: * -> *}.
(Foldable t, MonadState SheetState m, MonadThrow m, MonadIO m) =>
t HexpatEvent -> m ()
handler
  where
    sheetName :: Int
sheetName = SheetState -> Int
_ps_sheet_index SheetState
initState
    handler :: t HexpatEvent -> m ()
handler t HexpatEvent
evs = t HexpatEvent -> (HexpatEvent -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ t HexpatEvent
evs ((HexpatEvent -> m ()) -> m ()) -> (HexpatEvent -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \HexpatEvent
ev -> do
      parseRes <- ExceptT SheetErrors m (Maybe CellRow)
-> m (Either SheetErrors (Maybe CellRow))
forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT (ExceptT SheetErrors m (Maybe CellRow)
 -> m (Either SheetErrors (Maybe CellRow)))
-> ExceptT SheetErrors m (Maybe CellRow)
-> m (Either SheetErrors (Maybe CellRow))
forall a b. (a -> b) -> a -> b
$ HexpatEvent -> ExceptT SheetErrors m (Maybe CellRow)
forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
HexpatEvent -> m (Maybe CellRow)
matchHexpatEvent HexpatEvent
ev
      case parseRes of
        Left SheetErrors
err -> SheetErrors -> m ()
forall e a. (HasCallStack, Exception e) => e -> m a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM SheetErrors
err
        Right (Just CellRow
cellRow)
          | Bool -> Bool
not (CellRow -> Bool
forall a. IntMap a -> Bool
IntMap.null CellRow
cellRow) -> do
              rowNum <- Getting RowIndex SheetState RowIndex -> m RowIndex
forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Getting RowIndex SheetState RowIndex
Lens' SheetState RowIndex
ps_cell_row_index
              liftIO $ inner $ MkSheetItem sheetName $ MkRow rowNum cellRow
        Either SheetErrors (Maybe CellRow)
_ -> () -> m ()
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | this will collect the sheetitems in a list.
--   useful for cases were memory is of no concern but a sheetitem
--   type in a list is needed.
collectItems ::
  SheetIndex ->
  XlsxM [SheetItem]
collectItems :: SheetIndex -> XlsxM [SheetItem]
collectItems SheetIndex
sheetId = do
 res <- IO (IORef [SheetItem]) -> XlsxM (IORef [SheetItem])
forall a. IO a -> XlsxM a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef [SheetItem]) -> XlsxM (IORef [SheetItem]))
-> IO (IORef [SheetItem]) -> XlsxM (IORef [SheetItem])
forall a b. (a -> b) -> a -> b
$ [SheetItem] -> IO (IORef [SheetItem])
forall a. a -> IO (IORef a)
newIORef []
 void $ readSheet sheetId $ \SheetItem
item ->
   IO () -> IO ()
forall a. IO a -> IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IORef [SheetItem] -> ([SheetItem] -> [SheetItem]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [SheetItem]
res (SheetItem
item SheetItem -> [SheetItem] -> [SheetItem]
forall a. a -> [a] -> [a]
:))
 fmap reverse $ liftIO $ readIORef res

-- | datatype representing a sheet index, looking it up by name
--   can be done with 'makeIndexFromName', which is the preferred approach.
--   although 'makeIndex' is available in case it's already known.
newtype SheetIndex = MkSheetIndex Int
 deriving newtype SheetIndex -> ()
(SheetIndex -> ()) -> NFData SheetIndex
forall a. (a -> ()) -> NFData a
$crnf :: SheetIndex -> ()
rnf :: SheetIndex -> ()
NFData

-- | This does *no* checking if the index exists or not.
--   you could have index out of bounds issues because of this.
makeIndex :: Int -> SheetIndex
makeIndex :: Int -> SheetIndex
makeIndex = Int -> SheetIndex
MkSheetIndex

-- | Look up the index of a case insensitive sheet name
makeIndexFromName :: Text -> XlsxM (Maybe SheetIndex)
makeIndexFromName :: Text -> XlsxM (Maybe SheetIndex)
makeIndexFromName Text
sheetName = do
  wi <- XlsxM WorkbookInfo
getWorkbookInfo
  -- The Excel UI does not allow a user to create two sheets whose
  -- names differ only in alphabetic case (at least for ascii...)
  let sheetNameCI = Text -> Text
T.toLower Text
sheetName
      findRes :: Maybe SheetInfo
      findRes = (SheetInfo -> Bool) -> [SheetInfo] -> Maybe SheetInfo
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
sheetNameCI) (Text -> Bool) -> (SheetInfo -> Text) -> SheetInfo -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.toLower (Text -> Text) -> (SheetInfo -> Text) -> SheetInfo -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SheetInfo -> Text
sheetInfoName) ([SheetInfo] -> Maybe SheetInfo) -> [SheetInfo] -> Maybe SheetInfo
forall a b. (a -> b) -> a -> b
$ WorkbookInfo -> [SheetInfo]
_wiSheets WorkbookInfo
wi
  pure $ makeIndex . sheetInfoSheetId <$> findRes

readSheet ::
  SheetIndex ->
  -- | Function to consume the sheet's rows
  (SheetItem -> IO ()) ->
  -- | Returns False if sheet doesn't exist, or True otherwise
  XlsxM Bool
readSheet :: SheetIndex -> (SheetItem -> IO ()) -> XlsxM Bool
readSheet (MkSheetIndex Int
sheetId) SheetItem -> IO ()
inner = do
  mSrc :: Maybe (ConduitT () ByteString (C.ResourceT IO) ()) <-
    Int -> XlsxM (Maybe (ConduitT () ByteString (ResourceT IO) ()))
forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
Int -> XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource Int
sheetId
  let
  case mSrc of
    Maybe (ConduitT () ByteString (ResourceT IO) ())
Nothing -> Bool -> XlsxM Bool
forall a. a -> XlsxM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
    Just ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml -> do
      sharedStrs <- XlsxM SharedStringsMap
getOrParseSharedStringss
      let sheetState0 = SheetState
initialSheetState
            SheetState -> (SheetState -> SheetState) -> SheetState
forall a b. a -> (a -> b) -> b
& (SharedStringsMap -> Identity SharedStringsMap)
-> SheetState -> Identity SheetState
Lens' SheetState SharedStringsMap
ps_shared_strings ((SharedStringsMap -> Identity SharedStringsMap)
 -> SheetState -> Identity SheetState)
-> SharedStringsMap -> SheetState -> SheetState
forall s t a b. ASetter s t a b -> b -> s -> t
.~ SharedStringsMap
sharedStrs
            SheetState -> (SheetState -> SheetState) -> SheetState
forall a b. a -> (a -> b) -> b
& (Int -> Identity Int) -> SheetState -> Identity SheetState
Lens' SheetState Int
ps_sheet_index ((Int -> Identity Int) -> SheetState -> Identity SheetState)
-> Int -> SheetState -> SheetState
forall s t a b. ASetter s t a b -> b -> s -> t
.~ Int
sheetId
      runExpatForSheet sheetState0 sourceSheetXml inner
      pure True

-- | Returns number of rows in the given sheet (identified by the
-- sheet's ID, AKA the sheetId attribute, AKA 'sheetInfoSheetId'), or Nothing
-- if the sheet does not exist. Does not perform a full parse of the
-- XML into 'SheetItem's, so it should be more efficient than counting
-- via 'readSheetByIndex'.
countRowsInSheet :: SheetIndex -> XlsxM (Maybe Int)
countRowsInSheet :: SheetIndex -> XlsxM (Maybe Int)
countRowsInSheet (MkSheetIndex Int
sheetId) = do
  mSrc :: Maybe (ConduitT () ByteString (C.ResourceT IO) ()) <-
    Int -> XlsxM (Maybe (ConduitT () ByteString (ResourceT IO) ()))
forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
Int -> XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource Int
sheetId
  for mSrc $ \ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml -> do
    IO Int -> XlsxM Int
forall a. IO a -> XlsxM a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> XlsxM Int) -> IO Int -> XlsxM Int
forall a b. (a -> b) -> a -> b
$ forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat @Int @ByteString @ByteString Int
0 ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml (([SAXEvent ByteString ByteString] -> StateT Int IO ()) -> IO Int)
-> ([SAXEvent ByteString ByteString] -> StateT Int IO ()) -> IO Int
forall a b. (a -> b) -> a -> b
$ \[SAXEvent ByteString ByteString]
evs ->
      [SAXEvent ByteString ByteString]
-> (SAXEvent ByteString ByteString -> StateT Int IO ())
-> StateT Int IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [SAXEvent ByteString ByteString]
evs ((SAXEvent ByteString ByteString -> StateT Int IO ())
 -> StateT Int IO ())
-> (SAXEvent ByteString ByteString -> StateT Int IO ())
-> StateT Int IO ()
forall a b. (a -> b) -> a -> b
$ \case
        StartElement ByteString
"row" [(ByteString, ByteString)]
_ -> (Int -> Int) -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        SAXEvent ByteString ByteString
_                    -> () -> StateT Int IO ()
forall a. a -> StateT Int IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Return row from the state and empty it
popRow :: HasSheetState m => m CellRow
popRow :: forall (m :: * -> *). HasSheetState m => m CellRow
popRow = do
  row <- Getting CellRow SheetState CellRow -> m CellRow
forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Getting CellRow SheetState CellRow
Lens' SheetState CellRow
ps_row
  ps_row .= mempty
  pure row

data AddCellErrors
  = ReadError -- ^ Could not read current cell value
      Text    -- ^ Original value
      String  -- ^ Error message
  | SharedStringsNotFound -- ^ Could not find string by index in shared string table
      Int                -- ^ Given index
      (V.Vector Text)      -- ^ Given shared strings to lookup in
  deriving Int -> AddCellErrors -> ShowS
[AddCellErrors] -> ShowS
AddCellErrors -> FilePath
(Int -> AddCellErrors -> ShowS)
-> (AddCellErrors -> FilePath)
-> ([AddCellErrors] -> ShowS)
-> Show AddCellErrors
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> AddCellErrors -> ShowS
showsPrec :: Int -> AddCellErrors -> ShowS
$cshow :: AddCellErrors -> FilePath
show :: AddCellErrors -> FilePath
$cshowList :: [AddCellErrors] -> ShowS
showList :: [AddCellErrors] -> ShowS
Show

-- | Parse the given value
--
-- If it's a string, we try to get it our of a shared string table
{-# SCC parseValue #-}
parseValue :: SharedStringsMap -> Text -> ExcelValueType -> Either AddCellErrors CellValue
parseValue :: SharedStringsMap
-> Text -> ExcelValueType -> Either AddCellErrors CellValue
parseValue SharedStringsMap
sstrings Text
txt = \case
  ExcelValueType
TS -> do
    (idx, _) <- Text -> FilePath -> AddCellErrors
ReadError Text
txt (FilePath -> AddCellErrors)
-> Either FilePath (Int, Text) -> Either AddCellErrors (Int, Text)
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
`first` forall a. Integral a => Reader a
Read.decimal @Int Text
txt
    string <- maybe (Left $ SharedStringsNotFound idx sstrings) Right $ {-# SCC "sstrings_lookup_scc" #-}  (sstrings ^? ix idx)
    Right $ CellText string
  ExcelValueType
TStr -> CellValue -> Either AddCellErrors CellValue
forall a. a -> Either AddCellErrors a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CellValue -> Either AddCellErrors CellValue)
-> CellValue -> Either AddCellErrors CellValue
forall a b. (a -> b) -> a -> b
$ Text -> CellValue
CellText Text
txt
  ExcelValueType
TN -> (FilePath -> AddCellErrors)
-> ((Double, Text) -> CellValue)
-> Either FilePath (Double, Text)
-> Either AddCellErrors CellValue
forall a b c d. (a -> b) -> (c -> d) -> Either a c -> Either b d
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap (Text -> FilePath -> AddCellErrors
ReadError Text
txt) (Double -> CellValue
CellDouble (Double -> CellValue)
-> ((Double, Text) -> Double) -> (Double, Text) -> CellValue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double, Text) -> Double
forall a b. (a, b) -> a
fst) (Either FilePath (Double, Text) -> Either AddCellErrors CellValue)
-> Either FilePath (Double, Text) -> Either AddCellErrors CellValue
forall a b. (a -> b) -> a -> b
$ Reader Double
Read.double Text
txt
  ExcelValueType
TE -> (FilePath -> AddCellErrors)
-> ((ErrorType, Text) -> CellValue)
-> Either FilePath (ErrorType, Text)
-> Either AddCellErrors CellValue
forall a b c d. (a -> b) -> (c -> d) -> Either a c -> Either b d
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap (Text -> FilePath -> AddCellErrors
ReadError Text
txt) (ErrorType -> CellValue
CellError (ErrorType -> CellValue)
-> ((ErrorType, Text) -> ErrorType)
-> (ErrorType, Text)
-> CellValue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ErrorType, Text) -> ErrorType
forall a b. (a, b) -> a
fst) (Either FilePath (ErrorType, Text)
 -> Either AddCellErrors CellValue)
-> Either FilePath (ErrorType, Text)
-> Either AddCellErrors CellValue
forall a b. (a -> b) -> a -> b
$ Reader ErrorType
forall a. FromAttrVal a => Reader a
fromAttrVal Text
txt
  ExcelValueType
TB | Text
txt Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"1" -> CellValue -> Either AddCellErrors CellValue
forall a b. b -> Either a b
Right (CellValue -> Either AddCellErrors CellValue)
-> CellValue -> Either AddCellErrors CellValue
forall a b. (a -> b) -> a -> b
$ Bool -> CellValue
CellBool Bool
True
     | Text
txt Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"0" -> CellValue -> Either AddCellErrors CellValue
forall a b. b -> Either a b
Right (CellValue -> Either AddCellErrors CellValue)
-> CellValue -> Either AddCellErrors CellValue
forall a b. (a -> b) -> a -> b
$ Bool -> CellValue
CellBool Bool
False
     | Bool
otherwise -> AddCellErrors -> Either AddCellErrors CellValue
forall a b. a -> Either a b
Left (AddCellErrors -> Either AddCellErrors CellValue)
-> AddCellErrors -> Either AddCellErrors CellValue
forall a b. (a -> b) -> a -> b
$ Text -> FilePath -> AddCellErrors
ReadError Text
txt FilePath
"Could not read Excel boolean value (expected 0 or 1)"
  ExcelValueType
Untyped -> CellValue -> Either AddCellErrors CellValue
forall a b. b -> Either a b
Right (Text -> CellValue
parseUntypedValue Text
txt)

-- TODO: some of the cells are untyped and we need to test whether
-- they all are strings or something more complicated
parseUntypedValue :: Text -> CellValue
parseUntypedValue :: Text -> CellValue
parseUntypedValue = Text -> CellValue
CellText

-- | Adds a cell to row in state monad
{-# SCC addCellToRow #-}
addCellToRow
  :: ( MonadError SheetErrors m
     , HasSheetState m
     )
  => Text -> m ()
addCellToRow :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
Text -> m ()
addCellToRow Text
txt = do
  st <- m SheetState
forall s (m :: * -> *). MonadState s m => m s
get
  style <- use ps_cell_style
  when (_ps_is_in_val st) $ do
    val <- liftEither $ first ParseCellError $ parseValue (_ps_shared_strings st) txt (_ps_type st)
    put $ st { _ps_row = IntMap.insert (unColumnIndex $ _ps_cell_col_index st)
                         (Cell { _cellStyle   = style
                               , _cellValue   = Just val
                               , _cellComment = Nothing
                               , _cellFormula = Nothing
                               }) $ _ps_row st}

data SheetErrors
  = ParseCoordinateError CoordinateErrors -- ^ Error while parsing coordinates
  | ParseTypeError TypeError              -- ^ Error while parsing types
  | ParseCellError AddCellErrors          -- ^ Error while parsing cells
  | ParseStyleErrors StyleError
  | HexpatParseError Hexpat.XMLParseError
  deriving stock Int -> SheetErrors -> ShowS
[SheetErrors] -> ShowS
SheetErrors -> FilePath
(Int -> SheetErrors -> ShowS)
-> (SheetErrors -> FilePath)
-> ([SheetErrors] -> ShowS)
-> Show SheetErrors
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SheetErrors -> ShowS
showsPrec :: Int -> SheetErrors -> ShowS
$cshow :: SheetErrors -> FilePath
show :: SheetErrors -> FilePath
$cshowList :: [SheetErrors] -> ShowS
showList :: [SheetErrors] -> ShowS
Show
  deriving anyclass Show SheetErrors
Typeable SheetErrors
(Typeable SheetErrors, Show SheetErrors) =>
(SheetErrors -> SomeException)
-> (SomeException -> Maybe SheetErrors)
-> (SheetErrors -> FilePath)
-> (SheetErrors -> Bool)
-> Exception SheetErrors
SomeException -> Maybe SheetErrors
SheetErrors -> Bool
SheetErrors -> FilePath
SheetErrors -> SomeException
forall e.
(Typeable e, Show e) =>
(e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> (e -> Bool)
-> Exception e
$ctoException :: SheetErrors -> SomeException
toException :: SheetErrors -> SomeException
$cfromException :: SomeException -> Maybe SheetErrors
fromException :: SomeException -> Maybe SheetErrors
$cdisplayException :: SheetErrors -> FilePath
displayException :: SheetErrors -> FilePath
$cbacktraceDesired :: SheetErrors -> Bool
backtraceDesired :: SheetErrors -> Bool
Exception

type SheetValue = (ByteString, Text)
type SheetValues = [SheetValue]

data CoordinateErrors
  = CoordinateNotFound SheetValues         -- ^ If the coordinate was not specified in "r" attribute
  | NoListElement SheetValue SheetValues   -- ^ If the value is empty for some reason
  | NoTextContent Content SheetValues      -- ^ If the value has something besides @ContentText@ inside
  | DecodeFailure Text SheetValues         -- ^ If malformed coordinate text was passed
  deriving stock Int -> CoordinateErrors -> ShowS
[CoordinateErrors] -> ShowS
CoordinateErrors -> FilePath
(Int -> CoordinateErrors -> ShowS)
-> (CoordinateErrors -> FilePath)
-> ([CoordinateErrors] -> ShowS)
-> Show CoordinateErrors
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CoordinateErrors -> ShowS
showsPrec :: Int -> CoordinateErrors -> ShowS
$cshow :: CoordinateErrors -> FilePath
show :: CoordinateErrors -> FilePath
$cshowList :: [CoordinateErrors] -> ShowS
showList :: [CoordinateErrors] -> ShowS
Show
  deriving anyclass Show CoordinateErrors
Typeable CoordinateErrors
(Typeable CoordinateErrors, Show CoordinateErrors) =>
(CoordinateErrors -> SomeException)
-> (SomeException -> Maybe CoordinateErrors)
-> (CoordinateErrors -> FilePath)
-> (CoordinateErrors -> Bool)
-> Exception CoordinateErrors
SomeException -> Maybe CoordinateErrors
CoordinateErrors -> Bool
CoordinateErrors -> FilePath
CoordinateErrors -> SomeException
forall e.
(Typeable e, Show e) =>
(e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> (e -> Bool)
-> Exception e
$ctoException :: CoordinateErrors -> SomeException
toException :: CoordinateErrors -> SomeException
$cfromException :: SomeException -> Maybe CoordinateErrors
fromException :: SomeException -> Maybe CoordinateErrors
$cdisplayException :: CoordinateErrors -> FilePath
displayException :: CoordinateErrors -> FilePath
$cbacktraceDesired :: CoordinateErrors -> Bool
backtraceDesired :: CoordinateErrors -> Bool
Exception

data TypeError
  = TypeNotFound SheetValues
  | TypeNoListElement SheetValue SheetValues
  | UnkownType Text SheetValues
  | TypeNoTextContent Content SheetValues
  deriving Int -> TypeError -> ShowS
[TypeError] -> ShowS
TypeError -> FilePath
(Int -> TypeError -> ShowS)
-> (TypeError -> FilePath)
-> ([TypeError] -> ShowS)
-> Show TypeError
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> TypeError -> ShowS
showsPrec :: Int -> TypeError -> ShowS
$cshow :: TypeError -> FilePath
show :: TypeError -> FilePath
$cshowList :: [TypeError] -> ShowS
showList :: [TypeError] -> ShowS
Show
  deriving anyclass Show TypeError
Typeable TypeError
(Typeable TypeError, Show TypeError) =>
(TypeError -> SomeException)
-> (SomeException -> Maybe TypeError)
-> (TypeError -> FilePath)
-> (TypeError -> Bool)
-> Exception TypeError
SomeException -> Maybe TypeError
TypeError -> Bool
TypeError -> FilePath
TypeError -> SomeException
forall e.
(Typeable e, Show e) =>
(e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> (e -> Bool)
-> Exception e
$ctoException :: TypeError -> SomeException
toException :: TypeError -> SomeException
$cfromException :: SomeException -> Maybe TypeError
fromException :: SomeException -> Maybe TypeError
$cdisplayException :: TypeError -> FilePath
displayException :: TypeError -> FilePath
$cbacktraceDesired :: TypeError -> Bool
backtraceDesired :: TypeError -> Bool
Exception

data WorkbookError = LookupError { WorkbookError -> [(ByteString, Text)]
lookup_attrs :: [(ByteString, Text)], WorkbookError -> ByteString
lookup_field :: ByteString }
                   | ParseDecimalError Text String
  deriving Int -> WorkbookError -> ShowS
[WorkbookError] -> ShowS
WorkbookError -> FilePath
(Int -> WorkbookError -> ShowS)
-> (WorkbookError -> FilePath)
-> ([WorkbookError] -> ShowS)
-> Show WorkbookError
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> WorkbookError -> ShowS
showsPrec :: Int -> WorkbookError -> ShowS
$cshow :: WorkbookError -> FilePath
show :: WorkbookError -> FilePath
$cshowList :: [WorkbookError] -> ShowS
showList :: [WorkbookError] -> ShowS
Show
  deriving anyclass Show WorkbookError
Typeable WorkbookError
(Typeable WorkbookError, Show WorkbookError) =>
(WorkbookError -> SomeException)
-> (SomeException -> Maybe WorkbookError)
-> (WorkbookError -> FilePath)
-> (WorkbookError -> Bool)
-> Exception WorkbookError
SomeException -> Maybe WorkbookError
WorkbookError -> Bool
WorkbookError -> FilePath
WorkbookError -> SomeException
forall e.
(Typeable e, Show e) =>
(e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> (e -> Bool)
-> Exception e
$ctoException :: WorkbookError -> SomeException
toException :: WorkbookError -> SomeException
$cfromException :: SomeException -> Maybe WorkbookError
fromException :: SomeException -> Maybe WorkbookError
$cdisplayException :: WorkbookError -> FilePath
displayException :: WorkbookError -> FilePath
$cbacktraceDesired :: WorkbookError -> Bool
backtraceDesired :: WorkbookError -> Bool
Exception

{-# SCC matchHexpatEvent #-}
matchHexpatEvent ::
  ( MonadError SheetErrors m,
    HasSheetState m
  ) =>
  HexpatEvent ->
  m (Maybe CellRow)
matchHexpatEvent :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
HexpatEvent -> m (Maybe CellRow)
matchHexpatEvent HexpatEvent
ev = case HexpatEvent
ev of
  CharacterData Text
txt -> {-# SCC "handle_CharData" #-} do
    inVal <- Getting Bool SheetState Bool -> m Bool
forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Getting Bool SheetState Bool
Lens' SheetState Bool
ps_is_in_val
    when inVal $
      {-# SCC "append_text_buf" #-} (ps_text_buf <>= txt)
    pure Nothing
  StartElement ByteString
"c" [(ByteString, Text)]
attrs -> Maybe CellRow
forall a. Maybe a
Nothing Maybe CellRow -> m () -> m (Maybe CellRow)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ([(ByteString, Text)] -> m ()
forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setCoord [(ByteString, Text)]
attrs m () -> m () -> m ()
forall a b. m a -> m b -> m b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [(ByteString, Text)] -> m ()
forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setType [(ByteString, Text)]
attrs m () -> m () -> m ()
forall a b. m a -> m b -> m b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [(ByteString, Text)] -> m ()
forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setStyle [(ByteString, Text)]
attrs)
  StartElement ByteString
"is" [(ByteString, Text)]
_ -> Maybe CellRow
forall a. Maybe a
Nothing Maybe CellRow -> m () -> m (Maybe CellRow)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ((Bool -> Identity Bool) -> SheetState -> Identity SheetState
Lens' SheetState Bool
ps_is_in_val ((Bool -> Identity Bool) -> SheetState -> Identity SheetState)
-> Bool -> m ()
forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
True)
  EndElement ByteString
"is" -> Maybe CellRow
forall a. Maybe a
Nothing Maybe CellRow -> m () -> m (Maybe CellRow)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ m ()
forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
m ()
finaliseCellValue
  StartElement ByteString
"v" [(ByteString, Text)]
_ -> Maybe CellRow
forall a. Maybe a
Nothing Maybe CellRow -> m () -> m (Maybe CellRow)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ((Bool -> Identity Bool) -> SheetState -> Identity SheetState
Lens' SheetState Bool
ps_is_in_val ((Bool -> Identity Bool) -> SheetState -> Identity SheetState)
-> Bool -> m ()
forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
True)
  EndElement ByteString
"v" -> Maybe CellRow
forall a. Maybe a
Nothing Maybe CellRow -> m () -> m (Maybe CellRow)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ m ()
forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
m ()
finaliseCellValue
  -- If beginning of row, empty the state and return nothing.
  -- We don't know if there is anything in the state, the user may have
  -- decided to <row> <row> (not closing). In any case it's the beginning of a new row
  -- so we clear the state.
  StartElement ByteString
"row" [(ByteString, Text)]
_ -> Maybe CellRow
forall a. Maybe a
Nothing Maybe CellRow -> m CellRow -> m (Maybe CellRow)
forall a b. a -> m b -> m a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ m CellRow
forall (m :: * -> *). HasSheetState m => m CellRow
popRow
  -- If at the end of the row, we have collected the whole row into
  -- the current state. Empty the state and return the row.
  EndElement ByteString
"row" -> CellRow -> Maybe CellRow
forall a. a -> Maybe a
Just (CellRow -> Maybe CellRow) -> m CellRow -> m (Maybe CellRow)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m CellRow
forall (m :: * -> *). HasSheetState m => m CellRow
popRow
  StartElement ByteString
"worksheet" [(ByteString, Text)]
_ -> (Bool -> Identity Bool) -> SheetState -> Identity SheetState
Lens' SheetState Bool
ps_worksheet_ended ((Bool -> Identity Bool) -> SheetState -> Identity SheetState)
-> Bool -> m ()
forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
False m () -> m (Maybe CellRow) -> m (Maybe CellRow)
forall a b. m a -> m b -> m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Maybe CellRow -> m (Maybe CellRow)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe CellRow
forall a. Maybe a
Nothing
  EndElement ByteString
"worksheet" -> (Bool -> Identity Bool) -> SheetState -> Identity SheetState
Lens' SheetState Bool
ps_worksheet_ended ((Bool -> Identity Bool) -> SheetState -> Identity SheetState)
-> Bool -> m ()
forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
True m () -> m (Maybe CellRow) -> m (Maybe CellRow)
forall a b. m a -> m b -> m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Maybe CellRow -> m (Maybe CellRow)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe CellRow
forall a. Maybe a
Nothing
  -- Skip everything else, e.g. the formula elements <f>
  FailDocument XMLParseError
err -> do
    -- this event is emitted at the end the xml stream (possibly
    -- because the xml files in xlsx archives don't end in a
    -- newline, but that's a guess), so we use state to determine if
    -- it's expected.
    finished <- Getting Bool SheetState Bool -> m Bool
forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Getting Bool SheetState Bool
Lens' SheetState Bool
ps_worksheet_ended
    unless finished $
      throwError $ HexpatParseError err
    pure Nothing
  HexpatEvent
_ -> Maybe CellRow -> m (Maybe CellRow)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe CellRow
forall a. Maybe a
Nothing

{-# INLINE finaliseCellValue #-}
finaliseCellValue ::
  ( MonadError SheetErrors m, HasSheetState m ) => m ()
finaliseCellValue :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
m ()
finaliseCellValue = do
  txt <- (SheetState -> Text) -> m Text
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets SheetState -> Text
_ps_text_buf
  addCellToRow txt
  modify' $ \SheetState
st ->
    SheetState
st { _ps_is_in_val = False
       , _ps_text_buf = mempty
       }

-- | Update state coordinates accordingly to @parseCoordinates@
{-# SCC setCoord #-}
setCoord
  :: ( MonadError SheetErrors m
     , HasSheetState m
     )
  => SheetValues -> m ()
setCoord :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setCoord [(ByteString, Text)]
list = do
  coordinates <- Either SheetErrors (RowIndex, ColumnIndex)
-> m (RowIndex, ColumnIndex)
forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither (Either SheetErrors (RowIndex, ColumnIndex)
 -> m (RowIndex, ColumnIndex))
-> Either SheetErrors (RowIndex, ColumnIndex)
-> m (RowIndex, ColumnIndex)
forall a b. (a -> b) -> a -> b
$ (CoordinateErrors -> SheetErrors)
-> Either CoordinateErrors (RowIndex, ColumnIndex)
-> Either SheetErrors (RowIndex, ColumnIndex)
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first CoordinateErrors -> SheetErrors
ParseCoordinateError (Either CoordinateErrors (RowIndex, ColumnIndex)
 -> Either SheetErrors (RowIndex, ColumnIndex))
-> Either CoordinateErrors (RowIndex, ColumnIndex)
-> Either SheetErrors (RowIndex, ColumnIndex)
forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)]
-> Either CoordinateErrors (RowIndex, ColumnIndex)
parseCoordinates [(ByteString, Text)]
list
  ps_cell_col_index .= (coordinates ^. _2)
  ps_cell_row_index .= (coordinates ^. _1)

-- | Parse type from values and update state accordingly
setType
  :: ( MonadError SheetErrors m
     , HasSheetState m
 )
  => SheetValues -> m ()
setType :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setType [(ByteString, Text)]
list = do
  type' <- Either SheetErrors ExcelValueType -> m ExcelValueType
forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither (Either SheetErrors ExcelValueType -> m ExcelValueType)
-> Either SheetErrors ExcelValueType -> m ExcelValueType
forall a b. (a -> b) -> a -> b
$ (TypeError -> SheetErrors)
-> Either TypeError ExcelValueType
-> Either SheetErrors ExcelValueType
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first TypeError -> SheetErrors
ParseTypeError (Either TypeError ExcelValueType
 -> Either SheetErrors ExcelValueType)
-> Either TypeError ExcelValueType
-> Either SheetErrors ExcelValueType
forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> Either TypeError ExcelValueType
parseType [(ByteString, Text)]
list
  ps_type .= type'

-- | Find sheet value by its name
findName :: ByteString -> SheetValues -> Maybe SheetValue
findName :: ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
name = ((ByteString, Text) -> Bool)
-> [(ByteString, Text)] -> Maybe (ByteString, Text)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((ByteString
name ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==) (ByteString -> Bool)
-> ((ByteString, Text) -> ByteString) -> (ByteString, Text) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString, Text) -> ByteString
forall a b. (a, b) -> a
fst)
{-# INLINE findName #-}

setStyle :: (MonadError SheetErrors m, HasSheetState m) => SheetValues -> m ()
setStyle :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setStyle [(ByteString, Text)]
list = do
  style <- Either SheetErrors (Maybe Int) -> m (Maybe Int)
forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither (Either SheetErrors (Maybe Int) -> m (Maybe Int))
-> Either SheetErrors (Maybe Int) -> m (Maybe Int)
forall a b. (a -> b) -> a -> b
$ (StyleError -> SheetErrors)
-> Either StyleError (Maybe Int) -> Either SheetErrors (Maybe Int)
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first StyleError -> SheetErrors
ParseStyleErrors (Either StyleError (Maybe Int) -> Either SheetErrors (Maybe Int))
-> Either StyleError (Maybe Int) -> Either SheetErrors (Maybe Int)
forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> Either StyleError (Maybe Int)
parseStyle [(ByteString, Text)]
list
  ps_cell_style .= style

data StyleError = InvalidStyleRef { StyleError -> Text
seInput:: Text,  StyleError -> FilePath
seErrorMsg :: String}
  deriving Int -> StyleError -> ShowS
[StyleError] -> ShowS
StyleError -> FilePath
(Int -> StyleError -> ShowS)
-> (StyleError -> FilePath)
-> ([StyleError] -> ShowS)
-> Show StyleError
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> StyleError -> ShowS
showsPrec :: Int -> StyleError -> ShowS
$cshow :: StyleError -> FilePath
show :: StyleError -> FilePath
$cshowList :: [StyleError] -> ShowS
showList :: [StyleError] -> ShowS
Show

parseStyle :: SheetValues -> Either StyleError (Maybe Int)
parseStyle :: [(ByteString, Text)] -> Either StyleError (Maybe Int)
parseStyle [(ByteString, Text)]
list =
  case ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
"s" [(ByteString, Text)]
list of
    Maybe (ByteString, Text)
Nothing -> Maybe Int -> Either StyleError (Maybe Int)
forall a. a -> Either StyleError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe Int
forall a. Maybe a
Nothing
    Just (ByteString
_nm, Text
valTex) -> case Reader Int
forall a. Integral a => Reader a
Read.decimal Text
valTex of
      Left FilePath
err        -> StyleError -> Either StyleError (Maybe Int)
forall a b. a -> Either a b
Left (Text -> FilePath -> StyleError
InvalidStyleRef Text
valTex FilePath
err)
      Right (Int
i, Text
_rem) -> Maybe Int -> Either StyleError (Maybe Int)
forall a. a -> Either StyleError a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Int -> Either StyleError (Maybe Int))
-> Maybe Int -> Either StyleError (Maybe Int)
forall a b. (a -> b) -> a -> b
$ Int -> Maybe Int
forall a. a -> Maybe a
Just Int
i

-- | Parse value type
{-# SCC parseType #-}
parseType :: SheetValues -> Either TypeError ExcelValueType
parseType :: [(ByteString, Text)] -> Either TypeError ExcelValueType
parseType [(ByteString, Text)]
list =
  case ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
"t" [(ByteString, Text)]
list of
    -- NB: According to format specification default value for cells without
    -- `t` attribute is a `n` - number.
    --
    -- <xsd:complexType name="CT_Cell" from spec (see the `CellValue` spec reference)>
    --  ..
    --  <xsd:attribute name="t" type="ST_CellType" use="optional" default="n"/>
    -- </xsd:complexType>
    Maybe (ByteString, Text)
Nothing -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TN
    Just (ByteString
_nm, Text
valText)->
      case Text
valText of
        Text
"n"         -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TN
        Text
"s"         -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TS
         -- "Cell containing a formula string". Probably shouldn't be TStr..
        Text
"str"       -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TStr
        Text
"inlineStr" -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TStr
        Text
"b"         -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TB
        Text
"e"         -> ExcelValueType -> Either TypeError ExcelValueType
forall a b. b -> Either a b
Right ExcelValueType
TE
        Text
other       -> TypeError -> Either TypeError ExcelValueType
forall a b. a -> Either a b
Left (TypeError -> Either TypeError ExcelValueType)
-> TypeError -> Either TypeError ExcelValueType
forall a b. (a -> b) -> a -> b
$ Text -> [(ByteString, Text)] -> TypeError
UnkownType Text
other [(ByteString, Text)]
list

-- | Parse coordinates from a list of xml elements if such were found on "r" key
{-# SCC parseCoordinates #-}
parseCoordinates :: SheetValues -> Either CoordinateErrors (RowIndex, ColumnIndex)
parseCoordinates :: [(ByteString, Text)]
-> Either CoordinateErrors (RowIndex, ColumnIndex)
parseCoordinates [(ByteString, Text)]
list = do
  (_nm, valText) <- Either CoordinateErrors (ByteString, Text)
-> ((ByteString, Text)
    -> Either CoordinateErrors (ByteString, Text))
-> Maybe (ByteString, Text)
-> Either CoordinateErrors (ByteString, Text)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (CoordinateErrors -> Either CoordinateErrors (ByteString, Text)
forall a b. a -> Either a b
Left (CoordinateErrors -> Either CoordinateErrors (ByteString, Text))
-> CoordinateErrors -> Either CoordinateErrors (ByteString, Text)
forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> CoordinateErrors
CoordinateNotFound [(ByteString, Text)]
list) (ByteString, Text) -> Either CoordinateErrors (ByteString, Text)
forall a b. b -> Either a b
Right (Maybe (ByteString, Text)
 -> Either CoordinateErrors (ByteString, Text))
-> Maybe (ByteString, Text)
-> Either CoordinateErrors (ByteString, Text)
forall a b. (a -> b) -> a -> b
$ ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
"r" [(ByteString, Text)]
list
  maybe (Left $ DecodeFailure valText list) Right $ fromSingleCellRef $ CellRef valText