module Text.CSV.Lazy.ByteString
(
CSVTable
, CSVRow
, CSVField(..)
, CSVError(..)
, CSVResult
, csvErrors
, csvTable
, csvTableFull
, csvTableHeader
, parseCSV
, parseDSV
, ppCSVError
, ppCSVField
, ppCSVTable
, ppDSVTable
, fromCSVTable
, toCSVTable
, selectFields
, expectFields
, mkEmptyColumn
, joinCSV
, mkCSVField
) where
import Data.List (groupBy, partition, elemIndex, intercalate, takeWhile
,deleteFirstsBy, nub)
import Data.Function (on)
import Data.Maybe (fromJust)
import qualified Data.ByteString.Lazy.Char8 as BS
import Data.ByteString.Lazy.Char8 (ByteString)
type CSVTable = [CSVRow]
type CSVRow = [CSVField]
data CSVField = CSVField { CSVField -> Int
csvRowNum :: !Int
, CSVField -> Int
csvColNum :: !Int
, CSVField -> (Int, Int)
csvTextStart :: !(Int,Int)
, CSVField -> (Int, Int)
csvTextEnd :: !(Int,Int)
, CSVField -> ByteString
csvFieldContent :: !ByteString
, CSVField -> Bool
csvFieldQuoted :: !Bool }
| CSVFieldError { csvRowNum :: !Int
, csvColNum :: !Int
, csvTextStart :: !(Int,Int)
, csvTextEnd :: !(Int,Int)
, CSVField -> [Char]
csvFieldError :: !String }
deriving (CSVField -> CSVField -> Bool
(CSVField -> CSVField -> Bool)
-> (CSVField -> CSVField -> Bool) -> Eq CSVField
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CSVField -> CSVField -> Bool
$c/= :: CSVField -> CSVField -> Bool
== :: CSVField -> CSVField -> Bool
$c== :: CSVField -> CSVField -> Bool
Eq,Int -> CSVField -> ShowS
[CSVField] -> ShowS
CSVField -> [Char]
(Int -> CSVField -> ShowS)
-> (CSVField -> [Char]) -> ([CSVField] -> ShowS) -> Show CSVField
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [CSVField] -> ShowS
$cshowList :: [CSVField] -> ShowS
show :: CSVField -> [Char]
$cshow :: CSVField -> [Char]
showsPrec :: Int -> CSVField -> ShowS
$cshowsPrec :: Int -> CSVField -> ShowS
Show)
data CSVError = IncorrectRow { CSVError -> Int
csvRow :: Int
, CSVError -> Int
csvColsExpected :: Int
, CSVError -> Int
csvColsActual :: Int
, CSVError -> [CSVField]
csvFields :: [CSVField] }
| BlankLine { csvRow :: !Int
, csvColsExpected :: !Int
, csvColsActual :: !Int
, CSVError -> CSVField
csvField :: CSVField }
| FieldError { csvField :: CSVField }
| { csvColsExpected :: !Int
, :: !Int
, CSVError -> [Char]
csvDuplicate :: !String }
| NoData
deriving (CSVError -> CSVError -> Bool
(CSVError -> CSVError -> Bool)
-> (CSVError -> CSVError -> Bool) -> Eq CSVError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CSVError -> CSVError -> Bool
$c/= :: CSVError -> CSVError -> Bool
== :: CSVError -> CSVError -> Bool
$c== :: CSVError -> CSVError -> Bool
Eq,Int -> CSVError -> ShowS
[CSVError] -> ShowS
CSVError -> [Char]
(Int -> CSVError -> ShowS)
-> (CSVError -> [Char]) -> ([CSVError] -> ShowS) -> Show CSVError
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [CSVError] -> ShowS
$cshowList :: [CSVError] -> ShowS
show :: CSVError -> [Char]
$cshow :: CSVError -> [Char]
showsPrec :: Int -> CSVError -> ShowS
$cshowsPrec :: Int -> CSVError -> ShowS
Show)
type CSVResult = [ Either [CSVError] [CSVField] ]
csvTable :: CSVResult -> CSVTable
csvTable :: CSVResult -> CSVTable
csvTable CSVResult
r = [ [CSVField]
row | Right [CSVField]
row <- CSVResult
r ]
csvErrors :: CSVResult -> [CSVError]
csvErrors :: CSVResult -> [CSVError]
csvErrors CSVResult
r = [[CSVError]] -> [CSVError]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [ [CSVError]
err | Left [CSVError]
err <- CSVResult
r ]
csvTableFull:: CSVResult -> CSVTable
csvTableFull :: CSVResult -> CSVTable
csvTableFull = (Either [CSVError] [CSVField] -> [CSVField])
-> CSVResult -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map Either [CSVError] [CSVField] -> [CSVField]
beCareful (CSVResult -> CSVTable)
-> (CSVResult -> CSVResult) -> CSVResult -> CSVTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVResult -> CSVResult
deduplicate
where beCareful :: Either [CSVError] [CSVField] -> [CSVField]
beCareful (Right [CSVField]
row) = [CSVField]
row
beCareful (Left (r :: CSVError
r@IncorrectRow{}:[CSVError]
_)) =
CSVError -> [CSVField]
csvFields CSVError
r [CSVField] -> [CSVField] -> [CSVField]
forall a. [a] -> [a] -> [a]
++
Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- CSVError -> Int
csvColsActual CSVError
r)
(Int -> Int -> ByteString -> CSVField
mkCSVField (CSVError -> Int
csvRow CSVError
r) Int
0 ByteString
BS.empty)
beCareful (Left (r :: CSVError
r@BlankLine{}:[CSVError]
_)) =
Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r)
(Int -> Int -> ByteString -> CSVField
mkCSVField (CSVError -> Int
csvRow CSVError
r) Int
0 ByteString
BS.empty)
beCareful (Left (r :: CSVError
r@DuplicateHeader{}:[CSVError]
_)) =
Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r)
(Int -> Int -> ByteString -> CSVField
mkCSVField Int
0 Int
0 ByteString
BS.empty)
beCareful (Left (FieldError{}:[CSVError]
r)) = Either [CSVError] [CSVField] -> [CSVField]
beCareful ([CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVError]
r)
beCareful (Left (CSVError
NoData:[CSVError]
_)) = []
beCareful (Left []) = []
deduplicate :: CSVResult -> CSVResult
deduplicate (Left (errs :: [CSVError]
errs@(DuplicateHeader{}:[CSVError]
_)):Right [CSVField]
heads:CSVResult
rows) =
[CSVField] -> Either [CSVError] [CSVField]
forall a b. b -> Either a b
Right ([CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [CSVError]
errs ([CSVField] -> [Int] -> [(CSVField, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [CSVField]
heads [Int
0..]))
Either [CSVError] [CSVField] -> CSVResult -> CSVResult
forall a. a -> [a] -> [a]
: CSVResult
rows
deduplicate CSVResult
rows = CSVResult
rows
replaceInOrder :: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [] [(CSVField, Int)]
headers = ((CSVField, Int) -> CSVField) -> [(CSVField, Int)] -> [CSVField]
forall a b. (a -> b) -> [a] -> [b]
map (CSVField, Int) -> CSVField
forall a b. (a, b) -> a
fst [(CSVField, Int)]
headers
replaceInOrder [CSVError]
_ [] = []
replaceInOrder (CSVError
d:[CSVError]
dups) ((CSVField
h,Int
n):[(CSVField, Int)]
headers)
| CSVError -> Int
csvHeaderSerial CSVError
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n = CSVField
h{ csvFieldContent :: ByteString
csvFieldContent = [Char] -> ByteString
BS.pack
(CSVError -> [Char]
csvDuplicate CSVError
d[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
"_"[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n) }
CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [CSVError]
dups [(CSVField, Int)]
headers
| Bool
otherwise = CSVField
hCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder (CSVError
dCSVError -> [CSVError] -> [CSVError]
forall a. a -> [a] -> [a]
:[CSVError]
dups) [(CSVField, Int)]
headers
csvTableHeader :: CSVResult -> [String]
= (CSVField -> [Char]) -> [CSVField] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [Char]
BS.unpack (ByteString -> [Char])
-> (CSVField -> ByteString) -> CSVField -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVField -> ByteString
csvFieldContent) ([CSVField] -> [[Char]])
-> (CSVResult -> [CSVField]) -> CSVResult -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVResult -> [CSVField]
forall {a} {t}. [Either a t] -> t
firstRow
where firstRow :: [Either a t] -> t
firstRow (Left a
_: [Either a t]
rest) = [Either a t] -> t
firstRow [Either a t]
rest
firstRow (Right t
x: [Either a t]
_) = t
x
parseCSV :: ByteString -> CSVResult
parseCSV :: ByteString -> CSVResult
parseCSV = Bool -> Char -> ByteString -> CSVResult
parseDSV Bool
True Char
','
parseDSV :: Bool -> Char -> ByteString -> CSVResult
parseDSV :: Bool -> Char -> ByteString -> CSVResult
parseDSV Bool
qn Char
delim = CSVTable -> CSVResult
validate
(CSVTable -> CSVResult)
-> (ByteString -> CSVTable) -> ByteString -> CSVResult
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> CSVField -> Bool) -> [CSVField] -> CSVTable
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
(==)(Int -> Int -> Bool)
-> (CSVField -> Int) -> CSVField -> CSVField -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on`CSVField -> Int
csvRowNum)
([CSVField] -> CSVTable)
-> (ByteString -> [CSVField]) -> ByteString -> CSVTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Char -> ByteString -> [CSVField]
lexCSV Bool
qn Char
delim
validate :: [CSVRow] -> CSVResult
validate :: CSVTable -> CSVResult
validate [] = [[CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVError
NoData]]
validate xs :: CSVTable
xs@([CSVField]
x:CSVTable
_) = [CSVField] -> CSVResult -> CSVResult
checkDuplicateHeaders [CSVField]
x (CSVResult -> CSVResult) -> CSVResult -> CSVResult
forall a b. (a -> b) -> a -> b
$ ([CSVField] -> Either [CSVError] [CSVField])
-> CSVTable -> CSVResult
forall a b. (a -> b) -> [a] -> [b]
map (Int -> [CSVField] -> Either [CSVError] [CSVField]
extractErrs ([CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
x)) CSVTable
xs
extractErrs :: Int -> CSVRow -> Either [CSVError] CSVRow
Int
size [CSVField]
row
| [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
row0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
size Bool -> Bool -> Bool
&& [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
errs0 = [CSVField] -> Either [CSVError] [CSVField]
forall a b. b -> Either a b
Right [CSVField]
row0
| [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
row0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 Bool -> Bool -> Bool
&& CSVField -> Bool
empty CSVField
field0 = [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVField -> CSVError
blankLine CSVField
field0]
| Bool
otherwise = [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left ((CSVField -> CSVError) -> [CSVField] -> [CSVError]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> CSVError
convert [CSVField]
errs0
[CSVError] -> [CSVError] -> [CSVError]
forall a. [a] -> [a] -> [a]
++ [CSVField] -> [CSVError]
validateColumns [CSVField]
row0)
where
([CSVField]
row0,[CSVField]
errs0) = (CSVField -> Bool) -> [CSVField] -> ([CSVField], [CSVField])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition CSVField -> Bool
isField [CSVField]
row
(CSVField
field0:[CSVField]
_) = [CSVField]
row0
isField :: CSVField -> Bool
isField (CSVField{}) = Bool
True
isField (CSVFieldError{}) = Bool
False
empty :: CSVField -> Bool
empty f :: CSVField
f@(CSVField{}) = ByteString -> Bool
BS.null (CSVField -> ByteString
csvFieldContent CSVField
f)
empty CSVField
_ = Bool
False
convert :: CSVField -> CSVError
convert CSVField
err = FieldError :: CSVField -> CSVError
FieldError {csvField :: CSVField
csvField = CSVField
err}
validateColumns :: [CSVField] -> [CSVError]
validateColumns [CSVField]
r =
if [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
size then []
else [ IncorrectRow :: Int -> Int -> Int -> [CSVField] -> CSVError
IncorrectRow{ csvRow :: Int
csvRow = if [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
r then Int
0 else CSVField -> Int
csvRowNum ([CSVField] -> CSVField
forall a. [a] -> a
head [CSVField]
r)
, csvColsExpected :: Int
csvColsExpected = Int
size
, csvColsActual :: Int
csvColsActual = [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
r
, csvFields :: [CSVField]
csvFields = [CSVField]
r } ]
blankLine :: CSVField -> CSVError
blankLine CSVField
f = BlankLine :: Int -> Int -> Int -> CSVField -> CSVError
BlankLine{ csvRow :: Int
csvRow = CSVField -> Int
csvRowNum CSVField
f
, csvColsExpected :: Int
csvColsExpected = Int
size
, csvColsActual :: Int
csvColsActual = Int
1
, csvField :: CSVField
csvField = CSVField
f }
checkDuplicateHeaders :: CSVRow -> CSVResult -> CSVResult
[CSVField]
row CSVResult
result =
let headers :: [CSVField]
headers = [ CSVField
f | f :: CSVField
f@(CSVField{}) <- [CSVField]
row ]
dups :: [CSVField]
dups = (CSVField -> CSVField -> Bool)
-> [CSVField] -> [CSVField] -> [CSVField]
forall a. (a -> a -> Bool) -> [a] -> [a] -> [a]
deleteFirstsBy (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
(==)(ByteString -> ByteString -> Bool)
-> (CSVField -> ByteString) -> CSVField -> CSVField -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on`CSVField -> ByteString
csvFieldContent)
[CSVField]
headers ([CSVField] -> [CSVField]
forall a. Eq a => [a] -> [a]
nub [CSVField]
headers)
n :: Int
n = [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
headers
in if [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
dups then CSVResult
result
else [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left ((CSVField -> CSVError) -> [CSVField] -> [CSVError]
forall a b. (a -> b) -> [a] -> [b]
map (\CSVField
d-> DuplicateHeader :: Int -> Int -> [Char] -> CSVError
DuplicateHeader
{ csvColsExpected :: Int
csvColsExpected = Int
n
, csvHeaderSerial :: Int
csvHeaderSerial = CSVField -> Int
csvColNum CSVField
d
, csvDuplicate :: [Char]
csvDuplicate = ByteString -> [Char]
BS.unpack (CSVField -> ByteString
csvFieldContent CSVField
d)})
[CSVField]
dups)
Either [CSVError] [CSVField] -> CSVResult -> CSVResult
forall a. a -> [a] -> [a]
: CSVResult
result
data CSVState = CSVState { CSVState -> Int
tableRow, CSVState -> Int
tableCol :: !Int
, CSVState -> Int
textRow, CSVState -> Int
textCol :: !Int }
deriving Int -> CSVState -> ShowS
[CSVState] -> ShowS
CSVState -> [Char]
(Int -> CSVState -> ShowS)
-> (CSVState -> [Char]) -> ([CSVState] -> ShowS) -> Show CSVState
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [CSVState] -> ShowS
$cshowList :: [CSVState] -> ShowS
show :: CSVState -> [Char]
$cshow :: CSVState -> [Char]
showsPrec :: Int -> CSVState -> ShowS
$cshowsPrec :: Int -> CSVState -> ShowS
Show
incTableRow, incTableCol, incTextRow :: CSVState -> CSVState
incTableRow :: CSVState -> CSVState
incTableRow CSVState
st = CSVState
st { tableRow :: Int
tableRow = CSVState -> Int
tableRow CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 , tableCol :: Int
tableCol = Int
1 }
incTableCol :: CSVState -> CSVState
incTableCol CSVState
st = CSVState
st { tableCol :: Int
tableCol = CSVState -> Int
tableCol CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }
incTextRow :: CSVState -> CSVState
incTextRow CSVState
st = CSVState
st { textRow :: Int
textRow = CSVState -> Int
textRow CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 , textCol :: Int
textCol = Int
1 }
incTextCol :: Int -> CSVState -> CSVState
incTextCol :: Int -> CSVState -> CSVState
incTextCol Int
n CSVState
st = CSVState
st { textCol :: Int
textCol = CSVState -> Int
textCol CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n }
here :: CSVState -> (Int,Int)
here :: CSVState -> (Int, Int)
here CSVState
st = (CSVState -> Int
textRow CSVState
st, CSVState -> Int
textCol CSVState
st)
lexCSV :: Bool -> Char -> ByteString -> [CSVField]
lexCSV :: Bool -> Char -> ByteString -> [CSVField]
lexCSV Bool
qn Char
delim =
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
qn Char
delim
(CSVState :: Int -> Int -> Int -> Int -> CSVState
CSVState{tableRow :: Int
tableRow=Int
1,tableCol :: Int
tableCol=Int
1,textRow :: Int
textRow=Int
1,textCol :: Int
textCol=Int
1}) (Int
1,Int
1)
getFields :: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields :: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state (Int, Int)
begin ByteString
bs0
= case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs0 of
Maybe (Char, ByteString)
Nothing -> []
Just (Char
'"', ByteString
bs1) -> Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (Int -> CSVState -> CSVState
incTextCol Int
1 CSVState
state) (Int, Int)
begin
ByteString
BS.empty ByteString
bs1
Maybe (Char, ByteString)
_ ->
case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break Char -> Bool
interestingChar ByteString
bs0 of
(ByteString
fieldBs, ByteString
bs1) ->
let field :: CSVField
field = CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField CSVState
end (Int, Int)
begin ByteString
fieldBs Bool
False
end :: CSVState
end = Int -> CSVState -> CSVState
incTextCol (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ CSVState
state
state' :: CSVState
state' = CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ Int -> CSVState -> CSVState
incTextCol Int
2 CSVState
end
stateNL :: CSVState
stateNL = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ CSVState
state
len :: Int
len = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int64
BS.length ByteString
fieldBs
in case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs1 of
Just (Char
c,ByteString
bs2)
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
d -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state' (CSVState -> (Int, Int)
here CSVState
state') ByteString
bs2
Just (Char
'\r',ByteString
bs2) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs2 of
Just (Char
'\n',ByteString
bs3)
-> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs3
Maybe (Char, ByteString)
_ -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs2
Just (Char
'\n',ByteString
bs2) -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs2
Just (Char
'"', ByteString
_) -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
state' (Int, Int)
begin
[Char]
"unexpected quote, resync at EOL"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL)
((Char -> Bool) -> ByteString -> ByteString
BS.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n') ByteString
bs1)
Just (Char, ByteString)
_ -> [CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
state' (Int, Int)
begin [Char]
"XXX Can't happen"]
Maybe (Char, ByteString)
Nothing -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs1
where interestingChar :: Char -> Bool
interestingChar Char
'\r' = Bool
True
interestingChar Char
'\n' = Bool
True
interestingChar Char
'"' = Bool
True
interestingChar Char
c | Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
d = Bool
True
interestingChar Char
_ = Bool
False
doStringFieldContent :: Bool -> Char -> CSVState -> (Int, Int) -> ByteString
-> ByteString -> [CSVField]
doStringFieldContent :: Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d CSVState
state (Int, Int)
begin ByteString
acc ByteString
bs1
= case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break Char -> Bool
interestingCharInsideString ByteString
bs1 of
(ByteString
newBs, ByteString
bs2) ->
let fieldBs :: ByteString
fieldBs = ByteString
acc ByteString -> ByteString -> ByteString
`BS.append` ByteString
newBs
field :: CSVField
field = CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField CSVState
end (Int, Int)
begin ByteString
fieldBs Bool
True
end :: CSVState
end = Int -> CSVState -> CSVState
incTextCol (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) CSVState
state
state' :: CSVState
state' = CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ Int -> CSVState -> CSVState
incTextCol Int
3 CSVState
end
stateNL :: CSVState
stateNL = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$ CSVState
state
len :: Int
len = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int64
BS.length ByteString
newBs
in case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs2 of
Just (Char
'\r',ByteString
bs3) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs3 of
Just (Char
'\n',ByteString
bs4) | Bool
q ->
Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (CSVState -> CSVState
incTextRow CSVState
end) (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton Char
'\n') ByteString
bs4
Maybe (Char, ByteString)
_ -> Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d CSVState
end (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton Char
'\r') ByteString
bs3
Just (Char
'\n',ByteString
bs3) | Bool
q ->
Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (CSVState -> CSVState
incTextRow CSVState
end) (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton Char
'\n') ByteString
bs3
Just (Char
'\n',ByteString
bs3) ->
CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
end (Int, Int)
begin [Char]
"Found newline within quoted field"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs3
Just (Char
'"', ByteString
bs3) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs3 of
Just (Char
c,ByteString
bs4)
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
d -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state' (CSVState -> (Int, Int)
here CSVState
state') ByteString
bs4
Just (Char
'\r',ByteString
bs4) ->
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
bs4 of
Just (Char
'\n',ByteString
bs5) ->
CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs5
Maybe (Char, ByteString)
_ -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs4
Just (Char
'\n',ByteString
bs4) -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs4
Just (Char
'"',ByteString
bs4) ->
Bool
-> Char
-> CSVState
-> (Int, Int)
-> ByteString
-> ByteString
-> [CSVField]
doStringFieldContent Bool
q Char
d (Int -> CSVState -> CSVState
incTextCol Int
3 CSVState
end) (Int, Int)
begin
(ByteString
fieldBs ByteString -> ByteString -> ByteString
`BS.append` Char -> ByteString
BS.singleton Char
'"') ByteString
bs4
Just (Char, ByteString)
_ -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
state' (Int, Int)
begin [Char]
"End-quote not followed by comma"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
state' (CSVState -> (Int, Int)
here CSVState
state') ByteString
bs3
Maybe (Char, ByteString)
Nothing -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: Bool -> Char -> CSVState -> (Int, Int) -> ByteString -> [CSVField]
getFields Bool
q Char
d CSVState
stateNL (CSVState -> (Int, Int)
here CSVState
stateNL) ByteString
bs3
Just (Char, ByteString)
_ -> [CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
state' (Int, Int)
begin [Char]
"XXX Can't happen (string field)"]
Maybe (Char, ByteString)
Nothing -> CSVField
fieldCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
state' (Int, Int)
begin [Char]
"CSV data ends within a quoted string"
CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:[]
where interestingCharInsideString :: Char -> Bool
interestingCharInsideString Char
'\r' = Bool
True
interestingCharInsideString Char
'\n' = Bool
True
interestingCharInsideString Char
'"' = Bool
True
interestingCharInsideString Char
_ = Bool
False
mkField :: CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField :: CSVState -> (Int, Int) -> ByteString -> Bool -> CSVField
mkField CSVState
st (Int, Int)
begin ByteString
bs Bool
q = CSVField :: Int
-> Int
-> (Int, Int)
-> (Int, Int)
-> ByteString
-> Bool
-> CSVField
CSVField { csvRowNum :: Int
csvRowNum = CSVState -> Int
tableRow CSVState
st
, csvColNum :: Int
csvColNum = CSVState -> Int
tableCol CSVState
st
, csvTextStart :: (Int, Int)
csvTextStart = (Int, Int)
begin
, csvTextEnd :: (Int, Int)
csvTextEnd = (CSVState -> Int
textRow CSVState
st,CSVState -> Int
textCol CSVState
st)
, csvFieldContent :: ByteString
csvFieldContent = ByteString
bs
, csvFieldQuoted :: Bool
csvFieldQuoted = Bool
q }
mkError :: CSVState -> (Int, Int) -> String -> CSVField
mkError :: CSVState -> (Int, Int) -> [Char] -> CSVField
mkError CSVState
st (Int, Int)
begin [Char]
e = CSVFieldError :: Int -> Int -> (Int, Int) -> (Int, Int) -> [Char] -> CSVField
CSVFieldError { csvRowNum :: Int
csvRowNum = CSVState -> Int
tableRow CSVState
st
, csvColNum :: Int
csvColNum = CSVState -> Int
tableCol CSVState
st
, csvTextStart :: (Int, Int)
csvTextStart = (Int, Int)
begin
, csvTextEnd :: (Int, Int)
csvTextEnd = (CSVState -> Int
textRow CSVState
st,CSVState -> Int
textCol CSVState
st)
, csvFieldError :: [Char]
csvFieldError = [Char]
e }
ppCSVError :: CSVError -> String
ppCSVError :: CSVError -> [Char]
ppCSVError (err :: CSVError
err@IncorrectRow{}) =
[Char]
"\nRow "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVError -> Int
csvRow CSVError
err)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" has wrong number of fields."[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
"\n Expected "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVError -> Int
csvColsExpected CSVError
err)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" but got "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVError -> Int
csvColsActual CSVError
err)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
"."[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
"\n The fields are:"[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> ShowS
indent Int
8 ((CSVField -> [Char]) -> [CSVField] -> [Char]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap CSVField -> [Char]
ppCSVField (CSVError -> [CSVField]
csvFields CSVError
err))
ppCSVError (err :: CSVError
err@BlankLine{}) =
[Char]
"\nRow "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVError -> Int
csvRow CSVError
err)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" is blank."[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
"\n Expected "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVError -> Int
csvColsExpected CSVError
err)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" fields."
ppCSVError (err :: CSVError
err@FieldError{}) = CSVField -> [Char]
ppCSVField (CSVError -> CSVField
csvField CSVError
err)
ppCSVError (err :: CSVError
err@DuplicateHeader{}) =
[Char]
"\nThere are two (or more) identical column headers: "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
ShowS
forall a. Show a => a -> [Char]
show (CSVError -> [Char]
csvDuplicate CSVError
err)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
"."[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
"\n Column number "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVError -> Int
csvHeaderSerial CSVError
err)
ppCSVError (NoData{}) =
[Char]
"\nNo usable data (after accounting for any other errors)."
ppCSVField :: CSVField -> String
ppCSVField :: CSVField -> [Char]
ppCSVField (f :: CSVField
f@CSVField{}) =
[Char]
"\n"[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ByteString -> [Char]
BS.unpack (Bool -> ByteString -> ByteString
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (CSVField -> ByteString
csvFieldContent CSVField
f))[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
"\nin row "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> Int
csvRowNum CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" at column "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> Int
csvColNum CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
" (textually from "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int) -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> (Int, Int)
csvTextStart CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" to "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
(Int, Int) -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> (Int, Int)
csvTextEnd CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
")"
ppCSVField (f :: CSVField
f@CSVFieldError{}) =
[Char]
"\n"[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++CSVField -> [Char]
csvFieldError CSVField
f[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
"\nin row "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> Int
csvRowNum CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" at column "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> Int
csvColNum CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Char]
" (textually from "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int) -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> (Int, Int)
csvTextStart CSVField
f)[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char]
" to "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
(Int, Int) -> [Char]
forall a. Show a => a -> [Char]
show (CSVField -> (Int, Int)
csvTextEnd CSVField
f)
ppCSVTable :: CSVTable -> ByteString
ppCSVTable :: CSVTable -> ByteString
ppCSVTable = [ByteString] -> ByteString
BS.unlines ([ByteString] -> ByteString)
-> (CSVTable -> [ByteString]) -> CSVTable -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([CSVField] -> ByteString) -> CSVTable -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [ByteString] -> ByteString
BS.intercalate ([Char] -> ByteString
BS.pack [Char]
",") ([ByteString] -> ByteString)
-> ([CSVField] -> [ByteString]) -> [CSVField] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> ByteString) -> [CSVField] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> ByteString
ppField)
where ppField :: CSVField -> ByteString
ppField CSVField
f = Bool -> ByteString -> ByteString
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (CSVField -> ByteString
csvFieldContent CSVField
f)
ppDSVTable :: Bool -> Char -> CSVTable -> ByteString
ppDSVTable :: Bool -> Char -> CSVTable -> ByteString
ppDSVTable Bool
nl Char
d = [ByteString] -> ByteString
BS.unlines ([ByteString] -> ByteString)
-> (CSVTable -> [ByteString]) -> CSVTable -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([CSVField] -> ByteString) -> CSVTable -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [ByteString] -> ByteString
BS.intercalate ([Char] -> ByteString
BS.pack [Char
d]) ([ByteString] -> ByteString)
-> ([CSVField] -> [ByteString]) -> [CSVField] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> ByteString) -> [CSVField] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> ByteString
ppField)
where ppField :: CSVField -> ByteString
ppField CSVField
f = Bool -> ByteString -> ByteString
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (ByteString -> ByteString
doNL (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ CSVField -> ByteString
csvFieldContent CSVField
f)
doNL :: ByteString -> ByteString
doNL | Bool
nl = ByteString -> ByteString
replaceNL
| Bool
otherwise = ByteString -> ByteString
forall a. a -> a
id
indent :: Int -> String -> String
indent :: Int -> ShowS
indent Int
n = [[Char]] -> [Char]
unlines ([[Char]] -> [Char]) -> ([Char] -> [[Char]]) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char -> [Char]
forall a. Int -> a -> [a]
replicate Int
n Char
' ' [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++) ([[Char]] -> [[Char]])
-> ([Char] -> [[Char]]) -> [Char] -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [[Char]]
lines
quoted :: Bool -> ByteString -> ByteString
quoted :: Bool -> ByteString -> ByteString
quoted Bool
False ByteString
s = ByteString
s
quoted Bool
True ByteString
s = [ByteString] -> ByteString
BS.concat [[Char] -> ByteString
BS.pack [Char]
"\"", ByteString -> ByteString
escape ByteString
s, [Char] -> ByteString
BS.pack[Char]
"\""]
where escape :: ByteString -> ByteString
escape ByteString
s = let (ByteString
good,ByteString
next) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'"') ByteString
s
in if ByteString -> Bool
BS.null ByteString
next then ByteString
good
else [ByteString] -> ByteString
BS.concat [ ByteString
good, [Char] -> ByteString
BS.pack [Char]
"\"\"", ByteString -> ByteString
escape (ByteString -> ByteString
BS.tail ByteString
next) ]
replaceNL :: ByteString -> ByteString
replaceNL :: ByteString -> ByteString
replaceNL ByteString
s = let (ByteString
good,ByteString
next) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n') ByteString
s
in if ByteString -> Bool
BS.null ByteString
next then ByteString
good
else if ByteString -> Bool
BS.null ByteString
good then ByteString -> ByteString
replaceNL (ByteString -> ByteString
BS.tail ByteString
next)
else [ByteString] -> ByteString
BS.concat [ ByteString
good, [Char] -> ByteString
BS.pack [Char]
" ", ByteString -> ByteString
replaceNL ByteString
next ]
fromCSVTable :: CSVTable -> [[ByteString]]
fromCSVTable :: CSVTable -> [[ByteString]]
fromCSVTable = ([CSVField] -> [ByteString]) -> CSVTable -> [[ByteString]]
forall a b. (a -> b) -> [a] -> [b]
map ((CSVField -> ByteString) -> [CSVField] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> ByteString
csvFieldContent)
toCSVTable :: [[ByteString]] -> ([CSVError], CSVTable)
toCSVTable :: [[ByteString]] -> ([CSVError], CSVTable)
toCSVTable [] = ([CSVError
NoData], [])
toCSVTable rows :: [[ByteString]]
rows@([ByteString]
r:[[ByteString]]
_) = (\ ([[CSVError]]
a,CSVTable
b)-> ([[CSVError]] -> [CSVError]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[CSVError]]
a, CSVTable
b)) (([[CSVError]], CSVTable) -> ([CSVError], CSVTable))
-> ([[CSVError]], CSVTable) -> ([CSVError], CSVTable)
forall a b. (a -> b) -> a -> b
$
[([CSVError], [CSVField])] -> ([[CSVError]], CSVTable)
forall a b. [(a, b)] -> ([a], [b])
unzip ((Int -> [ByteString] -> ([CSVError], [CSVField]))
-> [Int] -> [[ByteString]] -> [([CSVError], [CSVField])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> [ByteString] -> ([CSVError], [CSVField])
walk [Int
1..] [[ByteString]]
rows)
where
n :: Int
n = [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
r
walk :: Int -> [ByteString] -> ([CSVError], CSVRow)
walk :: Int -> [ByteString] -> ([CSVError], [CSVField])
walk Int
rnum [] = ( [Int -> CSVError
blank Int
rnum]
, (Int -> CSVField) -> [Int] -> [CSVField]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
c-> Int -> Int -> ByteString -> CSVField
mkCSVField Int
rnum Int
c (ByteString
BS.empty)) [Int
1..Int
n])
walk Int
rnum [ByteString]
cs = ( if [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
cs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
n then [Int -> [ByteString] -> CSVError
bad Int
rnum [ByteString]
cs] else []
, (Int -> ByteString -> CSVField)
-> [Int] -> [ByteString] -> [CSVField]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> ByteString -> CSVField
mkCSVField Int
rnum) [Int
1..Int
n] [ByteString]
cs )
blank :: Int -> CSVError
blank Int
rnum = BlankLine :: Int -> Int -> Int -> CSVField -> CSVError
BlankLine{ csvRow :: Int
csvRow = Int
rnum
, csvColsExpected :: Int
csvColsExpected = Int
n
, csvColsActual :: Int
csvColsActual = Int
0
, csvField :: CSVField
csvField = Int -> Int -> ByteString -> CSVField
mkCSVField Int
rnum Int
0 ByteString
BS.empty
}
bad :: Int -> [ByteString] -> CSVError
bad Int
r [ByteString]
cs = IncorrectRow :: Int -> Int -> Int -> [CSVField] -> CSVError
IncorrectRow{ csvRow :: Int
csvRow = Int
r
, csvColsExpected :: Int
csvColsExpected = Int
n
, csvColsActual :: Int
csvColsActual = [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
cs
, csvFields :: [CSVField]
csvFields = (Int -> ByteString -> CSVField)
-> [Int] -> [ByteString] -> [CSVField]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> ByteString -> CSVField
mkCSVField Int
r) [Int
1..] [ByteString]
cs
}
selectFields :: [String] -> CSVTable -> Either [String] CSVTable
selectFields :: [[Char]] -> CSVTable -> Either [[Char]] CSVTable
selectFields [[Char]]
names CSVTable
table
| CSVTable -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null CSVTable
table = [[Char]] -> Either [[Char]] CSVTable
forall a b. a -> Either a b
Left [[Char]]
names
| Bool -> Bool
not ([[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
missing) = [[Char]] -> Either [[Char]] CSVTable
forall a b. a -> Either a b
Left [[Char]]
missing
| Bool
otherwise = CSVTable -> Either [[Char]] CSVTable
forall a b. b -> Either a b
Right (([CSVField] -> [CSVField]) -> CSVTable -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map [CSVField] -> [CSVField]
forall {b}. [b] -> [b]
select CSVTable
table)
where
header :: [[Char]]
header = (CSVField -> [Char]) -> [CSVField] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [Char]
BS.unpack (ByteString -> [Char])
-> (CSVField -> ByteString) -> CSVField -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVField -> ByteString
csvFieldContent) (CSVTable -> [CSVField]
forall a. [a] -> a
head CSVTable
table)
missing :: [[Char]]
missing = ([Char] -> Bool) -> [[Char]] -> [[Char]]
forall a. (a -> Bool) -> [a] -> [a]
filter ([Char] -> [[Char]] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [[Char]]
header) [[Char]]
names
reordering :: [Int]
reordering = ([Char] -> Int) -> [[Char]] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Int -> Int) -> ([Char] -> Maybe Int) -> [Char] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\[Char]
n-> [Char] -> [[Char]] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
elemIndex [Char]
n [[Char]]
header)) [[Char]]
names
select :: [b] -> [b]
select [b]
fields = (Int -> b) -> [Int] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ([b]
fields[b] -> Int -> b
forall a. [a] -> Int -> a
!!) [Int]
reordering
expectFields :: [String] -> CSVTable -> Either [String] CSVTable
expectFields :: [[Char]] -> CSVTable -> Either [[Char]] CSVTable
expectFields [[Char]]
names CSVTable
table
| CSVTable -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null CSVTable
table = [[Char]] -> Either [[Char]] CSVTable
forall a b. a -> Either a b
Left [[Char]
"CSV table is empty"]
| Bool -> Bool
not ([[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
missing) = [[Char]] -> Either [[Char]] CSVTable
forall a b. a -> Either a b
Left (ShowS -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ([Char]
"CSV table is missing field: "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++)
[[Char]]
missing)
| [[Char]]
header [[Char]] -> [[Char]] -> Bool
forall a. Eq a => a -> a -> Bool
/= [[Char]]
names = [[Char]] -> Either [[Char]] CSVTable
forall a b. a -> Either a b
Left [[Char]
"CSV columns are in the wrong order"
,[Char]
"Expected: "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char] -> [[Char]] -> [Char]
forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " [[Char]]
names
,[Char]
"Found: "[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++[Char] -> [[Char]] -> [Char]
forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " [[Char]]
header]
| Bool
otherwise = CSVTable -> Either [[Char]] CSVTable
forall a b. b -> Either a b
Right CSVTable
table
where
header :: [[Char]]
header = (CSVField -> [Char]) -> [CSVField] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (ByteString -> [Char]
BS.unpack (ByteString -> [Char])
-> (CSVField -> ByteString) -> CSVField -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVField -> ByteString
csvFieldContent) (CSVTable -> [CSVField]
forall a. [a] -> a
head CSVTable
table)
missing :: [[Char]]
missing = ([Char] -> Bool) -> [[Char]] -> [[Char]]
forall a. (a -> Bool) -> [a] -> [a]
filter ([Char] -> [[Char]] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [[Char]]
header) [[Char]]
names
joinCSV :: CSVTable -> CSVTable -> CSVTable
joinCSV :: CSVTable -> CSVTable -> CSVTable
joinCSV = ([CSVField] -> [CSVField] -> [CSVField])
-> CSVTable -> CSVTable -> CSVTable
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith [CSVField] -> [CSVField] -> [CSVField]
forall a. [a] -> [a] -> [a]
(++)
mkEmptyColumn :: String -> CSVTable
mkEmptyColumn :: [Char] -> CSVTable
mkEmptyColumn [Char]
header = [CSVField
headField] [CSVField] -> CSVTable -> CSVTable
forall a. a -> [a] -> [a]
: (Int -> [CSVField]) -> [Int] -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map ((CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:[])(CSVField -> [CSVField]) -> (Int -> CSVField) -> Int -> [CSVField]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Int -> CSVField
emptyField) [Int
2..]
where
headField :: CSVField
headField = (Int -> CSVField
emptyField Int
1) { csvFieldContent :: ByteString
csvFieldContent = [Char] -> ByteString
BS.pack [Char]
header
, csvFieldQuoted :: Bool
csvFieldQuoted = Bool
True }
emptyField :: Int -> CSVField
emptyField Int
n = CSVField :: Int
-> Int
-> (Int, Int)
-> (Int, Int)
-> ByteString
-> Bool
-> CSVField
CSVField { csvRowNum :: Int
csvRowNum = Int
n
, csvColNum :: Int
csvColNum = Int
0
, csvTextStart :: (Int, Int)
csvTextStart = (Int
0,Int
0)
, csvTextEnd :: (Int, Int)
csvTextEnd = (Int
0,Int
0)
, csvFieldContent :: ByteString
csvFieldContent = ByteString
BS.empty
, csvFieldQuoted :: Bool
csvFieldQuoted = Bool
False
}
mkCSVField :: Int -> Int -> ByteString -> CSVField
mkCSVField :: Int -> Int -> ByteString -> CSVField
mkCSVField Int
n Int
c ByteString
text =
CSVField :: Int
-> Int
-> (Int, Int)
-> (Int, Int)
-> ByteString
-> Bool
-> CSVField
CSVField { csvRowNum :: Int
csvRowNum = Int
n
, csvColNum :: Int
csvColNum = Int
c
, csvTextStart :: (Int, Int)
csvTextStart = (Int
0,Int
0)
, csvTextEnd :: (Int, Int)
csvTextEnd = ( Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
(Int64 -> Int) -> (ByteString -> Int64) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int64
BS.length
(ByteString -> Int64)
-> (ByteString -> ByteString) -> ByteString -> Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n')
(ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ ByteString
text
, Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
(Int64 -> Int) -> (ByteString -> Int64) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int64
BS.length
(ByteString -> Int64)
-> (ByteString -> ByteString) -> ByteString -> Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n')
(ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BS.reverse (ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ ByteString
text )
, csvFieldContent :: ByteString
csvFieldContent = ByteString
text
, csvFieldQuoted :: Bool
csvFieldQuoted = (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`[Char]
"\",\n\r") (ByteString -> [Char]
BS.unpack ByteString
text)
}