-------------------------------------------------------------------------------- -- $Id: ParseCSV.hs,v 1.2 2004/04/20 14:50:47 graham Exp $ -- -- Copyright (c) 2004, G. KLYNE. All rights reserved. -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : ParseCSV -- Copyright : (c) 2003, Graham Klyne -- License : GPL V2 -- -- Maintainer : Graham Klyne -- Stability : provisional -- Portability : H98 -- -- This module provides basic functions for parsing a CSV string into -- a list of lists of string values. -- Newlines define new rows, and commas separate elements within a row. -- Commas within quoted strings "..." or '...' are not separators. -- String delimiters within a string value are doubled; e.g. "a""b" is 'a"b'. -- -------------------------------------------------------------------------------- module ParseCSV ( parseCSV ) where import Maybe ( isJust, fromJust ) import Monad ( MonadPlus(..) ) ------------------------------------------------------------ -- CSV parser ------------------------------------------------------------ parseCSV :: String -> [[String]] parseCSV s = map csvItems (lines s) -- Split out CSV values in a single line csvItems :: String -> [String] csvItems "" = [] csvItems ss = s:csvItems rs where (s,rs) = csvItem $ skipSpaces ss -- Parse next value on line csvItem :: String -> (String,String) csvItem ss = if isJust s1 then fromJust s1 else ("","") where s1 = parseQuoted ss `mplus` parseValue ss -- Parsing functions type ParseItem = String -> Maybe (String,String) -- (matched,more) parseValue :: ParseItem parseValue ss = parseValue1 "" ss parseValue1 :: String -> ParseItem parseValue1 s "" = Just (reverse $ skipSpaces s,"") parseValue1 s (',':cs) = Just (reverse $ skipSpaces s,cs) parseValue1 s (c:cs) = parseValue1 (c:s) cs parseQuoted :: ParseItem parseQuoted ('"':cs) = parseQuoted1 '"' "" cs parseQuoted ('\'':cs) = parseQuoted1 '\'' "" cs parseQuoted _ = Nothing -- parse content of quoted string to closing terminator q -- s is accumulated string content in reverse order parseQuoted1 :: Char -> String -> ParseItem parseQuoted1 q s (c:cs) | (c==q) = parseQuoted2 q s cs | otherwise = parseQuoted1 q (c:s) cs parseQuoted1 _ _ "" = Nothing -- String terminator character seen: if doubled then it is an occurrence -- in the string, otherwise return the complete string parseQuoted2 :: Char -> String -> ParseItem parseQuoted2 q s "" = Just (reverse s,"") parseQuoted2 q s cr@(c:cs) | (c==q) = parseQuoted1 q (c:s) cs | otherwise = skipComma (reverse s) (skipSpaces cr) -- Expect comma or end of row; return rest of row. skipComma :: String -> ParseItem skipComma ret "" = Just (ret,"") skipComma ret (',':rs) = Just (ret,rs) skipComma _ _ = Nothing skipSpaces :: String -> String skipSpaces (' ':ss) = skipSpaces ss skipSpaces ss = ss -------------------------------------------------------------------------------- -- -- Copyright (c) 2004, G. KLYNE. All rights reserved. -- -- This is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- This software is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- -------------------------------------------------------------------------------- -- $Source: /file/cvsdev/HaskellUtils/ParseCSV.hs,v $ -- $Author: graham $ -- $Revision: 1.2 $ -- $Log: ParseCSV.hs,v $ -- Revision 1.2 2004/04/20 14:50:47 graham -- Fix some bugs in the CSV parser -- -- Revision 1.1 2004/03/10 16:05:14 graham -- Add CSV parser to Swish, for scraping RDF from exported -- spreadsheet and database files. --