module Query where import Prelude hiding (elem,index) -- Data Model: Constructors ---------------------------------------------------- text :: String -> Node elem :: Tag -> [Node] -> Node ref :: Node -> Node year0 :: Node year0 = elem "@year" [ text "1999" ] book0 :: Node book0 = elem "book" [ elem "@year" [ text "1999" ], elem "title" [ text "Data on the Web" ], elem "author" [ text "Abiteboul" ], elem "author" [ text "Buneman" ], elem "author" [ text "Suciu" ]] bib0 :: Node bib0 = elem "bib" [ elem "book" [ elem "@year" [ text "1999" ], elem "title" [ text "Data on the Web" ], elem "author" [ text "Abiteboul" ], elem "author" [ text "Buneman" ], elem "author" [ text "Suciu" ]], elem "book" [ elem "@year" [ text "1987" ], elem "title" [ text "Foundations of Databases" ], elem "author" [ text "Abiteboul" ], elem "author" [ text "Hull" ], elem "author" [ text "Vianu" ]]] -- Data model: Destructors ----------------------------------------------------- isText :: Node -> Bool string :: Node -> String isElem :: Node -> Bool tag :: Node -> Tag children :: Node -> [Node] isRef :: Node -> Bool dereference :: Node -> Node is :: Tag -> Node -> Bool is t x = isElem x && tag x == t q0 = is "@year" year0 q1 = is "book" book0 the :: [a] -> a the [x] = x q2 = string (the (children year0)) q3 = value year0 -- Tuples ---------------------------------------------------------------------- tuple0 :: (Int,String,[String]) tuple0 = (1999, "Data on the Web", ["Abiteboul","Buneman","Suciu"]) year :: (Int,String,[String]) -> Int year (y,t,a) = y title :: (Int,String,[String]) -> String title (y,t,a) = t authors :: (Int,String,[String]) -> [String] authors (y,t,a) = a q4 = year tuple0 q5 = title tuple0 q6 = authors tuple0 -------------------------------------------------------------------------------- -- Comprehensions -------------------------------------------------------------- -------------------------------------------------------------------------------- q7 = [ value x | x <- children book0, is "author" x ] q8 = [ value y | x <- children bib0, is "book" x, y <- children x, is "title" y ] follow :: Tag -> Node -> [Node] follow t x = [ y | y <- children x, is t y ] q9 = [ value x | x <- follow "author" book0 ] q10 = [ value y | x <- follow "book" bib0, y <- follow "title" x ] -- Cartesian product ----------------------------------------------------------- q11 = [ (value y, value z) | x <- follow "book" bib0, y <- follow "title" x, z <- follow "author" x ] -- Nested queries -------------------------------------------------------------- q12 = [ (int (value y), value z, [ value u | u <- follow "author" x ]) | x <- follow "book" bib0, y <- follow "@year" x, z <- follow "title" x ] -- Join ------------------------------------------------------------------------ reviews0 = elem "reviews" [ elem "book" [ elem "title" [ text "Data on the Web" ], elem "review" [ text "This is great!" ]], elem "book" [ elem "title" [ text "Foundations of Databases" ], elem "review" [ text "This is pretty good too!" ]]] q13 = [ (value y, int (value z), value w) | x <- follow "book" bib0, y <- follow "title" x, z <- follow "@year" x, u <- follow "book" reviews0, v <- follow "title" u, w <- follow "review" u, y == v ] -------------------------------------------------------------------------------- -- Other operations ------------------------------------------------------------ -------------------------------------------------------------------------------- -- Append for union ------------------------------------------------------------ -- (++) :: [a] -> [a] -> [a] q14 = follow "title" book0 ++ follow "author" book0 -- Indexing -------------------------------------------------------------------- index :: [a] -> [(Int,a)] q15 = [ x | (i,x) <- index (follow "author" book0), i < 2 ] -- Null for existentials ------------------------------------------------------- -- null :: [a] -> Bool q16 = not (null [ x | (i,x) <- index (follow "author" book0), i >= 1 ]) -------------------------------------------------------------------------------- -- Structural recursion -------------------------------------------------------- -------------------------------------------------------------------------------- value :: Node -> String value x = if isText x then string x else if isElem x then concat [ value y | y <- children x, not (isAttr y) ] else "" q17 = value year0 --

This is not hard

para0 = elem "p" [ text "This is ", elem "b" [ elem "@color" [ text "red"], text "not" ], text " hard."] q18 = value para0 --Peter's query ---------------------------------------------------------------- isbns :: Node -> Node isbns x = if is "book" x then elem "book" [ the (follow "isbn" x) ] else elem (tag x) [ isbns y | y <- children x ] bookstore0 = elem "bookstore" [ elem "fiction" [ elem "sci-fi" [ elem "book" [ elem "isbn" [ text "0006482805" ], elem "title" [ text "Do androids dream of electric sheep" ], elem "author" [ text "Philip K. Dick" ] ] ], elem "fantasy" [ elem "mystery" [ elem "book" [ elem "isbn" [ text "0261102362" ], elem "title" [ text "The two towers" ], elem "author" [ text "JRR Tolkien" ] ] ] ] ] ] q19 = isbns bookstore0 -------------------------------------------------------------------------------- -- Regular expressions --------------------------------------------------------- -------------------------------------------------------------------------------- match :: Reg a -> Node -> [a] reg0 :: Reg (Node,Node,[Node]) reg0 = ([ (x,y,u) | x <- item "@year", y <- item "title", u <- rep (item "author") ]) q20 = match reg0 book0 -- Anywhere -------------------------------------------------------------------- anywhere :: Reg a -> Reg a book1 :: Node book1 = elem "book" [ elem "title" [ text "Data on the Web" ], elem "author" [ text "Abiteboul" ], elem "author" [ text "Buneman" ], elem "author" [ text "Suciu" ], elem "@year" [ text "1999" ]] q21 = match reg0 book1 reg1 :: Reg (Node,Node,[Node]) reg1 = ([ (x,y,u) | x <- anywhere (item "@year"), y <- item "title", u <- rep (item "author") ]) q22 = match reg1 book1 -- Alternation ----------------------------------------------------------------- (+++) :: Reg a -> Reg a -> Reg a shortyearbook = elem "book" [ elem "@shortyear" [ text "99" ], elem "title" [ text "Data on the Web" ], elem "author" [ text "Abiteboul" ], elem "author" [ text "Buneman" ], elem "author" [ text "Suciu" ]] noyearbook = elem "book" [ elem "title" [ text "Data on the Web" ], elem "author" [ text "Abiteboul" ], elem "author" [ text "Buneman" ], elem "author" [ text "Suciu" ]] yearreg :: Reg Int yearreg = [ int (value y) | y <- item "@year" ] +++ [ 1900 + int (value y) | y <- item "@shortyear" ] +++ [ 1999 | True ] reg2 :: Reg (Int,Node,[Node]) reg2 = ([ (x,y,u) | x <- anywhere yearreg, y <- item "title", u <- rep (item "author") ]) q23 = match reg2 book0 q24 = match reg2 shortyearbook q25 = match reg2 noyearbook -- Step ------------------------------------------------------------------------ stringItem :: Reg String stringItem = ([ string x | x <- textItem ]) reg3 :: Reg [(Int,String,[String])] reg3 = step "bib" (rep (step "book" ([ (int x,y,u) | x <- anywhere (step "@year" stringItem), y <- step "title" stringItem, u <- rep (step "author" stringItem) ]))) q26 = match reg3 (elem "/" [bib0]) -- An example ------------------------------------------------------------------ bib1 :: Node bib1 = elem "/" [ elem "bib" [ elem "book" [ elem "@year" [ text "1999" ], elem "title" [ text "Data on the Web" ], elem "publisher" [ elem "name" [ text "Springer" ]]], elem "book" [ elem "@year" [ text "1987" ], elem "title" [ text "Foundations of Databases" ], elem "publisher" [ elem "name" [ text "Addison-Wesley" ]]]]] -- XML-QL ---------------------------------------------------------------------- -- -- CONSTRUCT { -- WHERE -- -- -- $t -- Addison-Wesley -- -- IN "www.bn.com/bib.xml", -- $y < 1991 -- CONSTRUCT $t -- } -- -------------------------------------------------------------------------------- queryXMLQL :: Node -> Node queryXMLQL x = elem "bib" [ elem "book" [ elem "@year" [text (value y)], elem "title" [text (value t)]] | a <- follow "bib" x, b <- follow "book" a, y <- follow "@year" b, t <- follow "title" b, p <- follow "publisher" b, n <- follow "name" p, int (value y) < 1991, value n == "Addison-Wesley" ] q27 = queryXMLQL bib1 -- YATL ------------------------------------------------------------------------ -- -- make -- bib [ *book [ @year [ $y ], -- title [ $t ] ] ] -- match "www.bn.com/bib.xml" with -- bib [ *book [ @year [ $y ], -- title [ $t ] ], -- publisher [ name [ $n ] ] ] -- where -- $n = "Addison-Wesley" and $y > 1991 -- -------------------------------------------------------------------------------- queryYatl :: Node -> Node queryYatl x = elem "bib" [ elem "book" [ elem "@year" [ text y ], elem "title" [ text t ] ] | (y,t,n) <- the (match bibReg x), int y < 1991, n == "Addison-Wesley" ] bibReg :: Reg [(String,String,String)] bibReg = step "bib" (rep (step "book" ([ (y,t,n) | y <- step "@year" stringItem, t <- step "title" stringItem, n <- step "publisher" (step "name" stringItem) ]))) q28 = queryYatl bib1 -------------------------------------------------------------------------------- -- Implementation details ------------------------------------------------------ -------------------------------------------------------------------------------- -- Data Model ------------------------------------------------------------------ int :: String -> Int int s = read s type Tag = String data Node = Text String | Elem Tag [Node] | Ref Node deriving (Eq,Show) text s = Text s elem t l = Elem t l ref x = Ref x isText x = case x of Text s -> True ; _ -> False isElem x = case x of Elem t l -> True ; _ -> False isRef x = case x of Ref y -> True ; _ -> False string x = case x of Text s -> s tag x = case x of Elem t l -> t children x = case x of Elem t l -> l dereference x = case x of Ref y -> y tagAttr t = case t of '@':s -> True ; _ -> False isAttr x = isElem x && tagAttr (tag x) -- Other operations ------------------------------------------------------------ index l = zip [0..] l data Reg a = REG ([Node] -> [(a,[Node])]) -- Regular expressions --------------------------------------------------------- -- See [Bird98] (Chapter 11, Parsing) for a more detailed explanation -- of how to define regular expressions in Haskell. app :: Reg a -> [Node] -> [(a,[Node])] app (REG f) l = f l instance Monad Reg where return x = REG (\ l -> [(x,l)]) p >>= k = REG (\ l -> [(y,n) | (x,m) <- app p l, (y,n) <- app (k x) m]) fail m = REG (\ l -> []) match p x = [ y | (y,m) <- app p (children x), null m ] p +++ q = REG (\ l -> app p l ++ app q l) anywhere p = REG (\ l -> [ (x, take i l ++ m) | i <- [0..length l-1], (x,m) <- app p (drop i l) ]) lift :: [a] -> Reg a lift m = REG (\ l -> [ (x,l) | x <- m ]) nodeItem :: Reg Node nodeItem = REG (\ l -> if null l then [] else [(head l, tail l)]) textItem :: Reg Node textItem = ([ x | x <- nodeItem, isText x ]) item :: Tag -> Reg Node item t = ([ x | x <- nodeItem, is t x ]) step :: Tag -> Reg a -> Reg a step t p = ([ y | x <- item t, y <- lift (match p x) ]) rep :: Reg a -> Reg [a] rep p = ([ [x]++l | x <- p, l <- rep p ]) +++ ([ [] | True ])