Complete suboptimal folding of canonical RNA structures: no dangling bases
RNA folding in an up-to-date but simplified energy model, disregarding energy contributions from dangling bases at the ends of stacks. (The full energy model is used here.) This version can enumerate all structures in order of increasing energy.
ADP Source Code Try DP online
Haskell header: showcode
> module CanonicalRNAnoDangle where

> import Array
> import List
> import RnaI
> import Energy
> import ADPTriCombinators
The signature: showcode
> data Canonical = Sadd Base Canonical   |
>                  Cadd Closed Canonical |
>                  Nil 
>                                   deriving (Eq, Ord, Show)

> data Closed = Sr Base Closed Base                               |
>               Hl Base Base (Int,Int) Base Base                  |
>               Sp Base Base Closed Base Base                     |
>               Bl (Int,Int) Closed                               |
>               Br Closed (Int,Int)                               |
>               Iln Base Base Inloop Base Base                    |
>               Ml Base Base MultiLoopComp Base Base              |
>               Addss Closed (Int,Int)                            |
>               Ssadd (Int,Int) Closed                            |
>               Cons Closed Closed                                |
>               Ul Closed 
>                                   deriving (Eq, Ord, Show)

> data Inloop = Ils Base Closed Base           |
>               Ilr Base Closed Base (Int,Int) |
>               Ill Inloop Base                |
>               Ill2 (Int,Int) Base Closed     |
>               Ilx Base Inloop Base          
>                                   deriving (Eq, Ord, Show)

> data MultiLoopComp = Combine Closed Closed 
>                                   deriving (Eq, Ord, Show)
The yield grammar: showcode
> canonicals k alg inp = axiom struct where
>  
>     (sadd,cadd,nil,sr,hl,sp,bl,br,iln,
>      ils,ilr,ill,ill2,ilx,ml,addss,ssadd,
>      cons,ul,combine,h,h_i,h_l,h_s) = alg baseArray k

>     struct        = listed ( 
>                     sadd <<< base   -~~ struct |||
>                     cadd <<< closed ~~~ struct |||
>                     nil  <<< empty               ... h_s)

>     closed        = tabulated (
>                     stack ||| hairpin ||| leftB ||| rightB ||| 
>                     iloop ||| multiloop ... h)

>     multiloop     = (ml <<< base -~~ base ~~! ml_components ~~- base ~~- base )
>                                                        `with` stackpairing ... h

>     ml_components = combine <<< block ~~~ comps

>     comps         = tabulated (
>                     cons  <<< block ~~~ comps  |||
>                               block            |||
>                     addss <<< block ~~~ region ... h_l)

>     block         = tabulated (
>                     ul    <<< closed            |||
>                     ssadd <<< region ~~~ closed ... h)

>     stack         = (sr <<< base -~~ closed  ~~- base) `with` basepairing

>     hairpin       = (hl <<< base -~~ base ~~! (region `with` minloopsize 3)
>                          ~~- base ~~- base)
>                     `with` stackpairing

>     leftB         = (sp <<< base -~~ base ~~! (bl <<< region  ~~~ closed)
>                          ~~- base ~~- base)
>                     `with` stackpairing ... h

>     rightB        = (sp <<< base -~~ base ~~! (br <<< closed ~~~ region)
>                         ~~- base ~~- base)
>                     `with` stackpairing ... h 

>     iloop         = (iln <<< base -~~ base ~~! inloop
>                          ~~- base ~~- base)
>                     `with` stackpairing ... h

>     inloop        = tabulated (
>                     ilx <<< base -~~ inloop ~~- base ||| loopend ... h_i)

>     loopend       = tabulated (
>                     ill <<< (ill2 <<< region ~~- base ~~~ closed) ~~- base |||
>                     ilr <<<            base -~~ closed ~~- base ~~~ region |||
>                     ils <<<            base -~~ closed ~~- base        ... h_i)
Bind input: showcode
>     axiom     = axiom' n
>     z         = mk (inp)
>     (_,n)     = bounds z
>     baseArray     = (fst.str2inp) inp
>     base (i,j)= [ j | (i+1) == j ]
>     region (i,j) =  [(i,j) | i < j]

>     tabulated = table n
>     listed :: Parser a -> Parser a
>     listed p = q $ array (0,n) [(j, p (j,n)) | j <- [0..n]] where
>       q t (i,j) = if j==n then t!i else []
>     
>     infixl 7 ~~!
>     (~~!) = (~~*) (2,2) 3
>     infixl 7 ~~!!
>     (~~!!) = (~~*) (3,3) 14
>     minloopsize :: Int -> Filter
>     minloopsize n = match inp where
>       match inp (i,j) = i+n<=j
>     stackpairing :: Filter
>     stackpairing  = match inp where
>       match inp (i,j) = i+3<j && basepair (z!(i+1), z!(j))
>                               && basepair (z!(i+2), z!(j-1))
>     basepairing :: Filter
>     basepairing  = match inp where
>       match inp (i,j) = i+1<j && basepair (z!(i+1), z!(j))
>     basepair ('a','u') = True
>     basepair ('u','a') = True
>     basepair ('c','g') = True
>     basepair ('g','c') = True
>     basepair ('g','u') = True
>     basepair ('u','g') = True
>     basepair ( x , y ) = False
>     merge::(Eq a,Ord a)=>[a]->[a]->[a]
>     merge [] y = y
>     merge xs@(_:_) [] = xs
>     merge xs@(a:x) ys@(b:y) | a <= b    = a:merge x ys
>                         | otherwise = b:merge xs y
Enumeration algebra: showcode
> enum :: Array Int Ebase -> a ->
>         Canonical_Algebra Int (Int,Int) Closed Inloop MultiLoopComp Canonical
> enum seq _ = (sadd,cadd,nil,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>             ml,addss,ssadd,cons,ul,combine,h,h_i,h_l,h_s)
>             where
>    sadd b = Sadd (s b)
>    cadd = Cadd
>    nil _ = Nil
>    sr lb c rb = Sr (s lb) c (s rb)
>    hl  llb lb l rb rrb = Hl  (s llb) (s lb) l (s rb) (s rrb)
>    sp  llb lb c rb rrb = Sp (s llb) (s lb) c (s rb) (s rrb)
>    bl  c bulge        = Bl c bulge
>    br  bulge c        = Br bulge c
>    iln llb lb i rb rrb = Iln (s llb) (s lb) i (s rb) (s rrb)
>    ils lb c rb   = Ils   (s lb) c (s rb)
>    ilr lb c rb   = Ilr   (s lb) c (s rb)
>    ill i rb      = Ill i (s rb)
>    ill2 r lb c   = Ill2 r (s lb) c
>    ilx lb i rb   = Ilx   (s lb) i (s rb)
>    ml llb lb multi rb rrb = Ml (s llb) (s lb) multi (s rb) (s rrb)
>    addss = Addss
>    ssadd = Ssadd
>    cons = Cons
>    ul = Ul
>    combine = Combine
>    h = id
>    h_i = id
>    h_l = id
>    h_s = id
>
>    s i = seq!i
Pretty printing algebra: showcode
> prettyprint :: a -> b ->
>                Canonical_Algebra i (Int,Int) String String String String
> prettyprint _ _ = 
>         (sadd,cadd,nil,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>          ml,addss,ssadd,cons,ul,combine,h,h_i,h_l,h_s) where
>   sadd _  s  = '.':s
>   cadd s1 s2 = s1++s2
>   nil  _     = []
>   sr   _  s _ = '(':s++")"
>   hl   _  _ (h1,h2) _ _   = '(':'(':dots (h2-h1)++"))"
>   sp   _  _ s _ _ = '(':'(':s++"))"
>   bl   (l1,l2) s  = dots (l2-l1)++s
>   br   s (r1,r2)  = s++dots (r2-r1)
>   iln  _  _ s  _  _    = '(':'(':s++"))"
>   ils  _  s _          = '.':s++"."
>   ilr  _  s _  (r1,r2) = '.':s++'.':dots (r2-r1)
>   ill  s _             = s++"."
>   ill2  (l1,l2) _  s   = dots (l2-l1)++'.':s
>   ilx  _  s _          = '.':s++"."
>   ml    _ _ s _ _     = '(':'(':s++"))"
>   addss s (r1,r2) = s++dots (r2-r1)
>   ssadd (l1,l2) s = dots (l2-l1)++s
>   cons  s1  s2    = s1++s2
>   ul  s     = s
>   combine s1 s2 = s1 ++ s2
>   h   = id
>   h_i = id
>   h_l = id
>   h_s = id
>   dots i = replicate i '.'
Counting Algebra: showcode
> count :: a -> b -> Canonical_Algebra i (Int,Int) Integer Integer Integer Integer
> count _ _ =(sadd,cadd,nil,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>             ml,addss,ssadd,cons,ul,combine,h,h_i,h_l,h_s) where 
>    sadd _ b = b
>    cadd a b = a*b
>    nil _ = 1
>    sr _ b _ = b
>    hl _ _ _ _ _ = 1
>    sp _ _ c _ _ = c
>    bl _ d = d
>    br c _ = c
>    iln _ _ c _ _ = c
>    ils _ b _ = b
>    ilr _ b _ _ = b
>    ill a _ = a
>    ill2 _ _ c = c
>    ilx _ b _ = b
>    ml _ _ c _ _ = c
>    addss a _ = a
>    ssadd _ b = b
>    cons a b = a * b
>    ul a  = a

>    combine a b = a*b
>    h [] = []
>    h xs = [sum xs]
>    h_i= h
>    h_l= h
>    h_s= h
Minimal free energy algebra: showcode
> mfe :: Array Int Ebase -> Int ->      -- closed         inloop      multiloop  answer   
>        Canonical_Algebra Int (Int,Int)  (Float,Int,Int) (Float,Int) Float      Float
> mfe array k = (sadd,cadd,nil,sr array,
>              hl array,sp array,bl array,br array,
>              iln array,ils array,ilr array,ill array,ill2 array,ilx,
>              ml array,addss,ssadd,cons,
>              ul,combine,h,h_i,h_l,h_s)                where
>    sadd lb e = e
>    cadd (e1,_,_) e = e1 + e
>    nil _ = 0.0
>    sr inp lb (e,_,_) rb = (e + sr_energy inp (lb,rb),lb,rb)
>    hl inp llb lb loop rb rrb = (hl_energy inp (lb,rb) + sr_energy inp (llb,rrb),llb,rrb)
>    sp inp llb lb (e,_,_) rb rrb = (e + sr_energy inp (llb,rrb), llb,rrb)
>    bl inp (l,r) (e,lend,rend) = (e + bl_energy inp l (l,r) (rend+1) ,l,rend)
>    br inp (e,lend,rend) (l,r) = (e + br_energy inp (lend-1) (l,r) (r+1) ,lend,r)
>    iln inp llb lb (e,_) rb rrb = (e + (top_stack inp lb rb) + (sr_energy inp (llb,rrb)),
>                                        llb,rrb)
>    ils inp lb (e,_,_) rb = (e + (asym 0) + (bot_stack inp lb rb) + (il_ent 2), 2)
>    ilr inp lb (e,_,_) rb (i,j) = (e + (asym (j-i)) + (bot_stack inp lb rb) 
>                                   + (il_ent (j-i+2)), j-i+2)
>    ill inp (e,l) rb = (e,l)
>    ill2 inp (i,j) lb (e,lend,rend) = (e + (asym (j-i)) + (bot_stack inp lb (rend+1)) 
>                                    + (il_ent (j-i+2)), j-i+2)
>    ilx lb (e,k) rb = (e + (il_ent (k+2)) - (il_ent k), k+2)
>    ml inp llb lb e rb rrb = (4.7 + e + sr_energy inp (llb,rrb),llb,rrb)
>    addss (e,lb,_) (i,j) = (e + ss_energy (i,j),lb,j)
>    ssadd (i,j) (e,_,rb) = (0.1 + e + ss_energy (i,j),i,rb)
>    cons (e1,lb,_) (e2,_,rb) = (e1 + e2,lb,rb)
>    ul (e,lb,rb) = (0.1 + e,lb,rb)
>    combine (e1,_,_) (e2,_,_) = e1 + e2
>    h = take k . sortBy compare . nub
>    h_l  = take k . sortBy compare . nub
>    h_s  = take k . sortBy compare . nub
>    h_i  = take k . sortBy compare . nub
Algebra type: showcode
> type Canonical_Algebra alph1 alph2 closed inloop multiloopcomp answer =
>  (alph1 -> answer -> answer,  --sadd
>   closed -> answer -> answer, --cadd
>   () -> answer,               --nil
>   alph1 -> closed -> alph1 -> closed,  --sr
>   alph1 -> alph1 -> alph2 -> alph1 -> alph1 -> closed,  --hl
>   alph1 -> alph1 -> closed -> alph1 -> alph1 -> closed, --sp
>   alph2 -> closed -> closed ,                           --bl         
>   closed -> alph2 -> closed,                            --br
>   alph1 -> alph1 -> inloop -> alph1 -> alph1 -> closed,  --iln
>   alph1 -> closed -> alph1 -> inloop,                    --ils
>   alph1 -> closed -> alph1 -> alph2 -> inloop,           --ilr
>   inloop -> alph1 -> inloop,                             --ill
>   alph2 -> alph1 -> closed -> inloop,                    --ill2
>   alph1 -> inloop -> alph1 -> inloop,                    --ilx
>   alph1 -> alph1 -> multiloopcomp -> alph1 -> alph1 -> closed, --ml
>   closed -> alph2 -> closed,  --addss
>   alph2 -> closed -> closed,  --ssadd
>   closed -> closed -> closed, --cons
>   closed -> closed,                   --ul
>   closed -> closed -> multiloopcomp,  --combine
>   [closed] -> [closed], --h
>   [inloop] -> [inloop], --h_i
>   [closed] -> [closed], --h_l
>   [answer] -> [answer]  --h_s
>   )
Algebra cross product: showcode
> infix ***
> (alg1 *** alg2) basearray k =
>         (sadd,cadd,nil,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>          ml,addss,ssadd,cons,ul,combine,h,h_i,h_l,h_s) where
>    (sadd1,cadd1,nil1,sr1,hl1,sp1,bl1,br1,
>     iln1,ils1,ilr1,ill_1,ill2_1,ilx1,ml1,addss1,ssadd1,cons1,
>     ul1,combine1, h1,h_i1,h_l1,h_s1) = alg1 basearray k
>    (sadd2,cadd2,nil2,sr2,hl2,sp2,bl2,br2,
>     iln2,ils2,ilr2,ill_2,ill2_2,ilx2, ml2,addss2,ssadd2,cons2,
>     ul2,combine2, h2,h_i2,h_l2,h_s2) = alg2 basearray k

>    sadd b (a1,a2) = (sadd1 b a1, sadd2 b a2)
>    cadd (c1,c2) (a1,a2) = (cadd1 c1 a1, cadd2 c2 a2)
>    nil a = (nil1 a, nil2 a)
>    sr b (c1,c2) b' = (sr1 b c1 b', sr2 b c2 b')
>    hl b1 b2 u b2' b1' = (hl1 b1 b2 u b2' b1', hl2 b1 b2 u b2' b1')
>    sp b1 b2 (c1,c2) b2' b1' = (sp1 b1 b2 c1 b2' b1',sp2 b1 b2 c2 b2' b1') 
>    bl u (c1,c2) = (bl1 u c1, bl2 u c2)
>    br (c1,c2) u = (br1 c1 u, br2 c2 u)
>    iln b1 b2 (i1,i2) b2' b1' = (iln1 b1 b2 i1 b2' b1', iln2 b1 b2 i2 b2' b1')
>    ils b (c1,c2) b' = (ils1 b c1 b', ils2 b c2 b')
>    ilr b (c1,c2) b' u = (ilr1 b c1 b' u, ilr2 b c2 b' u)
>    ill (i1,i2) b' = (ill_1 i1 b',ill_2 i2 b')
>    ill2 u b (c1,c2) = (ill2_1 u b c1, ill2_2 u b c2)
>    ilx b (i1,i2) b' = (ilx1 b i1 b', ilx2 b i2 b')
>    ml b1 b2 (m1,m2) b2' b1' = (ml1 b1 b2 m1 b2' b1', ml2 b1 b2 m2 b2' b1')
>    addss (c1,c2) u = (addss1 c1 u, addss2 c2 u)
>    ssadd u (c1,c2) = (ssadd1 u c1, ssadd2 u c2)
>    cons (c1,c2) (c_1,c_2) = (cons1 c1 c_1, cons2 c2 c_2)
>    ul (c1,c2) = (ul1 c1, ul2 c2)
>    combine (c1,c2) (c_1,c_2) = (combine1 c1 c_1, combine2 c2 c_2)
> 
>    h   = hm h1 h2
>    h_i = hm h_i1 h_i2
>    h_l = hm h_l1 h_l2
>    h_s = hm h_s1 h_s2
>
>    hm hm1 hm2 xs = take k [(x1,x2)| x1 <- nub $ hm1 [ y1 | (y1,y2) <- xs],
>                                     x2 <-       hm2 [ y2 | (y1,y2) <- xs, y1 == x1]]
For usage on your local machine:
background image
university bielefeld AG PI BiBiServ
ambient picture