Complete suboptimal folding of canonical RNA structures
RNA folding in the currently accepted energy model. Canonical structures avoid isolated base pairs. This version can enumerate all structures in order of increasing energy.
ADP Source Code Try DP online
Haskell header: showcode
> module CanonicalRNA where

> import Array
> import List
> import RnaI
> import Energy
> import ADPTriCombinators
The signature: showcode
> data Canonical = Sadd Base Canonical   |
>                  Cadd Closed Canonical |
>                  Nil 
>                                   deriving (Eq, Ord, Show)

> data Closed = Edl Base Closed                                   |
>               Edr Closed Base                                   |
>               Edlr Base Closed Base                             |
>               Drem Closed                                       |
>               Sr Base Closed Base                               |
>               Hl Base Base (Int,Int) Base Base                  |
>               Sp Base Base Closed Base Base                     |
>               Bl (Int,Int) Closed                               |
>               Br Closed (Int,Int)                               |
>               Iln Base Base Inloop Base Base                    |
>               Ml Base Base MultiLoopComp Base Base              |
>               Mldr Base Base MultiLoopComp Base Base Base       |
>               Mldlr Base Base Base MultiLoopComp Base Base Base |
>               Mldl Base Base Base MultiLoopComp Base Base       |
>               Addss Closed (Int,Int)                            |
>               Ssadd (Int,Int) Closed                            |
>               Cons Closed Closed                                |
>               Dr Closed Base                                    |
>               Dlr Base Closed Base                              |
>               Dl Base Closed                                    |
>               Ul Closed 
>                                   deriving (Eq, Ord, Show)

> data Inloop = Ils Base Closed Base           |
>               Ilr Base Closed Base (Int,Int) |
>               Ill Inloop Base                |
>               Ill2 (Int,Int) Base Closed     |
>               Ilx Base Inloop Base          
>                                   deriving (Eq, Ord, Show)

> data MultiLoopComp = Combine Closed Closed 
>                             deriving (Eq, Ord, Show)
The yield grammar: showcode
> canonicals k alg inp = axiom struct where
>  
>     (sadd,cadd,nil,edl,edr,edlr,drem,sr,hl,sp,bl,br,iln,
>      ils,ilr,ill,ill2,ilx,ml,mldr,mldlr,mldl,addss,ssadd,
>      cons,dr,dlr,dl,ul,combine,h,h_i,h_l,h_s) = alg baseArray k

>     struct        = listed ( 
>                     sadd <<< base    -~~ struct |||
>                     cadd <<< edangle ~~~ struct |||
>                     nil  <<< empty              ... h_s)

>     edangle       = edl  <<< base -~~ closed          |||
>                     edr  <<<          closed ~~- base |||
>                     edlr <<< base -~~ closed ~~- base |||
>                     drem <<<          closed          ... h

>     closed        = tabulated (
>                     stack ||| hairpin ||| leftB ||| rightB ||| 
>                     iloop ||| multiloop ... h)

>     multiloop     = (mldl  <<< base -~~ base ~~- base ~~!! ml_components
>                            ~~- base ~~- base |||
>                      mldr  <<< base -~~ base ~~!           ml_components ~~- base
>                            ~~- base ~~- base |||
>                      mldlr <<< base -~~ base ~~- base ~~!! ml_components ~~- base
>                            ~~- base ~~- base |||
>                      ml    <<< base -~~ base ~~!           ml_components
>                            ~~- base ~~- base )
>                      `with` stackpairing ... h

>     ml_components = combine <<< block ~~~ comps

>     comps         = tabulated (
>                     cons  <<< block ~~~ comps  |||
>                               block            |||
>                     addss <<< block ~~~ region ... h_l)

>     block         = tabulated (
>                     ul    <<< dangle            |||
>                     ssadd <<< region ~~~ dangle ... h)

>     dangle        = dl   <<< base -~~ closed          |||
>                     dr   <<<          closed ~~- base |||
>                     dlr  <<< base -~~ closed ~~- base |||
>                     drem <<<          closed          ... h

>     stack         = (sr  <<< base -~~ closed ~~- base) `with` basepairing

>     hairpin       = (hl  <<< base -~~ base ~~! (region `with` minloopsize 3)
>                          ~~- base ~~- base)
>                     `with` stackpairing

>     leftB         = (sp  <<< base -~~ base ~~! (bl <<< region  ~~~ closed)
>                          ~~- base ~~- base)
>                     `with` stackpairing ... h

>     rightB        = (sp  <<< base -~~ base ~~! (br <<< closed ~~~ region)
>                          ~~- base ~~- base)
>                     `with` stackpairing ... h 

>     iloop         = (iln <<< base -~~ base ~~! inloop
>                          ~~- base ~~- base)
>                     `with` stackpairing ... h

>     inloop        = tabulated (
>                    ilx <<< base -~~ inloop ~~- base |||
>                                     loopend         ... h_i)

>     loopend       = tabulated (
>                     ill <<< (ill2 <<< region ~~- base ~~~ closed) ~~- base |||
>                     ilr <<<            base -~~ closed ~~- base ~~~ region |||
>                     ils <<<            base -~~ closed ~~- base        ... h_i)
Bind input: showcode
>     axiom     = axiom' n
>     z         = mk (inp)
>     (_,n)     = bounds z
>     baseArray     = (fst.str2inp) inp
>     base (i,j)= [ j | (i+1) == j ]
>     region (i,j) =  [(i,j) | i < j]

>     tabulated = table n
>     listed :: Parser a -> Parser a
>     listed p = q $ array (0,n) [(j, p (j,n)) | j <- [0..n]] where
>       q t (i,j) = if j==n then t!i else []

>     infixl 7 ~~!
>     (~~!) = (~~*) (2,2) 3
>     infixl 7 ~~!!
>     (~~!!) = (~~*) (3,3) 14
>     minloopsize :: Int -> Filter
>     minloopsize n = match inp where
>       match inp (i,j) = i+n<=j
>     stackpairing :: Filter
>     stackpairing  = match inp where
>       match inp (i,j) = i+3<j && basepair (z!(i+1), z!(j))
>                               && basepair (z!(i+2), z!(j-1))
>     basepairing :: Filter
>     basepairing  = match inp where
>       match inp (i,j) = i+1<j && basepair (z!(i+1), z!(j))
>     basepair ('a','u') = True
>     basepair ('u','a') = True
>     basepair ('c','g') = True
>     basepair ('g','c') = True
>     basepair ('g','u') = True
>     basepair ('u','g') = True
>     basepair ( x , y ) = False
>     merge::(Eq a,Ord a)=>[a]->[a]->[a]
>     merge [] y = y
>     merge xs@(_:_) [] = xs
>     merge xs@(a:x) ys@(b:y) | a <= b    = a:merge x ys
>                         | otherwise = b:merge xs y
Enumeration algebra: showcode
> enum :: Array Int Ebase -> a ->

>         Canonical_Algebra Int (Int,Int) Closed Inloop MultiLoopComp Canonical
> enum seq _ = (sadd,cadd,nil,edl,edr,edlr,drem,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>             ml,mldr,mldlr,mldl,addss,ssadd,cons,dr,dlr,dl,ul,combine,h,h_i,h_l,h_s)
>             where
>    sadd b = Sadd (s b)
>    cadd = Cadd
>    nil _ = Nil
>    edl b = Edl (s b)
>    edr c b = Edr c (s b)
>    edlr lb c rb = Edlr (s lb) c (s rb)
>    drem = Drem
>    sr lb c rb = Sr (s lb) c (s rb)
>    hl  llb lb l rb rrb = Hl  (s llb) (s lb) l (s rb) (s rrb)
>    sp  llb lb c rb rrb = Sp (s llb) (s lb) c (s rb) (s rrb)
>    bl  c bulge        = Bl c bulge
>    br  bulge c        = Br bulge c
>    iln llb lb i rb rrb = Iln (s llb) (s lb) i (s rb) (s rrb)
>    ils lb c rb   = Ils   (s lb) c (s rb)
>    ilr lb c rb   = Ilr   (s lb) c (s rb)
>    ill i rb      = Ill i (s rb)
>    ill2 r lb c   = Ill2 r (s lb) c
>    ilx lb i rb   = Ilx   (s lb) i (s rb)
>    ml llb lb multi rb rrb = Ml (s llb) (s lb) multi (s rb) (s rrb)
>    mldr  llb lb    multi dr rb rrb = Mldr  (s llb)(s lb)       multi (s dr)(s rb)(s rrb)
>    mldlr llb lb dl multi dr rb rrb = Mldlr (s llb)(s lb)(s dl) multi (s dr)(s rb)(s rrb)
>    mldl  llb lb dl multi    rb rrb = Mldl  (s llb)(s lb)(s dl) multi       (s rb)(s rrb)
>    addss = Addss
>    ssadd = Ssadd
>    cons = Cons
>    dr     c dr = Dr         c (s dr)
>    dlr dl c dr = Dlr (s dl) c (s dr)
>    dl  dl c    = Dl  (s dl) c
>    ul = Ul
>    combine = Combine
>    h = id
>    h_i = id
>    h_l = id
>    h_s = id
>
>    s i = seq!i
Pretty printing algebra: showcode
> prettyprint :: Char -> Char -> a -> b ->

>                Canonical_Algebra i (Int,Int) String String String String
> prettyprint left right _ _ = 
>         (sadd,cadd,nil,edl,edr,edlr,drem,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>          ml,mldr,mldlr,mldl,addss,ssadd,cons,dr,dlr,dl,ul,combine,h,h_i,h_l,h_s) where
>   sadd _  s  = '.':s
>   cadd s1 s2 = s1++s2
>   nil  _     = []
>   edl  _  s   = left:s
>   edr  s  _   = s++right:[]
>   edlr _  s _ = left:s++right:[]
>   drem        = id
>   sr   _  s _ = '(':s++")"
>   hl   _  _ (h1,h2) _ _   = '(':'(':dots (h2-h1)++"))"
>   sp   _  _ s _ _ = '(':'(':s++"))"
>   bl   (l1,l2) s  = dots (l2-l1)++s
>   br   s (r1,r2)  = s++dots (r2-r1)
>   iln  _  _ s  _  _    = '(':'(':s++"))"
>   ils  _  s _          = '.':s++"."
>   ilr  _  s _  (r1,r2) = '.':s++'.':dots (r2-r1)
>   ill  s _             = s++"."
>   ill2  (l1,l2) _  s   = dots (l2-l1)++'.':s
>   ilx  _  s _          = '.':s++"."
>   ml    _ _ s _ _     = '(':'(':s++"))"
>   mldr  _ _ s _ _ _   = '(':'(':s++left:"))"
>   mldlr _ _ _ s _ _ _ = '(':'(':right:s++left:"))"
>   mldl  _ _ _ s _ _   = '(':'(':right:s++"))"
>   addss s (r1,r2) = s++dots (r2-r1)
>   ssadd (l1,l2) s = dots (l2-l1)++s
>   cons  s1  s2    = s1++s2
>   dr  s _   = s++right:[]
>   dlr _ s _ = left:s++right:[]
>   dl  _ s   = left:s
>   ul  s     = s
>   combine s1 s2 = s1 ++ s2
>   h   = id
>   h_i = id
>   h_l = id
>   h_s = id
>   dots i = replicate i '.'

> prettyprint1 = prettyprint '\\' '/'
> prettyprint2 = prettyprint '/' '\\'
DotBracket algebra: showcode
> dotBracket = prettyprint '.' '.'
Counting Algebra: showcode
> count :: a -> b -> Canonical_Algebra i (Int,Int) Integer Integer Integer Integer
> count _ _ = (sadd,cadd,nil,edl,edr,edlr,drem,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>          ml,mldr,mldlr,mldl,addss,ssadd,cons,dr,dlr,dl,ul,combine,h,h_i,h_l,h_s) where
>    sadd _ b = b
>    cadd a b = a*b
>    nil _ = 1
>    edl _ b = b
>    edr a _ = a
>    edlr _ b _ = b
>    drem a = a
>    sr _ b _ = b
>    hl _ _ _ _ _ = 1
>    sp _ _ c _ _ = c
>    bl _ d = d
>    br c _ = c
>    iln _ _ c _ _ = c
>    ils _ b _ = b
>    ilr _ b _ _ = b
>    ill a _ = a
>    ill2 _ _ c = c
>    ilx _ b _ = b
>    ml _ _ c _ _ = c
>    mldr _ _ c _ _ _ = c
>    mldlr _ _ _ d _ _ _ = d
>    mldl _ _ _ d _ _ = d
>    addss a _ = a
>    ssadd _ b = b
>    cons a b = a * b
>    dr a _ = a
>    dlr _ b _ = b
>    dl _ b = b
>    ul a  = a
>    combine a b = a*b
>    h [] = []
>    h xs = [sum xs]
>    h_i= h
>    h_l= h
>    h_s= h
Minimal free energy algebra: showcode
> mfe :: Array Int Ebase -> Int ->      -- closed         inloop      multiloop  answer   
>        Canonical_Algebra Int (Int,Int)  (Float,Int,Int) (Float,Int) Float      Float
> mfe array k = (sadd,cadd,nil,edl array,edr array,edlr array,drem,sr array,
>              hl array,sp array,bl array,br array,
>              iln array,ils array,ilr array,ill array,ill2 array,ilx,
>              ml array,mldr array,mldlr array,mldl array,addss,ssadd,cons,
>              dr array,dlr array,dl array,ul,combine,h,h_i,h_l,h_s)                where
>    sadd lb e = e
>    cadd (e1,_,_) e = e1 + e
>    nil _ = 0.0
>    edl inp dl (e,lb,rb)     = (e + dl_energy inp (lb,rb),dl,rb)
>    edr inp (e,lb,rb) dr     = (e + dr_energy inp (lb,rb),lb,dr)
>    edlr inp dl (e,lb,rb) dr = (e + dl_energy inp (lb,rb) + dr_energy inp (lb,rb),dl,dr)
>    drem = id
>    sr inp lb (e,_,_) rb = (e + sr_energy inp (lb,rb),lb,rb)
>    hl inp llb lb loop rb rrb = (hl_energy inp (lb,rb) + sr_energy inp (llb,rrb),llb,rrb)
>    sp inp llb lb (e,_,_) rb rrb = (e + sr_energy inp (llb,rrb), llb,rrb)
>    bl inp (l,r) (e,lend,rend) = (e + bl_energy inp l (l,r) (rend+1) ,l,rend)
>    br inp (e,lend,rend) (l,r) = (e + br_energy inp (lend-1) (l,r) (r+1) ,lend,r)
>    iln inp llb lb (e,_) rb rrb = (e + (top_stack inp lb rb) + (sr_energy inp (llb,rrb)),
>                                        llb,rrb)
>    ils inp lb (e,_,_) rb = (e + (asym 0) + (bot_stack inp lb rb) + (il_ent 2), 2)
>    ilr inp lb (e,_,_) rb (i,j) = (e + (asym (j-i)) + (bot_stack inp lb rb) 
>                                   + (il_ent (j-i+2)), j-i+2)
>    ill inp (e,l) rb = (e ,l)
>    ill2 inp (i,j) lb (e,lend,rend) = (e + (asym (j-i)) + (bot_stack inp lb (rend+1)) 
>                                    + (il_ent (j-i+2)), j-i+2)
>    ilx lb (e,k) rb = (e + (il_ent (k+2)) - (il_ent k), k+2)
>    ml inp llb lb e rb rrb = (4.7 + e + sr_energy inp (llb,rrb),llb,rrb)
>    mldr inp llb lb e dr rb rrb = (5.1 + e + dri_energy inp (lb,rb) 
>                                         + sr_energy inp (llb,rrb),llb,rrb)
>    mldlr inp llb lb dl e dr rb rrb = (5.5 + e + dli_energy inp (lb,rb) 
>                                             + dri_energy inp (lb,rb) 
>                                             + sr_energy inp (llb,rrb),llb,rrb)
>    mldl inp llb lb dl e rb rrb = (5.1 + e + dli_energy inp (lb,rb) 
>                                         + sr_energy inp (llb,rrb), llb,rrb)
>    addss (e,lb,_) (i,j) = (e + ss_energy (i,j),lb,j)
>    ssadd (i,j) (e,_,rb) = (0.1 + e + ss_energy (i,j),i,rb)
>    cons (e1,lb,_) (e2,_,rb) = (e1 + e2,lb,rb)
>    dr inp (e,lb,rb) dr     = (e + 0.4 + dr_energy inp (lb,rb),lb,dr)
>    dlr inp dl (e,lb,rb) dr = (e + 0.8 + dl_energy inp (lb,rb) + dr_energy inp (lb,rb),
>                               dl,dr)
>    dl inp dl (e,lb,rb)     = (e + 0.4 + dl_energy inp (lb,rb),dl,rb)
>    ul (e,lb,rb) = (0.1 + e,lb,rb)
>    combine (e1,_,_) (e2,_,_) = e1+e2
>    h  = take k . sortBy compare . nub
>    h_l  = take k . sortBy compare . nub
>    h_s  = take k . sortBy compare . nub
>    h_i  = take k . sortBy compare . nub
Algebra type: showcode
> type Canonical_Algebra alph1 alph2 closed inloop multiloopcomp answer =
>  (alph1 -> answer -> answer,  --sadd
>   closed -> answer -> answer, --cadd
>   () -> answer,               --nil
>   alph1 -> closed -> closed,  --edl
>   closed -> alph1 -> closed,  --edr
>   alph1 -> closed -> alph1 -> closed,  --edlr
>   closed -> closed,                    --drem
>   alph1 -> closed -> alph1 -> closed,  --sr
>   alph1 -> alph1 -> alph2 -> alph1 -> alph1 -> closed,  --hl
>   alph1 -> alph1 -> closed -> alph1 -> alph1 -> closed, --sp
>   alph2 -> closed -> closed ,                           --bl         
>   closed -> alph2 -> closed,                            --br
>   alph1 -> alph1 -> inloop -> alph1 -> alph1 -> closed,  --iln
>   alph1 -> closed -> alph1 -> inloop,                    --ils
>   alph1 -> closed -> alph1 -> alph2 -> inloop,           --ilr
>   inloop -> alph1 -> inloop,                             --ill
>   alph2 -> alph1 -> closed -> inloop,                    --ill2
>   alph1 -> inloop -> alph1 -> inloop,                    --ilx
>   alph1 -> alph1 -> multiloopcomp -> alph1 -> alph1 -> closed,                   --ml
>   alph1 -> alph1 -> multiloopcomp -> alph1 -> alph1 -> alph1 -> closed,          --mldr
>   alph1 -> alph1 -> alph1 -> multiloopcomp -> alph1 -> alph1 -> alph1 -> closed, --mldlr
>   alph1 -> alph1 -> alph1 -> multiloopcomp -> alph1 -> alph1 -> closed,          --mldl
>   closed -> alph2 -> closed,  --addss
>   alph2 -> closed -> closed,  --ssadd
>   closed -> closed -> closed, --cons
>   closed -> alph1 -> closed,          --dr
>   alph1 -> closed -> alph1 -> closed, --dlr
>   alph1 -> closed -> closed,          --dl
>   closed -> closed,                   --ul
>   closed -> closed -> multiloopcomp,  --combine
>   [closed] -> [closed], --h
>   [inloop] -> [inloop], --h_i
>   [closed] -> [closed], --h_l
>   [answer] -> [answer]  --h_s
>   )
Algebra cross product: showcode
> infix ***
> (alg1 *** alg2) basearray k =
>         (sadd,cadd,nil,edl,edr,edlr,drem,sr,hl,sp,bl,br,iln,ils,ilr,ill,ill2,ilx,
>          ml,mldr,mldlr,mldl,addss,ssadd,cons,dr,dlr,dl,ul,combine,h,h_i,h_l,h_s) where
>    (sadd1,cadd1,nil1,edl1,edr1,edlr1,drem1,sr1,hl1,sp1,bl1,br1,
>     iln1,ils1,ilr1,ill_1,ill2_1,ilx1,ml1,mldr1,mldlr1,mldl1,addss1,ssadd1,cons1,
>     dr1,dlr1,dl1,ul1,combine1, h1,h_i1,h_l1,h_s1) = alg1 basearray k
>    (sadd2,cadd2,nil2,edl2,edr2,edlr2,drem2,sr2,hl2,sp2,bl2,br2,
>     iln2,ils2,ilr2,ill_2,ill2_2,ilx2, ml2,mldr2,mldlr2,mldl2,addss2,ssadd2,cons2,
>     dr2,dlr2,dl2,ul2,combine2, h2,h_i2,h_l2,h_s2) = alg2 basearray k
>    sadd b (a1,a2) = (sadd1 b a1, sadd2 b a2)
>    cadd (c1,c2) (a1,a2) = (cadd1 c1 a1, cadd2 c2 a2)
>    nil a = (nil1 a, nil2 a)
>    edl b (c1,c2) = (edl1 b c1, edl2 b c2)
>    edr (c1,c2) b = (edr1 c1 b, edr2 c2 b)
>    edlr b (c1,c2) b' = (edlr1 b c1 b', edlr2 b c2 b')
>    drem (c1,c2) = (drem1 c1, drem2 c2)
>    sr b (c1,c2) b' = (sr1 b c1 b', sr2 b c2 b')
>    hl b1 b2 u b2' b1' = (hl1 b1 b2 u b2' b1', hl2 b1 b2 u b2' b1')
>    sp b1 b2 (c1,c2) b2' b1' = (sp1 b1 b2 c1 b2' b1',sp2 b1 b2 c2 b2' b1') 
>    bl u (c1,c2) = (bl1 u c1, bl2 u c2)
>    br (c1,c2) u = (br1 c1 u, br2 c2 u)
>    iln b1 b2 (i1,i2) b2' b1' = (iln1 b1 b2 i1 b2' b1', iln2 b1 b2 i2 b2' b1')
>    ils b (c1,c2) b' = (ils1 b c1 b', ils2 b c2 b')
>    ilr b (c1,c2) b' u = (ilr1 b c1 b' u, ilr2 b c2 b' u)
>    ill (i1,i2) b' = (ill_1 i1 b',ill_2 i2 b')
>    ill2 u b (c1,c2) = (ill2_1 u b c1, ill2_2 u b c2)
>    ilx b (i1,i2) b' = (ilx1 b i1 b', ilx2 b i2 b')
>    ml b1 b2 (m1,m2) b2' b1' = (ml1 b1 b2 m1 b2' b1', ml2 b1 b2 m2 b2' b1')
>    mldr b1 b2 (m1,m2) d b2' b1' = (mldr1 b1 b2 m1 d b2' b1',
>                                    mldr2 b1 b2 m2 d b2' b1')
>    mldlr b1 b2 d (m1,m2) d_ b2' b1' = (mldlr1 b1 b2 d m1 d_ b2' b1',
>                                        mldlr2 b1 b2 d m2 d_ b2' b1')
>    mldl b1 b2 d (m1,m2) b2' b1' = (mldl1 b1 b2 d m1 b2' b1', mldl2 b1 b2 d m2 b2' b1')
>    addss (c1,c2) u = (addss1 c1 u, addss2 c2 u)
>    ssadd u (c1,c2) = (ssadd1 u c1, ssadd2 u c2)
>    cons (c1,c2) (c_1,c_2) = (cons1 c1 c_1, cons2 c2 c_2)
>    dr (c1,c2) d = (dr1 c1 d, dr2 c2 d)
>    dlr d (c1,c2) d_ = (dlr1 d c1 d_, dlr2 d c2 d_)
>    dl d (c1,c2) = (dl1 d c1, dl2 d c2)
>    ul (c1,c2) = (ul1 c1, ul2 c2)
>    combine (c1,c2) (c_1,c_2) = (combine1 c1 c_1, combine2 c2 c_2)
> 
>    h   = hm h1 h2
>    h_i = hm h_i1 h_i2
>    h_l = hm h_l1 h_l2
>    h_s = hm h_s1 h_s2
>
>    hm hm1 hm2 xs = take k [(x1,x2)| x1 <- nub $ hm1 [ y1 | (y1,y2) <- xs],
>                                     x2 <-       hm2 [ y2 | (y1,y2) <- xs, y1 == x1]]
For usage on your local machine:
background image
university bielefeld AG PI BiBiServ
ambient picture