The original Needleman-Wunsch algorithm (sequence alignment)
The original paper by Needleman and Wunsch is actually a very informal description of the algorithm. It uses an arbitrary scoring function for gaps, which leads to an algorithm of runtime O(n^3). Read the full description in Systematic Dynamic Programming in Bioinformatics.
ADP Source Code Try DP online
Haskell header: showcode
> module NeedlemanWunsch where

> import Array
> import List
> import TTCombinators
The signature: showcode
> data Alignment = Nil                    |
>                  D  Subword Alignment   |
>                  I  Alignment Subword   |
>                  R  Char Alignment Char 
>                                            deriving (Eq, Show)
The yield grammar: showcode
> nw_alignments alg inpX inpY = axiom alignment where
>   (nil, d, i, r, h)    = alg x y
> 
>   alignment  = tabulated (
>                 nil ><< empty                           |||
>                 d   <<< region ++~ alignment            |||
>                 i   <<<            alignment ~++ region |||
>                 r   <<< xbase  -~~ alignment ~~- ybase  ... h)
Bind input: showcode
>   infixl 7 +~~, -~~, ~~+, ~~-
>   (x, y, xbase, ybase, empty, (+~~), (~~+), (-~~), (~~-), tabulated) 
>     = bindParserCombinators inpX inpY
>   infixl 7 ++~,  ~++
>   ((++~), (~++)) = bindSpezialCombinators x y
Enumeration algebra: showcode
> enum :: a -> a-> NW_Algebra Char Subword Alignment
> enum _ _ = (nil, d, i, r, h) where
>    nil = Nil
>    d   = D
>    i   = I
>    r   = R
>    h   = id
Pretty printing algebra: showcode
> prettyprint ::  Array Int Char ->  Array Int Char ->
>                 NW_Algebra Char Subword (String, String)
> prettyprint seqX seqY = (nil, d, i, r, h) where
>   nil         = ("","")
>   d x (l,r)   = (strx x++l, gap' x++r)
>   i (l,r) y   = ((gap' y)++l, stry y++r)
>   r x (l,r) y = (x:l,y:r)
>   h           = id
>   gap         = '-'
>   gapStart    = '='
>   gap' (i,j)	= gapStart:take (j-i-1) (repeat gap)
>   strx (i,j)  = [seqX!k|k<-[i+1..j]]
>   stry (i,j)  = [seqY!k|k<-[i+1..j]]
Counting Algebra: showcode
> count :: a -> a-> NW_Algebra b c Int
> count _ _ = (nil, d, i, r, h) where
>    nil   = 1
>    d _ b = b
>    i a _ = a
>    r _ b _ = b
>    h [] = []
>    h xs = [sum xs]
Affine gap score algebra: showcode
> affine :: a -> a-> NW_Algebra Char Subword Int
> affine _ _ = (nil, d, i, r, h) where
>  nil         = 0
>  d (i,j) s   = s + open + extend * (j-i)
>  i   s (i,j) = s + open + extend * (j-i)
>  r a s b     = s + w a b
>  h []        = []
>  h l         = [maximum l]
>
>  -- simple definitions for open, extend and w:
>  open   = (-15)
>  extend = (-1)
>  w a b  = if a==b then 4 else -3
Unit cost score algebra: showcode
> unit :: a -> a-> NW_Algebra Char Subword Int
> unit _ _  = (nil, d, i, r, h) where
>   nil     = 0
>   d (i,j) s   = s + (j-i)
>   i   s (i,j) = s + (j-i)
>   r a s b = if a==b then s else s+1
>   h []    = []
>   h xs    = [minimum xs]
Algebra type: showcode
> type NW_Algebra alph1 alph2 answer = (
>   answer,                             -- nil
>   alph2 -> answer -> answer,          -- d
>   answer -> alph2 -> answer,          -- i
>   alph1 -> answer -> alph1 -> answer, -- r
>   [answer] -> [answer]                -- h
>   )
Algebra cross product: showcode
> infix ***
> (***):: Eq c => 
>         (Array Int Char -> Array Int Char -> NW_Algebra a b c) 
>      -> (Array Int Char -> Array Int Char -> NW_Algebra a b d) 
>      ->  Array Int Char -> Array Int Char -> NW_Algebra a b (c,d)
> (alg1 *** alg2) x y = (nil, d, i, r, h) where
>    (nil1, d1, i1, r1, h1) = alg1 x y
>    (nil2, d2, i2, r2, h2) = alg2 x y
> 
>    nil = (nil1, nil2)
>    d x (a1,a2) = (d1 x a1, d2 x a2)
>    i (a1,a2) y = (i1 a1 y, i2 a2 y)
>    r x (a1,a2) y = (r1 x a1 y, r2 x a2 y)
> 
>    h xs = [(x1,x2)| x1 <- nub $ h1 [ y1 | (y1,y2) <- xs],
>                     x2 <-       h2 [ y2 | (y1,y2) <- xs, y1 == x1]]
For usage on your local machine:
background image
university bielefeld AG PI BiBiServ
ambient picture