(*
 * linear algebra functions with Bigarrays
 *
 * 9/22/01 - checker
 *
 *)

(* todo
   - should check sizes of matrices (or just let them throw?)
   - use custom float ref for unboxing?  += operator
 *)

  
type ba1d = (float, Bigarray.float64_elt, Bigarray.c_layout) Bigarray.Array1.t
type bapivot = (int, Bigarray.int_elt, Bigarray.c_layout) Bigarray.Array1.t
      
type matrix = {
    height : int;
    width : int;       (* @todo stride so can reuse memory *)
    data : ba1d;
  }

let getw { width = w } = w
let geth { height = h } = h
let getwh { width = w; height = h } = w,h
    
let max_float = 1.7976931348623158e+308
let macheps =
  let rec macheps f = if f +. 1.0 <> 1.0 then macheps (f/.2.0) else f*.2.0 in
  macheps 1.0
      
let setm a i j v = a.data.{i*a.width+j} <- v
let getm a i j = a.data.{i*a.width+j}
let setv a i v = a.data.{i} <- v
let getv a i = a.data.{i}

let addm a i j v = a.data.{i*a.width+j} <- a.data.{i*a.width+j} +. v
let addv a i v = a.data.{i} <- a.data.{i} +. v
let subm a i j v = a.data.{i*a.width+j} <- a.data.{i*a.width+j} -. v
let subv a i v = a.data.{i} <- a.data.{i} -. v

let create_matrix ?init m n =
  let m = { height = m; width = n;
            data = Bigarray.Array1.create Bigarray.float64 Bigarray.c_layout (m*n); } in
  begin match init with
    None -> ()
  | Some v -> Bigarray.Array1.fill m.data v
  end;
  m

let set_submatrix a i j b =
  let m = b.height and
      n = b.width in
  assert ((i+m <= a.height) && (j+n <= a.width));
  for ii = 0 to m-1 do
    for jj = 0 to n-1 do
      setm a (i+ii) (j+jj) (getm b ii jj)
    done
  done
let set_submatrix3 a i j b =
  let m = 3 and 
      n = 3 in
  assert ((i+m <= a.height) && (j+n <= a.width));
  for ii = 0 to m-1 do
    for jj = 0 to n-1 do
      setm a (i+ii) (j+jj) b.(ii*n+jj)
    done
  done
let get_submatrix a i j m n =
  let b = create_matrix m n in
  assert ((i+m <= a.height) && (j+n <= a.width));
  for ii = 0 to m-1 do
    for jj = 0 to n-1 do
      setm b ii jj (getm a (i+ii) (j+jj))
    done
  done;
  b

let get_submatrix_idxset a rows cols =
  let m = Array.length rows and
      n = Array.length cols in
  let b = create_matrix m n in
  for i = 0 to m-1 do
    for j = 0 to n-1 do
      setm b i j (getm a rows.(i) cols.(j))
    done;
  done;
  b
    
    
(* @lame these are 2d vector specific *)
let set_subcolvector2 a i j (x0,x1) =
  setm a (i+0) j x0;
  setm a (i+1) j x1
let get_subcolvector2 a i j =
  (getm a (i+0) j,
   getm a (i+1) j)
let set_subrowvector2 a i j (x0,x1) =
  setm a i (j+0) x0;
  setm a i (j+1) x1
let get_subrowvector2 a i j =
  (getm a i (j+0),
   getm a i (j+1))
let add_subcolvector2 a i j (x0,x1) =
  addm a (i+0) j x0;
  addm a (i+1) j x1
let add_subrowvector2 a i j (x0,x1) =
  addm a i (j+0) x0;
  addm a i (j+1) x1
let sub_subcolvector2 a i j (x0,x1) =
  subm a (i+0) j x0;
  subm a (i+1) j x1
let sub_subrowvector2 a i j (x0,x1) =
  subm a i (j+0) x0;
  subm a i (j+1) x1
(* @lame and now for 3d specific *)
let set_subcolvector3 a i j (x0,x1,x2) =
  setm a (i+0) j x0;
  setm a (i+1) j x1;
  setm a (i+2) j x2;
  ()
let get_subcolvector3 a i j =
  (getm a (i+0) j,
   getm a (i+1) j,
   getm a (i+2) j)
let set_subrowvector3 a i j (x0,x1,x2) =
  setm a i (j+0) x0;
  setm a i (j+1) x1;
  setm a i (j+2) x2;
  ()

let create_pivot n =
  let pivot = Bigarray.Array1.create Bigarray.int Bigarray.c_layout n in
  for i = 0 to n - 1 do
    pivot.{i} <- i;
  done;
  pivot
    
let create_vector ?init n = create_matrix ?init n 1
    
let copy {data = src} {data = dst} =
  Bigarray.Array1.blit src dst

let fill m v = Bigarray.Array1.fill m.data v
    
let iter a f =
  let m = a.height and
      n = a.width in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      f a i j
    done
  done

let create_init m n f =
  let a = create_matrix m n in
  iter a f;
  a

let create_identity n =
  create_init n n (fun a i j -> if i = j then setm a i j 1.0 else setm a i j 0.0)
    
let create_array m n a =  (* @todo should be called of_array? *)
  create_init m n (fun a' i j -> a'.data.{i*n+j} <- a.(i*n+j))

let copy_array src dst =
  let m = dst.height and
      n = dst.width in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      dst.data.{i*n+j} <- src.(i*n+j)
    done
  done;
  dst
    
let multiply a b =
  let m = a.height and
      n = a.width and 
      p = b.width in
  let c = create_matrix m p in
  for i = 0 to m - 1 do
    for j = 0 to p - 1 do
      let t = ref 0. in
      for k = 0 to n - 1 do
        t := !t +. a.data.{i*n+k} *. b.data.{k*p+j}
      done;
      c.data.{i*p+j} <- !t
    done
  done;
  c

let multiplyt a b =
  let m = a.height and
      n = a.width and 
      p = b.width in
  let c = create_matrix m p in
  for i = 0 to m - 1 do
    for j = 0 to p - 1 do
      let t = ref 0. in
      for k = 0 to n - 1 do
        t := !t +. a.data.{k*n+i} *. b.data.{k*p+j}
      done;
      c.data.{i*p+j} <- !t
    done
  done;
  c

let transpose a =
  let m = a.height and
      n = a.width in
  let a' = create_matrix n m in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      a'.data.{j*m+i} <- a.data.{i*n+j}
    done
  done;
  a'

(* @lame why are these _matrix...scoping when open'ed, ugh *)
let negate_matrix a =
  let m = a.height and
      n = a.width in
  let a' = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      a'.data.{i*n+j} <-  -. a.data.{i*n+j}
    done
  done;
  a'

let abs_matrix a =
  let m = a.height and
      n = a.width in
  let a' = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      a'.data.{i*n+j} <-  abs_float a.data.{i*n+j}
    done
  done;
  a'

let inf_normv v =  (* @todo write for both matrices and vectors, and other norms *)
  let vmax = ref 0.0 in
  let n = v.height in
  assert (v.width = 1);
  for i = 0 to n - 1 do
    let vi = abs_float v.data.{i} in
    if !vmax < vi then
      vmax := vi
  done;
  !vmax

let dot_vectors a b =
  let s = ref 0.0 in
  let n = a.height in
  assert (a.width = 1 && b.width = 1 && a.height = b.height);
  for i = 0 to n - 1 do
    s := !s +. a.data.{i} *. b.data.{i}
  done;
  !s
  
let two_normv v = sqrt (dot_vectors v v)
  
let comp_divide a b =
  let m = a.height and
      n = a.width in
  let c = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      c.data.{i*n+j} <- a.data.{i*n+j} /. b.data.{i*n+j}
    done
  done;
  c
let max_vector a b =
  let m = a.height and
      n = a.width in
  let c = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      c.data.{i*n+j} <- max a.data.{i*n+j} b.data.{i*n+j}
    done
  done;
  c


    
let add_matrix a b =
  let m = a.height and
      n = a.width in
  let c = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      c.data.{i*n+j} <- a.data.{i*n+j} +. b.data.{i*n+j}
    done
  done;
  c

let saxpy a x y =  (* @todo not named correctly...use BLAS names *)
  let m = x.height and
      n = x.width in
  assert (y.height = m && y.width = n);
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      y.data.{i*n+j} <- y.data.{i*n+j} +. a *. x.data.{i*n+j}
    done
  done
  
let sub_matrix a b =
  let m = a.height and
      n = a.width in
  let c = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      c.data.{i*n+j} <- a.data.{i*n+j} -. b.data.{i*n+j}
    done
  done;
  c
    
let scale_matrix s a =
  let m = a.height and
      n = a.width in
  let c = create_matrix m n in
  for i = 0 to m - 1 do
    for j = 0 to n - 1 do
      c.data.{i*n+j} <- s *. a.data.{i*n+j}
    done
  done;
  c
    
let back_substitute r b =
  (* @todo compatible? *)
  (* @todo r needs to be square? *)
  let m = r.height and
      n = r.width in
  let x = create_matrix m 1 in
  for i = m - 1 downto 0 do
    x.data.{i} <- b.data.{i};
    for j = m - 1 downto i + 1 do
      x.data.{i} <- x.data.{i} -. r.data.{i*n+j} *. x.data.{j}
    done;
    x.data.{i} <- x.data.{i} /. r.data.{i*n+i};
  done;
  x
    

    
let fprint fmt sci ?prec a =
  Format.fprintf fmt "@[<2>[|";
  let nint,nfrac =
    match prec,sci with
      None,true -> 20,6
    | None,false -> 12,6
    | Some(i,f),_ -> i,f
  in
  for i = 0 to a.height - 1 do
    for j = 0 to a.width - 1 do
      if sci then
        Format.fprintf fmt "%*.*e;" nint nfrac a.data.{i*a.width+j}
      else
        Format.fprintf fmt "%*.*f;" nint nfrac a.data.{i*a.width+j}
    done;
    if i < a.height - 1 then
      Format.pp_force_newline fmt ();
  done;
  Format.fprintf fmt "  |]@]"
let print ?prec m = fprint Format.std_formatter false ?prec m
let printe ?prec m = fprint Format.std_formatter true ?prec m
let print_noprec m = fprint Format.std_formatter false m

let fspy fmt a =
  Format.fprintf fmt "@[   ";
  for j = 0 to a.width - 1 do
    if j > 9 then
      Format.fprintf fmt "%d" (j/10)
    else
      Format.fprintf fmt " "
  done;
  Format.fprintf fmt "@\n   ";
  for j = 0 to a.width - 1 do
    Format.fprintf fmt "%d" (j mod 10)
  done;
  Format.pp_force_newline fmt ();
  for i = 0 to a.height - 1 do
    Format.fprintf fmt "%2d " i;
    for j = 0 to a.width - 1 do
      if getm a i j <> 0.0 then
        Format.fprintf fmt "X"
      else
        Format.fprintf fmt " "
    done;
    if i < a.height - 1 then
      Format.pp_force_newline fmt ();
  done;
  Format.fprintf fmt "@]"
let spy m = fspy Format.std_formatter m
    
let fprint_ba1d fmt a =
  let n = Bigarray.Array1.dim a in
  Format.fprintf fmt "@[<2>[|";
  for i = 0 to n - 1 do
    Format.fprintf fmt "%12.6f;\t@," a.{i};
  done;
  Format.fprintf fmt "|]@]"
let print_ba1d m = fprint_ba1d Format.std_formatter m

let fprint_bapivot fmt a =
  let n = Bigarray.Array1.dim a in
  Format.fprintf fmt "@[<2>[|";
  for i = 0 to n - 1 do
    Format.fprintf fmt "%10d;\t@," a.{i};
  done;
  Format.fprintf fmt "|]@]"
let print_bapivot m = fprint_bapivot Format.std_formatter m

let printn s a =
  Format.printf "@\n%s =@\n" s;
  print a
    
let dot_columns a j b jj =
  let m = a.height and
      an = a.width and
      bn = b.width and
      s = ref 0. in
  for i = 0 to m - 1 do
    s := !s +. (a.data.{i*an+j} *. b.data.{i*bn+jj})
  done;
  !s
    
let mgs a = (* modified gram-schmidt QR factorization *)
  let m = a.height and
      n = a.width in
  let q = create_matrix m n and
      r = create_matrix n n in
  copy a q;     (* @todo I don't think I need to copy *)
  for k = 0 to n - 1 do
    let r_kk = sqrt (dot_columns q k q k) in
    let ir_kk = 1. /. r_kk in
    r.data.{k*n+k} <- r_kk;
    for i = 0 to m - 1 do
      q.data.{i*n+k} <- ir_kk *. q.data.{i*n+k}
    done;
    for j = k + 1 to n - 1 do
      let r_kj = dot_columns q k q j in
      r.data.{k*n+j} <- r_kj;
      for i = 0 to m - 1 do
        q.data.{i*n+j} <- q.data.{i*n+j} -. r_kj *. q.data.{i*n+k}
      done
    done;
    for j = 0 to k - 1 do
      r.data.{k*n+j} <- 0.0
    done
  done;
  (q,r)

(* 
let householder a =
  let m = nth_dim a 0 and
      n = nth_dim a 1 in
  let qr = create float32 c_layout [|m;n|] and
      x = create float32 c_layout [|m|] in
  blit a qr;
  for k = 0 to n - 1 do
    for j = k to m - 1 do
 *)      
    
let qr = mgs


let solve a b =
  let q,r = qr a in
  back_substitute r (multiplyt q b)
    
let solveqr (q,r) b =
  back_substitute r (multiplyt q b)

let inverse a =
  assert (a.width = a.height);
  let n = a.width in
  let q,r = qr a in
  let q' = transpose q in
  for i = 0 to n - 1 do
    let b = get_submatrix q' 0 i n 1 in
    let x = back_substitute r b in
    set_submatrix q' 0 i x;
  done;
  q'
    
let lu_decompose a'' =
  (* @todo *) let a' = create_matrix (a''.width) (a''.width) in
  copy a'' a';
  
  let n = a'.width and 
      a = a'.data in

  let pivot = create_pivot n in (* @todo pass in *)

  (* @todo *)
  let l' = create_matrix ~init:0.0 n n in
  let l = l'.data in

  for i = 0 to n - 1 do
    (* search for largest pivot row *)
    let pmax = ref 0.0 in
    let pidx = ref 0 in
    for ii = i to n - 1 do
(* @todo       let p = abs_float a.{pivot.{ii}*n+i} in *)
      let p = abs_float a.{ii*n+i} in
      if p > !pmax then
        pidx := ii;
    done;
    (* swap the rows *)
(*    let pi = pivot.{!pidx} in
    pivot.{!pidx} <- pivot.{i}; *)
    let pi = !pidx in
    pivot.{i} <- !pidx; 
    (* @todo now really swap the rows *)
    for j = 0 to n - 1 do
      let t = a.{pi*n+j} in
      a.{pi*n+j} <- a.{i*n+j};
      a.{i*n+j} <- t;
    done;
    let pi = i in

    (* invert the pivot *)
    let m = 1.0 /. a.{pi*n+i} in
    l.{i*n+i} <- a.{pi*n+i};
    a.{pi*n+i} <- 1.0;

    (* do the rest of the pivot row *)
    for j = i + 1 to n - 1 do
      a.{pi*n+j} <- m *. a.{pi*n+j};
    done;

    (* eliminate the other rows *)
    for ii = i + 1 to n - 1 do
      (* @todo      let pii = pivot.{ii} in *)
      let pii = ii in
      let mm = a.{pii*n+i} in
      l.{ii*n+i} <- mm;
      a.{pii*n+i} <- 0.0;
      for j = i + 1 to n - 1 do
        a.{pii*n+j} <- a.{pii*n+j} -. mm *. a.{pi*n+j}
      done;
    done;
  done;
  l',a'
    
    

let print_all a =
  printn "a" a;
  printn "a'" (transpose a);
  printn "a'a" (multiply (transpose a) a);
  Printf.printf "\n";
  for i = 0 to 1 do
    for j = 0 to 1 do
       Printf.printf "a'a(%d,%d) = %f\n" i j (dot_columns a i a j)
    done
  done;   
  let (q,r) = qr a in
  printn "q" q;
  printn "r" r;
  printn "qr" (multiply q r);
  printn "q'q" (multiply (transpose q) q);
  printn "q'a" (multiply (transpose q) a);
  ()
    
let test () =
  let a = create_array 3 2 [| 1.;        5.;    5.; 
                              -.7.;      3.;    3.; |] in
  print_all a;
  let a = create_array 3 2 [|1.;        1.;
                             1e-3;      0.;
                             0.;        1e-3; |] in
  print_all a;
  let a'a = multiply (transpose a) a in
  print_all a'a

  (*
  let a = create_init 2 3 (fun a i j -> set a [|i;j|] (if i = 0 && j = 0 then 1. else 0.)) and
      b = create_init 3 2 (fun a i j -> set a [|i;j|] (if i = 0 && j = 1 then 1. else 0.)) in
  print "a" a;
  print "b" b;
  print "ab" (multiply a b)
   *)

let test_axb () =
  let a = create_array 2 2 [|   1.;     5.; 
                              -.7.;     3.; |]
  in
  let b = create_array 2 1 [| 2.5;
                              1.5; |]
  in
  let q,r = qr a in
  let x = back_substitute r (multiply (transpose q) b) in
  print x
  
(* blas *)
(* documentation:
   - comment above function
   - A' = transpose A
   - A` = inverse A
 *)
(* @todo
   - should these return dests?
   - unit tests!
 *)
(* @todo what the fuck, should I use bigarrays or built-in arrays? *)
type blas_codes = [
    `N (* No-transpose 
          Non-unit triangular *)
  | `T (* Transpose *)
  | `U (* Unit triangular
          Upper triangular *)
  | `L (* Lower triangular
          Left *)
  | `R (* Right *)
  ] 

(* dscal: x <- alpha*x *)
let dscal
    alpha {height=m;width=n;data=x}
    =
  (* @todo this is a "level 3" version of dscal since it does a matrix? *)
  (* @todo should I assume packed and just do a single loop? *)
  (* @todo missing incx...figure out plan for submatrices and strides *)
  let tot = m*n in
  for i = 0 to tot - 1 do
    x.{i} <- alpha *. x.{i};
  done

(* daxpy: y <- alpha*x + y *)
let daxpy
    alpha {height=xm;width=xn;data=x} {height=ym;width=yn;data=y}
    =
  (* @todo incs *)
  (* @todo optimize alpha = 1.0? *)
  assert (xm = ym && xn = yn);
  let tot = xm*xn in
  for i = 0 to tot - 1 do
    y.{i} <- alpha *. x.{i} +. y.{i};
  done
(* daxpyz: z <- alpha*x + y
   @todo is there a real blas version of this non-updating function? *)
let daxpyz
    alpha {height=xm;width=xn;data=x} {height=ym;width=yn;data=y} {height=zm;width=zn;data=z}
    =
  (* @todo incs *)
  (* @todo optimize alpha = 1.0? *)
  assert (xm = ym && xn = yn);
  assert (xm = zm && xn = zn);
  let tot = xm*xn in
  for i = 0 to tot - 1 do
    z.{i} <- alpha *. x.{i} +. y.{i};
  done

(* dgemv: y <- alpha*op(A)*x + beta*y, op(A) = A,A' *)
let dgemv
    trans alpha {height=am;width=an;data=a}
    {height=xm;width=xn;data=x}
    beta {height=ym;width=yn;data=y}
    =
  (* @todo what about m, n, lda, ldb, incx, incy  parms? submatrices!?! *)
  (* @todo should this just be dgemm? why bother with this, optimization? *)
  assert (xn = 1 && xm = ym && yn = 1);
  assert (xm = (match trans with `N -> an | `T -> am));
  for i = 0 to ym - 1 do
    y.{i} <- beta *. y.{i};
    if alpha <> 0.0 then
      match trans with
        `N ->
          for j = 0 to an - 1 do
            y.{i} <- y.{i} +. alpha *. a.{i*an+j} *. x.{j};
          done
      | `T ->
          for j = 0 to am - 1 do
            y.{i} <- y.{i} +. alpha *. a.{j*an+i} *. x.{j};
          done
  done
          
(* dtrsv: x <- op(T)*x, op(T) = T`, T`', T upper/lower [non-]unit triangular *)
let dtrsv
    uplo trans diag
    {height=tm;width=tn;data=t}
    {height=xm;width=xn;data=x}
    =
  (* @todo what am I doing writing all these things I don't need? *)
  (* @todo just write what I need for now *)
  (* @todo do I want tail recursive functions for these loops? incx... *)
  assert (xn = 1 && tn = xm);
  match uplo with
    `U ->
      for i = tm - 1 downto 0 do
        let xi = ref x.{i} in
        for j = tm - 1 downto i + 1 do
          xi := !xi -. t.{i*tn+j} *. x.{j}
        done;
        x.{i} <- !xi;
        match diag with
          `N -> x.{i} <- x.{i} /. t.{i*tn+i}
        | `U -> ()
      done
  | `L ->
      for i = 0 to tm - 1 do
        let xi = ref x.{i} in
        for j = 0 to i - 1 do
          xi := !xi -. t.{i*tn+j} *. x.{j}
        done;
        x.{i} <- !xi;
        match diag with
          `N -> x.{i} <- x.{i} /. t.{i*tn+i}
        | `U -> ()
      done