Subscribed unsubscribe Subscribe Subscribe

CPSAM.org

computer, programming, statistics and more!

BWTその2

前回の分では長いリファレンスに対応できない いろいろ参考にして

library(stringr)

wheel <- function(x){
  L <- str_sub(x, 1, 1)
  R <- str_sub(x, 2, str_length(x))
  return(str_c(R, L))
}

blocksort <- function(x){
  SA <- c(x)
  for(i in 1:(str_length(x)-1)){
    SA[i+1] <- wheel(SA[i])
  }
  SA.sorted <- as.matrix(str_sort(SA), ncol=1)
  SA.order <- str_order(SA)
  BWT.char <- paste(str_sub(SA.sorted, -1), collapse="")
  BWT <- factor(str_sub(SA.sorted,-1))
  
  return(list(SA.sorted, SA.order, BWT.char, BWT))
}

C <- function(x){
  sum(FC<x)
}

LFC <- function(r, x){
  C(x) + sum(FC[1:r]==x)  
}

FC <- as.numeric(blocksort(x)[[4]]) # first culumn(right sided)
LC <- sort(FC) #last culumn(left sided)
ord <- blocksort(x)[[2]] #order vector

a <- t[length(t)] #last character of query
low <- C(a)+1 #most upsided # of row permitted for a
high <- C(a+1) #most downsided # of row permitted for a
i <- length(t)-1 #itereter

while(low<=high && i>=1){
  a <- t[i]
  low <- LFC(low-1, a)+1
  high <- LFC(high, a)
  i <- i-1
}
ord[c(low:high)]
Remove all ads