Subscribed unsubscribe Subscribe Subscribe

CPSAM.org

computer, programming, statistics and more!

GenbankID2orgname

EfetchでGenbankIDから生物種名を得たい場合

curl -s "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=CP012745&rettype=fasta&retmode=xml" |\
grep TSeq_orgname |\
cut -d '>' -f 2 |\
cut -d '<' -f 1 |\
tr -d "\n"
echo
Remove all ads

Manjaroでmxnet

git clone --recursive https://github.com/dmlc/mxnet cd mxnet; make -j$(nproc) install.packages("drat", repos="https://cran.rstudio.com") drat:::addRepo("dmlc") install.packages("mxnet")

Rscript -e "install.packages('devtools', repo = 'https://cran.rstudio.com')" cd R-package Rscript -e "library(devtools); library(methods); options(repos=c(CRAN='https://cran.rstudio.com')); install_deps(dependencies = TRUE)" cd .. make rpkg

R CMD INSTALL mxnet_0.5.tar.gz

Remove all ads

Fasta整形

cat data | sed -e '/^>/ s/$/</` | tr -d '\n' | sed -e s/>/\n/gp' | sed -e 's/>/\n/gp' > reference

cat data | sedにデータを渡す

sed -e '/^>/ s/$/</` | /^>/ 行頭に>のある行を選ぶ s/$/</ 行末に<を加える

tr -d '\n' "\n"を削除

sed -e 's/>/\n>/gp' ">"を"\n>"に置換

sed -d 's/</\n/gp' "<"を"\n"に置換

reference

Remove all ads

BWTその2

前回の分では長いリファレンスに対応できない いろいろ参考にして

library(stringr)

wheel <- function(x){
  L <- str_sub(x, 1, 1)
  R <- str_sub(x, 2, str_length(x))
  return(str_c(R, L))
}

blocksort <- function(x){
  SA <- c(x)
  for(i in 1:(str_length(x)-1)){
    SA[i+1] <- wheel(SA[i])
  }
  SA.sorted <- as.matrix(str_sort(SA), ncol=1)
  SA.order <- str_order(SA)
  BWT.char <- paste(str_sub(SA.sorted, -1), collapse="")
  BWT <- factor(str_sub(SA.sorted,-1))
  
  return(list(SA.sorted, SA.order, BWT.char, BWT))
}

C <- function(x){
  sum(FC<x)
}

LFC <- function(r, x){
  C(x) + sum(FC[1:r]==x)  
}

FC <- as.numeric(blocksort(x)[[4]]) # first culumn(right sided)
LC <- sort(FC) #last culumn(left sided)
ord <- blocksort(x)[[2]] #order vector

a <- t[length(t)] #last character of query
low <- C(a)+1 #most upsided # of row permitted for a
high <- C(a+1) #most downsided # of row permitted for a
i <- length(t)-1 #itereter

while(low<=high && i>=1){
  a <- t[i]
  low <- LFC(low-1, a)+1
  high <- LFC(high, a)
  i <- i-1
}
ord[c(low:high)]
Remove all ads

BWA

BWAアルゴリズムをRで実装してみる

require('stringr')

REF <- "CAGGTTGGTTGGTTGG"
reads <- c("AGGT", "GGTT", "TTCA")

wheel <- function(x){
  len <- str_length(x)
  L <- str_sub(x, 1, 1)
  R <- str_sub(x, 2, len)
  return(str_c(R, L))
}

myAline <- function(read, ref){

  tmp <- str_c(REF, "$")
  SA <- tmp

  for(i in 1:(str_length(tmp))-1){
    t <- wheel(SA[i])
    SA <- c(SA, t)
  }

  SA.trim <- (str_split_fixed(SA[str_order(SA)], "\\$", n=2))[,1]
  SA.add <- str_c(SA.trim, "$")

  hits <- list(seq(1:length(SA.add)))

  for(i in 1:str_length(read)){
    hit <- which(str_sub(SA.add[hits[[i]]], i, i)==str_sub(read, i, i))
    if(length(hit) < 1){
      break
    } else {
      hits[[i+1]] <- hits[[i]][hit]
    }
  }

  if(length(hit)<1){
    return(0)
  } else {
    hits.pos <- str_length(REF) - str_length(SA.add[hits[[str_length(read)]]]) + 2
    return(hits.pos)
  }

}

print(REF)

for(i in reads){
  print(c(i, myAline(i, REF)))
}
Remove all ads

牛丼問題の簡素化

require(rstan)

oneGroup <- "
  data {
    int n;
    vector[n] dat;
  }

  parameters {
    real mu;
    real<lower=0> sigma;
  }

  model {
    dat ~ normal(mu, sigma);
  }
"

set.seed(1)
x <- c(76.5,83.9,87.9,70.8,84.6,85.1,79.6,79.8,79.7,78.0)
don <- list(dat=x, n=length(x))

oneGroupMean <- stan(
  model_code = oneGroup,
  data = don,
  iter = 51000,
  warmup = 1000,
  thin = 1,
  chains = 4
)

]

summary(oneGroupMean)$summary

m <- summary(oneGroupMean)$summary[1,1]
s <- summary(oneGroupMean)$summary[2,1]
x.seq <- seq(60,100,1)

plot(x.seq, dnorm(x.seq, m, s))
par(new=T)
hist(x, xlim=c(60,100))
Remove all ads

hello rstan

rstanが動くかどうかの確認コード

library(inline) 
library(Rcpp)
src <- ' 
  std::vector<std::string> s; 
  s.push_back("hello");
  s.push_back("world");
  return Rcpp::wrap(s);
'
hellofun <- cxxfunction(body = src, includes = '', plugin = 'Rcpp', verbose = FALSE)
cat(hellofun(), '\n') 
Remove all ads