Just a collection of (often poor) scripts to do various bits of bioinformatics stuff I find myself needing to do.
*Disclaimer, not all of these were written by me, they just happen to be useful. Some are based on examples available online and modified. Some are also not actually functional scripts - just little reminders of commands I've used that did something useful (often long one liners I might want again).
Some One liners (shamelessly stolen from https://github.com/onceupon/Bash-Oneliner/blob/master/README.md)
##Handy Bash oneliner commands for tsv file editing
##Grep #####extract text bewteen words (e.g. w1,w2)
grep -o -P '(?<=w1).*(?=w2)'#####grep lines without word (e.g. bbo)
grep -v bbo filename#####grep only one/first match (e.g. bbo)
grep -m 1 bbo filename#####grep and count (e.g. bbo)
grep -c bbo filename#####insensitive grep (e.g. bbo/BBO/Bbo)
grep -i "bbo" filename #####count occurrence (e.g. three times a line count three times)
grep -o bbo filename #####COLOR the match (e.g. bbo)!
grep --color bbo filename #####grep search all files in a directory(e.g. bbo)
grep -R bbo /path/to/directory or
grep -r bbo /path/to/directory #####search all files in directory, only output file names with matches(e.g. bbo)
grep -Rh bbo /path/to/directory or
grep -rh bbo /path/to/directory #####grep OR (e.g. A or B or C or D)
grep 'A\|B\|C\|D'
#####grep AND (e.g. A and B)
grep 'A.*B' #####grep all content of a fileA from fileB
grep -f fileA fileB #####grep a tab
grep $'\t' #####grep variable from variable
$echo "$long_str"|grep -q "$short_str"
if [ $? -eq 0 ]; then echo 'found'; fi//grep -q will output 0 if match found //remember to add space between []!
#####grep strings between a bracket()
grep -oP '\(\K[^\)]+'#####grep number of characters with known strings in between(e.g. AAEL000001-RA)
grep -o -w "\w\{10\}\-R\w\{1\}"// \w word character [0-9a-zA-Z_] \W not word character
#####a lot examples here http://www.cyberciti.biz/faq/grep-regular-expressions/
##Sed [back to top]
#####remove lines with word (e.g. bbo)
sed "/bbo/d" filename#####edit infile (edit and save)
sed -i "/bbo/d" filename#####when using variable (e.g. $i), use double quotes " " e.g. add >$i to the first line (to make a FASTA file)
sed "1i >$i" //notice the double quotes! in other examples, you can use a single quote, but here, no way! //'1i' means insert to first line
#####delete empty lines
sed '/^\s*$/d' or
sed 's/^$/d' #####delete last line
sed '$d' #####delete last character from end of file
sed -i '$ s/.$//' filename#####add string to end of file (e.g. "]")
sed '$s/$/]/' filename#####add string to beginning of every line (e.g. bbo)
sed -e 's/^/bbo/' file#####add string to end of each line (e.g. "}")
sed -e 's/$/\}\]/' filename#####add \n every nth character (e.g. every 4th character)
sed 's/.\{4\}/&\n/g' #####concatenate/combine/join files with a seperator and next line (e.g seperate by ",")
sed -s '$a,' *.json > all.json#####substitution (e.g. replace A by B)
sed 's/A/B/g' filename #####select lines start with string (e.g. bbo)
sed -n '/^@S/p' #####delete lines with string (e.g. bbo)
sed '/bbo/d' filename #####print every nth lines
sed -n '0~3p' filename//catch 0: start; 3: step
#####print every odd # lines
sed -n '1~2p' #####print every third line including the first line
sed -n '1p;0~3p' #####remove leading whitespace and tabs
sed -e 's/^[ \t]*//'//notice a whitespace before '\t'!!
#####remove only leading whitespace
sed 's/ *//'//notice a whitespace before '*'!!
#####remove ending commas
sed 's/,$//g' #####add a column to the end
sed "s/$/\t$i/"//$i is the valuable you want to add e.g. add the filename to every last column of the file
for i in $(ls);do sed -i "s/$/\t$i/" $i;done#####add extension of filename to last column
for i in T000086_1.02.n T000086_1.02.p;do sed "s/$/\t${i/*./}/" $i;done >T000086_1.02.np#####remove newline\ nextline
sed ':a;N;$!ba;s/\n//g'#####print a number of lines (e.g. line 10th to line 33 rd)
sed -n '10,33p' <filename#####change delimiter
sed 's=/=\\/=g'#####replace with wildcard (e.g A-1-e or A-2-e or A-3-e....)
sed 's/A-.*-e//g' filename#Awk [back to top]
#####set tab as field separator
awk -F $'\t' #####output as tab separated (also as field separator)
awk -v OFS='\t' #####pass variable
a=bbo;b=obb;
awk -v a="$a" -v b="$b" "$1==a && $10=b' filename #####print number of characters on each line
awk '{print length ($0);}' filename #####find number of columns
awk '{print NF}' #####reverse column order
awk '{print $2, $1}' #####check if there is a comma in a column (e.g. column $1)
awk '$1~/,/ {print}' #####split and do for loop
awk '{split($2, a,",");for (i in a) print $1"\t"a[i]} filename #####print all lines before nth occurence of a string (e.g stop print lines when bbo appears 7 times)
awk -v N=7 '{print}/bbo/&& --N<=0 {exit}'#####print filename and last line of all files in directory
ls|xargs -n1 -I file awk '{s=$0};END{print FILENAME,s}' file'#####add string to the beginning of a column (e.g add "chr" to column $3)
awk 'BEGIN{OFS="\t"}$3="chr"$3' #####remove lines with string (e.g. bbo)
awk '!/bbo/' file #####column subtraction
cat file| awk -F '\t' 'BEGIN {SUM=0}{SUM+=$3-$2}END{print SUM}'#####usage and meaning of NR and FNR e.g. fileA: a b c fileB: d e
awk 'print FILENAME, NR,FNR,$0}' fileA fileB fileA 1 1 a fileA 2 2 b fileA 3 3 c fileB 4 1 d fileB 5 2 e
#####and gate
e.g. fileA: 1 0
2 1
3 1
4 0
fileB:
1 0
2 1
3 0
4 1
awk -v OFS='\t' 'NR=FNR{a[$1]=$2;next} NF {print $1,((a[$1]=$2)? $2:"0")}' fileA fileB 1 0
2 1
3 0
4 0
#####round all numbers of file (e.g. 2 significant figure)
awk '{while (match($0, /[0-9]+\[0-9]+/)){
\printf "%s%.2f", substr($0,0,RSTART-1),substr($0,RSTART,RLENGTH)
\$0=substr($0, RSTART+RLENGTH)
\}
\print
\}'#####give number/index to every row
awk '{printf("%s\t%s\n",NR,$0)}'#####break combine column data into rows
e.g. seperate
David cat,dog
into
David cat
David dog
detail here: http://stackoverflow.com/questions/33408762/bash-turning-single-comma-separated-column-into-multi-line-string
awk '{split($2,a,",");for(i in a)print $1"\t"a[i]}' file#####sum up a file (each line in file contains only one number)
awk '{s+=$1} END {print s}' filename#####average a file (each line in file contains only one number)
awk '{s+=$1}END{print s/NR}'#####print field start with string (e.g Linux)
awk '$1 ~ /^Linux/'#####sort a row (e.g. 1 40 35 12 23 --> 1 12 23 35 40)
awk ' {split( $0, a, "\t" ); asort( a ); for( i = 1; i <= length(a); i++ ) printf( "%s\t", a[i] ); printf( "\n" ); }'##Xargs [back to top]
#####set tab as delimiter (default:space)
xargs -d\t#####display 3 items per line
echo 1 2 3 4 5 6| xargs -n 3//1 2 3 4 5 6
#####prompt before execution
echo a b c |xargs -p -n 3#####print command along with output
xargs -t abcd///bin/echo abcd //abcd
#####with find and rm
find . -name "*.html"|xargs rm -rfdelete fiels with whitespace in filename (e.g. "hello 2001")
find . -name "*.c" -print0|xargs -0 rm -rf#####show limits
xargs --show-limits#####move files to folder
find . -name "*.bak" -print 0|xargs -0 -I {} mv {} ~/oldor
find . -name "*.bak" -print 0|xargs -0 -I file mv file ~/old#####move first 100th files to a directory (e.g. d1)
ls |head -100|xargs -I {} mv {} d1#####parallel
time echo {1..5} |xargs -n 1 -P 5 sleepa lot faster than
time echo {1..5} |xargs -n1 sleep#####copy all files from A to B
find /dir/to/A -type f -name "*.py" -print 0| xargs -0 -r -I file cp -v -p file --target-directory=/path/to/B//v: verbose| //p: keep detail (e.g. owner)
#####with sed
ls |xargs -n1 -I file sed -i '/^Pos/d' filename#####add the file name to the first line of file
ls |sed 's/.txt//g'|xargs -n1 -I file sed -i -e '1 i\>file\' file.txt#####count all files
ls |xargs -n1 wc -l#####to filter txt to a single line
ls -l| xargs#####count files within directories
echo mso{1..8}|xargs -n1 bash -c 'echo -n "$1:"; ls -la "$1"| grep -w 74 |wc -l' --// "--" signals the end of options and display further option processing
#####download dependencies files and install (e.g. requirements.txt)
cat requirements.txt| xargs -n1 sudo pip install#####count lines in all file, also count total lines
ls|xargs wc -l#####xargs and grep
cat grep_list |xargs -I{} grep {} filename##Find [back to top] #####list all sub directory/file in the current directory
find .#####list all files under the current directory
find . -type f#####list all directories under the current directory
find . -type d#####edit all files under current directory (e.g. replace 'www' with 'ww')
find . name '*.php' -exec sed -i 's/www/w/g' {} \;if no subdirectory
replace "www" "w" -- *//a space before *
#####find and output only filename (e.g. "mso")
find mso*/ -name M* -printf "%f\n"#####find and delete file with size less than (e.g. 74 byte)
find . -name "*.mso" -size -74c -delete//M for MB, etc
##Loops [back to top] #####while loop, column subtraction of a file (e.g. a 3 columns file)
while read a b c; do echo $(($c-$b));done < <(head filename)//there is a space between the two '<'s
#####while loop, sum up column subtraction
i=0; while read a b c; do ((i+=$c-$b)); echo $i; done < <(head filename)#####if loop
if (($j==$u+2))//(( )) use for arithmetic operation
if [[$age >21]]//[[ ]] use for comparison
#####test if file exist
if [ -e $DF ]
then
echo -e "file exists!"
fi#####for loop
for i in $(ls); do echo file $i;done##Download [back to top] #####download all from a page
wget -r -l1 -H -t1 -nd -N -np -A mp3 -e robots=off http://example.com//-r: recursive and download all links on page
//-l1: only one level link
//-H: span host, visit other hosts
//-t1: numbers of retries
//-nd: don't make new directories, download to here
//-N: turn on timestamp
//-nd: no parent
//-A: type (seperate by ,)
//-e robots=off: ignore the robots.txt file which stop wget from crashing the site, sorry example.com
##Random [back to top] #####random pick 100 lines from a file
shuf -n 100 filename#####random order (lucky draw)
for i in a b c d e; do echo $i; done| shuf#####echo series of random numbers between a range (e.g. generate 15 random numbers from 0-10)
shuf -i 0-10 -n 15#####echo a random number
echo $RANDOM#####random from 0-9
echo $((RANDOM % 10))#####random from 1-10
echo $(((RANDOM %10)+1))##Others [back to top] #####remove newline / nextline
tr --delete '\n' <input.txt >output.txt#####replace newline
tr '\n' ' ' <filename#####compare files (e.g. fileA, fileB)
diff fileA fileB//a: added; d:delete; c:changed
or
sdiff fileA fileB//side-to-side merge of file differences
#####number a file (e.g. fileA)
nl fileAor
nl -nrz fileA//add leading zeros
#####combine/ paste two files (e.g. fileA, fileB)
paste fileA fileB//default tab seperated
#####reverse string
echo 12345| rev#####read .gz file without extracting
zmore filenameor
zless filename#####run in background, output error file
some_commands &>log &or
some_commands 2>log &or
some_commands 2>&1| tee logfileor
some_commands 2>&1 >>outfile//0: standard input; 1: standard output; 2: standard error
#####send mail
echo 'heres the content'| mail -A 'file.txt' -s 'mail.subject' me@gmail.com//use -a flag to set send from (-a "From: some@mail.tld")
#####.xls to csv
xls2csv filename#####append to file (e.g. hihi)
echo 'hihi' >>filename#####make BEEP sound
speaker-test -t sine -f 1000 -l1#####set beep duration
(speaker-test -t sine -f 1000) & pid=$!;sleep 0.1s;kill -9 $pid#####history edit/ delete
~/.bash_historyor
history -d [line_number]#####get last history/record filename
head !$#####clean screen
clearor
Ctrl+l#####send data to last edited file
cat /directory/to/file
echo 100>!$#####run history number (e.g. 53)
!53#####run last command
!!#####run last command that began with (e.g. cat filename)
!cator
!c//run cat filename again
#####extract .xf
1.unxz filename.tar.xz
2.tar -xf filename.tar
#####install python package
pip install packagename#####Download file if necessary
data=file.txt
url=http://www.example.com/$data
if [! -s $data];then
echo "downloading test data..."
wget $url
fi#####wget to a filename (when a long name)
wget -O filename "http://example.com"#####wget files to a folder
wget -P /path/to/directory "http://example.com"#####delete current bash command
Ctrl+Uor
Ctrl+Cor
Alt+Shift+#//to make it to history
#####add things to history (e.g. "addmetohistory")
#addmetodistory//just add a "#" before~~
#####sleep awhile or wait for a moment or schedule a job
sleep 5;echo hi#####count the time for executing a command
time echo hi#####backup with rsync
rsync -av filename filename.bak
rsync -av directory directory.bak
rsync -av --ignore_existing directory/ directory.bak
rsync -av --update directory directory.bak//skip files that are newer on receiver (i prefer this one!)
#####make all directories at one time!
mkdir -p project/{lib/ext,bin,src,doc/{html,info,pdf},demo/stat}//-p: make parent directory //this will create project/doc/html/; project/doc/info; project/lib/ext ,etc
#####run command only if another command returns zero exit status (well done)
cd tmp/ && tar xvf ~/a.tar#####run command only if another command returns non-zero exit status (not finish)
cd tmp/a/b/c ||mkdir -p tmp/a/b/c#####extract to a path
tar xvf -C /path/to/directory filename.gz#####use backslash "" to break long command
cd tmp/a/b/c \
> || \
>mkdir -p tmp/a/b/c#####get pwd
VAR=$PWD; cd ~; tar xvf -C $VAR file.tar//PWD need to be capital letter
#####list file type of file (e.g. /tmp/)
file /tmp///tmp/: directory
#####bash script
#!/bin/bash
file=${1#*.}//remove string before a "."
file=${1%.*}//remove string after a "."
#####search from history
Ctrl+r#####python simple HTTP Server
python -m SimpleHTTPServer#####variables
{i/a/,}e.g. replace all
{i//a/,}//for variable i, replace all 'a' with a comma
#####read user input
read input
echo $input#####generate sequence 1-10
seq 10#####sum up input list (e.g. seq 10)
seq 10|paste -sd+|bc#####find average of input list/file
i=`wc -l filename|cut -d ' ' -f1`; cat filename| echo "scale=2;(`paste -sd+`)/"$i|bc#####generate all combination (e.g. 1,2)
echo {1,2}{1,2}//1 1, 1 2, 2 1, 2 2
#####generate all combination (e.g. A,T,C,G)
set = {A,T,C,G}
group= 5
for ((i=0; i<$group; i++));do
repetition=$set$repetition;done
bash -c "echo "$repetition""#####read file content to variable
foo=$(<test1)#####echo size of variable
echo ${#foo}#####echo tab
echo -e ' \t '#####array
declare -A array=()#####send a directory
scp -r directoryname user@ip:/path/to/send#####split file into lines (e.g. 1000 lines/smallfile)
$ split -d -l 1000 bigfilename#####rename all files (e.g. remove ABC from all .gz files)
rename 's/ABC//' *.gz#####remove extention (e.g remove .gz from filename.gz)
basename filename.gz .gz
zcat filename.gz> $(basename filename.gz .gz).unpacked#####use the squeeze repeat option (e.g. /t/t --> /t)
tr -s "/t" < filename#####do not print nextline with echo
echo -e 'text here \c'#####use the last argument
!$#####check last exit code
echo $?##System [back to top]
#####snapshot of the current processes
ps #####check graphics card
lspci#####show IP address
$ip add showor
ifconfig#####check system version
cat /etc/*-release#####Linux Programmer's Manuel: hier- description of the filesystem hierarchy
man hier#####list job
jobs -l#####export PATH
export PATH=$PATH:~/path/you/want#####make file execuable
chmod +x filename//you can now ./filename to execute it
#####list screen
screen -d -r#####echo screen name
screen -ls#####check system (x86-64)
uname -i#####surf the net
links www.google.com#####add user, set passwd
useradd username
passwd username#####edit variable for bash, (e.g. displaying the whole path)
1. joe ~/.bash_profile
2. export PS1='\u@\h:\w\$' //$PS1 is a variable that defines the makeup and style of the command prompt
3. source ~/.bash_profile#####edit environment setting (e.g. alias)
1. joe ~/.bash_profile
2. alias pd="pwd" //no more need to type that 'w'!
3. source ~/.bash_profile#####list environment variables (e.g. PATH)
$echo $PATH//list of directories separated by a colon
#####list all environment variables for current user
$env#####show partition format
lsblk#####soft link program to bin
ln -s /path/to/program /home/usr/bin//must be the whole path to the program
#####show hexadecimal view of data
hexdump -C filename.class#####jump to different node
rsh node_name#####check port (active internet connection)
netstat -tulpn#####find whick link to a file
readlink filename#####check where a command link to (e.g. python)
which python#####list total size of a directory
du -hs .or
du -sb#####copy directory with permission setting
cp -rp /path/to/directory#####store current directory
pushd . $popd ;dirs -l #####show disk usage
df -h or
du -h or
du -sk /var/log/* |sort -rn |head -10#####show current runlevel
runlevel#####switch runlevel
init 3 or
telinit 3 #####permanently modify runlevel
1. edit /etc/init/rc-sysinit.conf
2. env DEFAULT_RUNLEVEL=2 #####become root
su#####become somebody
su somebody#####report user quotes on device
requota -auvs#####get entries in a number of important databases
getent database_name(e.g. the 'passwd' database)
getent passwd//list all user account (all local and LDAP) (e.g. fetch list of grop accounts)
getent group//store in database 'group'
#####little xwindow tools
xclock
xeyes#####change owner of file
chown user_name filename
chown -R user_name /path/to/directory///chown user:group filename
#####list current mount detail
df#####list current usernames and user-numbers
cat /etc/passwd#####get all username
getent passwd| awk '{FS="[:]"; print $1}'#####show all users
compgen -u#####show all groups
compgen -g#####show group of user
group username#####show uid, gid, group of user
id username#####check if it's root
if [$(id -u) -ne 0];then
echo "You are not root!"
exit;
fi//'id -u' output 0 if it's not root
#####find out CPU information
more /proc/cpuinfoor
lscpu#####set quota for user (e.g. disk soft limit: 120586240; hard limit: 125829120)
setquota username 120586240 125829120 0 0 /home#####show quota for user
quota -v username#####fork bomb
:(){:|:&};://dont try this at home
#####check user login
lastlog#####edit path for all users
joe /etc/environment//edit this file
#####show running processes
ps aux#####find maximum number of processes
cat /proc/sys/kernal/pid_max#####show and set user limit
ulimit -u#####which ports are listening for TCP connections from the network
nmap -sT -O localhost#####print out number of cores/ processors
nproc --all#####check status of each core
- top
- press '1'
#####show jobs and PID
jobs -l#####list all running services
service --status-all#####schedule shutdown server
shutdown -r +5 "Server will restart in 5 minutes. Please save your work."#####cancel scheduled shutdown
shutdown -c#####boardcast to all users
wall -n hihi#####enable X11 forwarding,in order to use graphical application on servers
ssh -X user_name@ip_addressor setting through xhost
#####open pictures from ssh server
ssh -X user_name@ip_address
eog picture.png#####kill all process of a user
pkill -U user_name=-=-=-=-=-A lot more coming!! =-=-=-=-=-=-=-=-=-=waitwait-=-=-=-=-=-=-=-=-=-