ar402 09:16:06 ~
$ less -S Data/example.gtf | awk '{print $9$10}' | head
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
gene_id"ENSG00000223972";
Mar402 09:17:16 ~
$ less -S Data/example.gtf | awk '{print $9,$10}' | head #打逗号默认是空格不是tab键
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
gene_id "ENSG00000223972";
$ less -S Data/example.gtf | awk -F '\t' '{print $9}' | less -S
$ less -S Data/example.gtf | awk '/UTR/{print $0}'| less -S # 查找有UTR的行,并输出出来
Mar402 09:26:49 ~
$ less -S Data/example.gtf | awk '/UTR/{print $7,$1,$2,$3,$6}'| head #可以更改输出的顺序与cut是不一样的(cut必须按顺序提出)
+ chr1 ENSEMBL UTR .
+ chr1 ENSEMBL UTR .
+ chr1 ENSEMBL UTR .
- chr1 ENSEMBL UTR .
- chr1 ENSEMBL UTR .
+ chr1 ENSEMBL UTR .
- chr1 ENSEMBL UTR .
- chr1 ENSEMBL UTR .
- chr1 ENSEMBL UTR .
- chr1 ENSEMBL UTR .
# 在循环开始的开头和结尾加上 find UTR 和 end (pic2)
Mar402 09:26:55 ~
$ less -S Data/example.gtf | awk 'BEGIN{print "find UTR"} /UTR/{print $3,$4,$5} END{print "end"} '| less -SN
Mar402 09:43:19 ~
$ cat Data/example.gtf | awk '{print $3,$4,$5}' | head #原始
UTR 1737 2090
exon 1737 2090
transcript 1737 4275
gene 1737 4275
exon 1873 1920
transcript 1873 3533
exon 2042 2090
exon 2476 2560
UTR 2476 2584
exon 2476 2584
Mar402 09:42:22 ~
$ cat Data/example.gtf | awk 'BEGIN{OFS=":"} {print $3,$4,$5}' | head #用:将其分开 两个都是
UTR:1737:2090
exon:1737:2090
transcript:1737:4275
gene:1737:4275
exon:1873:1920
transcript:1873:3533
exon:2042:2090
exon:2476:2560
UTR:2476:2584
exon:2476:2584
Mar402 09:43:31 ~
cat Data/example.gtf | awk '{print $3":"$4"-"$5}' | head #想让分隔符不一样
UTR:1737-2090
exon:1737-2090
transcript:1737-4275
gene:1737-4275
exon:1873-1920
transcript:1873-3533
exon:2042-2090
exon:2476-2560
UTR:2476-2584
exon:2476-2584
Mar402 09:48:51 ~
$ cat Data/example.gtf | awk '{print NR,$3":"$4"-"$5}' | head #加上NR列出当前是第几行
1 UTR:1737-2090
2 exon:1737-2090
3 transcript:1737-4275
4 gene:1737-4275
5 exon:1873-1920
6 transcript:1873-3533
7 exon:2042-2090
8 exon:2476-2560
9 UTR:2476-2584
10 exon:2476-2584
Mar402 12:20:29 ~ #第三列是否为gene,是的话打印一整行
$ cat Data/example.gtf | awk '{if($3=="gene"){print $0}}' | less -S
Mar402 12:28:16 ~ #i=1时,i<4,输出$i(第一列);i++(i+1的意思)
$ less -S Data/example.gtf | awk '{for(i=1;i<4;i++){print $i}}' | less -S
Mar402 12:28:54 ~
$ less -S Data/example.gtf | awk '/exon/{print $5-$4}' | less -S
Mar402 09:13:30 ~
$ head Data/example.gtf | awk '{print $10,$12,$14}' | head | sed 's/"//g' | tr -d ';'
ENSG00000223972 ENST00000456328 protein_coding
ENSG00000223972 ENST00000456328 protein_coding
ENSG00000223972 ENST00000456328 protein_coding
ENSG00000223972 ENSG00000223972 protein_coding
ENSG00000223972 ENST00000450305 protein_coding
ENSG00000223972 ENST00000450305 protein_coding
ENSG00000223972 ENST00000450305 protein_coding
ENSG00000223972 ENST00000450305 protein_coding
ENSG00000223972 ENST00000456328 protein_coding
ENSG00000223972 ENST00000456328 protein_coding
----来自生信技能树----
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。