AWK 是一种解释执行的编程语言。它非常的强大,被设计用来专门处理文本数据。AWK 的名称是由它们设计者 的名字缩写而来 —— Afred Aho, Peter Weinberger 与 Brian Kernighan
学习awk能帮你处理哪些问题:
下面我们来回顾一下awk
awk-F':''{print $1}'/etc/passwd
awk-f./passwd.awk/etc/passwd
root@awk-pratics:~/workspace# awk -v name=marionxue 'BEGIN{printf "Name=%s\n",name}'
Name=marionxue
从上面的部分代码是不是有些很难理解?下面我们会慢慢的带你走进awk,并且使用awk真正的处理一些问题:
root@awk-pratics:~/workspace# cat marks.txt
1) Amit 物理 80
2) Rahul 数学 90
3) Shyam 生物 87
4) Kerda 英语 85
5) Hari 历史 89
root@awk-pratics:~/workspace# awk '{print $3 "\t" $4}' marks.txt #打印第三列和第四列
物理 80
数学 90
生物 87
英语 85
历史 89
# 通过模式匹配输出列,注意此处的输出顺序可以调整
root@awk-pratics:~/workspace# awk '/a/ {print $3 "\t" $4}' marks.txt
数学 90
生物 87
英语 85
历史 89
下面我们准备一些小案例
root@awk-pratics:~/workspace# awk '/a/{++cnt} END {print "Count=",cnt}' marks.txt
Count= 4
root@awk-pratics:~/workspace# awk '{print $3 "," "Length=",length($0)}' marks.txt
物理,Length= 14
数学,Length= 15 # length是awk中内置的函数
生物,Length= 15
英语,Length= 16
历史,Length= 14
root@awk-pratics:~/workspace# awk 'length($0)>15' marks.txt
4) Kaerda 英语 85
root@awk-pratics:~/workspace# awk 'BEGIN{print "Arguments=",ARGC}' One Two Three Four
Arguments= 5
# awk有效索引长度为0到argc-1
root@awk-pratics:~/workspace# awk 'BEGIN{for(i=0;i< ARGC-1;++i)
{printf "ARGV[%d]=%s\n",i,ARGV[i]}
}' one two three four
ARGV[0]=awk
ARGV[1]=one
ARGV[2]=two
ARGV[3]=three
root@awk-pratics:~/workspace# awk 'BEGIN{print ENVIRON["USER"]}' # 从env命令结果中获取的变量值
root
root@awk-pratics:~/workspace# awk 'END{print FILENAME}' marks.txt #注意FILENAME变量只在END块中
marks.txt
root@awk-pratics:~/workspace# awk 'BEGIN{print TEXTDOMAIN}'
messages # 输出 message 是由于 TEXTDOMAIN 的默认值为 messages) 上面所有的输出都是英文字符是因 为本地语言环境配置为 en_IN
root@awk-pratics:~/workspace# awk 'BEGIN { print PROCINFO["pid"] }'
6172
root@awk-pratics:~/workspace# awk 'BEGIN{IGNORECASE=1} /amit/' marks.txt
1) Amit 物理 80
root@awk-pratics:~/workspace# echo "12334344343434" | awk -vFIELDWIDTHS="1 2 3 4 5" -vOFS="|" 'NF=NF'
1|23|343|4434|3434
awk 'BEGIN { a = 50; b = 20; print "(a % b) = ", (a % b) }'
root@awk-pratics:~/workspace# echo "121212" | awk 'BEGIN{a=50;b=20;print "(a/b)=",(a/b)} {print $1}'
(a/b)= 2.5
121212
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = ++a; printf "a = %d, b = %d\n", a, b }'
a = 11, b = 11
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = --a; printf "a = %d, b = %d\n", a, b }'
a = 9, b = 9
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = a--; printf "a = %d, b = %d\n", a, b }'
a = 9, b = 10
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = a++; printf "a = %d, b = %d\n", a, b }'
a = 11, b = 10
root@awk-pratics:~/workspace# awk 'BEGIN { name = "Jerry"; print "My name is", name }'
My name is Jerry
root@awk-pratics:~/workspace# awk 'BEGIN { cnt=10; cnt += 10; print "Counter =", cnt }'
Counter = 20
root@awk-pratics:~/workspace# awk 'BEGIN { cnt=10; cnt -= 10; print "Counter =", cnt }'
Counter = 0
root@awk-pratics:~/workspace# awk 'BEGIN { cnt=100; cnt /= 5; print "Counter =", cnt }'
Counter = 20
root@awk-pratics:~/workspace# awk 'BEGIN { cnt=10; cnt *= 10; print "Counter =", cnt }'
Counter = 100
root@awk-pratics:~/workspace# awk 'BEGIN { cnt=100; cnt %= 8; print "Counter =", cnt }'
Counter = 4
root@awk-pratics:~/workspace# awk 'BEGIN { cnt=2; cnt ^= 4; print "Counter =", cnt }'
Counter = 16
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 10; if (a == b) print "a == b" }'
a == b
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 20; if (a != b) print "a == b" }'
a == b
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 20; if (a < b) print "a == b" }'
a == b
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 20; if (a > b) print "a == b" }'
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 10; if (a <= b) print "a <= b" }'
a <= b
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 20; if (b > a ) print "b > a" }'
b > a
root@awk-pratics:~/workspace# awk 'BEGIN { a = 10; b = 10; if (a >= b) print "a >= b" }'
a >= b
# 如果 expr1 与 epxr2 均为真,则最终结果为真;否则为假。请注意,只有当 expr1 为真时才会计算 expr2 的 值,若 expr1 为假则直接返回真,而不再计算 expr2 的值。下面的例子判断给定的字符串是否是十进制形 式:
➜ ~ (☸ kubernetes-admin@kubernetes:default) awk 'BEGIN {num = 5; if (num >= 0 && num <= 7) printf "%d is in octal format\n", num }'
5 is in octal format
# 如果 expr1 与 epxr2 至少其中一个为真,则最终结果为真;二者均为假时则为假。请注意,只有当 expr1 为假 时才会计算 expr2 的值,若 expr1 为真则不会再计算 expr2 的值。示例如下:
➜ ~ (☸ kubernetes-admin@kubernetes:default) awk 'BEGIN {ch = "\n"; if (ch == " " || ch == "\t" || ch == "\n") print "Current character is whitespace." }'
Current character is whitespace.
# 逻辑非将 expr1 的真值取反。如果 expr1 为真,则返回 0。否则返回 1。下面的示例判断字符串是否为空:
awk 'BEGIN { name = ""; if (! length(name)) print "name is empty string." }'
当条件表达式( condition expression)为真时,statement1 执行,否则 statement2 执行。下面的示例将返 回最大数值
➜ ~ (☸ kubernetes-admin@kubernetes:default) awk 'BEGIN { a = 10; b = 20; (a > b) ? max = a : max = b; print "Max =", max}'
Max = 20
# 空格 (space) 操作符可以完成两个字符串的连接操作
➜ ~ (☸ kubernetes-admin@kubernetes:default) awk 'BEGIN { str1="Hello, "; str2="World"; str3 = str1 str2; print str3 }'
Hello, World
# 数组成员操作符为 in。该操作符用于访问数组元素 awk -f ./demos.awk
BEGIN{
arr[1]=2;
arr[2]=3;
arr[3]=4;
for(i in arr)
printf "arr[%d] = %d\n", i, arr[i]
}
awk '$0 ~ 9' marks.txt
awk '$0 !~ 9' marks.txt
root@awk-pratics:~/workspace# echo -e "cat\nbat\nfun\nfin\nfan" | awk '/f.n/'
fun
fin
fan
root@awk-pratics:~/workspace# echo -e "This\nThat\nThere\nTheir\nthese" | awk '/^The/'
There
Their
root@awk-pratics:~/workspace# echo -e "knife\nknow\nfun\nfin\nfan\nnine" | awk '/n$/'
fun
fin
fan
root@awk-pratics:~/workspace# echo -e "Call\nTall\nBall" | awk '/[CT]all/'
Call
Tall
root@awk-pratics:~/workspace# echo -e "Call\nTall\nBall" | awk '/<a href="#footnote-CT"><sup>[CT]</sup></a>all/'
Ball
root@awk-pratics:~/workspace# echo -e "Call\nTall\nBall\nSmall\nShall" | awk '/Call|Ball/'
Call
Ball
root@awk-pratics:~/workspace# echo -e "Colour\nColor" | awk '/Colou?r/'
Colour
Color
root@awk-pratics:~/workspace# echo -e "ca\ncat\ncatt" | awk '/cat*/'
ca
cat
catt
root@awk-pratics:~/workspace# echo -e "111\n22\n123\n234\n456\n222" | awk '/2+/'
22
123
234
222
root@awk-pratics:~/workspace# echo -e "Apple Juice\nApple Pie\nApple Tart\nApple Cake" | awk '/Apple (Juice|Cake)/'
Apple Juice
Apple Cake
AWK 有关联数组这种数据结构,而这种数据结构最好的一个特点就是它的索引值不需要是连续的整数值。我们 既可以使用数字也可以使用字符串作为数组的索引。除此之外,关联数组也不需要提前声明其大小,因为它在运 行时可以自动的增大或减小。
格式:
array_name[index]=value 其中 array_name 是数组的名称,index 是数组索引,value 为数组中元素所赋予的值
root@awk-pratics:~/workspace# awk 'BEGIN { fruits["mango"]="yellow"; fruits["orange"]="orange"
print fruits["orange"] "\n" fruits["mango"] }'
orange
yellow
root@awk-pratics:~/workspace# awk 'BEGIN { fruits["mango"]="yellow"; fruits["orange"]="orange"; delete fruits["orange"];
print fruits["orange"]
}'
100 200 300
400 500 600
700 800 900
上面的示例中,array[0][0] 存储 100,array[0][1] 存储 200 ,依次类推。为了在 array[0][0] 处存储100, 我们 可以使用这样的表示方法:
# 格式: array["0,0"] = 100 # 此时数组的索引值就是"0,0",是一个字符串
root@awk-pratics:~/workspace# awk 'BEGIN { array["0,0"] = 100; array["0,1"] = 200; array["0,2"] = 300; array["1,0"] = 400; array["1,1"] = 500; array["1,2"] = 600;
# print array elements
print "array[0,0] = " array["0,0"]; print "array[0,1] = " array["0,1"]; print "array[0,2] = " array["0,2"]; print "array[1,0] = " array["1,0"]; print "array[1,1] = " array["1,1"]; print "array[1,2] = " array["1,2"]; }'
array[0,0] = 100
array[0,1] = 200
array[0,2] = 300
array[1,0] = 400
array[1,1] = 500
array[1,2] = 600