I. Review#
Review 1: Common Shell Commands
Get the output result of a command
1.` `, backticks line=`ls`
2.$() line=$(ls)
seq is similar to range in Python
Mainly generates a sequence of ordered numbers
-s specifies the delimiter
-w specifies equal width output
II. sort#
sort command
- By default, sorts by the first character of each line
- -n: sort by integer --> default is ascending
- -r: descending sort (note: reverse order)
- -u: remove duplicates
Specify sort key
- Specify which column of data to sort by
- -k: specify which column is the sort key
cat tt | sort -n -k4
Specify field delimiter
- -t: specify field delimiter (default is whitespace)
sort -t: -n -k3 /etc/passwd
# Note: specify delimiter as :
sort is a sorting command
- By default, sorts by the first character of each line
- English is sorted in the order of a-z; if the first letter is the same, compare the second letter, and so on
- Chinese is sorted by the first letter of the pinyin of the first character
Example 1: cat sort_test.txt | sort
---------------------------------------------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat sort_test.txt # Note: display text content
Chinese 456 1xx 123
abc bcd 3yy dd
Aac XYZ 2zz
San Chuang xixi
[root@sanchuang-linux ~]# cat sort_test.txt | sort # Note: sort the entire text
San Chuang xixi
Chinese 456 1xx 123
Aac XYZ 2zz
abc bcd 3yy dd
--------------------------------------------------------------------------------------------
>>> ord("三") # Use ord() function in Python to check Unicode encoding
19977 # Note: sort does not sort by encoding
>>> ord("中")
20013
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# locale # Note: check encoding format
LANG=zh_CN.UTF-8
LC_CTYPE="zh_CN.UTF-8"
LC_NUMERIC="zh_CN.UTF-8"
LC_TIME="zh_CN.UTF-8"
LC_COLLATE="zh_CN.UTF-8"
LC_MONETARY="zh_CN.UTF-8"
LC_MESSAGES="zh_CN.UTF-8"
LC_PAPER="zh_CN.UTF-8"
LC_NAME="zh_CN.UTF-8"
LC_ADDRESS="zh_CN.UTF-8"
LC_TELEPHONE="zh_CN.UTF-8"
LC_MEASUREMENT="zh_CN.UTF-8"
LC_IDENTIFICATION="zh_CN.UTF-8"
LC_ALL=
============================================================================================
Example 2: cat sort_test.txt | sort -k 2 Specify which column is the sort key
[root@sanchuang-linux ~]# cat sort_test.txt | sort -k2 # Note: specify the second column for sorting
Chinese 456 1xx 123
abc bcd 3yy dd
San Chuang xixi
Aac XYZ 2zz
[root@sanchuang-linux ~]# cat sort_test.txt | sort -k 3 # Note: specify the third column for sorting
San Chuang xixi # Note: whitespace comes first
Chinese 456 1xx 123 # Note: 1
Aac XYZ 2zz # Note: 2
abc bcd 3yy dd # Note: 3
============================================================================================
Example 3: English is sorted in the order of a-z; if the first letter is the same, compare the second letter
[root@sanchuang-linux ~]# cat sort_test.txt
Chinese 456 1xx 123
aac bcd 3yy dd
Aac XYZ 2zz
San Chuang xixi
Xyz cde
Bbc Abc
bbc xxx
ABC
abc
[root@sanchuang-linux ~]# cat sort_test.txt | sort # Note: not sorted by encoding
San Chuang xixi
Chinese 456 1xx 123
aac bcd 3yy dd
Aac XYZ 2zz
abc
ABC
Bbc Abc
bbc xxx
Xyz cde
sort -n#
sort -n sorts numerically
[root@sanchuang-linux ~]# a=123 # Note: a represents a character
[root@sanchuang-linux ~]# b=234
[root@sanchuang-linux ~]# echo $a+$b # Note: string concatenation
123+234
[root@sanchuang-linux ~]# echo $(($a+$b)) # Note: need to use 2 parentheses for numerical addition
357
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat aa.txt
123
23
4
234
[root@sanchuang-linux ~]# cat aa.txt | sort # Note: by default sorts by the first letter string
123
23
234
4
[root@sanchuang-linux ~]# cat aa.txt | sort -n # Note: sort -n sorts by numerical value
4 # Note: default ascending
23 # Note: -n sorts numerically
123
234
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat aa.txt | sort -n -r # Note: numerical, reverse order sorting, same effect
[root@sanchuang-linux ~]# cat aa.txt | sort -nr # Note: numerical, reverse order sorting, same effect
234
123
23
4
-t Specify the column delimiter#
Specify the column delimiter # head -n7 /etc/passwd | sort -k6 -t :
Default delimiter is whitespace
Use -t to specify the column delimiter
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort # Note: take the first 7 lines and sort
adm:x:3:4:adm:/var/adm:/sbin/nologin
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
root:x:0:0:root:/root:/bin/bash
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
sync:x:5:0:sync:/sbin:/bin/sync
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort -k2 # Note: sort -k2 specifies the second column as the sort key
adm:x:3:4:adm:/var/adm:/sbin/nologin
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
root:x:0:0:root:/root:/bin/bash
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
sync:x:5:0:sync:/sbin:/bin/sync
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort -k6 -t : # *Note: -t : specifies the delimiter as :
bin:x:1:1:bin:/bin:/sbin/nologin
root:x:0:0:root:/root:/bin/bash
sync:x:5:0:sync:/sbin:/bin/sync
daemon:x:2:2:daemon:/sbin:/sbin/nologin
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort -k6 -t : -r # Note: -r reverse order
III. Exercise: Find the top 5 processes by memory usage#
Find the top 5 processes by memory usage
ps aux | sort -n -k4 -r | head -5 # Note: recommended
Note: Memory usage %MEM
[root@sanchuang-linux ~]# ps aux | tail -n +2 | sort -nr -k4 | head -5
root 960 0.0 2.0 221572 38096 ? S 08:31 0:00 /usr/libexec/sssd/sssd_nss --uid 0 --gid 0 --logger=files
root 930 0.0 1.7 425416 31480 ? Ssl 08:31 0:01 /usr/libexec/platform-python -Es /usr/sbin/tuned -l -P
polkitd 890 0.0 1.2 1625936 23856 ? Ssl 08:31 0:00 /usr/lib/polkit-1/polkitd --no-debug
root 891 0.0 0.9 391216 18088 ? Ssl 08:31 0:00 /usr/sbin/NetworkManager --no-daemon
root 954 0.0 0.8 219700 15416 ? S 08:31 0:00 /usr/libexec/sssd/sssd_be --domain implicit_files --uid 0 --gid 0 --logger=files
--------------------------------------------------------------------------------------------
# ps aux | tail -n +2 | sort -nr -k4 | head -5
Note: tail -n +2 displays from the second line to the end (optional)
Note: sort -nr -k4 -n sorts numerically, -r reverse order, -k4 specifies the 4th column as the sort key
Note: head -5 takes the first 5 lines
IV. uniq#
Usage of the uniq command (remove duplicates)
uniq --> unique
Remove adjacent duplicate lines
Sort first, then remove duplicates
-c counts the number of occurrences
-u displays lines that appear only once
-d displays lines that are repeated
============================================================================================
Example 1: Remove adjacent duplicate lines
[root@sanchuang-linux ~]# cat uniq_test.txt
123 abc
abc 123
45
46
45
45
47
47
48
47
[root@sanchuang-linux ~]# cat uniq_test.txt | uniq # Note: remove adjacent duplicate lines
123 abc
abc 123
45
46
45
47
48
47
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq # Note: sort first, then remove duplicates
abc 123 # Note: generally, sort first, then remove duplicates
45
46
47
48
123 abc
--------------------------------------------------------------------------------------------
Example 1.1 # cat uniq_test.txt | sort -nu
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n -u # Note: sort -u can also remove duplicates
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -nu # Note: same effect
abc 123
45
46
47
48
123 abc
============================================================================================
Example 2: -c counts the number of occurrences
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq -c # Note: -c counts the number of occurrences
1 abc 123
3 45
1 46
3 47
1 48
1 123 abc
============================================================================================
Example 3: -u displays lines that appear only once
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq -u # Note: -u displays lines that appear only once
abc 123
46
48
123 abc
============================================================================================
Example 4: -d displays lines that are repeated
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq -d # Note: -d displays lines that are repeated
45
47
V. Exercise: Count the top ten IPs in 120,000 lines#
Count the top ten IPs in 120,000 lines
[root@sanchuang-linux ~]# cat ips.txt | sort | uniq -c | sort -nr | head
# Note 1: The first sort puts the same IP segments together
# Note 2: uniq -c counts occurrences
# Note: The second sort -nr sorts the previous counts + IP in reverse order
# Note: head takes the first ten by default
Note: sort -c is used to count the access of IP addresses
[root@localhost ~]# yum install nginx
[root@localhost ~]# nginx
[root@localhost ~]# lsof -i:80 # Note: nginx is up
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
nginx 12765 root 9u IPv4 60060 0t0 TCP *:http (LISTEN)
nginx 12765 root 10u IPv6 60061 0t0 TCP *:http (LISTEN)
nginx 12766 nginx 9u IPv4 60060 0t0 TCP *:http (LISTEN)
nginx 12766 nginx 10u IPv6 60061 0t0 TCP *:http (LISTEN)
nginx 12767 nginx 9u IPv4 60060 0t0 TCP *:http (LISTEN)
nginx 12767 nginx 10u IPv6 60061 0t0 TCP *:http (LISTEN)
[root@sanchuang-linux ~]# iptables -F # Note: turn off the firewall
[root@sanchuang-linux ~]# cd /var/log # Note: /var/log stores logs
[root@sanchuang-linux log]# cd nginx
[root@sanchuang-linux nginx]# pwd
/var/log/nginx
[root@sanchuang-linux nginx]# ls
access.log error.log # Note: log files
VI. Exercise: Count the top ten users accessing the web server#
Count the top ten users accessing the web server (note: judged by IP)
# Note: nginx check the top 3 IPs with the most access (common exam question)
# cat access.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -3
[root@sanchuang-linux nginx]# head access.log
192.168.0.42 - - [29/Oct/2020:12:01:01 +0800] "GET / HTTP/1.1" 200 4057 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400" "-"
……………… # Note: awk command extracts the first column data IP address
[root@sanchuang-linux nginx]# cat access.log | awk '{print $1}'
192.168.0.42 # Note: by default, space is the delimiter, print the first column $1
192.168.0.42
192.168.0.42
192.168.0.42
192.168.0.42
192.168.0.193
192.168.0.193
192.168.0.193
192.168.0.193 # Note: awk command extracts the first column data IP address
[root@sanchuang-linux nginx]# cat access.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -3
5 192.168.0.42 # Note: count the top 3 access volumes
4 192.168.0.193
VII. cut#
cut command
- Extract text columns from text files or text streams
cut -options extract range text file
-----------------------------------
Common options
- -c: extract characters from the specified range
- -f: extract fields from the specified range
- -d: specify the delimiter, default delimiter is tab
-----------------------------------
Extract range
n: the nth item
n-: from the nth item to the end of the line
-m: from the start of the line to the mth item
n,m: the nth item and the mth item
n-m: from the nth item to the mth item
Example
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# w # Note: w current user login status
14:45:33 up 4:12, 5 users, load average: 0.00, 0.00, 0.03
USER TTY FROM LOGIN@ IDLE JCPU PCPU WHAT
root tty1 - San22 15:49 0.08s 0.08s -bash
root pts/0 192.168.0.42 14:29 3.00s 0.02s 0.01s w
root pts/1 192.168.0.42 14:29 15:35 0.00s 0.00s -bash
root pts/3 192.168.0.42 09:48 4:56m 0.03s 0.03s -bash
root pts/4 192.168.0.42 09:50 2:37m 0.52s 0.52s -bash
[root@sanchuang-linux ~]# who # Note: w shows more details
root tty1 2020-10-28 22:15
root pts/0 2020-10-29 14:29 (192.168.0.42)
root pts/1 2020-10-29 14:29 (192.168.0.42)
root pts/3 2020-10-29 09:48 (192.168.0.42)
root pts/4 2020-10-29 09:50 (192.168.0.42)
Method 1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# w | tr -s " " | cut -d" " -f 1,2,4
14:50:31 4:17, # Note: default delimiter is tab, so must compress
USER TTY LOGIN@ # Note: then specify the delimiter as space
root tty1 San22 # Note: add tr -s " "
root pts/0 14:29
root pts/1 14:29
root pts/3 09:48
root pts/4 09:50
Method 2
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# w | awk '{print $1,$2,$4}' # Note: awk default delimiter is whitespace
14:51:28 up 5 # Note: print the 1st, 2nd, and 4th columns
USER TTY LOGIN@
root tty1 San22
root pts/0 14:29
root pts/1 14:29
root pts/3 09:48
root pts/4 09:50
Use colon as a delimiter to extract username, user ID, user group#
Use colon as a delimiter to extract username, user ID, user group
Writing 1
[root@sanchuang-linux ~]# cat /etc/passwd | cut -d":" -f 1,3,5
Writing 2 (recommended)
[root@sanchuang-linux ~]# cut -d ":" -f 1,3,5 /etc/passwd # Note: extract columns 1, 3, 5
# Note: text processing commands can directly operate on text without needing to use cat
Value range#
Value range
[root@sanchuang-linux ~]# cut -d ":" -f 1,3,5 /etc/passwd # Note: extract columns 1, 3, 5
[root@sanchuang-linux ~]# cut -d ":" -f 1-5 /etc/passwd # Note: extract columns 1-5
[root@sanchuang-linux ~]# cut -d ":" -f 3- /etc/passwd # Note: extract from the 3rd column to the end
[root@sanchuang-linux ~]# cut -d ":" -f -3 /etc/passwd # Note: extract the first 3 columns
============================================================================================
-c: extract characters from the specified range
Example
[root@sanchuang-linux ~]# echo abcdefg | cut -c 2 # Note: extract the 2nd character
b
[root@sanchuang-linux ~]# echo abcdefg | cut -c 2-5 # Note: extract characters 2-5
bcde
[root@sanchuang-linux ~]# echo abcdefg | cut -c 5- # Note: extract from the 5th character to the end
efg
VIII. Exercise#
Exercise
1. Count the top three IPs in access.log
2. Display the sizes of all files in the /boot directory (including files in subdirectories), sorted from smallest to largest
3. Count the number of times each shell is used in /etc/passwd (sorted in descending order)
4. Count how many times the word 'sbin' appears in /etc/passwd
5. Display only the IP address of ens33
============================================================================================
Example 1: Count the top three IPs in access.log
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cut -d "-" -f 1 access.log | sort | uniq -c | sort -nr | head -n3
7 192.168.0.42
6 192.168.0.193
5 192.168.0.38
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cut -d " " -f1 access.log | sort | uniq -c
6 192.168.0.193
5 192.168.0.21
5 192.168.0.32
5 192.168.0.37
5 192.168.0.38
7 192.168.0.42
[root@sanchuang-linux ~]# cut -d " " -f1 access.log | sort | uniq -c | sort -nr
7 192.168.0.42
6 192.168.0.193
5 192.168.0.38
5 192.168.0.37
5 192.168.0.32
5 192.168.0.21
[root@sanchuang-linux ~]# cut -d " " -f1 access.log | sort | uniq -c | sort -nr | head -3
7 192.168.0.42
6 192.168.0.193
5 192.168.0.38
============================================================================================
Example 2: Display the sizes of all files in the /boot directory (including files in subdirectories), sorted from smallest to largest
Method 1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# du -ak /boot | sort -n # Note: in k bytes
4 /boot/.bashrc
4 /boot/efi/EFI/centos
4 /boot/grub2/device.map
4 /boot/grub2/grubenv
4 /boot/grub2/i386-pc/adler32.mod
4 /boot/grub2/i386-pc/all_video.mod
4 /boot/grub2/i386-pc/aout.mod
Method 2
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# ll -R | grep root | tr -s " " | cut -d " " -f 5,9 | sort -n
0 1214.txt
0 12244.txt
0 1224.txt
0 12456.txt
0 20
0 20
0 2020-09-24-18_25_03.txt
Method 3
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# ll -R | grep root | awk '{print $5,$9}' | sort -n
0 1214.txt
0 12244.txt
0 1224.txt
0 12456.txt
0 20
0 20
0 2020-09-24-18_25_03.txt
0 abcd.txt
============================================================================================
Example 3: Count the number of times each shell is used in /etc/passwd (sorted in descending order)
[root@sanchuang-linux ~]# cut -d : -f7 /etc/passwd | sort | uniq -c | sort -nr
31 /bin/bash
19 /sbin/nologin
1 /sbin/shutdown
1 /sbin/halt
1 /bin/sync
============================================================================================
Example 4: Count how many times the word 'sbin' appears in /etc/passwd
Method 1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# grep -o sbin /etc/passwd
sbin
sbin
………………
sbin
sbin
[root@sanchuang-linux ~]# grep -o sbin /etc/passwd | wc -l
25
Method 2
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat /etc/passwd | tr ":" "\n" | grep sbin | wc -l
25 # Note: replace : with newline character to move the same line to different lines
============================================================================================
Example 5: Display only the IP address of ens33
# ip a | grep ens33 | grep inet | tr -s " " | cut -d " " -f3 | cut -d"/" -f1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# ip a | grep ens33
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
inet 192.168.0.34/24 brd 192.168.0.255 scope global dynamic noprefixroute ens33
[root@sanchuang-linux ~]# ip a | grep ens33 | grep inet | tr -s " " | cut -d " " -f3
192.168.0.34/24
[root@sanchuang-linux ~]# ip a | grep ens33 | grep inet | tr -s " " | cut -d " " -f3 | cut -d"/" -f1
192.168.0.34
IX. awk specify delimiter -F#
awk specify delimiter -F
[root@sanchuang-linux ~]# awk -F":" '{print $1}' /etc/passwd
root
bin
daemon
adm
lp
sync
…………………………
X. grep#
grep command
The three text processing tools ==> awk grep sed
https://www.cnblogs.com/end/archive/2012/02/21/2360965.html
grep filters and is a general regular expression analysis program
grep, egrep, fgrep
Used for matching to filter
Purpose: to search for and display lines containing a specified string in a file Format: grep [options]... pattern target file
Note: pattern --》 template#
Can accept a regular expression
-i
: ignore case when searching
-v
: reverse search, output lines that do not match the pattern
-n: display line numbers that meet the pattern requirements
-r: recursively search all files
-o
: only display matching content
-E
: supports more metacharacters (supports extended regex)
-A: find matching lines and a few following lines
-B: output matching lines and a few preceding lines
Pattern
^…. : starts with something, the whole line starts with something
…..$ : ends with something, the whole line ends with something
Note: grep is a text processing command that can directly operate on text
=====================================================================
Example 1: -v: reverse search, output lines that do not match the pattern
---------------------------------------------------------------------
[root@sanchuang-linux ~]# grep -v “#” /etc/yum.repos.d/centos.repo
# Note: do not output lines containing #
============================================================================================
Example 2: -i ignore case when searching
---------------------------------------------------------------------
[root@sanchuang-linux ~]# cd /etc/ssh/
[root@sanchuang-linux ssh]# pwd
/etc/ssh
[root@sanchuang-linux ssh]# grep -i "port" /etc/ssh/sshd_config # Note: ignore case when searching
# If you want to change the port on a SELinux system, you have to tell
# semanage port -a -t ssh_port_t -p tcp #PORTNUMBER
#Port 22
# WARNING: 'UsePAM no' is not supported in Fedora and may cause several
#GatewayPorts no
============================================================================================
Example 3: ignore case and display line numbers found
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ssh]# grep -i -n "port" /etc/ssh/sshd_config # Note: display line numbers that meet the pattern requirements
13:# If you want to change the port on a SELinux system, you have to tell
15:# semanage port -a -t ssh_port_t -p tcp #PORTNUMBER
17:#Port 22
102:# WARNING: 'UsePAM no' is not supported in Fedora and may cause several
108:#GatewayPorts no
============================================================================================
Example 4: -r recursively search all files
Note: search in all files under subdirectories
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ssh]# grep "xxxxx" * -r # Note: recursively search in all files and subdirectories in the current directory
[root@sanchuang-linux ssh]# grep "GET" /var/log/nginx -r # Note: recursively search in the nginx directory
/var/log/nginx/error.log:2020/10/29 12:01:02 [error] 12767#0: *2 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.0.42, server: _, request: "GET /favicon.ico HTTP/1.1", host: "192.168.0.34", referrer: "http://192.168.0.34/"
/var/log/nginx/error.log:2020/10/29 12:01:58 [error] 12767#0: *2 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.0.42, server: _, request: "GET /favicon.ico HTTP/1.1", host: "192.168.0.34"
--------------------------------------------------------------------------------------------
# Recursively search all files in /var/log/nginx (including files in subdirectories)
[root@mysql-binary nginx]# grep "GET" /var/log/nginx -r
XI. Regular Expressions#
Regular Expressions
^aa indicates lines starting with aa
aa$ indicates lines ending with aa
[] indicates a character set
[a-z] takes one from a-z
[^a-z] does not take a-z characters
grep ^[^a-zA-Z0-9_] grep_test.txt displays lines not starting with letters, numbers, or underscores
Example 1: do not output lines starting with #
--------------------------------------------------------------------------------------------
[root@sanchuang-linux yum.repos.d]# grep -v ^# centos.repo
[root@sanchuang-linux yum.repos.d]# grep -v ^# centos.repo | grep -v ^$ # Note: do not output blank lines
Note: do not output blank lines grep -v ^$
Do not output lines starting with # grep -v ^#
============================================================================================
Example 2: filter out lines in grep_test.txt that do not start with # and non-blank lines
--------------------------------------------------------------------------------------------
[root@sanchuang-linux chenpeng]# cat grep_test.txt
#aaa
aaa#bbb
456
#
789
[root@sanchuang-linux chenpeng]# grep -v ^# grep_test.txt # Note: filter out lines that do not start with #
aaa#bbb
456
789
# Method 1
[root@sanchuang-linux chenpeng]# grep -v ^# grep_test.txt | grep -v ^$ # Note: filter out blank lines
aaa#bbb # Note: if it is a blank character, it will not be filtered
456
789
# Method 2
Filter out lines in grep_test.txt that do not start with # and non-blank lines
[root@sanchuang-linux chenpeng]# grep -v -E "^#|^$" grep_test.txt
aaa#bbb # Note: -E regular expression, | or, -v does not display
456 # Note: -E: supports more metacharacters (supports extended regex)
789
XII. [] Indicates a Character Set (Regular Expression)#
[] Indicates a Character Set (Regular Expression)
[root@localhost chenpeng]# cat grep_test.txt
abc
adc
Abdc
ac
a1c
axy
axc
123
777
Example 1: filter out abc adc
--------------------------------------------------------------------------------------------
[root@localhost chenpeng]# grep a[bd]c grep_test.txt # Note: take one from the character set [bd]
abc
adc
[root@sanchuang-linux chenpeng]# grep a[a-z]c grep_test.txt # Note: take one character from a-z
abc # Note: ac is not filtered, must have one character here
adc # Note: [a-z] indicates taking one from a-z
axc
[root@sanchuang-linux chenpeng]# grep a[0-9]c grep_test.txt # Note: take one character from 0-9
a1c
--------------------------------------------------------------------------------------------
Example 2: [^a-z] does not take a-z characters
--------------------------------------------------------------------------------------------
[root@sanchuang-linux chenpeng]# grep a[^a-z]c grep_test.txt # Note: ^ takes the inverse, does not take a-z characters between a and c
a1c
Example 3: extract lines not starting with letters
--------------------------------------------------------------------------------------------
[root@sanchuang-linux chenpeng]# grep ^[^a-zA-Z] grep_test.txt # Note: lines not starting with a-zA-Z
11c
123 # Note: space included
777
Note: grep ^[a-zA-Z] grep_test.txt lines starting with letters
============================================================================================
Example 4: display lines not starting with letters, numbers, or underscores
Writing 1
[root@sanchuang-linux chenpeng]# grep ^[^a-zA-Z0-9_] grep_test.txt
Writing 2
[root@sanchuang-linux chenpeng]# grep -v ^[a-zA-Z0-9_] grep_test.txt
# Note: display lines not starting with letters, numbers, or underscores
XIII. Wildcards (Regular Expressions)#
Wildcards (Regular Expressions)
* represents matching the previous item any number of times
? represents matching the previous item 0 or 1 time
+ represents matching the previous item one to many times
. placeholder for any character except \n
{n,m} matches the previous item n to m times
egrep is equivalent to grep -E
fgrep does not support any regex, ordinary text filtering
Example 1: * ? + .
--------------------------------------------------------------------------------------------
# Note: all matches are for the preceding character
[root@localhost ~]# cat grep_test.txt
alc
axxc
ac
[root@localhost ~]# grep -E a.?c grep_test.txt # Note: -E supports more extended regex
alc # Note: . placeholder (note = must have 1 character)
ac # Note: .? indicates that there can be 0 to 1 character between a and c
[root@localhost ~]# grep -E a.*c grep_test.txt # Note: .* indicates that there can be any number of characters between a and c
alc # Note: egrep is equivalent to grep -E
axxc
ac
[root@localhost ~]# grep -E a.c grep_test.txt # Note: . indicates that there is only 1 character between a and c
alc
[root@localhost ~]# grep -E a.+c grep_test.txt # Note: represents that the previous character . appears 1 to multiple times
alc
axxc
Example 2: { }
--------------------------------------------------------------------------------------------
# Note: all matches are for the preceding character
[root@localhost ~]# egrep "a.{1}c" grep_test.txt # Note: specify that . appears once
alc
[root@localhost ~]# egrep "a.{1,2}c" grep_test.txt # Note: specify that . appears 1 to 2 times
alc
axxc
Example 3: { }
--------------------------------------------------------------------------------------------
[root@localhost ~]# cat grep_test.txt
alc
axxc
ac
ayy1c
addddddc
[root@localhost ~]# egrep "a.{1,5}c" grep_test.txt # Note: the previous item . appears 1 to 5 times
alc
axxc
ayy1c
egrep is equivalent to grep -E
fgrep does not support regex, ordinary text filtering
XIV. Exercise: grep Regular Expressions#
grep Regular Expressions
-
Enter the /lianxi directory, copy /etc/passwd to the current directory, and then operate on passwd
-
Find lines in the current passwd file that start with ftp or mail, output to the screen
grep -E "^ftp|^mail" passwd
egrep "^ftp|^mail" passwd
- Find lines in the current passwd file that do not start with r, m, or f
grep -v -E "^r|^m|^f" passwd
grep ^[^rmp] passwd
- Find lines in the current passwd that end with bash
grep bash$ passwd
- Find valid lines in the /etc/login.defs file (do not display blank lines and comment lines starting with #)
grep -v -E "^#|^$" /etc/login.defs
- Find words with 15 letters in the /var/log/messages document
grep -E "[^a-zA-Z][a-zA-Z{15}][^a-zA-Z]" /var/log/message # Note: not a word on the left, 15 letters in the middle
grep -E "\b[a-zA-Z{15}]\b" /var/log/message # Note: \b automatically matches the boundary of a word
- Find users in the /etc/passwd file whose usernames contain 'liu' and use bash
grep liu /etc/passwd | grep bash$ | cut -d":" -f1
- Find valid lines in /etc/ssh/sshd_config
grep -v -E "^#|^$" /etc/ssh/sshd_config
- Find lines in /etc/ssh/sshd_config that contain two consecutive characters
grep -E "(.)\1" /etc/ssh/sshd_config
grep -E "(.)\1" # Note: . matches any character except newline \1 appears again here as the same group
- Find lines containing special characters
grep -E "[^0-Z]" grep_test.txt
- Find lines that do not contain numbers
grep -v "[0-9]" abc.txt
- Find IP addresses in /var/log/secure
cut -d " " -f11 /var/log/secure | grep -E "\.." | sort | uniq Note: \. escape the second . indicates any character
--------------------------------------------------------------------------------------------
grep -E "((([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))\.){3}(([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))" ip_test.txt
============================================================================================
IP address matching:
Ipv4 0-255 4 groups 192.168.1.0
Note: match IP addresses
[root@sanchuang-linux ~]# grep -E "((([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))\.){3}(([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))" ip_test.txt
192.168.0.1
192.168.1.255
172.0.0.1
((([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))\.){3}(([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))
# Parentheses indicate a group
Analysis
0-255
0-9 single digit
[1-9][0-9] tens
(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]) hundreds