mycpen

Mycpen

记录学习历程与受益知识
github
telegram
bilibili

12_Linux Basics - SHELL Commands - sort - uniq - cut - grep - Regular Expressions - Wildcards

I. Review#

Review 1: Common Shell Commands

Get the output result of a command	
1.` `, backticks	 line=`ls`
2.$()			line=$(ls)
seq is similar to range in Python
Mainly generates a sequence of ordered numbers
-s specifies the delimiter
-w specifies equal width output

II. sort#

sort command

  • By default, sorts by the first character of each line
  • -n: sort by integer --> default is ascending
  • -r: descending sort (note: reverse order)
  • -u: remove duplicates

Specify sort key

  • Specify which column of data to sort by
  • -k: specify which column is the sort key
  • cat tt | sort -n -k4

Specify field delimiter

  • -t: specify field delimiter (default is whitespace)
  • sort -t: -n -k3 /etc/passwd # Note: specify delimiter as :

sort is a sorting command

  • By default, sorts by the first character of each line
    • English is sorted in the order of a-z; if the first letter is the same, compare the second letter, and so on
    • Chinese is sorted by the first letter of the pinyin of the first character
Example 1: cat sort_test.txt | sort
---------------------------------------------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat sort_test.txt 			# Note: display text content
Chinese 456 1xx  123
abc bcd  3yy dd
Aac XYZ  2zz
San Chuang xixi
[root@sanchuang-linux ~]# cat sort_test.txt | sort		# Note: sort the entire text
San Chuang xixi
Chinese 456 1xx  123
Aac XYZ  2zz
abc bcd  3yy dd
--------------------------------------------------------------------------------------------
>>> ord("三")							# Use ord() function in Python to check Unicode encoding
19977									# Note: sort does not sort by encoding
>>> ord("中")
20013
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# locale		# Note: check encoding format
LANG=zh_CN.UTF-8
LC_CTYPE="zh_CN.UTF-8"
LC_NUMERIC="zh_CN.UTF-8"
LC_TIME="zh_CN.UTF-8"
LC_COLLATE="zh_CN.UTF-8"
LC_MONETARY="zh_CN.UTF-8"
LC_MESSAGES="zh_CN.UTF-8"
LC_PAPER="zh_CN.UTF-8"
LC_NAME="zh_CN.UTF-8"
LC_ADDRESS="zh_CN.UTF-8"
LC_TELEPHONE="zh_CN.UTF-8"
LC_MEASUREMENT="zh_CN.UTF-8"
LC_IDENTIFICATION="zh_CN.UTF-8"
LC_ALL=
============================================================================================
Example 2: cat sort_test.txt | sort -k 2	Specify which column is the sort key
[root@sanchuang-linux ~]# cat sort_test.txt | sort -k2		# Note: specify the second column for sorting
Chinese 456 1xx  123
abc bcd  3yy dd
San Chuang xixi
Aac XYZ  2zz
[root@sanchuang-linux ~]# cat sort_test.txt | sort -k 3	  # Note: specify the third column for sorting
San Chuang xixi													# Note: whitespace comes first
Chinese 456 1xx  123											# Note: 1
Aac XYZ  2zz											 # Note: 2
abc bcd  3yy dd											 # Note: 3
============================================================================================
Example 3: English is sorted in the order of a-z; if the first letter is the same, compare the second letter
[root@sanchuang-linux ~]# cat sort_test.txt 
Chinese 456 1xx  123
aac bcd  3yy dd
Aac XYZ  2zz
San Chuang xixi
Xyz  cde
Bbc  Abc
bbc xxx
ABC
abc
[root@sanchuang-linux ~]# cat sort_test.txt | sort			# Note: not sorted by encoding
San Chuang xixi
Chinese 456 1xx  123
aac bcd  3yy dd
Aac XYZ  2zz
abc
ABC
Bbc  Abc
bbc xxx
Xyz  cde

sort -n#

sort -n sorts numerically

[root@sanchuang-linux ~]# a=123						# Note: a represents a character
[root@sanchuang-linux ~]# b=234
[root@sanchuang-linux ~]# echo $a+$b				# Note: string concatenation
123+234
[root@sanchuang-linux ~]# echo $(($a+$b))			# Note: need to use 2 parentheses for numerical addition
357
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat aa.txt 
123
23
4
234
[root@sanchuang-linux ~]# cat aa.txt | sort			# Note: by default sorts by the first letter string
123
23
234
4
[root@sanchuang-linux ~]# cat aa.txt | sort -n		# Note: sort -n sorts by numerical value
4													# Note: default ascending
23													# Note: -n sorts numerically
123
234
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat aa.txt | sort -n -r	# Note: numerical, reverse order sorting, same effect
[root@sanchuang-linux ~]# cat aa.txt | sort -nr		# Note: numerical, reverse order sorting, same effect
234
123
23
4

-t Specify the column delimiter#

Specify the column delimiter # head -n7 /etc/passwd | sort -k6 -t :

Default delimiter is whitespace

Use -t to specify the column delimiter

[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort		# Note: take the first 7 lines and sort
adm:x:3:4:adm:/var/adm:/sbin/nologin
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
root:x:0:0:root:/root:/bin/bash
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
sync:x:5:0:sync:/sbin:/bin/sync
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort -k2	# Note: sort -k2 specifies the second column as the sort key
adm:x:3:4:adm:/var/adm:/sbin/nologin
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
root:x:0:0:root:/root:/bin/bash
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
sync:x:5:0:sync:/sbin:/bin/sync
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort -k6 -t :			# *Note: -t : specifies the delimiter as :
bin:x:1:1:bin:/bin:/sbin/nologin
root:x:0:0:root:/root:/bin/bash
sync:x:5:0:sync:/sbin:/bin/sync
daemon:x:2:2:daemon:/sbin:/sbin/nologin
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
[root@sanchuang-linux ~]# head -n7 /etc/passwd | sort -k6 -t : -r		# Note: -r reverse order

III. Exercise: Find the top 5 processes by memory usage#

Find the top 5 processes by memory usage

ps aux | sort -n -k4 -r | head -5		# Note: recommended
Note: Memory usage %MEM
[root@sanchuang-linux ~]# ps aux | tail -n +2 | sort -nr -k4 | head -5
root         960  0.0  2.0 221572 38096 ?        S    08:31   0:00 /usr/libexec/sssd/sssd_nss --uid 0 --gid 0 --logger=files
root         930  0.0  1.7 425416 31480 ?        Ssl  08:31   0:01 /usr/libexec/platform-python -Es /usr/sbin/tuned -l -P
polkitd      890  0.0  1.2 1625936 23856 ?       Ssl  08:31   0:00 /usr/lib/polkit-1/polkitd --no-debug
root         891  0.0  0.9 391216 18088 ?        Ssl  08:31   0:00 /usr/sbin/NetworkManager --no-daemon
root         954  0.0  0.8 219700 15416 ?        S    08:31   0:00 /usr/libexec/sssd/sssd_be --domain implicit_files --uid 0 --gid 0 --logger=files
--------------------------------------------------------------------------------------------
# ps aux | tail -n +2 | sort -nr -k4 | head -5
Note: tail -n +2 displays from the second line to the end (optional)
Note: sort -nr -k4   -n sorts numerically, -r reverse order, -k4 specifies the 4th column as the sort key
Note: head -5 takes the first 5 lines

IV. uniq#

Usage of the uniq command (remove duplicates)

uniq --> unique
Remove adjacent duplicate lines
Sort first, then remove duplicates
-c counts the number of occurrences
-u displays lines that appear only once
-d displays lines that are repeated

============================================================================================
Example 1: Remove adjacent duplicate lines
[root@sanchuang-linux ~]# cat uniq_test.txt
123 abc
abc 123
45
46
45
45
47
47
48
47
[root@sanchuang-linux ~]# cat uniq_test.txt | uniq		# Note: remove adjacent duplicate lines
123 abc
abc 123
45
46
45
47
48
47
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq	# Note: sort first, then remove duplicates
abc 123														# Note: generally, sort first, then remove duplicates
45
46
47
48
123 abc
--------------------------------------------------------------------------------------------
Example 1.1 # cat uniq_test.txt | sort -nu
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n -u		# Note: sort -u can also remove duplicates
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -nu		# Note: same effect
abc 123
45
46
47
48
123 abc
============================================================================================
Example 2: -c counts the number of occurrences
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq -c    # Note: -c counts the number of occurrences
      1 abc 123
      3 45
      1 46
      3 47
      1 48
      1 123 abc
============================================================================================
Example 3: -u displays lines that appear only once
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq -u	# Note: -u displays lines that appear only once
abc 123
46
48
123 abc
============================================================================================
Example 4: -d displays lines that are repeated
[root@sanchuang-linux ~]# cat uniq_test.txt | sort -n | uniq -d	# Note: -d displays lines that are repeated
45
47

V. Exercise: Count the top ten IPs in 120,000 lines#

Count the top ten IPs in 120,000 lines

[root@sanchuang-linux ~]# cat ips.txt | sort | uniq -c | sort -nr | head
# Note 1: The first sort puts the same IP segments together
# Note 2: uniq -c counts occurrences
# Note: The second sort -nr sorts the previous counts + IP in reverse order
# Note: head takes the first ten by default
Note: sort -c is used to count the access of IP addresses
[root@localhost ~]# yum install nginx
[root@localhost ~]# nginx
[root@localhost ~]# lsof -i:80			# Note: nginx is up
COMMAND   PID  USER   FD   TYPE DEVICE SIZE/OFF NODE NAME
nginx   12765  root    9u  IPv4  60060      0t0  TCP *:http (LISTEN)
nginx   12765  root   10u  IPv6  60061      0t0  TCP *:http (LISTEN)
nginx   12766 nginx    9u  IPv4  60060      0t0  TCP *:http (LISTEN)
nginx   12766 nginx   10u  IPv6  60061      0t0  TCP *:http (LISTEN)
nginx   12767 nginx    9u  IPv4  60060      0t0  TCP *:http (LISTEN)
nginx   12767 nginx   10u  IPv6  60061      0t0  TCP *:http (LISTEN)
[root@sanchuang-linux ~]# iptables -F			# Note: turn off the firewall

[root@sanchuang-linux ~]# cd /var/log			# Note: /var/log stores logs
[root@sanchuang-linux log]# cd nginx
[root@sanchuang-linux nginx]# pwd
/var/log/nginx
[root@sanchuang-linux nginx]# ls
access.log  error.log							# Note: log files

VI. Exercise: Count the top ten users accessing the web server#

Count the top ten users accessing the web server (note: judged by IP)

# Note: nginx check the top 3 IPs with the most access (common exam question)
# cat access.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -3
[root@sanchuang-linux nginx]# head access.log 
192.168.0.42 - - [29/Oct/2020:12:01:01 +0800] "GET / HTTP/1.1" 200 4057 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3823.400 QQBrowser/10.7.4307.400" "-"
………………						# Note: awk command extracts the first column data IP address
[root@sanchuang-linux nginx]# cat access.log | awk '{print $1}'
192.168.0.42				# Note: by default, space is the delimiter, print the first column $1
192.168.0.42
192.168.0.42
192.168.0.42
192.168.0.42
192.168.0.193
192.168.0.193
192.168.0.193
192.168.0.193				# Note: awk command extracts the first column data IP address
[root@sanchuang-linux nginx]# cat access.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -3
      5 192.168.0.42		# Note: count the top 3 access volumes
      4 192.168.0.193

VII. cut#

cut command

  • Extract text columns from text files or text streams
  • cut -options extract range text file

-----------------------------------

Common options

  • -c: extract characters from the specified range
  • -f: extract fields from the specified range
  • -d: specify the delimiter, default delimiter is tab

-----------------------------------

Extract range

n: the nth item

n-: from the nth item to the end of the line

-m: from the start of the line to the mth item

n,m: the nth item and the mth item

n-m: from the nth item to the mth item

Example
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# w					# Note: w current user login status
 14:45:33 up  4:12,  5 users,  load average: 0.00, 0.00, 0.03
USER     TTY      FROM             LOGIN@   IDLE   JCPU   PCPU WHAT
root     tty1     -                San22   15:49   0.08s  0.08s -bash
root     pts/0    192.168.0.42     14:29    3.00s  0.02s  0.01s w
root     pts/1    192.168.0.42     14:29   15:35   0.00s  0.00s -bash
root     pts/3    192.168.0.42     09:48    4:56m  0.03s  0.03s -bash
root     pts/4    192.168.0.42     09:50    2:37m  0.52s  0.52s -bash
[root@sanchuang-linux ~]# who				# Note: w shows more details
root     tty1         2020-10-28 22:15
root     pts/0        2020-10-29 14:29 (192.168.0.42)
root     pts/1        2020-10-29 14:29 (192.168.0.42)
root     pts/3        2020-10-29 09:48 (192.168.0.42)
root     pts/4        2020-10-29 09:50 (192.168.0.42)
Method 1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# w | tr -s " " | cut -d" " -f 1,2,4
 14:50:31 4:17,									# Note: default delimiter is tab, so must compress 
USER TTY LOGIN@									# Note: then specify the delimiter as space
root tty1 San22								   # Note: add tr -s " "
root pts/0 14:29
root pts/1 14:29
root pts/3 09:48
root pts/4 09:50
Method 2
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# w | awk '{print $1,$2,$4}'		# Note: awk default delimiter is whitespace
14:51:28 up 5										    # Note: print the 1st, 2nd, and 4th columns
USER TTY LOGIN@
root tty1 San22
root pts/0 14:29
root pts/1 14:29
root pts/3 09:48
root pts/4 09:50

Use colon as a delimiter to extract username, user ID, user group#

Use colon as a delimiter to extract username, user ID, user group

Writing 1
[root@sanchuang-linux ~]# cat /etc/passwd | cut -d":" -f 1,3,5	
Writing 2 (recommended)
[root@sanchuang-linux ~]# cut -d ":" -f 1,3,5 /etc/passwd	# Note: extract columns 1, 3, 5
# Note: text processing commands can directly operate on text without needing to use cat

Value range#

Value range

[root@sanchuang-linux ~]# cut -d ":" -f 1,3,5 /etc/passwd	# Note: extract columns 1, 3, 5
[root@sanchuang-linux ~]# cut -d ":" -f 1-5 /etc/passwd		# Note: extract columns 1-5
[root@sanchuang-linux ~]# cut -d ":" -f 3- /etc/passwd		# Note: extract from the 3rd column to the end
[root@sanchuang-linux ~]# cut -d ":" -f -3 /etc/passwd		# Note: extract the first 3 columns
============================================================================================
-c: extract characters from the specified range
Example
[root@sanchuang-linux ~]# echo abcdefg | cut -c 2		# Note: extract the 2nd character
b
[root@sanchuang-linux ~]# echo abcdefg | cut -c 2-5		# Note: extract characters 2-5
bcde
[root@sanchuang-linux ~]# echo abcdefg | cut -c 5-		# Note: extract from the 5th character to the end
efg

VIII. Exercise#

Exercise

1. Count the top three IPs in access.log
2. Display the sizes of all files in the /boot directory (including files in subdirectories), sorted from smallest to largest
3. Count the number of times each shell is used in /etc/passwd (sorted in descending order)
4. Count how many times the word 'sbin' appears in /etc/passwd
5. Display only the IP address of ens33
============================================================================================
Example 1: Count the top three IPs in access.log
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cut -d "-" -f 1 access.log | sort | uniq -c | sort -nr | head -n3
      7 192.168.0.42 
      6 192.168.0.193 
      5 192.168.0.38 
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cut -d " " -f1 access.log | sort | uniq -c
      6 192.168.0.193
      5 192.168.0.21
      5 192.168.0.32
      5 192.168.0.37
      5 192.168.0.38
      7 192.168.0.42
[root@sanchuang-linux ~]# cut -d " " -f1 access.log | sort | uniq -c | sort -nr
      7 192.168.0.42
      6 192.168.0.193
      5 192.168.0.38
      5 192.168.0.37
      5 192.168.0.32
      5 192.168.0.21
[root@sanchuang-linux ~]# cut -d " " -f1 access.log | sort | uniq -c | sort -nr | head -3
      7 192.168.0.42
      6 192.168.0.193
      5 192.168.0.38
============================================================================================
Example 2: Display the sizes of all files in the /boot directory (including files in subdirectories), sorted from smallest to largest
Method 1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# du -ak /boot | sort -n  		# Note: in k bytes
4	/boot/.bashrc
4	/boot/efi/EFI/centos
4	/boot/grub2/device.map
4	/boot/grub2/grubenv
4	/boot/grub2/i386-pc/adler32.mod
4	/boot/grub2/i386-pc/all_video.mod
4	/boot/grub2/i386-pc/aout.mod

Method 2
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# ll -R | grep root | tr -s " " | cut -d " " -f 5,9 | sort -n
0 1214.txt
0 12244.txt
0 1224.txt
0 12456.txt
0 20
0 20
0 2020-09-24-18_25_03.txt

Method 3
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# ll -R | grep root | awk '{print $5,$9}' | sort -n
0 1214.txt
0 12244.txt
0 1224.txt
0 12456.txt
0 20
0 20
0 2020-09-24-18_25_03.txt
0 abcd.txt
============================================================================================
Example 3: Count the number of times each shell is used in /etc/passwd (sorted in descending order)
[root@sanchuang-linux ~]# cut -d : -f7 /etc/passwd | sort | uniq -c | sort -nr
     31 /bin/bash
     19 /sbin/nologin
      1 /sbin/shutdown
      1 /sbin/halt
      1 /bin/sync
============================================================================================
Example 4: Count how many times the word 'sbin' appears in /etc/passwd
Method 1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# grep -o sbin /etc/passwd
sbin
sbin
………………
sbin
sbin
[root@sanchuang-linux ~]# grep -o sbin /etc/passwd | wc -l
25

Method 2
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# cat /etc/passwd | tr ":" "\n" | grep sbin | wc -l
25						# Note: replace : with newline character to move the same line to different lines
============================================================================================
Example 5: Display only the IP address of ens33
# ip a | grep ens33 | grep inet | tr -s " " | cut -d " " -f3 | cut -d"/" -f1
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ~]# ip a | grep ens33
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    inet 192.168.0.34/24 brd 192.168.0.255 scope global dynamic noprefixroute ens33
[root@sanchuang-linux ~]# ip a | grep ens33 | grep inet | tr -s " " | cut -d " " -f3
192.168.0.34/24
[root@sanchuang-linux ~]# ip a | grep ens33 | grep inet | tr -s " " | cut -d " " -f3 | cut -d"/" -f1
192.168.0.34

IX. awk specify delimiter -F#

awk specify delimiter -F

[root@sanchuang-linux ~]# awk -F":" '{print $1}' /etc/passwd
root
bin
daemon
adm
lp
sync
…………………………

X. grep#

grep command

The three text processing tools ==> awk grep sed
https://www.cnblogs.com/end/archive/2012/02/21/2360965.html
grep filters and is a general regular expression analysis program
grep, egrep, fgrep
Used for matching to filter

Purpose: to search for and display lines containing a specified string in a file Format: grep [options]... pattern target file

Note: pattern --》 template#

Can accept a regular expression

-i: ignore case when searching

-v: reverse search, output lines that do not match the pattern

-n: display line numbers that meet the pattern requirements

-r: recursively search all files

-o: only display matching content

-E: supports more metacharacters (supports extended regex)

-A: find matching lines and a few following lines

-B: output matching lines and a few preceding lines

Pattern

^…. : starts with something, the whole line starts with something

…..$ : ends with something, the whole line ends with something

Note: grep is a text processing command that can directly operate on text

=====================================================================
Example 1: -v: reverse search, output lines that do not match the pattern
---------------------------------------------------------------------
[root@sanchuang-linux ~]# grep -v “#” /etc/yum.repos.d/centos.repo   
# Note: do not output lines containing #
============================================================================================

Example 2: -i ignore case when searching
---------------------------------------------------------------------
[root@sanchuang-linux ~]# cd /etc/ssh/
[root@sanchuang-linux ssh]# pwd
/etc/ssh
[root@sanchuang-linux ssh]# grep -i "port" /etc/ssh/sshd_config 		# Note: ignore case when searching
# If you want to change the port on a SELinux system, you have to tell
# semanage port -a -t ssh_port_t -p tcp #PORTNUMBER
#Port 22
# WARNING: 'UsePAM no' is not supported in Fedora and may cause several
#GatewayPorts no
============================================================================================

Example 3: ignore case and display line numbers found
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ssh]# grep -i -n "port" /etc/ssh/sshd_config 		# Note: display line numbers that meet the pattern requirements
13:# If you want to change the port on a SELinux system, you have to tell
15:# semanage port -a -t ssh_port_t -p tcp #PORTNUMBER
17:#Port 22
102:# WARNING: 'UsePAM no' is not supported in Fedora and may cause several
108:#GatewayPorts no
============================================================================================

Example 4: -r recursively search all files
Note: search in all files under subdirectories
--------------------------------------------------------------------------------------------
[root@sanchuang-linux ssh]# grep "xxxxx" * -r  				# Note: recursively search in all files and subdirectories in the current directory
[root@sanchuang-linux ssh]# grep "GET" /var/log/nginx -r  	# Note: recursively search in the nginx directory
/var/log/nginx/error.log:2020/10/29 12:01:02 [error] 12767#0: *2 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.0.42, server: _, request: "GET /favicon.ico HTTP/1.1", host: "192.168.0.34", referrer: "http://192.168.0.34/"
/var/log/nginx/error.log:2020/10/29 12:01:58 [error] 12767#0: *2 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.0.42, server: _, request: "GET /favicon.ico HTTP/1.1", host: "192.168.0.34"
--------------------------------------------------------------------------------------------
# Recursively search all files in /var/log/nginx (including files in subdirectories)
[root@mysql-binary nginx]# grep "GET" /var/log/nginx -r

XI. Regular Expressions#

Regular Expressions

^aa indicates lines starting with aa
aa$ indicates lines ending with aa

[]      indicates a character set
[a-z]   takes one from a-z
[^a-z]  does not take a-z characters
grep ^[^a-zA-Z0-9_] grep_test.txt  displays lines not starting with letters, numbers, or underscores
Example 1: do not output lines starting with #
--------------------------------------------------------------------------------------------
[root@sanchuang-linux yum.repos.d]# grep -v ^# centos.repo
[root@sanchuang-linux yum.repos.d]# grep -v ^# centos.repo | grep -v ^$   # Note: do not output blank lines
Note: do not output blank lines       grep -v ^$
   Do not output lines starting with #   grep -v ^#
============================================================================================

Example 2: filter out lines in grep_test.txt that do not start with # and non-blank lines
--------------------------------------------------------------------------------------------
[root@sanchuang-linux chenpeng]# cat grep_test.txt
#aaa
aaa#bbb

456
#
789
[root@sanchuang-linux chenpeng]# grep -v ^# grep_test.txt  # Note: filter out lines that do not start with #
aaa#bbb

456
789

# Method 1
[root@sanchuang-linux chenpeng]# grep -v ^# grep_test.txt | grep -v ^$	# Note: filter out blank lines
aaa#bbb                                      # Note: if it is a blank character, it will not be filtered
456
789

# Method 2
Filter out lines in grep_test.txt that do not start with # and non-blank lines
[root@sanchuang-linux chenpeng]# grep -v -E "^#|^$" grep_test.txt 
aaa#bbb                         # Note: -E regular expression, | or, -v does not display
456                             # Note: -E: supports more metacharacters (supports extended regex)
789

XII. [] Indicates a Character Set (Regular Expression)#

[] Indicates a Character Set (Regular Expression)

[root@localhost chenpeng]# cat grep_test.txt 
abc
adc
Abdc
ac
a1c
axy
axc
123
777
Example 1: filter out abc adc
--------------------------------------------------------------------------------------------
[root@localhost chenpeng]# grep a[bd]c grep_test.txt        # Note: take one from the character set [bd]
abc
adc
[root@sanchuang-linux chenpeng]# grep a[a-z]c grep_test.txt # Note: take one character from a-z
abc                                        # Note: ac is not filtered, must have one character here
adc                                        # Note: [a-z] indicates taking one from a-z
axc
[root@sanchuang-linux chenpeng]# grep a[0-9]c grep_test.txt     # Note: take one character from 0-9
a1c
--------------------------------------------------------------------------------------------

Example 2: [^a-z] does not take a-z characters
--------------------------------------------------------------------------------------------
[root@sanchuang-linux chenpeng]# grep a[^a-z]c grep_test.txt    # Note: ^ takes the inverse, does not take a-z characters between a and c
a1c

Example 3: extract lines not starting with letters
--------------------------------------------------------------------------------------------
[root@sanchuang-linux chenpeng]# grep ^[^a-zA-Z] grep_test.txt  # Note: lines not starting with a-zA-Z
11c
123                                   # Note: space included
777
Note: grep ^[a-zA-Z] grep_test.txt  lines starting with letters
============================================================================================

Example 4: display lines not starting with letters, numbers, or underscores
Writing 1
[root@sanchuang-linux chenpeng]# grep ^[^a-zA-Z0-9_] grep_test.txt
Writing 2
[root@sanchuang-linux chenpeng]# grep -v ^[a-zA-Z0-9_] grep_test.txt
# Note: display lines not starting with letters, numbers, or underscores

XIII. Wildcards (Regular Expressions)#

Wildcards (Regular Expressions)

*     represents matching the previous item any number of times
?    represents matching the previous item 0 or 1 time
+     represents matching the previous item one to many times
.     placeholder for any character except \n
{n,m} matches the previous item n to m times
egrep is equivalent to grep -E
fgrep does not support any regex, ordinary text filtering
Example 1: * ? + .
--------------------------------------------------------------------------------------------
# Note: all matches are for the preceding character
[root@localhost ~]# cat grep_test.txt 
alc
axxc
ac
[root@localhost ~]# grep -E a.?c grep_test.txt    # Note: -E supports more extended regex
alc                                               # Note: .  placeholder (note = must have 1 character)
ac                                                # Note: .? indicates that there can be 0 to 1 character between a and c
[root@localhost ~]# grep -E a.*c grep_test.txt    # Note: .* indicates that there can be any number of characters between a and c
alc               # Note: egrep is equivalent to grep -E
axxc
ac
[root@localhost ~]# grep -E a.c grep_test.txt     # Note: . indicates that there is only 1 character between a and c
alc
[root@localhost ~]# grep -E a.+c grep_test.txt    # Note: represents that the previous character . appears 1 to multiple times
alc
axxc

Example 2: { }
--------------------------------------------------------------------------------------------
# Note: all matches are for the preceding character
[root@localhost ~]# egrep "a.{1}c" grep_test.txt    # Note: specify that . appears once
alc
[root@localhost ~]# egrep "a.{1,2}c" grep_test.txt  # Note: specify that . appears 1 to 2 times
alc
axxc

Example 3: { }
--------------------------------------------------------------------------------------------
[root@localhost ~]# cat grep_test.txt 
alc
axxc
ac
ayy1c
addddddc
[root@localhost ~]# egrep "a.{1,5}c" grep_test.txt           # Note: the previous item . appears 1 to 5 times
alc
axxc
ayy1c

egrep is equivalent to grep -E
fgrep does not support regex, ordinary text filtering

XIV. Exercise: grep Regular Expressions#

grep Regular Expressions

  1. Enter the /lianxi directory, copy /etc/passwd to the current directory, and then operate on passwd

  2. Find lines in the current passwd file that start with ftp or mail, output to the screen

grep -E "^ftp|^mail" passwd
egrep "^ftp|^mail" passwd
  1. Find lines in the current passwd file that do not start with r, m, or f
grep -v -E "^r|^m|^f" passwd
grep ^[^rmp] passwd
  1. Find lines in the current passwd that end with bash
grep bash$ passwd
  1. Find valid lines in the /etc/login.defs file (do not display blank lines and comment lines starting with #)
grep -v -E "^#|^$" /etc/login.defs
  1. Find words with 15 letters in the /var/log/messages document
grep -E "[^a-zA-Z][a-zA-Z{15}][^a-zA-Z]" /var/log/message  # Note: not a word on the left, 15 letters in the middle
grep -E "\b[a-zA-Z{15}]\b" /var/log/message                # Note: \b automatically matches the boundary of a word
  1. Find users in the /etc/passwd file whose usernames contain 'liu' and use bash
grep liu /etc/passwd | grep bash$ | cut -d":" -f1
  1. Find valid lines in /etc/ssh/sshd_config
grep -v -E "^#|^$" /etc/ssh/sshd_config
  1. Find lines in /etc/ssh/sshd_config that contain two consecutive characters
grep -E "(.)\1" /etc/ssh/sshd_config 
grep -E "(.)\1"  # Note: . matches any character except newline  \1 appears again here as the same group
  1. Find lines containing special characters
grep -E "[^0-Z]" grep_test.txt
  1. Find lines that do not contain numbers
grep -v "[0-9]" abc.txt
  1. Find IP addresses in /var/log/secure
cut -d " " -f11 /var/log/secure | grep -E "\.." | sort | uniq    Note: \. escape  the second . indicates any character
--------------------------------------------------------------------------------------------
grep -E "((([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))\.){3}(([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))" ip_test.txt
============================================================================================
IP address matching:
   Ipv4  0-255  4 groups    192.168.1.0
Note: match IP addresses
[root@sanchuang-linux ~]# grep -E "((([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))\.){3}(([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))" ip_test.txt
192.168.0.1
192.168.1.255
172.0.0.1

((([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))\.){3}(([0-9])|([1-9][0-9])|(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5]))
# Parentheses indicate a group

Analysis
0-255
0-9                                     single digit
[1-9][0-9]                              tens
(1[0-9][0-9])|(2[0-4][0-9])|(25[0-5])   hundreds

Loading...
Ownership of this post data is guaranteed by blockchain and smart contracts to the creator alone.