python regular expression re module

'''
regular expression
regular expression
Regular expression is a special character sequence, which can be understood as a string
But this string is stronger than the original string
Regular expression usage scenario:
Special characters are not allowed when registering and setting passwords. What are the number of passwords
When crawling, use regular to grab the specified data in the page
Usage principle of regular expression:
Powerful tools for processing strings have their own syntax level independent processing engine
The efficiency is not as good as that of the system, but the currency is powerful
If the system can solve the problem, try to use the self-contained
Import module re
'''
'''
1.match: whether to start with the content of regular string and check whether it meets the corresponding content of regular string

First parameter: regular syntax string
Second parameter: the string to be verified
The third parameter: set the configuration attribute in the verification process
'''
 1 import re
 2 
 3 #If the return result is none It means the match failed
 4 #If it's a match Object indicates that the match is successful
 5 res= re.match("La La La",'La La ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha')
 6 print(res)#<re.Match object; span=(0, 3), match='La La La'>
 7 '''
 8 ^x --Indicates that this string x start
 9 x$ --Represents a string in x ending
10 '''
11 res= re.match("^La La La $",'La La ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha ha')
12 print(res)#None
13 #If the result is returned None Indicates that the match failed
14 #If it's a match Object indicates that the match is successful
'''
2.search method
Find out whether there is data in the specified format in the string to be verified according to the regular rules. If so, return the first match object found
If not, return to None
In a regular statement, it represents a wildcard and can match any symbol except \ n
'''

 1 pattern = 'c.t' #When looking for data ct Any character between combinations
 2 src_str ="lll cat crt"
 3 res =re.search(pattern,src_str)
 4 print(res)#<re.Match object; span=(4, 7), match='cat'>
 5 
 6 #from match Object to get the matching object
 7 data = res.group()
 8 print(data)
 9 #Get the difference of this data in the original string
10 start_end = res.span()
11 print(start_end)#(4, 7)

'''
findall: find all strings that meet the regular syntax in the string to be verified,
If so, the matched content is stored in the list
If no data is matched, the result is an empty list
'''

1 pattern = 'c.t' #When looking for data ct Any character between combinations
2 src_str ="lll cat crt"
3 res =re.findall(pattern,src_str)
4 print(res)#['cat', 'crt']

'''
finditer:
All strings to be found in the regular syntax,
In some cases, the matched content pairs are stored in an iteration with match
If no data is matched, the result is an empty iteration
'''

1 pattern = 'c.t' #When looking for data ct Any character between combinations
2 src_str ="lll cat crt"
3 res =re.finditer(pattern,src_str)
4 print(res)#['cat', 'crt']
5 #ergodic
6 for data in res:
7     print(data)
8 #<re.Match object; span=(4, 7), match='cat'>
9 #<re.Match object; span=(8, 11), match='crt'>

'''
Replace operation sub
There is also an alternative method, relax, in the system
What content needs to be replaced? Specify the number of content replacements
'''

 1 src_str ="Lala, is there a hole in your brain"
 2 #Replace the pit with water
 3 replace_str = src_str.replace('pit','water')
 4 print(replace_str)#Lala, is there water in your brain
 5 
 6 #Replace only one
 7 src_str ="Lala, cough"
 8 #Replace cough with
 9 replace_str = src_str.replace('cough','la',1)
10 print(replace_str)#Lala Lala cough
11 
12 src_str = 'Hello 00, I'm five seven 57'
13 #Remove numbers from characters
14 
15 #Traverse the original string
16 for ch in src_str:
17     #isdigit Judge whether it is a pure number
18     #Assign the replacement completed content to the variable src_str
19     if ch.isdigit():
20         src_str =src_str.replace(ch,'')
21 print(src_str)#Hello, I'm
22 
23 #Using regular expressions
24 #regular grammars [0-9]Match any number from 0 to 9
25 # res = re.sub('[0-9]]',src_str)
26 # print(res)#Hello, this is may seventh

'''
split cutting
Cuts the specified string with the Neil matched by the regular syntax as the cutter
'''

1 src_str = 'i26372hh387hdgddl'
2 #Get word by cutting string with numeric cutter
3 res = re.split('[0-9]+', src_str)
4 print(res)#['i', '', '', '', '', 'hh', '', '', 'hdgddl']
5 #increase+No. remove ''['i', 'hh', 'hdgddl']

1 '''
2 Syntax in regular expressions
3 '''
4 #(.)In regular expressions, wildcards are represented, and matches are in addition to\n Any character other than
5 s = 'ggg\nhhh%%%-'
6 res = re.findall(".",s)
7 print(res)#['g', 'g', 'g', 'h', 'h', 'h', '%', '%', '%', '—', '—']
8 
9 #Set matching[] Matches any of the characters listed in brackets
'''
[0-9] - match any number
[a-z] - match any lowercase letter
[A-Z] - match any uppercase letter
[0-9A-Za] - matches a number, any symbol in a letter
[adhueur] - pip is any character in adhueur
[^ 0-9] - matches any non numeric character
'''
1 s = '3746873hdshdg$$#^$_'
2 res = re.findall("[0-9a-z_]",s)
3 print(res)#['3', '7', '4', '6', '8', '7', '3', 'h', 'd', 's', 'h', 'd', 'g', '_']
#Escape characters in regular expressions\
#Just want to match English symbols--- What matches is the meaning of the point itself
#At this time, it is necessary to escape to keep its original intention
#ip address verification 0.0.0.0 3673@qq.com
1 res = re.match("\.","o")
2 print(res) #None
'''
\d -- equivalent to [0-9], represents any character in 0-9
\D -- equivalent to [^ 0-9], represents any character in non-0-9
\w -- matches any symbol in the underlined Chinese characters of numbers and letters
\W -- invert \ w to match any symbol in the Chinese character with non numeric letters under the line
'''
1 res = re.match('\d','m')
2 print(res)
3 
4 res =re.match('\D','O')
5 print(res)#<re.Match object; span=(0, 1), match='O'>
6 
7 res = re.match('\w','Ha')
8 print(res)#<re.Match object; span=(0, 1), match='Ha'>
'''
classifier
*Indicates that the preceding symbol appears any time in a row
+Indicates that the preceding symbol appears at least once in a row
? Indicates that the preceding symbol appears at most once in a row
{m} Indicates that the previous symbolic connection occurs m times, and m n is a numerical value
{m, n} indicates that the preceding symbol appears continuously at least m times and at most N times
'''

 1 res = re.match('.*','hsgdhs')
 2 print(res)#<re.Match object; span=(0, 6), match='hsgdhs'>
 3 
 4 res = re.match('.+','')
 5 print(res)#None
 6 
 7 res = re.match('^.{$}','666')
 8 print(res)#None
 9 
10 res = re.match('^.{$}','888')
11 print(res)#None
12 
13 res = re.match('^.{3,6}','888')
14 print(res)#None

#Grouping
'''
() matching a whole symbol
For example, 163 qq sina
[163]
|Indicates the relationship between or
Email verification: any one of 163 qq sina
Email format:
6 to 12 digits at the beginning of number letter underline
@163|qq|sina.com
'''

1 pattern ="^[0-9a-zA-Z_]{6,8}@(163|qq|sina)\.com$"
2 email = input("Please enter a mailbox:")
3 res =re.match(pattern,email)
4 if res != None:
5     print(f"{email}It's legal")
6 
7 else:
8     print(f"{email}wrongful")

 

 
 
 

 

 

Tags: Python

Posted by nomad9 on Sun, 22 May 2022 11:47:14 +0300