Go语言教程之边写边学:正则表达式 Regex
正则表达式是一个非常有用的工具,用于描述匹配文本的搜索模式。正则表达式只不过是定义搜索模式的一些字符序列。正则表达式用于解析、过滤、验证和从大文本中提取有意义的信息,例如从其他程序生成的日志和输出。
用于提取方括号之间的文本的正则表达式
package main
import (
"fmt"
"regexp"
"strings"
)
func main() {
str1 := "this is a [sample] [[string]] with [SOME] special words"
re := regexp.MustCompile(`\[([^\[\]]*)\]`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Println("Matched:", re.MatchString(str1)) // true
fmt.Println("\nText between square brackets:")
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
element = strings.Trim(element, "[")
element = strings.Trim(element, "]")
fmt.Println(element)
}
}
输出
Pattern: \[([^\[\]]*)\]
Matched: true
Text between square brackets:
sample
string
SOME
用于从字符串中提取所有非字母数字字符
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "We @@@Love@@@@ #Go!$! ****Programming****Language^^^"
re := regexp.MustCompile(`[^a-zA-Z0-9]+`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Println(re.MatchString(str1)) // true
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: [^a-zA-Z0-9]+
true
@@@
@@@@ #
!$! ****
****
^^^
用于从字符串中提取日期YYYY-MM-DD的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "If I am 20 years 10 months and 14 days old as of August 17,2016 then my DOB would be 1995-10-03"
re := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Println(re.MatchString(str1)) // true
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: \d{4}-\d{2}-\d{2}
true
1995-10-03
用于从字符串中提取DNS主机名或IP地址的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := `Proxy Port Last Check Proxy Speed Proxy Country Anonymity 118.99.81.204
118.99.81.204 8080 34 sec Indonesia - Tangerang Transparent 2.184.31.2 8080 58 sec
Iran Transparent 93.126.11.189 8080 1 min Iran - Esfahan Transparent 202.118.236.130
7777 1 min China - Harbin Transparent 62.201.207.9 8080 1 min Iraq Transparent`
re := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Println(re.MatchString(str1)) // true
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: (25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}
true
118.99.81.204
118.99.81.204
2.184.31.2
93.126.11.189
202.118.236.130
62.201.207.9
用于从URL中提取域名的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := `http://www.suon.co.uk/product/1/7/3/`
re := regexp.MustCompile(`^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/\n]+)`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Println(re.MatchString(str1)) // true
submatchall := re.FindAllString(str1,-1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: ^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/\n]+)
true
http://www.suon.co.uk
用于验证电子邮件地址的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "ç$€§/az@gmail.com"
str2 := "abcd@gmail_yahoo.com"
str3 := "abcd@gmail-yahoo.com"
str4 := "abcd@gmailyahoo"
str5 := "abcd@gmail.yahoo"
re := regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Printf("\nEmail: %v :%v\n", str1, re.MatchString(str1))
fmt.Printf("Email: %v :%v\n", str2, re.MatchString(str2))
fmt.Printf("Email: %v :%v\n", str3, re.MatchString(str3))
fmt.Printf("Email: %v :%v\n", str4, re.MatchString(str4))
fmt.Printf("Email: %v :%v\n", str5, re.MatchString(str5))
}
输出
Pattern: ^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$
Email: ç$?§/az@gmail.com :false
Email: abcd@gmail_yahoo.com :false
Email: abcd@gmail-yahoo.com :true
Email: abcd@gmailyahoo :true
Email: abcd@gmail.yahoo :true
用于验证电话号码的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "1(234)5678901x1234"
str2 := "(+351) 282 43 50 50"
str3 := "90191919908"
str4 := "555-8909"
str5 := "001 6867684"
str6 := "001 6867684x1"
str7 := "1 (234) 567-8901"
str8 := "1-234-567-8901 ext1234"
re := regexp.MustCompile(`^(?:(?:\(?(?:00|\+)([1-4]\d\d|[1-9]\d?)\)?)?[\-\.\ \\\/]?)?((?:\(?\d{1,}\)?[\-\.\ \\\/]?){0,})(?:[\-\.\ \\\/]?(?:#|ext\.?|extension|x)[\-\.\ \\\/]?(\d+))?$`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Printf("\nPhone: %v\t:%v\n", str1, re.MatchString(str1))
fmt.Printf("Phone: %v\t:%v\n", str2, re.MatchString(str2))
fmt.Printf("Phone: %v\t\t:%v\n", str3, re.MatchString(str3))
fmt.Printf("Phone: %v\t\t\t:%v\n", str4, re.MatchString(str4))
fmt.Printf("Phone: %v\t\t:%v\n", str5, re.MatchString(str5))
fmt.Printf("Phone: %v\t\t:%v\n", str6, re.MatchString(str6))
fmt.Printf("Phone: %v\t\t:%v\n", str7, re.MatchString(str7))
fmt.Printf("Phone: %v\t:%v\n", str8, re.MatchString(str8))
}
输出
Pattern: ^(?:(?:\(?(?:00|\+)([1-4]\d\d|[1-9]\d?)\)?)?[\-\.\ \\\/]?)?((?:\(?\d{1,}\)?[\-\.\ \\\/]?){0,})(?:[\-\.\ \\\/]?(?:#|ext\.?|extension|x
)[\-\.\ \\\/]?(\d+))?$
Phone: 1(234)5678901x1234 :true
Phone: (+351) 282 43 50 50 :true
Phone: 90191919908 :true
Phone: 555-8909 :true
Phone: 001 6867684 :true
Phone: 001 6867684x1 :true
Phone: 1 (234) 567-8901 :true
Phone: 1-234-567-8901 ext1234 :true
用于验证正确的日期格式的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "31/07/2010"
str2 := "1/13/2010"
str3 := "29/2/2007"
str4 := "31/08/2010"
str5 := "29/02/200a"
str6 := "29/02/200a"
str7 := "55/02/200a"
str8 := "2_/02/2009"
re := regexp.MustCompile("(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/((19|20)\\d\\d)")
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Printf("\nDate: %v :%v\n", str1, re.MatchString(str1))
fmt.Printf("Date: %v :%v\n", str2, re.MatchString(str2))
fmt.Printf("Date: %v :%v\n", str3, re.MatchString(str3))
fmt.Printf("Date: %v :%v\n", str4, re.MatchString(str4))
fmt.Printf("Date: %v :%v\n", str5, re.MatchString(str5))
fmt.Printf("Date: %v :%v\n", str6, re.MatchString(str6))
fmt.Printf("Date: %v :%v\n", str7, re.MatchString(str7))
fmt.Printf("Date: %v :%v\n", str8, re.MatchString(str8))
}
输出
Pattern: (0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/((19|20)\d\d)
Date: 31/07/2010 :true
Date: 1/13/2010 :false
Date: 29/2/2007 :true
Date: 31/08/2010 :true
Date: 29/02/200a :false
Date: 29/02/200a :false
Date: 55/02/200a :false
Date: 2_/02/2009 :false
用于验证常用信用卡号的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "4111111111111111"
str2 := "346823285239073"
str3 := "370750517718351"
str4 := "4556229836495866"
str5 := "5019717010103742"
str6 := "76009244561"
str7 := "4111-1111-1111-1111"
str8 := "5610591081018250"
str9 := "30569309025904"
str10 := "6011111111111117"
re := regexp.MustCompile(`^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Printf("\nCC : %v :%v\n", str1, re.MatchString(str1))
fmt.Printf("CC : %v :%v\n", str2, re.MatchString(str2))
fmt.Printf("CC : %v :%v\n", str3, re.MatchString(str3))
fmt.Printf("CC : %v :%v\n", str4, re.MatchString(str4))
fmt.Printf("CC : %v :%v\n", str5, re.MatchString(str5))
fmt.Printf("CC : %v :%v\n", str6, re.MatchString(str6))
fmt.Printf("CC : %v :%v\n", str7, re.MatchString(str7))
fmt.Printf("CC : %v :%v\n", str8, re.MatchString(str8))
fmt.Printf("CC : %v :%v\n", str9, re.MatchString(str9))
fmt.Printf("CC : %v :%v\n", str10, re.MatchString(str10))
}
输出
Pattern: ^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|18
00|35\d{3})\d{11})$
CC : 4111111111111111 :true
CC : 346823285239073 :true
CC : 370750517718351 :true
CC : 4556229836495866 :true
CC : 5019717010103742 :false
CC : 76009244561 :false
CC : 4111-1111-1111-1111 :false
CC : 5610591081018250 :true
CC : 30569309025904 :true
CC : 6011111111111117 :true
使用正则表达式将任何非字母数字字符序列替换为短划线
package main
import (
"fmt"
"log"
"regexp"
)
func main() {
reg, err := regexp.Compile("[^A-Za-z0-9]+")
if err != nil {
log.Fatal(err)
}
newStr := reg.ReplaceAllString("#Golang#Python$Php&Kotlin@@", "-")
fmt.Println(newStr)
}
输出
-Golang-Python-Php-Kotlin-
使用替换第一次出现的匹配的字符串正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
strEx := "Php-Golang-Php-Python-Php-Kotlin"
reStr := regexp.MustCompile("^(.*?)Php(.*)$")
repStr := "${1}Java$2"
output := reStr.ReplaceAllString(strEx, repStr)
fmt.Println(output)
}
输出
Java-Golang-Php-Python-Php-Kotlin
以空格拆分字符串的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "Split String on \nwhite \tspaces."
re := regexp.MustCompile(`\S+`)
fmt.Printf("Pattern: %v\n", re.String()) // Print Pattern
fmt.Printf("String contains any match: %v\n", re.MatchString(str1)) // True
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: \S+
String contains any match: true
Split
String
on
white
spaces.
从字符串中提取数字的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "Hello X42 I'm a Y-32.35 string Z30"
re := regexp.MustCompile(`[-]?\d[\d,]*[\.]?[\d{2}]*`)
fmt.Printf("Pattern: %v\n", re.String()) // Print Pattern
fmt.Printf("String contains any match: %v\n", re.MatchString(str1)) // True
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: [-]?\d[\d,]*[\.]?[\d{2}]*
String contains any match: true
42
-32.35
30
从给定路径中提取文件名的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`^(.*/)?(?:$|(.+?)(?:(\.[^.]*$)|$))`)
str1 := `http://www.golangprograms.com/regular-expressions.html`
match1 := re.FindStringSubmatch(str1)
fmt.Println(match1[2])
str2 := `/home/me/dir3/dir3a/dir3ac/filepat.png`
match2 := re.FindStringSubmatch(str2)
fmt.Println(match2[2])
}
输出
regular-expressions
filepat
使用正则表达式将字符串以大写字母拆分
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "Hello X42 I'm a Y-32.35 string Z30"
re := regexp.MustCompile(`[A-Z][^A-Z]*`)
fmt.Printf("Pattern: %v\n", re.String()) // Print Pattern
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
fmt.Println(element)
}
}
输出
Pattern: [A-Z][^A-Z]*
Hello
X42
I'm a
Y-32.35 string
Z30
用于获取括号之间的字符串的正则表达式
package main
import (
"fmt"
"regexp"
"strings"
)
func main() {
str1 := "This is a (sample) ((string)) with (SOME) special words"
re := regexp.MustCompile(`\((.*?)\)`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Println("\nText between parentheses:")
submatchall := re.FindAllString(str1, -1)
for _, element := range submatchall {
element = strings.Trim(element, "(")
element = strings.Trim(element, ")")
fmt.Println(element)
}
}
输出
Pattern: \((.*?)\)
Text between parentheses:
sample
string
SOME
将字符串中的符号替换为空格
package main
import (
"fmt"
"log"
"regexp"
)
func main() {
str1 := "how much for the maple syrup? $20.99? That's ridiculous!!!"
re, err := regexp.Compile(`[^\w]`)
if err != nil {
log.Fatal(err)
}
str1 = re.ReplaceAllString(str1, " ")
fmt.Println(str1)
}
输出
how much for the maple syrup 20 99 That s ridiculous
使用正则表达式替换字符串中的表情符号字符
package main
import (
"fmt"
"regexp"
)
func main() {
var emojiRx = regexp.MustCompile(`[\x{1F600}-\x{1F6FF}|[\x{2600}-\x{26FF}]`)
var str = emojiRx.ReplaceAllString("Thats a nice joke 😆😆😆 😛", `[e]`)
fmt.Println(str)
}
输出
Thats a nice joke [e][e][e] [e]
获取textarea标记之间的文本的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := `<html><body>
<form name="query" action="http://www.example.net/action.php" method="post">
<textarea type="text" name="nameiknow">The text I want</textarea>
<div id="button">
<input type="submit" value="Submit" />
</div>
</form>
</body></html>`
re := regexp.MustCompile(`<textarea.*?>(.*)</textarea>`)
submatchall := re.FindAllStringSubmatch(str1, -1)
for _, element := range submatchall {
fmt.Println(element[1])
}
}
输出
The text I want
用于匹配HH:MM时间格式的正则表达式
package main
import (
"fmt"
"regexp"
)
func main() {
str1 := "8:2"
str2 := "9:9"
str3 := "12:29"
str4 := "02:5"
str5 := "23:59"
str6 := "55:59"
str7 := "0:01"
re := regexp.MustCompile(`^([0-9]|0[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9])$`)
fmt.Printf("Pattern: %v\n", re.String()) // print pattern
fmt.Printf("Time: %v\t:%v\n", str1, re.MatchString(str1))
fmt.Printf("Time: %v\t:%v\n", str2, re.MatchString(str2))
fmt.Printf("Time: %v\t:%v\n", str3, re.MatchString(str3))
fmt.Printf("Time: %v\t:%v\n", str4, re.MatchString(str4))
fmt.Printf("Time: %v\t:%v\n", str5, re.MatchString(str5))
fmt.Printf("Time: %v\t:%v\n", str6, re.MatchString(str6))
fmt.Printf("Time: %v\t:%v\n", str7, re.MatchString(str7))
}
输出
Pattern: ^([0-9]|0[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9])$
Time: 8:2 :true
Time: 9:9 :true
Time: 12:29 :true
Time: 02:5 :true
Time: 23:59 :true
Time: 55:59 :false
Time: 0:01 :true