Go语言教程之边写边学:正则表达式 Regex

正则表达式是一个非常有用的工具,用于描述匹配文本的搜索模式。正则表达式只不过是定义搜索模式的一些字符序列。正则表达式用于解析、过滤、验证和从大文本中提取有意义的信息,例如从其他程序生成的日志和输出。

 

用于提取方括号之间的文本的正则表达式

package main

import (
	"fmt"
	"regexp"
	"strings"
)

func main() {
	str1 := "this is a [sample] [[string]] with [SOME] special words"

	re := regexp.MustCompile(`\[([^\[\]]*)\]`)
	fmt.Printf("Pattern: %v\n", re.String())      // print pattern
	fmt.Println("Matched:", re.MatchString(str1)) // true

	fmt.Println("\nText between square brackets:")
	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		element = strings.Trim(element, "[")
		element = strings.Trim(element, "]")
		fmt.Println(element)
	}
}

输出

Pattern: \[([^\[\]]*)\]
Matched: true
Text between square brackets:
sample
string
SOME

 

用于从字符串中提取所有非字母数字字符

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "We @@@Love@@@@ #Go!$! ****Programming****Language^^^"

	re := regexp.MustCompile(`[^a-zA-Z0-9]+`)

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Println(re.MatchString(str1))        // true

	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: [^a-zA-Z0-9]+
true
 @@@
@@@@ #
!$! ****
****
^^^

 

用于从字符串中提取日期YYYY-MM-DD的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "If I am 20 years 10 months and 14 days old as of August 17,2016 then my DOB would be 1995-10-03"

	re := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern

	fmt.Println(re.MatchString(str1)) // true

	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: \d{4}-\d{2}-\d{2}
true
1995-10-03

 

用于从字符串中提取DNS主机名或IP地址的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := `Proxy Port Last Check Proxy Speed Proxy Country Anonymity 118.99.81.204
	118.99.81.204 8080 34 sec Indonesia - Tangerang Transparent 2.184.31.2 8080 58 sec 
	Iran Transparent 93.126.11.189 8080 1 min Iran - Esfahan Transparent 202.118.236.130 
	7777 1 min China - Harbin Transparent 62.201.207.9 8080 1 min Iraq Transparent`

	re := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}`)

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Println(re.MatchString(str1)) // true

	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: (25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}
true
118.99.81.204
118.99.81.204
2.184.31.2
93.126.11.189
202.118.236.130
62.201.207.9

 

用于从URL中提取域名的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := `http://www.suon.co.uk/product/1/7/3/`

	re := regexp.MustCompile(`^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/\n]+)`)
	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Println(re.MatchString(str1)) // true

	submatchall := re.FindAllString(str1,-1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: ^(?:https?:\/\/)?(?:[^@\/\n]+@)?(?:www\.)?([^:\/\n]+)
true
http://www.suon.co.uk

 

用于验证电子邮件地址的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "ç$€§/az@gmail.com"
	str2 := "abcd@gmail_yahoo.com"
	str3 := "abcd@gmail-yahoo.com"
	str4 := "abcd@gmailyahoo"
	str5 := "abcd@gmail.yahoo"

	re := regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern	
	fmt.Printf("\nEmail: %v :%v\n", str1, re.MatchString(str1))
	fmt.Printf("Email: %v :%v\n", str2, re.MatchString(str2))
	fmt.Printf("Email: %v :%v\n", str3, re.MatchString(str3))
	fmt.Printf("Email: %v :%v\n", str4, re.MatchString(str4))
	fmt.Printf("Email: %v :%v\n", str5, re.MatchString(str5))
}

输出

Pattern: ^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$

Email: ç$?§/az@gmail.com :false
Email: abcd@gmail_yahoo.com :false
Email: abcd@gmail-yahoo.com :true
Email: abcd@gmailyahoo :true
Email: abcd@gmail.yahoo :true

 

用于验证电话号码的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "1(234)5678901x1234"
	str2 := "(+351) 282 43 50 50"
	str3 := "90191919908"
	str4 := "555-8909"
	str5 := "001 6867684"
	str6 := "001 6867684x1"
	str7 := "1 (234) 567-8901"
	str8 := "1-234-567-8901 ext1234"

	re := regexp.MustCompile(`^(?:(?:\(?(?:00|\+)([1-4]\d\d|[1-9]\d?)\)?)?[\-\.\ \\\/]?)?((?:\(?\d{1,}\)?[\-\.\ \\\/]?){0,})(?:[\-\.\ \\\/]?(?:#|ext\.?|extension|x)[\-\.\ \\\/]?(\d+))?$`)

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Printf("\nPhone: %v\t:%v\n", str1, re.MatchString(str1))
	fmt.Printf("Phone: %v\t:%v\n", str2, re.MatchString(str2))
	fmt.Printf("Phone: %v\t\t:%v\n", str3, re.MatchString(str3))
	fmt.Printf("Phone: %v\t\t\t:%v\n", str4, re.MatchString(str4))
	fmt.Printf("Phone: %v\t\t:%v\n", str5, re.MatchString(str5))
	fmt.Printf("Phone: %v\t\t:%v\n", str6, re.MatchString(str6))
	fmt.Printf("Phone: %v\t\t:%v\n", str7, re.MatchString(str7))
	fmt.Printf("Phone: %v\t:%v\n", str8, re.MatchString(str8))
}

输出

Pattern: ^(?:(?:\(?(?:00|\+)([1-4]\d\d|[1-9]\d?)\)?)?[\-\.\ \\\/]?)?((?:\(?\d{1,}\)?[\-\.\ \\\/]?){0,})(?:[\-\.\ \\\/]?(?:#|ext\.?|extension|x
)[\-\.\ \\\/]?(\d+))?$

Phone: 1(234)5678901x1234       :true
Phone: (+351) 282 43 50 50      :true
Phone: 90191919908              :true
Phone: 555-8909                 :true
Phone: 001 6867684              :true
Phone: 001 6867684x1            :true
Phone: 1 (234) 567-8901         :true
Phone: 1-234-567-8901 ext1234   :true

 

用于验证正确的日期格式的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "31/07/2010"
	str2 := "1/13/2010"
	str3 := "29/2/2007"
	str4 := "31/08/2010"
	str5 := "29/02/200a"
	str6 := "29/02/200a"
	str7 := "55/02/200a"
	str8 := "2_/02/2009"

	re := regexp.MustCompile("(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/((19|20)\\d\\d)")

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Printf("\nDate: %v :%v\n", str1, re.MatchString(str1))
	fmt.Printf("Date: %v :%v\n", str2, re.MatchString(str2))
	fmt.Printf("Date: %v :%v\n", str3, re.MatchString(str3))
	fmt.Printf("Date: %v :%v\n", str4, re.MatchString(str4))
	fmt.Printf("Date: %v :%v\n", str5, re.MatchString(str5))
	fmt.Printf("Date: %v :%v\n", str6, re.MatchString(str6))
	fmt.Printf("Date: %v :%v\n", str7, re.MatchString(str7))
	fmt.Printf("Date: %v :%v\n", str8, re.MatchString(str8))
}

输出

Pattern: (0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/((19|20)\d\d)
Date: 31/07/2010 :true
Date: 1/13/2010 :false
Date: 29/2/2007 :true
Date: 31/08/2010 :true
Date: 29/02/200a :false
Date: 29/02/200a :false
Date: 55/02/200a :false
Date: 2_/02/2009 :false

 

用于验证常用信用卡号的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "4111111111111111"
	str2 := "346823285239073"
	str3 := "370750517718351"
	str4 := "4556229836495866"
	str5 := "5019717010103742"
	str6 := "76009244561"
	str7 := "4111-1111-1111-1111"
	str8 := "5610591081018250"
	str9 := "30569309025904"
	str10 := "6011111111111117"

	re := regexp.MustCompile(`^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$`)

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Printf("\nCC : %v :%v\n", str1, re.MatchString(str1))
	fmt.Printf("CC : %v :%v\n", str2, re.MatchString(str2))
	fmt.Printf("CC : %v :%v\n", str3, re.MatchString(str3))
	fmt.Printf("CC : %v :%v\n", str4, re.MatchString(str4))
	fmt.Printf("CC : %v :%v\n", str5, re.MatchString(str5))
	fmt.Printf("CC : %v :%v\n", str6, re.MatchString(str6))
	fmt.Printf("CC : %v :%v\n", str7, re.MatchString(str7))
	fmt.Printf("CC : %v :%v\n", str8, re.MatchString(str8))
	fmt.Printf("CC : %v :%v\n", str9, re.MatchString(str9))
	fmt.Printf("CC : %v :%v\n", str10, re.MatchString(str10))
}

输出

Pattern: ^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|18
00|35\d{3})\d{11})$
CC : 4111111111111111 :true
CC : 346823285239073 :true
CC : 370750517718351 :true
CC : 4556229836495866 :true
CC : 5019717010103742 :false
CC : 76009244561 :false
CC : 4111-1111-1111-1111 :false
CC : 5610591081018250 :true
CC : 30569309025904 :true
CC : 6011111111111117 :true

 

使用正则表达式将任何非字母数字字符序列替换为短划线

package main

import (
	"fmt"
	"log"
	"regexp"
)

func main() {
	reg, err := regexp.Compile("[^A-Za-z0-9]+")
	if err != nil {
		log.Fatal(err)
	}
	newStr := reg.ReplaceAllString("#Golang#Python$Php&Kotlin@@", "-")
	fmt.Println(newStr)
}

输出

-Golang-Python-Php-Kotlin-

 

使用替换第一次出现的匹配的字符串正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	strEx := "Php-Golang-Php-Python-Php-Kotlin"
	reStr := regexp.MustCompile("^(.*?)Php(.*)$")
	repStr := "${1}Java$2"
	output := reStr.ReplaceAllString(strEx, repStr)
	fmt.Println(output)
}

输出

Java-Golang-Php-Python-Php-Kotlin

 

以空格拆分字符串的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "Split   String on \nwhite    \tspaces."

	re := regexp.MustCompile(`\S+`)

	fmt.Printf("Pattern: %v\n", re.String()) // Print Pattern

	fmt.Printf("String contains any match: %v\n", re.MatchString(str1)) // True

	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: \S+
String contains any match: true
Split
String
on
white
spaces.

 

从字符串中提取数字的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "Hello X42 I'm a Y-32.35 string Z30"

	re := regexp.MustCompile(`[-]?\d[\d,]*[\.]?[\d{2}]*`)

	fmt.Printf("Pattern: %v\n", re.String()) // Print Pattern

	fmt.Printf("String contains any match: %v\n", re.MatchString(str1)) // True

	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: [-]?\d[\d,]*[\.]?[\d{2}]*
String contains any match: true
42
-32.35
30

 

从给定路径中提取文件名的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	re := regexp.MustCompile(`^(.*/)?(?:$|(.+?)(?:(\.[^.]*$)|$))`)

	str1 := `http://www.golangprograms.com/regular-expressions.html`
	match1 := re.FindStringSubmatch(str1)
	fmt.Println(match1[2])

	str2 := `/home/me/dir3/dir3a/dir3ac/filepat.png`
	match2 := re.FindStringSubmatch(str2)
	fmt.Println(match2[2])
}

输出

regular-expressions
filepat

 

使用正则表达式将字符串以大写字母拆分

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "Hello X42 I'm a Y-32.35 string Z30"

	re := regexp.MustCompile(`[A-Z][^A-Z]*`)

	fmt.Printf("Pattern: %v\n", re.String()) // Print Pattern

	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element)
	}
}

输出

Pattern: [A-Z][^A-Z]*
Hello
X42
I'm a
Y-32.35 string
Z30

 

用于获取括号之间的字符串的正则表达式

package main

import (
	"fmt"
	"regexp"
	"strings"
)

func main() {
	str1 := "This is a (sample) ((string)) with (SOME) special words"

	re := regexp.MustCompile(`\((.*?)\)`)
	fmt.Printf("Pattern: %v\n", re.String()) // print pattern

	fmt.Println("\nText between parentheses:")
	submatchall := re.FindAllString(str1, -1)
	for _, element := range submatchall {
		element = strings.Trim(element, "(")
		element = strings.Trim(element, ")")
		fmt.Println(element)
	}
}

输出

Pattern: \((.*?)\)
Text between parentheses:
sample
string
SOME

 

将字符串中的符号替换为空格

package main 

import ( 
	"fmt" 
	"log" 
	"regexp" 
)

func main() { 
	str1 := "how much for the maple syrup? $20.99? That's ridiculous!!!" 
	re, err := regexp.Compile(`[^\w]`) 
	if err != nil { 
		log.Fatal(err) 
	} 
	str1 = re.ReplaceAllString(str1, " ") 
	fmt.Println(str1) 
}

输出

how much for the maple syrup   20 99  That s ridiculous

 

使用正则表达式替换字符串中的表情符号字符

package main 
import (
	"fmt" 
	"regexp" 
) 

func main() { 
	var emojiRx = regexp.MustCompile(`[\x{1F600}-\x{1F6FF}|[\x{2600}-\x{26FF}]`) 
	var str = emojiRx.ReplaceAllString("Thats a nice joke 😆😆😆 😛", `[e]`) 
	fmt.Println(str) 
}

输出

Thats a nice joke [e][e][e] [e]

 

获取textarea标记之间的文本的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := `<html><body>
			<form name="query" action="http://www.example.net/action.php" method="post">
				<textarea type="text" name="nameiknow">The text I want</textarea>
				<div id="button">
					<input type="submit" value="Submit" />
				</div>
			</form>
			</body></html>`

	re := regexp.MustCompile(`<textarea.*?>(.*)</textarea>`)

	submatchall := re.FindAllStringSubmatch(str1, -1)
	for _, element := range submatchall {
		fmt.Println(element[1])
	}
}

输出

The text I want

 

用于匹配HH:MM时间格式的正则表达式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str1 := "8:2"
	str2 := "9:9"
	str3 := "12:29"
	str4 := "02:5"
	str5 := "23:59"
	str6 := "55:59"
	str7 := "0:01"

	re := regexp.MustCompile(`^([0-9]|0[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9])$`)

	fmt.Printf("Pattern: %v\n", re.String()) // print pattern
	fmt.Printf("Time: %v\t:%v\n", str1, re.MatchString(str1))
	fmt.Printf("Time: %v\t:%v\n", str2, re.MatchString(str2))
	fmt.Printf("Time: %v\t:%v\n", str3, re.MatchString(str3))
	fmt.Printf("Time: %v\t:%v\n", str4, re.MatchString(str4))
	fmt.Printf("Time: %v\t:%v\n", str5, re.MatchString(str5))
	fmt.Printf("Time: %v\t:%v\n", str6, re.MatchString(str6))
	fmt.Printf("Time: %v\t:%v\n", str7, re.MatchString(str7))
}

输出

Pattern: ^([0-9]|0[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9])$
Time: 8:2       :true
Time: 9:9       :true
Time: 12:29     :true
Time: 02:5      :true
Time: 23:59     :true
Time: 55:59     :false
Time: 0:01      :true