Python Notes

Ruitian Zhong

Aug 7, 2023 2 min read Python

Regular Expression

"""
Regular Expression Playground
"""

import re

print(re.match('www', 'www.google.com').span())

print(re.search('www', 'www.google.com').span())

pattern = re.compile(r'\d+')
print(pattern.match('123456 12345').group())
print(pattern.findall('123456 12345'))
num = re.sub(r'\D', "", "2000-01-01")
print(num)


def double(matched):
    value = int(matched.group('value'))
    return str(value * 2)


print(re.sub(r'(?P<value>\d+)', double, '2222'))
s = "1102231990xxxxxx"
res = re.search(r'(?P<province>\d{3})(?P<city>\d{3})(?P<born_year>\d{4})', s)
print(res.groups())
print(res.groupdict())
# 分组匹配
r"""
re.l 忽略大小写
re.M 多行模式
re.U 和Unicode字符属性数据库相关
^ 开头
$ 结尾
.匹配任意字符，除了换行符
[...]
[^...]
re*
re+
re?
re{n}
re{n,}
re{n,m}
a | b
(re) group
\w 字母数字+下划线
\W 非字母数 + 下划线
\d
\D
\A
\Z
\b
\B
\S 任意非空字符
\s
\1 \9
\n \t
"""

Loop

while
for

l1 = [1, 2, 3, 4, 5]

for i in l1:
    print(i)

for index in range(len(l1)):
    print(l1[index])
else:
    print("no break")

is & ==

a = b = 100
if a is b:
    print('same memory area')

Basic Data Structure

# complex
complex_v = 10 + 2j
print(complex_v)

# List
l1 = ['Tim Cook', 100, 2.33, 'john']
# Dictionary
dict1 = {'one': 1, 'two': 'two'}
print(dict1['one'])
dict1['two'] = 2
print(dict1.keys())
print(dict1.values())

Pass

No operation

Exception

def raiseException():
    raise Exception(100)


class NetworkError(RuntimeError):
    def _init__(self, arg):
        self.args = arg


try:
    raise NetworkError("Bad hostname")
except NetworkError as e:
    print(e)

try:
    fh = open('test.txt', 'w')
    fh.write("hello world")
except(SystemError, SystemExit):
    print("")
except IOError:
    print("IO error")
else:
    print("succeed")
finally:
    print("finally")

fh.close()

 1class Animal:
 2    """This is a animal class"""
 3    count = 0  # shared
 4    __private_var = 0
 5    _protected = 0
 6
 7    def __init__(self, name, age):
 8        self.name = name
 9        self.age = age
10        Animal.count += 1
11
12    def displayName(self):
13        self._protected += 1
14        print(self.name)
15
16    def displayAge(self):
17        print(self.age)
18
19    def getName(self):
20        return self.name
21
22    def __del__(self):
23        print('delete ' + self.name)
24
25
26dog = Animal("Dog", 1)
27cat = Animal("Cat", 2)
28assert Animal.count == 2
29assert "Dog" == dog.getName()
30assert dog._Animal__private_var == 0
31print(dog.__doc__)
32print(dog.__dict__.keys())
33del cat
34
35
36class Dog(Animal):
37
38    def run(self):
39        Animal.displayName(self)
40        print("Dog " + self.name + "is running")
41
42    def getName(self):
43        self._protected += 2
44        print("this is dog")
45
46
47# Python 总是首先查找对应类型的方法，
48# 如果它不能在派生类中找到对应的方法，
49# 它才开始到基类中逐个查找。
50# （先在本类中查找调用的方法，找不到才去基类中找）。
51dog = Dog("dog 1", 100)
52
53
54class LittleDog(Dog):
55    def __init__(self):
56        print("Little Dog")
57        self.name = "Little"
58        self.age = 0
59
60
61dog = LittleDog()
62dog.getName()

Regular Expression Exercise

import re

# Regular Expression
pattern1 = r'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$'
s1 = "2023-08-08"
s2 = "2022-12-15"
p = re.compile(pattern1)

print(re.sub('-', ".", s1))

result = p.search(s1)
print(result.groupdict())
print(int(result.groupdict()['month']))
result = p.search(s2)
print(result.groupdict())
# None
a = {'key1': 1}

if a.get('key2') is None:
    print('None')
# Difference between search & match
print(re.match('func', 'abc_func') is None)
print(re.search('func', 'abc_func') is None)

# ()表示捕获分组 (?:) 表示非捕获分组，非捕获分组的值不会保存起来
# (?=pattern) 表示匹配以pattern结尾的内容
# (?!pattern) negative assert
# (?<=pattern) 以pattern开头
# (?<!pattern) 不以pattern开头
print(re.search(r'Windows(?=95|NT|7|10|11)', 'Windows10').group(0))
assert re.search(r'Windows(?=95)', 'Windows10') is None

print(re.search(r'www.(zrt|example ).ink', 'www.zrt.ink').group(0))
print(re.findall(r'www.(zrt|example ).ink', 'www.zrt.ink'))
print(re.findall(r'www.(?:zrt|example).ink', 'www.zrt.ink'))

print(re.split(r'\d+', 'abc23cde3efg'))
print(re.split(r'(\d+)', 'abc23cde3efg'))