前言
字符串相关的算法种类相当多,常见的涉及的数据结构/算法有:
- 递归
- 栈
- DP
- 双指针
经典题型
常见运算
解法说明:最经典的题型为:通过字符串输入实现加减乘除运算;该类题型的解法通常是将原始的数学解法进行拆分,从而实现分步实现。常用的数据结构是 栈
。
真题:
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
class Solution(object): def multiply(self, num1, num2): """ :type num1: str :type num2: str :rtype: str """ # 关键:抽象为两步: # 1. 单位乘法 # 2. 各个位数相加 res = [0] * (len(num1) + len(num2) + 1) for _i in range(len(num1)): i = len(num1) - 1 - _i # 单位乘法 single_re = [0] * (len(num2) + 1) single_jinwei = 0 for _j in range(len(num2)): j = len(num2) - 1 - _j n1 = int(num1[i]) n2 = int(num2[j]) single_re[_j] = (n1 * n2 + single_jinwei) % 10 single_jinwei = (n1 * n2 + single_jinwei) // 10 if single_jinwei != 0: single_re[-1] = single_jinwei # 与总的结果相加 # TODO: 和第一步合并: 压缩时间复杂度 top_jinwei = 0 for _j, n in enumerate(single_re): ori_value = res[_i + _j] res[_i + _j] = (ori_value + n + top_jinwei) % 10 top_jinwei = (ori_value + n + top_jinwei) // 10 # 去掉最后的0 for _ in range(len(res)): idx = len(res) - 1 - _ if res[idx] != 0: break res = res[0:idx + 1] res = list(map(str, res)) res = ''.join(res) return res[::-1]
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
class Solution(object): def calculate(self, s): """ :type s: str :rtype: int """ # 栈存储:数字,"*,/"先做处理 num_stack = [] # 先清理' ' s = s.replace(' ', '') num = 0 # 记录前置op: 很关键 op = '+' for i in range(len(s)): if s[i] not in ['+', '-', '*', '/']: num = num * 10 + int(s[i]) if s[i] in ['+', '-', '*', '/'] or i == len(s) - 1: if op == '+': num_stack.append(num) elif op == '-': num_stack.append(-num) elif op == '*': num_stack.append(num_stack.pop() * num) else: num_pre = num_stack.pop() if num_pre < 0: n = -int(-num_pre // num) else: n = int(num_pre / num) num_stack.append(n) op = s[i] num = 0 return sum(num_stack)
判断合理字符串
解法说明:该类题型比较多变,通常是根据匹配规则来进行判断。常有的解法为回溯/栈,细节的处理很关键。
真题:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
class Solution: def restoreIpAddresses(self, s: str) -> List[str]: SEG_COUNT = 4 ans = list() segments = [0] * SEG_COUNT # 回溯:segId->ipAdress第几段,segStart->字符串开始search的位置 def dfs(segId: int, segStart: int): # 如果找到了 4 段 IP 地址并且遍历完了字符串,那么就是一种答案 if segId == SEG_COUNT: if segStart == len(s): ipAddr = ".".join(str(seg) for seg in segments) ans.append(ipAddr) return # 如果还没有找到 4 段 IP 地址就已经遍历完了字符串,那么提前回溯 if segStart == len(s): return # 由于不能有前导零,如果当前数字为 0,那么这一段 IP 地址只能为 0 if s[segStart] == "0": segments[segId] = 0 dfs(segId + 1, segStart + 1) # 一般情况,枚举每一种可能性并递归 addr = 0 # segEnd->当前字符截取终点(下个字符的起点) for segEnd in range(segStart, len(s)): addr = addr * 10 + (ord(s[segEnd]) - ord("0")) if 0 < addr <= 0xFF: segments[segId] = addr dfs(segId + 1, segEnd + 1) else: break dfs(0, 0) return ans
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
class Solution: def validUtf8(self, data): """ :type data: List[int] :rtype: bool """ # 记录当前UTF-8的字节数 n_bytes = 0 # 对每个字节进行判断 for num in data: # 可直接通过format防范进行二进制转换 bin_rep = format(num, '#010b')[-8:] # 如n_bytes==0,说明开始处理新的UTF-8码 if n_bytes == 0: # 1位判断字节数 for bit in bin_rep: if bit == '0': break n_bytes += 1 # 说明首位为0,为单字节 if n_bytes == 0: continue # 错误格式:不能为1或者超过4个字节 if n_bytes == 1 or n_bytes > 4: return False else: # 如n_bytes非0,则后续字节格式为:`10xxxxxx` if not (bin_rep[0] == '1' and bin_rep[1] == '0'): return False # `10xxxxxx`格式则字节数-1 n_bytes -= 1 # 如字节数归0则为正确格式 return n_bytes == 0
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
class Solution: def longestValidParentheses(self, s: str) -> int: if len(s) < 2: return 0 # 考虑用栈 idx_stack = [] final_re = 0 idx_stack.append(-1) # 关键点:哨兵;代表前置最近的未匹配)位置 for idx, w in enumerate(s): if w == '(': idx_stack.append(idx) else: # 有可匹配的):计算匹配前后的idx差即可 # 注意这里减去的是前一个元素的idx idx_stack.pop() if not len(idx_stack): # 意味着前面没有(match,那么只更新最新的不match的)的idx idx_stack.append(idx) else: # 说明前面有可match的idx,这里存的是上一个不match的)的idx final_re = max(idx - idx_stack[-1], final_re) return final_re
回文子串
解法说明:回文字符的判断思路可采用双指针,即从中心字符(或者两个相同的字符对)两侧向左右两侧移动:
- 如对应位置的左右字符相同,则继续移动
- 如不相同,则可跳出判断
真题:
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
class Solution(object): @staticmethod def judgePalindrome(idx_left, idx_right, s): # 通过双指针左右移动判断回文子串 while idx_left >=0 and idx_right < len(s): if s[idx_left] != s[idx_right]: return idx_left, idx_right idx_left = idx_left - 1 idx_right = idx_right + 1 return idx_left, idx_right def longestPalindrome(self, s): """ :type s: str :rtype: str """ # 回文子串时间复杂度:M^2 max_length = 1 res_word = s[0] num_s = len(s) for idx in range(num_s - 1): idx_left, idx_right = Solution.judgePalindrome(idx - 1, idx + 1, s) cur_length = idx_right - idx_left - 1 if s[idx] == s[idx + 1]: d_left, d_right = Solution.judgePalindrome(idx - 1, idx + 2, s) if d_right - d_left - 1 > cur_length: idx_left = d_left idx_right = d_right cur_length = d_right - d_left - 1 if cur_length > max_length: max_length = cur_length res_word = s[idx_left + 1 : idx_right] return res_word
DP算法
解法说明:DP算法常应用各类字符串匹配问题,常见问题:最长公共子序列等,其时间复杂度一般为O(n2)
真题:
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13
class Solution: def wordBreak(self, s, wordDict) -> bool: # 简单的一维DP问题: # dp[i] = dp[j] && (s[j+1:i] in wordDict) dp = [True] + [False] * len(s) for i in range(1, len(s) + 1): for j in range(i): if dp[j] and s[j:i] in wordDict: dp[i] = True break return dp[len(s)]
题解:
1 2 3 4 5 6 7 8 9 10 11 12
class Solution: def lengthOfLIS(self, nums: List[int]) -> int: # 经典一维DP if not nums: return 0 dp = [] for i in range(len(nums)): dp.append(1) for j in range(i): if nums[i] > nums[j]: dp[i] = max(dp[i], dp[j] + 1) return max(dp)
题解:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
class Solution: def longestCommonSubsequence(self, text1: str, text2: str) -> int: # 经典二维DP m, n = len(text1), len(text2) dp = [[0] * (n + 1) for _ in range(m + 1)] for i in range(1, m + 1): for j in range(1, n + 1): if text1[i - 1] == text2[j - 1]: dp[i][j] = dp[i - 1][j - 1] + 1 else: dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) return dp[m][n]
Tricks
re模块
re
模块可用于字符串的正则匹配,非常方便,常见的一些用法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import re
pattern1 = 'bc'
pattern2 = 'abc'
pattern3 = 'BC'
input_s = 'bcabcabcabcbc'
# match方法:一次匹配,且默认从头开始匹配
print("re.match方法:")
print(re.match(pattern1, input_s))
print(re.match(pattern2, input_s))
print(re.match(pattern3, input_s))
# 常用的参数:re.I(不区分大小写)/re.M(多行模式)
print(re.match(pattern3, input_s, flags=re.I))
# search方法:一次匹配,且从任意位置开始匹配
print("re.search方法:")
print(re.search(pattern1, input_s))
print(re.search(pattern2, input_s))
print(re.search(pattern3, input_s))
print(re.search(pattern3, input_s, flags=re.I))
# findall/finditer方法:获取所有匹配结果,两个方法区别:
# findall:返回匹配到的字符串
# finditer:返回是匹配结果,一般用这个
print("re.finditer方法:")
print('/'.join(str(_) for _ in re.finditer(pattern1, input_s)))
print('/'.join(str(_) for _ in re.finditer(pattern3, input_s, flags=re.I)))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#+RESULTS:
#+begin_example
re.match方法:
<re.Match object; span=(0, 2), match='bc'>
None
None
<re.Match object; span=(0, 2), match='bc'>
re.search方法:
<re.Match object; span=(0, 2), match='bc'>
<re.Match object; span=(2, 5), match='abc'>
None
<re.Match object; span=(0, 2), match='bc'>
re.finditer方法:
<re.Match object; span=(0, 2), match='bc'>/<re.Match object; span=(3, 5), match='bc'>/<re.Match object; span=(6, 8), match='bc'>/<re.Match object; span=(9, 11), match='bc'>/<re.Match object; span=(11, 13), match='bc'>
<re.Match object; span=(0, 2), match='bc'>/<re.Match object; span=(3, 5), match='bc'>/<re.Match object; span=(6, 8), match='bc'>/<re.Match object; span=(9, 11), match='bc'>/<re.Match object; span=(11, 13), match='bc'>
#+end_example
format使用
format常用于字符串的规范输出:
字符串格式化
1 2 3 4 5
print("{} {}".format("hello", "world")) print("{0} {1}".format("hello", "world")) print("{1} {0} {1}".format("hello", "world")) personal_info = {"name": "wgzheng"} print("name:{name}".format(**personal_info))
1 2 3 4 5 6 7
#+RESULTS: #+begin_example hello world hello world world hello world name:wgzheng #+end_example
数字格式化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# 格式化数字 print("{:.2f}".format(3.1415926)) print("{:+.2f}".format(-3.1415926)) # 补零,左补齐 print("{:0>3d}".format(3)) # 补零,右补齐 print("{:0<3d}".format(3)) # 中间对齐 print("{:^3d}".format(3)) # 百分比格式 print("{:.2%}".format(3)) # 指数格式 print("{:.2e}".format(3)) # 逗号分隔 print("{:,}".format(100000))
1 2 3 4 5 6 7 8 9 10 11
#+RESULTS: #+begin_example 3.14 -3.14 003 300 3 300.00% 3.00e+00 100,000 #+end_example
进制转化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
num = 10 print("10进制:{0:d}".format(num)) print("2进制:{0:b}".format(num)) print("8进制:{0:o}".format(num)) print("16进制:{0:x}".format(num)) # 保留进制标志 print("10进制:{0:#d}".format(num)) print("2进制:{0:#b}".format(num)) print("8进制:{0:#o}".format(num)) print("16进制:{0:#x}".format(num)) # 直接format num_b = format(num, 'b') num_o = format(num, 'o') num_x = format(num, 'x') print("2进制:", num_b) print("8进制:", num_o) print("16进制:", num_x)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#+RESULTS: #+begin_example 10进制:10 2进制:1010 8进制:12 16进制:a 10进制:10 2进制:0b1010 8进制:0o12 16进制:0xa 2进制: 1010 8进制: 12 16进制: a #+end_example