s='colorless'
s=s[:s.index('r')]+'u'+s[s.index('r'):]
s[:s.index('-')]
monty[::-1]可以逆置列表
p=r'[a-zA-Z]+'
nltk.re_show(p,'123asd456')
nltk.re_show(p,'123asd456asd')
p='[A-Z][a-z]*'
nltk.re_show(p,'123asd456asd')
nltk.re_show(p,'Aadsds123asd456asd')
p='p[aeiou]{,2}t'
nltk.re_show(p,'paat'')
nltk.re_show(p,'paat')
nltk.re_show(p,'padst')
nltk.re_show(p,'padsst')
p='\d+(\.\d+)?'
nltk.re_show(p,'2312.12345dsa')
a.
pattern = r'''(?x) # set flag to allow verbose regexps
[][.,;"'?():-_`] # these are separate tokens
'''
nltk.regexp_tokenize(text, pattern)
b.
pattern =r'''(?x) # set flag to allow verbose regexps
([A-Z]\.)+ # abbreviations, e.g. U.S.A.
| [A-Z][a-z]*\s[A-Z][a-z]* # words with optional internal
| \$?\d+(\.\d+)?%? # currency and percentages, e.g. $12.40, 82%
| \d+-\d+-\d+
'''
S.split(sep=None, maxsplit=-1) -> list of strings
Return a list of the words in S, using sep as the
delimiter string. If maxsplit is given, at most maxsplit
splits are done. If sep is not specified or is None, any
whitespace string is a separator and empty strings are
removed from the result.
list的方法sort是in place排序,可以改变自身,sorted方法返回排序后的list,不影响自身
sorted([w for w in text if w.lower().startswith('wh')])
result=[]
text=['a 10','b 20','c 30']
for line in text:
...: w,x=tuple(line.split())
...: result.append((w,x))
def unknown(url):
unknown('http://www.gutenberg.org/files/11/11-h/11-h.htm')
resp=urllib.request.urlopen('http://www.gutenberg.org/files/11/11-h/11-h.htm')
raw=resp.read().decode('utf-8')
words=nltk.word_tokenize(raw)
unknown=[w for w in words if w not in wn.words()]
p1=r'e'
p2=r'i'
p3='o'
p4=r'[.]'
p5=r'ate'
p6=r'^s'
p7=r's'
p8=r'1'
def f(s):
s=re.sub(p1,'3',s)
s=re.sub(p2,'1',s)
s=re.sub(p3,'0',s)
s=re.sub(p4,'5w33t!')
s=re.sub(p5,'8',s)
s=re.sub(p6,'$',s)
s=re.sub(p7,'5',s)
s=re.sub(p8,'|',s)
saying=['After', 'all', 'is', 'said', 'and', 'done', ',', 'more', 'is', 'said', 'than', 'done', '.']
lengths=[]
for w in saying:
lengths.append(w)
lengths=[w for w in saying]
silly='newly formed bland ideas are inexpressible in an infuriating way'
bland=silly.split()
from functools import reduce
s=reduce(lambda x,y:x+y,[w[1] for w in bland])
' '.join(bland)
sorted(bland)
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有