大家好,又见面了,我是你们的朋友全栈君。
项目要求:根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、作者和正文。
unit Unit1;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, IdBaseComponent,
IdComponent, IdTCPConnection, IdTCPClient, IdHTTP, Vcl.ComCtrls;
type
TForm1 = class(TForm)
IdHTTP1: TIdHTTP;
Button1: TButton;
Label1: TLabel;
Edit1: TEdit;
ProgressBar1: TProgressBar;
Memo1: TMemo;
Button2: TButton;
Memo2: TMemo;
procedure Button1Click(Sender: TObject);
procedure FormCreate(Sender: TObject);
procedure Button2Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
uses StrUtils,HttpApp;
{$R *.dfm}
type
TDelFlags = set of (dfDelBefore, dfDelAfter);
function Delstr(var ms: String; endstr: String; Flags: TDelFlags;
bself: Boolean = True): String;
var
l: Integer;
begin
l := length(endstr);
if dfDelBefore in Flags then
begin
if bself then
begin
Result := copy(ms, 1, pos(endstr, ms) + l - 1);
Delete(ms, 1, pos(endstr, ms) + l - 1);
end
else
begin
Result := copy(ms, 1, pos(endstr, ms) - 1);
Delete(ms, 1, pos(endstr, ms) - 1);
end;
end
else
begin
if bself then
begin
Result := copy(ms, pos(endstr, ms), length(ms));
Delete(ms, pos(endstr, ms), length(ms));
end
else
begin
Result := copy(ms, pos(endstr, ms) + l, length(ms));
Delete(ms, pos(endstr, ms) + l, length(ms));
end;
end;
end;
procedure DelstrEx(var ms: String; endstr: String;
var DelData: String; Flags: TDelFlags; bself: Boolean = True);
var
l: Integer;
begin
l := length(endstr);
if dfDelBefore in Flags then
begin //删除字符串的前半部分
if bself then //连同自己一起删除
begin
DelData := copy(ms, 1, pos(endstr, ms) + l - 1);
Delete(ms, 1, pos(endstr, ms) + l - 1);
end
else
begin
DelData := copy(ms, pos(endstr, ms) - 1, length(ms));
Delete(ms, 1, pos(endstr, ms) - 1);
end;
end
else
begin
if bself then
begin
DelData := copy(ms, pos(endstr, ms), length(ms));
Delete(ms, pos(endstr, ms), length(ms)); //连同自己一起删除
end
else
begin
DelData := copy(ms, pos(endstr, ms) + l, length(ms));
Delete(ms, pos(endstr, ms) + l, length(ms));
end;
end;
end; {DelstrEx}
function GetCenterStr(src, str1, str2: String): String;
var
i, i2, i3: Integer;
begin
i := 0;
i2 := 0;
i3 := 0;
Delstr(src, str1, [dfDelBefore]);
i := pos(AnsiLowercase(str1), AnsiLowercase(src));
i3 := pos(AnsiLowercase(str2), AnsiLowercase(src));
Result := copy(src, i2 + 1, i3 - i2 - 1);
end;
function delstrByNum(ss:string;uniqueFlag:string;disapperNum:integer;FromFlags: TDelFlags;bReturnDeletedPart:boolean):string;
var _num:integer;
_Str:string;
begin
_num:=0;
_Str:=ss;
result:='';
while _num<disapperNum do
begin
if dfDelBefore in FromFlags then //从字符串左端开始删除
begin
delstr(_Str,uniqueFlag,FromFlags);
end
else
begin //从字符串右端开始删除
_Str:= StrUtils.ReverseString(_Str) ;
if bReturnDeletedPart then
delstrEx(_Str,StrUtils.ReverseString(uniqueFlag),result,[dfdelbefore])
else
delstr(_Str,StrUtils.ReverseString(uniqueFlag),[dfdelbefore]);
_Str:= StrUtils.ReverseString(_Str) ;
end;
inc(_num);
end;
if result='' then result:=_Str
else result:= StrUtils.ReverseString(result) ;
end;
function Matchstrings(Source, pattern: String): Boolean;
var
pSource: array[0..255] of Char;
pPattern: array[0..255] of Char;
function MatchPattern(element, pattern: PChar): Boolean;
function IsPatternWild(pattern: PChar): Boolean;
begin
Result := StrScan(pattern, '*') <> nil;
if not Result then
Result := StrScan(pattern, '?') <> nil;
end;
begin
if 0 = StrComp(pattern, '*') then
Result := True
else if (element^ = Chr(0)) and (pattern^ <> Chr(0)) then
Result := False
else if element^ = Chr(0) then
Result := True
else
begin
case pattern^ of
'*':
if MatchPattern(element, @pattern[1]) then
Result := True
else
Result := MatchPattern(@element[1], pattern);
'?':
Result := MatchPattern(@element[1], @pattern[1]);
else
if element^ = pattern^ then
Result := MatchPattern(@element[1], @pattern[1])
else
Result := False;
end;
end;
end;
begin
StrPCopy(pSource, Source);
StrPCopy(pPattern, pattern);
Result := MatchPattern(pSource, pPattern);
end; {匹配字符串函数}
{从磁盘中搜索指定类型的所有文件}
procedure FindFiles(ASourceDir, SearchFileType: String; var List: TStrings);
var
FileRec: TSearchrec;
Sour, OldFileName, NewFileName: String;
fs: TFileStream;
begin
Sour := ASourceDir;
if Sour[length(Sour)] <> '\' then
Sour := Sour + '\';
if FindFirst(Sour + '*.*', faAnyfile, FileRec) = 0 then
{循环}
repeat
if ((FileRec.Attr and faDirectory) <> 0) then
begin
if (FileRec.Name <> '.') and (FileRec.Name <> '..') then //找到目录
begin
FindFiles(Sour + FileRec.Name, SearchFileType, List);
end;
end
else //找到文件
begin
if Matchstrings(LowerCase(FileRec.Name), SearchFileType) then
begin
List.Add(Sour + FileRec.Name);
end; {拷贝所有类型的文件}
end;
until FindNext(FileRec) <> 0;
system.SysUtils.FindClose(FileRec);
end; {从磁盘中搜索指定类型的所有文件}
procedure RmHtmlTags(var src: string);
function DelTag(var src: string): boolean;
var
iPosS, iPosE: integer;
begin
result := False;
if pos('<script', AnsiLowerCase(src)) > 0 then
begin
iPosS := pos('<script', AnsiLowerCase(src));
if iPosS > 0 then
begin
iPosE := pos('</script>', AnsiLowerCase(src));
result := iPosE > iPosS;
if result then
Delete(src, iPosS, iPosE - iPosS + 9);
end;
end
else
begin
iPosS := pos('<', src);
if iPosS > 0 then
begin
iPosE := pos('>', src);
result := iPosE > iPosS;
if result then
Delete(src, iPosS, iPosE - iPosS + 1);
end;
end;
end;
begin
//src := LowerCase(src);
src := src;
repeat
until not DelTag(src);
end;
procedure RmHtmlTagsEx(var src: string);
function DelTag(var src: string): boolean;
var
iPosS, iPosE: integer;
begin
result := False;
if pos('<script', AnsiLowerCase(src)) > 0 then
begin
iPosS := pos('<script', AnsiLowerCase(src));
if iPosS > 0 then
begin
iPosE := pos('</script>', AnsiLowerCase(src));
result := iPosE > iPosS;
if result then
Delete(src, iPosS, iPosE - iPosS + 9);
end;
end
else
if pos('<style', AnsiLowerCase(src)) > 0 then
begin
iPosS := pos('<style', AnsiLowerCase(src));
if iPosS > 0 then
begin
iPosE := pos('</style>', AnsiLowerCase(src));
result := iPosE > iPosS;
if result then
Delete(src, iPosS, iPosE - iPosS + 9);
end;
end
else
begin
{ iPosS := pos('<', src);
if iPosS > 0 then
begin
iPosE := pos('>', src);
result := iPosE > iPosS;
if result then
Delete(src, iPosS, iPosE - iPosS + 1);
end; }
end;
end;
begin
//src := LowerCase(src);
src := src;
repeat
until not DelTag(src);
end;
function UrlDecoder(const AUrl:string):string;
begin
result:= UTF8Decode(HttpDecode(AUrl));
end;
function UrlEncoder(const AUrl:string):string;
begin
//URL编码通常使用“+”来替换空格。
result:=HttpEncode(UTF8Encode(AUrl));
end;
function getResURL(http:TIdHttp;searchWord:string):string;
var info:tstringlist;
res:tstringstream;
tURL:string;
MemoText: string;
begin
http.HandleRedirects:=true;
http.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.2)';
http.Request.Host:='search.cyol.com';
http.Request.ContentType:='application/x-www-form-urlencoded';
http.Request.Referer:='http://search.cyol.com/index.htm';
http.request.CacheControl:='no-cache';
http.HTTPOptions:=http.HTTPOptions+[hoKeepOrigProtocol];
try
info:=tstringlist.Create;
res:=tstringstream.Create('',TEncoding.UTF8);
{
info.Add('op=new');
info.Add('searchBtn=搜索');
info.Add('searchText='+searchWord); //全站内模糊搜索
// info.Add('searchText=一日为师 终身挨骂?');
}
info.Add('ak=');
info.Add('ck=');
info.Add('df=');
info.Add('dt=');
info.Add('nk=4');
info.Add('od=date');
info.Add('op=adv');
info.Add('tk='+searchWord);
tURL:='http://search.cyol.com/searchh.jsp';
http.Post(tURL,info,res);
MemoText:= res.DataString;
delstr(MemoText,'resultdiv',[dfdelbefore]);
//showmessage(MemoText);
if pos('color:red',ansilowercase(MemoText))=0 then
begin
result:='';
Exit;
end;
delstr(MemoText,'>',[dfdelbefore]);
delstr(MemoText,'<a',[dfdelbefore]);
delstr(MemoText,'http:',[dfdelbefore],false);
delstr(MemoText,'.htm',[dfdelafter],false);
result:=MemoText;
finally
freeandnil(info);
freeandnil(res);
//http.Free;
end;
end;
function getHtmlStr(http:TIdHttp;fURL:string):string;
begin
if assigned(http) and (http is TIdHttp) and (http<>nil) then
result:= http.Get(fURL);
end;
procedure TForm1.Button1Click(Sender: TObject);
var htmlText:string;
biaoti: string;
Author: string;
yinti: string;
table_Pos: Integer;
ss: string;
outdata: string;
neirong: string;
zhenwen: string;
frontPart: string;
subtitle: string;
txtList: TStrings;
i: Integer;
readtxt: TStringList;
zhenti: string;
resURL: string;
begin
button1.Caption:='正在处理'; button1.Enabled:=false;
{ htmlText:= getHtmlStr(idHTTP1, getResURL(idHTTP1,'一日为师 终身挨骂?') );
frontPart:=htmlText;
delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
delstr(frontPart,'/enpproperty',[dfdelafter]);
Author:= GetCenterStr(frontPart,'<author>','</author>'); //作者
subtitle:= GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
yinti:= GetCenterStr(frontPart,'<introtitle>','</introtitle>'); //引题
//取正文
zhenwen:=htmlText;
delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);
Memo1.Text:=zhenwen;
}
if not directoryExists(edit1.Text) then
begin
showmessage('请输入标引txt的路径!');
exit;
end;
txtList:=tstringlist.Create ;
readtxt:=TStringlist.Create ;
findfiles(edit1.Text,'*.txt',txtList);
ProgressBar1.Position:=0;
ProgressBar1.Max:=txtlist.Count;
try
for i := 0 to txtList.Count-1 do
begin
application.ProcessMessages ;
ProgressBar1.Position:=i+1;
readtxt.LoadFromFile(txtList[i]);
zhenti:=readtxt.Values['<主题>'];
htmlText:=''; zhenwen:='';
author:='';subtitle:=''; yinti:='';
resURL:=getResURL(idHTTP1,trim(zhenti));
if ''<>trim(resURL) then
begin
htmlText:= getHtmlStr(idHTTP1, resURL);
frontPart:=htmlText;
delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
delstr(frontPart,'/enpproperty',[dfdelafter]);
Author:= GetCenterStr(frontPart,'<author>','</author>'); //作者
subtitle:= GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
yinti:= GetCenterStr(frontPart,'<introtitle>','</introtitle>'); //引题
//取正文
zhenwen:=htmlText;
delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);
RmHtmlTagsEx(zhenwen);
if ''<>trim(yinti) then readtxt.Values['<引题>']:=yinti;
if ''<>trim(subtitle) then readtxt.Values['<副题>']:=subtitle;
if ''<>trim(author) then readtxt.Values['<作者>']:=author;
if ''<>trim(zhenwen) then readtxt.Values['<正文>']:=slinebreak+trim(zhenwen);
readtxt.SaveToFile(txtList[i]);
readtxt.Clear ;
end
else
begin
Memo2.Lines.Add('未找到对应数据:'+txtList[i]);
end;
end; // for i end
if ProgressBar1.Max=ProgressBar1.Position then
begin
showmessage('处理完成!');
end;
finally
button1.Caption:='开始处理'; button1.Enabled:=true;
freeandnil(readtxt);
freeandnil(txtlist);
end;
{ delstr(htmlText,'<body',[dfdelbefore]);
biaoti:='biaoti';
//取作者
Author:=htmlText;
delstr(Author,biaoti,[dfdelbefore]);
delstr(Author,'rc-writer',[dfdelbefore]);
delstr(Author,'>',[dfdelbefore]);
delstr(Author,'<',[dfdelafter]);
showmessage(Author);
//取引题
yinti:=htmlText;
delstr(yinti,biaoti,[dfdelafter]);
table_Pos:=0;
//example: ss:='<table>ccc</table><table>ddd</table>';
yinti:=delstrByNum(yinti,'<table',1,[dfdelafter],true)+'>';
RmHtmlTags(yinti);
showmessage(yinti );
//取正文内容
neirong:='neirong';
zhenwen:=htmlText;
delstr(zhenwen,neirong,[dfdelbefore]);
delstr(zhenwen,'<P',[dfdelbefore],false);
delstr(zhenwen,'<script',[dfdelafter]);
Memo1.Text:=zhenwen;
}
end;
procedure TForm1.Button2Click(Sender: TObject);
var
ss: string;
begin
ss:=Memo1.Text;
RmHtmlTagsEx(ss);
memo1.Text:=ss;
end;
procedure TForm1.FormCreate(Sender: TObject);
begin
edit1.Clear ;
memo2.Clear ;
end;
end.
发布者:全栈程序员栈长,转载请注明出处:https://javaforall.cn/154654.html原文链接:https://javaforall.cn