首页
社区
课程
招聘
[旧帖] 一次性统统提取——提取文本里的数字文本 0.00雪花
发表于: 2010-9-30 17:54 997

[旧帖] 一次性统统提取——提取文本里的数字文本 0.00雪花

2010-9-30 17:54
997
所谓数字文本,就是1、2、3、4这样的单个或连续数字(包括0.8、1.2这样的数字),但不包括IP地址那种有多个小数点的文字(如有需要,请自行修改)。这些数字通常隐藏在众多非数字的文本文件或者字符串中。这里可以一次性把他们全部提取出来放入列表里,列表里的Strings部分保存原始文本,Values里保存转化成数字的变量。使用起来与TStringList风格相似,留给有这种需要的人。

有一个特例:对于单一的“.”符号,也会被收录(我在类里没有处理,因为这种写法,有可能就是懒人0.0的简写,练手的代码,不考虑这么多),如果不喜欢这样的处理,可以自行修改。

unit NumFromStr;

interface

uses
  Classes, SysUtils;

type
  PSeriesItem = ^TSeriesItem;
  TSeriesItem = record
    FNumString: string;
    FNumValue : Extended;
  end;

  PSeriesItemList = ^TSeriesItemList;
  TSeriesItemList = array[0..Maxint div 64] of TSeriesItem;

  TPickNumStringHelper = class(TObject)
  private
    FList: PSeriesItemList;
    FCount: Integer;
    FCapacity: Integer;
    procedure Error(Index: Integer);
    procedure Grow;
    function GetIntegerValue(Index: Integer): Integer;
    function StartNumber(const S: string):Integer;
    function StopNumber(const S: string):Integer;
  protected
    function Get(Index: Integer): string;virtual;
    function GetCapacity: Integer;virtual;
    function GetCount: Integer;virtual;
    function GetValue(Index: Integer): Extended; virtual;
    procedure ParseTextStr(const AnyString: string); virtual;
    procedure Put(Index: Integer; const NumString: string); virtual;
    function SepValue(const S: string;const lPos:integer;  var VarStr: string):Integer;virtual;
    procedure SetValue(Index: Integer; NumValue: Extended); virtual;
    procedure SetCapacity(NewCapacity: Integer);virtual;
    procedure InsertItem(Index: Integer;const NumString: string; NumValue: Extended); virtual;
  public
    destructor Destroy; override;
    procedure Clear;
    function Add(const AnyString: string): Integer;
    function AddNumValue(const NumString: string; NumValue: Extended): Integer;
    procedure LoadFromFile(const FileName: string);
    procedure LoadFromStream(Stream: TStream);
    property Count: Integer read GetCount;
    property ValsToInt[Index: Integer]: Integer read GetIntegerValue;
    property Values[Index: Integer]: Extended read GetValue write SetValue;
    property Strings[Index: Integer]: string read Get write Put; default;
  end;     
  
implementation

// ##############################################################
// #                                                            #
// #   <Delphi Pickup Number from String> ++ Write by 小雨哥    #
// #                                                            #
// ##############################################################

{ TPickNumStringHelper }

destructor TPickNumStringHelper.Destroy;
begin
  if FCount <> 0 then Finalize(FList^[0], FCount);
  FCount := 0;
  SetCapacity(0);
  inherited Destroy;
end;

function TPickNumStringHelper.Add(const AnyString: string): Integer;
var
I, J: Integer;
VarStr,temp:string;
FNumValue: Extended;
begin
  Result := 1;
  temp := AnyString;

  while true do
  begin
    I := StartNumber(temp);
    if I < Length(AnyString) then
    begin
      J := SepValue(temp, I, VarStr);
      if J > 1 then
      begin
        /// FNumValue: "." value auto to 0
        if TryStrToFloat(VarStr, FNumValue) then
          Result := AddNumValue(VarStr, FNumValue);
      end else Break;
      temp := Copy(temp, I + j - 1, MaxInt);
    end
    else Break;   
  end;
end;

function TPickNumStringHelper.AddNumValue(const NumString: string; NumValue: Extended): Integer;
begin
  Result := FCount;
  ///  This is a faster add action, not test it!!! add is free.
  InsertItem(Result, NumString, NumValue);
end;

procedure TPickNumStringHelper.Clear;
begin
  if FCount <> 0 then
  begin
    Finalize(FList^[0], FCount);
    FCount := 0;
    SetCapacity(0);
  end;
end;

procedure TPickNumStringHelper.Error(Index: Integer);
  function ReturnAddr: Pointer;
  asm
     MOV     EAX,[EBP+4]
  end;
const
  IndexError = 'index out of bounds (%d)';
begin
  raise Exception.CreateFmt(IndexError, [Index]) at ReturnAddr;
end;

function TPickNumStringHelper.Get(Index: Integer): string;
begin
  if (Index < 0) or (Index >= FCount) then
   Error(Index)
  else
   Result := FList^[Index].FNumString;
end;

function TPickNumStringHelper.GetCapacity: Integer;
begin
  Result := FCapacity;
end;

function TPickNumStringHelper.GetCount: Integer;
begin
  Result := FCount;
end;

function TPickNumStringHelper.GetIntegerValue(Index: Integer): Integer;
begin
  Result := Round(GetValue(Index));
end;

function TPickNumStringHelper.GetValue(Index: Integer): Extended;
begin
  Result := 0.0;
  if (Index < 0) or (Index >= FCount) then
    Error(Index)
  else
    Result := FList^[Index].FNumValue;
end;

procedure TPickNumStringHelper.Grow;
var
  Delta: Integer;
begin
  if FCapacity > 128 then Delta := FCapacity div 4 else
    if FCapacity > 32 then Delta := 16 else Delta := 4;
  SetCapacity(FCapacity + Delta);
end;

procedure TPickNumStringHelper.InsertItem(Index: Integer;const NumString: string; NumValue: Extended);
begin
  if FCount = FCapacity then Grow;
  if Index < FCount then
    System.Move(FList^[Index], FList^[Index + 1], (FCount - Index) * SizeOf(TSeriesItem));
  with FList^[Index] do
  begin
    Pointer(FNumString) := nil;
    FNumValue := NumValue;
    FNumString := NumString;
  end;
  Inc(FCount);
end;

procedure TPickNumStringHelper.LoadFromFile(const FileName: string);
var
  Stream: TStream;
begin
  Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
    LoadFromStream(Stream);
  finally
    Stream.Free;
  end;
end;

procedure TPickNumStringHelper.LoadFromStream(Stream: TStream);
var
  Size: Integer;
  S: string;
begin
  Size := Stream.Size - Stream.Position;
  SetString(S, nil, Size);
  Stream.Read(Pointer(S)^, Size);
  ParseTextStr(S);
end;

procedure TPickNumStringHelper.Put(Index: Integer; const NumString: string);
var
  NumValue: Extended;
begin
  if (Index < 0) or (Index >= FCount) then
    Error(Index)
  else
  begin
    if TryStrToFloat(NumString, NumValue) then
    begin
      FList^[Index].FNumString := NumString;
      FList^[Index].FNumValue  := NumValue;
    end;
  end;
end;

function TPickNumStringHelper.StartNumber(const S: string): Integer;
begin
  Result := 1;
  while (Result <= Length(S)) and (not (S[Result] in ['0'..'9', '.'])) do
    Inc(Result);
end;

function TPickNumStringHelper.StopNumber(const S: string): Integer;
begin
  Result := 1;
  while (Result <= Length(S)) and (S[Result] in ['0'..'9', '.']) do
    Inc(Result);
end;

function TPickNumStringHelper.SepValue(const S: string; const lPos: integer; var VarStr: string): Integer;
begin
  VarStr := Copy(S, lPos, MaxInt);
  Result := StopNumber(VarStr);
  if Result > 1 then
  begin
    VarStr := Copy(S,  lPos, Result - 1);

  end
  else VarStr := '';
end;

procedure TPickNumStringHelper.SetCapacity(NewCapacity: Integer);
begin
  ReallocMem(FList, NewCapacity * SizeOf(TSeriesItem));
  FCapacity := NewCapacity;
end;

procedure TPickNumStringHelper.ParseTextStr(const AnyString: string);
var
  P, Start: PChar;
  S: string;
begin
  Clear;
  P := Pointer(AnyString);
  if P <> nil then
  begin
    while P^ <> #0 do
    begin
      Start := P;
      while not (P^ in [#0, #10, #13]) do Inc(P);
      SetString(S, Start, P - Start);
      Add(S);
      if P^ = #13 then Inc(P);
      if P^ = #10 then Inc(P);
    end;
  end;
end;

procedure TPickNumStringHelper.SetValue(Index: Integer; NumValue: Extended);
begin
  if (Index < 0) or (Index >= FCount) then
    Error(Index)
  else
  begin
    FList^[Index].FNumString := FloatToStr(NumValue);
    FList^[Index].FNumValue := NumValue;
  end;
end;

end.

代码没有充分测试,请有心人斧正。  

来自:小雨哥, 时间:2010-5-6 23:57:50, ID:3992728
上面代码的提取核心代码如下,如果只是很少量的提取,可以直接使用:

///  定位到数字文本开头
function ToNumberHelper(const S: string):Integer;
begin
  Result := 1;
  while (Result <= Length(S)) and (not (S[Result] in ['0'..'9', '.'])) do
    Inc(Result);
end;

///  定位到数字文本结尾
function ToStringHelper(const S: string):Integer;
begin
  Result := 1;
  while (Result <= Length(S)) and (S[Result] in ['0'..'9', '.']) do
    Inc(Result);
end;

///  找到一段提取一段
function GetNumValueHelper(const S: string;const lPos:integer;  var VarStr: string):Integer;
begin
  VarStr := Copy(S, lPos, MaxInt);
  Result := ToStringHelper(VarStr);
  if Result > 1 then
    VarStr := Copy(S,  lPos, Result - 1);
end;

///  主函数,把文本的数字文本一个个取出来,中间用“|”符号分隔
function ParseStrToSeparatorValue(const S: string; FDelimiter: Char = '|'): string;
var
I, J: Integer;
NumStr,temp:string;
begin
  temp := s;
  Result := '';

  while true do
  begin
    I := ToNumberHelper(temp);
    if I < Length(S) then
    begin
      J := GetNumValueHelper(temp, I, NumStr);
      if J > 1 then
      begin
        if Result ='' then
           Result := NumStr
        else
         Result := Result + FDelimiter + NumStr;
      end else Break;
      temp := Copy(temp, I + j - 1, MaxInt);
    end
    else Break;
  end;
end;

[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课

收藏
免费 0
支持
分享
最新回复 (0)
游客
登录 | 注册 方可回帖
返回
//