Reprinted with Permission by Quest Software June  2002

PL/SQL Scan Function
By: Fuping Peng

In my work I use SAS as well as PL SQL. SAS has a string parse function: scan(), which returns the nth word using the delimiter you choose. For example:

data _null_;
    length source source2 target1  target2 $30;
   source='Can you scan for the third word?';
    target1=scan(sourse,3,' ');      *or: "target1=scan(sourse,3);" as ' '
is the defaul delimiter;;
    put 'target1=' target1;

   source2='Can* you* scan* for* the* third* word?';
    target2=scan(sourse2,3,'* ');
    put 'target2='  target2;
run;

I have written my own PLSQL scan function to mimic the SAS function.  Here is the code:

--pkgScan
--two versions of scan function, which return the nth word using default or
--passed delimiter.
--by Fuping Peng
 
  CREATE OR REPLACE PACKAGE pkgscan
AS
   FUNCTION SCAN (itxt IN VARCHAR2, tknum IN INT, splitchar IN VARCHAR2)
      RETURN VARCHAR2;

   FUNCTION SCAN (itxt IN VARCHAR2, tknum IN INT)
      RETURN VARCHAR2;
END pkgscan;
/

CREATE OR REPLACE PACKAGE BODY pkgscan
AS
   --scan require delimiter
   FUNCTION SCAN (itxt IN VARCHAR2, tknum IN INT, splitchar IN VARCHAR2)
      RETURN VARCHAR2
   AS
      i        INTEGER         := 1;
      txt      VARCHAR2 (1000);
      currtk   INT             := 1;
      stpos    INT;
      endpos   INT;
      tkpos    INT             := 0;
      SPLIT    VARCHAR2 (1);
   BEGIN
      SPLIT := SUBSTR (splitchar, 1, 1);
      txt := RTRIM (LTRIM (itxt));

      WHILE SUBSTR (txt, i, 1) IS NOT NULL
      LOOP
         IF SUBSTR (txt, i, 1) = SPLIT
         THEN
            IF i > tkpos + 1
            THEN                         /*use only the last of consecutive splitchars to tokenize*/
               currtk := currtk + 1;
               tkpos := i;
            END IF;

            tkpos := i;

            IF tknum = currtk
            THEN
               stpos := i + 1;
            END IF;

            IF tknum + 1 = currtk
            THEN
               endpos := i;
            END IF;
         END IF;

         i := i + 1;
      END LOOP;

      RETURN SUBSTR (txt, stpos, endpos - stpos);
   END SCAN;
   --scan use ' ' as default delimiter
   FUNCTION SCAN (itxt IN VARCHAR2, tknum IN INT)
      RETURN VARCHAR2
   AS
      i        INTEGER         := 1;
      txt      VARCHAR2 (1000);
      currtk   INT             := 1;
      stpos    INT;
      endpos   INT;
      tkpos    INT             := 0;
      SPLIT    VARCHAR2 (1)    := ' ';
   BEGIN
      txt := RTRIM (LTRIM (itxt));

      WHILE SUBSTR (txt, i, 1) IS NOT NULL
      LOOP
         IF SUBSTR (txt, i, 1) = SPLIT
         THEN
            IF i > tkpos + 1
            THEN                         /*use only the last of consecutive splitchars to tokenize*/
               currtk := currtk + 1;
               tkpos := i;
            END IF;

            tkpos := i;

            IF tknum = currtk
            THEN
               stpos := i + 1;
            END IF;

            IF tknum + 1 = currtk
            THEN
               endpos := i;
            END IF;
         END IF;

         i := i + 1;
      END LOOP;

      RETURN SUBSTR (txt, stpos, endpos - stpos);
   END SCAN;
END pkgscan;
/

SHOW err
--call the scan function. note the consecutive delimiters did not prevent
--the scan function
--to pick up the third word.

EXECUTE dbms_output.put_line(pkgScan.scan('how     old are you?',3, ' '));
EXECUTE dbms_output.put_line(pkgScan.scan('how     old are you?',3));
EXECUTE dbms_output.put_line(pkgScan.scan('how*****old*are*you?',3,'*'));