CSVReader PL/SQL + java

  • support on Oracle 10, 11, 12

1. Create java source with name CSVReader.

CREATE OR REPLACE AND RESOLVE JAVA SOURCE NAMED "CSVReader" AS
import java.lang.*;
import java.io.File;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;

public class CSVReader {

// Procedure set encoding for Java = cp1251 (for situations where database character set is AL32UTF8), otherwise there not will be read Russian characters
public static void encod()
{
System.setProperty("file.encoding","cp1251");
}

// Procedure for filling the object type t_line (to display the data from csv file)
static void toLine (int pRowNum, String pValue, int pDelQoutes)
throws Exception {
encod();
#sql {
begin
csvreader.private_add_line (
p_row_nr        => :pRowNum,
p_value         => :pValue,
p_delqoutes     => :pDelQoutes );
end;
};
}

// Function replace all separator between double quotes
public static String replace_between_qoutes(String p_text, String p_sep)
throws Exception {
char sep = p_sep.charAt(0);
String copy = new String();
boolean inQuotes = false;
int code = (int) sep;
char dblq = '"';

for(int i=0; i<p_text.length();++i) {
if (p_text.charAt(i)=='"')
inQuotes = !inQuotes;

if (p_text.charAt(i)==sep && inQuotes)
copy += "chr("+code+")";

else
copy += p_text.charAt(i);
}
return copy;
}

// Procedure reading csv file from disk
public static void read(String p_file, Integer p_skip, Integer p_delqoutes)
throws Exception {
encod();
int             line_counter     = 1;
int             col_counter      = 0;
File            file        = new File(p_file);
BufferedReader  br          = null;
String          line        = "";
String          text        = "";

br = new BufferedReader(new FileReader(file));
// read line by line
while ((line = br.readLine()) != null) {
// skip lines when set to skiplines
if (line_counter > p_skip) {
// filling oracle type for display values
CSVReader.toLine(
line_counter,
line,
p_delqoutes
);
}
line_counter++;
}
}

};
/

2. Then create a package of “csvreader” for publication the Java call specification, so we can access it via PL/SQL.

CREATE OR REPLACE package csvreader as

/* Package for parse CSV file in Oracle */

g_sep       varchar2(10) := ',';

type t_line is record (
row_nr      number,
col_nr      number,
value       varchar2(4000));
type t_lines is table of t_line;

/* pipelined table function to return all cells of an CSV file */
function get_lines (
p_file         in varchar2,
p_sep          in varchar2 := ',',
p_skip         in number   := 0,
p_delqoutes    in number   := 1)
return t_lines
pipelined;

/* for filling type t_line from java code */
procedure private_add_line (
p_row_nr        in number,
p_value         in varchar2,
p_delqoutes     in number);

function replace_between_qoutes(
p_text          in varchar2,
p_sep           in varchar2
)
return Varchar2
is language java
name 'CSVReader.replace_between_qoutes(java.lang.String, java.lang.String) return java.lang.String';

end csvreader;
/

CREATE OR REPLACE package body csvreader as

g_lines t_lines;

procedure read (
    p_file          in varchar2,
    p_skip          in number,
    p_delqoutes     in number)
is language java
name 'CSVReader.read(java.lang.String, java.lang.Integer, java.lang.Integer)';

/* parse line by line of csv file */
function get_lines (
    p_file          in varchar2,
    p_sep           in varchar2 := ',',
    p_skip          in number   := 0,
    p_delqoutes     in number   := 1)
    return t_lines
    pipelined
is
begin
    g_lines := new t_lines();

    if p_sep != ',' then
        g_sep := p_sep;
    end if;

    if p_file is not null then
        read (
            p_file      => p_file,
            p_skip      => p_skip,
            p_delqoutes => p_delqoutes);
    end if;

    for i in 1 .. g_lines.count loop
        pipe row (g_lines(i));
    end loop;

    g_lines := null;
    return;
end get_lines;

/* filling type t_line */
procedure private_add_line (
    p_row_nr        in number,
    p_value         in varchar2,
    p_delqoutes     in number)
is
    l_row t_line;
    val varchar2(4000);
begin

-- divided line on the part depending of the separator
for rec in ( select replace(
                    regexp_substr(newstr,'[^'||g_sep||']+', 1, level),
                        'chr('||ascii(g_sep)||')', g_sep) text, level lvl
             from
                ( select str, listagg(rep,'') within group (order by n) newstr
                 from (
                     select str
                     ,n, case
                            when (mod(n,2)=q)
                                then '"'||replace(sub,g_sep,'chr('||ascii(g_sep)||')')||'"'
                            else sub
                         end rep
                     from (
                         select str,
                         m.column_value n,
                         case when regexp_like(str,'^"') then 1 else 0 end q,
                         regexp_substr(str,'[^"]+',1,m.column_value) sub
                         from  (select p_value str from dual) t,
                                table(cast(multiset(
                                    select level from dual
                                    connect by level <= regexp_count(str,'[^"]+')
                                ) as sys.odciNumberList)) m
                     )
                 )
                 group by str
                )
            connect by regexp_substr(newstr, '[^'||g_sep||']+', 1, level) is not null
           )
loop
    -- remove the double quotation marks at the beginning and end of the line
    val := case
                when p_delqoutes = 0 then rec.text
                else regexp_replace(rec.text, '^([^"]*"(.*)"[^"]*|(.*))$', '\2\3')
           end;

    l_row.row_nr        := p_row_nr;
    l_row.col_nr        := rec.lvl;
    l_row.value         := val;
    g_lines.extend;
    g_lines(g_lines.count) := l_row;
    
end loop;

end private_add_line;

end csvreader;
/

3. Grant the Oracle JVM the relevant filesystem permissions.

DECLARE
l_user VARCHAR2(30) := :user_name;
BEGIN
DBMS_JAVA.grant_permission(l_user, 'java.io.FilePermission','<<ALL FILES>>', 'read ,write, execute, delete');
DBMS_JAVA.grant_permission(l_user, 'SYS:java.lang.RuntimePermission','writeFileDescriptor', '');
DBMS_JAVA.grant_permission(l_user, 'SYS:java.lang.RuntimePermission','readFileDescriptor', '');
DBMS_JAVA.grant_permission(l_user, 'SYS:java.util.PropertyPermission','file.encoding', 'write' );
EXECUTE IMMEDIATE 'GRANT EXECUTE ON sys.odciNumberList to '||l_user;
END;
/

4. How use:

/*
Input parameters:
  p_file        - fullpath to file. The file must be located on the computer
                  where the database Oracle or a network path,
                  where access to the computer issued to Oracle;
  p_sep         - separator, default ",";
  p_skip        - skip n-lines before read, deafult 0;
  p_delqoutes   - remove the double quotation marks at the beginning
                  and end of the line (0 - no, 1 - yes), default 1.

Output:
  row_nr      number,         - number of line in file
  col_nr      number,         - number of column
  value       varchar2(4000)  - text

Example:
create file "c:\test.txt" with this content:
"1.0.0.0","1.0.0.255","16777216","16777471","AU","Australia, "Montenegro LCD", USA"
"1.0.1.0","1.0.3.255","16777472","16778239","CN","China"
"1.0.4.0","1.0.7.255","16778240","16779263","AU","Australia, USA"
"1.0.8.0","1.0.15.255","16779264","16781311","CN","China"
"1.0.16.0","1.0.31.255","16781312","16785407","JP","Japan"
"1.0.32.0","1.0.63.255","16785408","16793599","CN","China"
"1.0.64.0","1.0.127.255","16793600","16809983","JP","Japan"
"1.0.128.0","1.0.255.255","16809984","16842751","TH","Thailand, Siria"
*/

-- run this select:
select * from table(csvreader.get_lines(
p_file      => 'c:\test.txt',
p_sep       => ',',
p_skip      => 0,
p_delqoutes => 1));
---------------------------------------------
1    1    1.0.0.0
1    2    1.0.0.255
1    3    16777216
1    4    16777471
1    5    AU
1    6    Australia, "Montenegro LCD", USA
2    1    1.0.1.0
2    2    1.0.3.255
............................................

Oracle regular expressions examples (REGEXP)

  • Oracle 10 and above

Примеры использования регулярных выражений.

REGEXP_REPLACE
1. Замена всех пробелов в строке на двойной пробел.

with t(fname) as
(select 'Hello, dear   friend! Nice to  see   you!' from dual)
select regexp_replace(fname, '[ ]+', '  ')  from t;</pre>
<pre>fname
--------------------------------------------------
Hello,  dear  friend!  Nice  to  see  you!

2. Замена первых n-символов в строке на другие.

with t(fname) as
(select 'Regular expressions is very easy' from dual)
select regexp_replace(fname, lpad(fname, 7), 'Arithmetic')  from t;

FNAME                              
-----------------------------------
Arithmetic expressions is very easy

3. Замена n-ого символа(ов) в строке, начиная с позиции k

with t(fname) as
(select 1000340082145076 from dual)
select regexp_replace(fname, '..', '21', 5, 1) fname  from t;

FNAME           
----------------
1000210082145076

4. Получить часть строки после первой запятой

with t(fname) as
(select 'gfdgh  ввап1; 567, 23425' from dual)
select regexp_replace(fname,  '^[^,]*,') fname  from t;

FNAME
------
23425

5. Удаляем из строки html tags

with t(fname) as
(select '<span style="color: rgb(255, 255, 153);">Hello World!</span><br style="color: rgb(255, 255, 153);"><span style="color: rgb(255, 255, 153);">Listen to your heart!</span><br>' from dual)
select regexp_replace(fname, '<[^>]*>', ' ') fname  from t;

FNAME
---------------------------------------
Hello World!   Listen to your heart!

6. Вырезать значение из двойных ковычек

with t(fname) as
(select 'LTD  "GROUP  "BIG WORLD" Las Vegas' from dual)
select replace(regexp_replace(fname, '^([^"]*"(.*)"[^"]*|(.*))$', '\2\3'), '"') fname  from t;

FNAME
----------------
GROUP  BIG WORLD

REGEXP_SUBSTR
1. Вырезать часть строки по заданному шаблону

with t(fname) as
(select '(KERAMIN."user_role_auth"(p_username=>:USER_NAME, p_groupname=>'||chr(39)||'public'||chr(39)||', p_level=>5, p_app_id=>:APP_ID) or KERAMIN."user_role_auth"(p_username=>:USER_NAME, p_groupname=>'||chr(39)||'ADMINISTRATORS'||chr(39)||', p_level=>10, p_app_id=>:APP_ID) or KERAMIN."user_role_auth"(p_username=>:USER_NAME, p_groupname=>'||chr(39)||'users'||chr(39)||', p_level=>15, p_app_id=>:APP_ID))' fname from dual)
select regexp_substr(fname,'KERAMIN."user_role_auth"\(p_username=>:USER_NAME, p_groupname=>[^,]+, p_level=>10, p_app_id=>:APP_ID\)', 1, 1) fname  from t;

FNAME
--------------------------------------------------------------------------------
KERAMIN."user_role_auth"(p_username=>:USER_NAME, p_groupname=>'ADMINISTRATORS',
p_level=>10, p_app_id=>:APP_ID)

2. Делим одну строку на несколько строк с использованием разделителя (например, <,>)

with t as
(select 'O,r,a,c,l,e' fname from dual)
select regexp_substr(fname,'[^,]+', 1, level) fname from t
connect by regexp_substr(fname, '[^,]+', 1, level) is not null;

FNAME
-----------
O
r
a
c
l
e

REGEXP_COUNT
1. Подсчет количества цифр в строке

with t(fname) as
(select '12 mgf45, fhfh-565gh hgh56 66' fname from dual UNION ALL
select 'fd8g78df7g8ghorhegy g rgtueriogti' fname from dual UNION ALL
select '5768hf hgh565 g-+g 676 454ggf55fhgfh' fname from dual)
select regexp_count(fname, '\d') fname from t;

FNAME
----------
11
5
15

2. Подсчет количества символов в строке без учета регистра

with t(fname) as
(select 'A fox is not taken twice in the same snare' fname from dual)
select regexp_count(fname, 'a', 1, 'i') fname from t;

FNAME
----------
4

Как получить инициалы (INITIALS) из ФИО (NAME)

  • Oracle 10, 11

Данная конструкция позволяет из ФИО (NAME) сотрудника получить фамилию (LASTNAME), имя (FIRSTNAME), отчество (FATHERNAME) и иницалы (INITIALS).

with t as
(select 'Иванов Петр Васильевич' as name from dual)
select t.name,
regexp_replace(t.name, ' (.*)') LASTNAME,
regexp_replace(t.name, ' (.*)|^[^ ]* ') FIRSTNAME,
regexp_replace(t.name, '(.*) ') FATHERNAME,
regexp_replace(regexp_replace(t.name, ' (.*)|^[^ ]* '),'.*','.',2,1)||regexp_replace(regexp_replace(t.name, '(.*) '),'.*','.',2,1) INITIALS
from t;

NAME                    LASTNAME  FIRSTNAME  FATHERNAME  INITIALS
----------------------  ------    ----       ----------  ----                                                            
Иванов Петр Васильевич  Иванов    Петр       Васильевич  П.В.