/*Левенштейн*/ CREATE OR REPLACE FUNCTION ld ( as_src_i IN VARCHAR2 , as_trg_i IN VARCHAR2 ) RETURN NUMBER DETERMINISTIC AS ln_src_len PLS_INTEGER := NVL(LENGTH(as_src_i), 0); ln_trg_len PLS_INTEGER := NVL(LENGTH(as_trg_i), 0); ln_hlen PLS_INTEGER; ln_cost PLS_INTEGER; TYPE t_numtbl IS TABLE OF PLS_INTEGER INDEX BY BINARY_INTEGER; la_ldmatrix t_numtbl; BEGIN IF (ln_src_len = 0) THEN RETURN ln_trg_len; ELSIF (ln_trg_len = 0) THEN RETURN ln_src_len; END IF; ln_hlen := ln_src_len + 1; FOR h IN 0 .. ln_src_len LOOP la_ldmatrix(h) := h; END LOOP; FOR v IN 0 .. ln_trg_len LOOP la_ldmatrix(v * ln_hlen) := v; END LOOP; FOR h IN 1 .. ln_src_len LOOP FOR v IN 1 .. ln_trg_len LOOP IF (SUBSTR(as_src_i, h, 1) = SUBSTR(as_trg_i, v, 1)) THEN ln_cost := 0; ELSE ln_cost := 1; END IF; la_ldmatrix(v * ln_hlen + h) := LEAST( la_ldmatrix((v - 1) * ln_hlen + h ) + 1 , la_ldmatrix( v * ln_hlen + h - 1) + 1 , la_ldmatrix((v - 1) * ln_hlen + h - 1) + ln_cost ) ; END LOOP; END LOOP; RETURN la_ldmatrix(ln_trg_len * ln_hlen + ln_src_len); END ld; / DECLARE A NUMBER; BEGIN A:=ld('triim','troom'); DBMS_OUTPUT.ENABLE; DBMS_OUTPUT.PUT_LINE(A); END; / SELECT ld('Россия','Роисся') FROM DUAL; /*Джаро-Винклер*/ CREATE OR REPLACE FUNCTION jws (p_string1 IN VARCHAR2, p_string2 IN VARCHAR2) RETURN NUMBER DETERMINISTIC AS v_closeness NUMBER := 0; v_temp VARCHAR2 (32767); v_comp1 VARCHAR2 (32767); v_comp2 VARCHAR2 (32767); v_matches NUMBER := 0; v_char VARCHAR2 (1); v_transpositions NUMBER := 0; v_d_jaro NUMBER := 0; v_leading NUMBER := 0; v_d_winkler NUMBER := 0; v_jws NUMBER := 0; BEGIN -- check for null strings: IF p_string1 IS NULL OR p_string2 IS NULL THEN RETURN 0; END IF; -- closeness: v_closeness := (GREATEST (LENGTH (p_string1), LENGTH (p_string2)) / 2) - 1; -- find matching characters and transpositions within closeness: v_temp := p_string2; FOR i IN 1 .. LENGTH (p_string1) LOOP IF INSTR (v_temp, SUBSTR (p_string1, i, 1)) > 0 THEN v_char := SUBSTR (p_string1, i, 1); IF ABS (INSTR (p_string1, v_char) - INSTR (p_string2, v_char)) <= v_closeness THEN v_comp1 := v_comp1 || SUBSTR (p_string1, i, 1); v_temp := SUBSTR (v_temp, 1, INSTR (v_temp, SUBSTR (p_string1, i, 1)) - 1) || SUBSTR (v_temp, INSTR (v_temp, SUBSTR (p_string1, i, 1)) + 1); END IF; END IF; END LOOP; v_temp := p_string1; FOR i IN 1 .. LENGTH (p_string2) LOOP IF INSTR (v_temp, SUBSTR (p_string2, i, 1)) > 0 THEN v_char := SUBSTR (p_string2, i, 1); IF ABS (INSTR (p_string2, v_char) - INSTR (p_string1, v_char)) <= v_closeness THEN v_comp2 := v_comp2 || SUBSTR (p_string2, i, 1); v_temp := SUBSTR (v_temp, 1, INSTR (v_temp, SUBSTR (p_string2, i, 1)) - 1) || SUBSTR (v_temp, INSTR (v_temp, SUBSTR (p_string2, i, 1)) + 1); END IF; END IF; END LOOP; -- check for null strings: IF v_comp1 IS NULL OR v_comp2 IS NULL THEN RETURN 0; END IF; -- count matches and transpositions within closeness: FOR i IN 1 .. LEAST (LENGTH (v_comp1), LENGTH (v_comp2)) LOOP IF SUBSTR (v_comp1, i, 1) = SUBSTR (v_comp2, i, 1) THEN v_matches := v_matches + 1; ELSE v_char := SUBSTR (v_comp1, i, 1); IF ABS (INSTR (p_string1, v_char) - INSTR (p_string2, v_char)) <= v_closeness THEN v_transpositions := v_transpositions + 1; v_matches := v_matches + 1; END IF; END IF; END LOOP; v_transpositions := v_transpositions / 2; -- check for no matches: IF v_matches = 0 THEN RETURN 0; END IF; -- Jaro: v_d_jaro := ((v_matches / LENGTH (p_string1)) + (v_matches / LENGTH (p_string2)) + ((v_matches - v_transpositions) / v_matches)) / 3; -- count matching leading characters (up to 4): FOR i IN 1 .. LEAST (LENGTH (p_string1), LENGTH (p_string2), 4) LOOP IF SUBSTR (p_string1, i, 1) = SUBSTR (p_string2, i, 1) THEN v_leading := v_leading + 1; ELSE EXIT; END IF; END LOOP; -- Winkler: v_d_winkler := v_d_jaro + ((v_leading * .1) * (1 - v_d_jaro)); -- Jaro-Winkler similarity rounded: v_jws := ROUND (v_d_winkler * 100); RETURN v_jws; END jws; / WITH strings AS (SELECT NULL string1, NULL string2 FROM DUAL UNION ALL SELECT 'test' string1, NULL string2 FROM DUAL UNION ALL SELECT NULL string1, 'test' string2 FROM DUAL UNION ALL SELECT 'CRATE' string1, 'TRACE' string2 FROM DUAL UNION ALL SELECT 'MARTHA' string1, 'MARHTA' string2 FROM DUAL UNION ALL SELECT 'DWAYNE' string1, 'DUANE' string2 FROM DUAL UNION ALL SELECT 'DIXON' string1, 'DICKSONX' string2 FROM DUAL UNION ALL SELECT 'Dunningham' string1, 'Cunningham' string2 FROM DUAL UNION ALL SELECT 'Abroms' string1, 'Abrams' string2 FROM DUAL UNION ALL SELECT 'Lampley' string1, 'Campley' string2 FROM DUAL UNION ALL SELECT 'Jonathon' string1, 'Jonathan' string2 FROM DUAL UNION ALL SELECT 'Jeraldine' string1, 'Gerladine' string2 FROM DUAL UNION ALL SELECT 'test' string1, 'blank' string2 FROM DUAL UNION ALL SELECT 'everybody' string1, 'every' string2 FROM DUAL UNION ALL SELECT 'a' string1, 'aaa' string2 FROM DUAL) SELECT string1, string2, UTL_MATCH.JARO_WINKLER_SIMILARITY (string1, string2) oracle_jws, jws (string1, string2) my_jws FROM strings ORDER BY my_jws DESC / SELECT jws ('Барак Абама','Парап Апама') FROM DUAL; -- Hamming Distance CREATE OR REPLACE FUNCTION hd ( as_src_i IN VARCHAR2 , as_trg_i IN VARCHAR2 ) RETURN NUMBER DETERMINISTIC AS ln_src_len PLS_INTEGER := NVL(LENGTH(as_src_i), 0); ln_trg_len PLS_INTEGER := NVL(LENGTH(as_trg_i), 0); ln_distance PLS_INTEGER := 0; BEGIN IF (ln_src_len <> ln_trg_len) THEN RETURN NULL; END IF; IF (ln_src_len = 0) THEN RETURN ln_src_len; END IF; FOR i IN 1..ln_src_len LOOP IF (SUBSTR(as_src_i, i, 1) <> SUBSTR(as_trg_i, i, 1)) THEN ln_distance := ln_distance + 1; END IF; END LOOP; RETURN ln_distance; END hd; / DECLARE A NUMBER; BEGIN A:=hd('triim','troom'); DBMS_OUTPUT.ENABLE; DBMS_OUTPUT.PUT_LINE(A); END; / SELECT hd('Россия','Роисся') FROM DUAL; -- ЗАПРОС ** SELECT * FROM( WITH COUNTRY_NAMES AS (SELECT NAME FROM CITIZENSHIP) SELECT COUNTRY_NAMES.NAME, jws(COUNTRY_NAMES.NAME,'Rassiya') mjws FROM COUNTRY_NAMES ORDER BY mjws DESC ) WHERE ROWNUM = 1; SELECT SUR, EN_PR, CIT FROM ( SELECT SUR, EN_PR, C.NAME CIT FROM CITIZENSHIP C JOIN ( SELECT H.SURNAME SUR, EN_PR, H.CITIZENSHIP_ID CIT_ID FROM HUMANS H JOIN ( SELECT HUMAN_ID H_ID, EN_PR FROM ENTRANTS E JOIN ( SELECT P.ENTRANT_ID ENT_ID, LISTAGG(P.PROGRAM_ID,', ') WITHIN GROUP (ORDER BY P.ENTRANT_ID) AS EN_PR FROM PRIORITIES P GROUP BY P.ENTRANT_ID ) ON E.ID = ENT_ID ) ON H.ID = H_ID ) ON C.ID = CIT_ID ) WHERE CIT IN ( SELECT NAME FROM( WITH COUNTRY_NAMES AS (SELECT NAME FROM CITIZENSHIP) SELECT COUNTRY_NAMES.NAME, jws(COUNTRY_NAMES.NAME,'Rassiya') mjws FROM COUNTRY_NAMES ORDER BY mjws DESC ) WHERE ROWNUM = 1 );