Implementing C/C++ Dynamic Shared Libraries with Teradata UDFs

Build the Boost C++ Regex source into a Dynamic Shared C++ Library (.so) and implement a Scalar UDF that utilises this library to parse URLs.

Setting up C/C++ Programming Environment

/home/tdat/.bashrcumask 022
Neon.3 Release (4.6.3)
Build id: 20170314–1500
tdat@TDExpress1620_Sles11:~> mkdir -p /home/tdat/apps
tdat@TDExpress1620_Sles11:~/apps> tar -xvf eclipse-cpp-neon-3-linux-gtk-x86_64.tar.gz
/home/tdat/.bashrcPATH=/opt/teradata/jvm64/jdk8/bin:${PATH}
export PATH
/home/tdat/apps/eclipse/eclipse.ini-Xmx512m
/home/tdat/apps/eclipse/eclipse
tdat@TDExpress1620_Sles11:~/apps> tar -xvf boost_1_72_0.tar.gz
/home/tdat/apps/boost_1_72_0
cd /home/tdat/apps/boost_1_72_0/libs/regex/src
g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c c_regex_traits.cppg++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c cpp_regex_traits.cpp            g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c cregex.cpp                      g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c fileiter.cpp                    g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c icu.cpp                         g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c instances.cpp                   g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c internals.hpp                   g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c posix_api.cpp                   g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c regex.cpp                       g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c regex_debug.cpp                 g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c regex_raw_buffer.cpp            g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c regex_traits_defaults.cpp       g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c static_mutex.cpp                g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c usinstances.cpp                 g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c w32_regex_traits.cpp            g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c wc_regex_traits.cpp             g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c wide_posix_api.cpp              g++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -fPIC -c winstances.cpp
g++ -shared -fPIC -o libboost.so \
c_regex_traits.o \
cpp_regex_traits.o \
cregex.o \
fileiter.o \
icu.o \
instances.o \
internals.hpp \
posix_api.o \
regex.o \
regex_debug.o \
regex_raw_buffer.o \
regex_traits_defaults.o \
static_mutex.o \
usinstances.o \
w32_regex_traits.o \
wc_regex_traits.o \
wide_posix_api.o \
winstances.o
/home/tdat/apps/boost_1_72_0/libs/regex/src/libboost.so

Create UDF Test code using Shared Library

/home/tdat/apps/eclipse/eclipse
#include <string>
#include <iostream>
#include <boost/regex.hpp>
#include "cstring"
using std::string;
using std::cout;
using std::endl;
using std::stringstream;
void parse_url(const string& url_string) //with boost
{
boost::regex ex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");
boost::cmatch what;
if(regex_match(url_string.c_str(), what, ex))
{
string scheme = string(what[1].first, what[1].second);
string host = string(what[2].first, what[2].second);
string port = string(what[3].first, what[3].second);
string path = string(what[4].first, what[4].second);
string query = string(what[5].first, what[5].second);
string jsonify = "{\"scheme\": \"" + scheme + "\","
" \"host\": \"" + host + "\","
" \"port\": \"" + port + "\","
" \"path\": \"" + path + "\","
" \"query\": \"" + query + "\"}"
;
cout << jsonify << endl;
}
}
int main(int argc, char* argv[])
{
parse_url("https://www.samp.com:8080/samp/catg/path?parm1=sampparm");
return 0;}
{
"scheme": "https",
"host": "www.samp.com",
"port": "8080",
"path": "/samp/catg/path",
"query": "parm1=sampparm"
}

Preparing C++ Code for UDF Format

#define  SQL_TEXT Latin_Text
#include "sqltypes_td.h"
#include <string>
#include <iostream>
#include <boost/regex.hpp>
#include "cstring"
using std::string;
using std::cout;
using std::endl;
using std::stringstream;
extern "C"
void parse_url(const char *url_string, char *result, char sqlstate[6])
{
boost::regex ex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");
boost::cmatch what;
if(regex_match(url_string, what, ex))
{
string scheme = string(what[1].first, what[1].second);
string host = string(what[2].first, what[2].second);
string port = string(what[3].first, what[3].second);
string path = string(what[4].first, what[4].second);
string query = string(what[5].first, what[5].second);
string jsonify = "{\"scheme\": \"" + scheme + "\","
" \"host\": \"" + host + "\","
" \"port\": \"" + port + "\","
" \"path\": \"" + path + "\","
" \"query\": \"" + query + "\"}";
strcpy(result, jsonify.c_str());

}

return;
}

Install parse_url UDF

cd /home/tdat/apps/boost_1_72_0/libs/regex/srcg++ -I /home/tdat/apps/boost_1_72_0 -I /home/tdat/apps/boost_1_72_0/libs/regex/src -I /usr/tdbms/etc -fPIC -c parse_url.cpp
DATABASE MYPROJ;DROP FUNCTION parse_url;REPLACE FUNCTION parse_url (url_string  VARCHAR(2500) )
RETURNS VARCHAR(2500)
LANGUAGE CPP
NO SQL
EXTERNAL NAME 'SL!boost!CO!parse_url!/home/tdat/apps/boost_1_72_0/libs/regex/src/parse_url.o'
PARAMETER STYLE TD_GENERAL;
tdat@TDExpress1620_Sles11:/> cd /home/tdat/apps/boost_1_72_0/libs/regex/src
tdat@TDExpress1620_Sles11:~/apps/boost_1_72_0/libs/regex/src> bteq
Teradata BTEQ 16.20.00.04 for LINUX. PID: 11459
Copyright 1984-2018, Teradata Corporation. ALL RIGHTS RESERVED.
Enter your logon or BTEQ command:
.logon dbc;.logon dbc
Password:
*** Logon successfully completed.
*** Teradata Database Release is 16.20.32.01
*** Teradata Database Version is 16.20.32.01
*** Transaction Semantics are BTET.
*** Session Character Set Name is 'ASCII
Teradata BTEQ 16.20.00.04 for LINUX
Copyright 1984-2018, Teradata Corporation. ALL RIGHTS RESERVED.
Enter your logon or BTEQ command:
.logon dbc;
*** Total elapsed time was 1 second.

BTEQ -- Enter your SQL request or BTEQ command:
DATABASE MYPROJ;
*** New default database accepted.
*** Total elapsed time was 1 second.
BTEQ -- Enter your SQL request or BTEQ command:
DROP FUNCTION parse_url;
*** Failure 5589 Function 'parse_url' does not exist.
Statement# 1, Info =0
*** Total elapsed time was 1 second.
BTEQ -- Enter your SQL request or BTEQ command:
REPLACE FUNCTION parse_url (url_string VARCHAR(2500) )
RETURNS VARCHAR(2500)
LANGUAGE CPP
NO SQL
EXTERNAL NAME 'SL!boost!CO!parse_url!/home/tdat/apps/boost_1_72_0/libs/reg
ex/src/parse_url.o!F!parse_url'
PARAMETER STYLE TD_GENERAL;
*** Create function completed with error(s).
*** Warning: 5603 Errors encountered in compiling UDF/XSP/UDM/UDT/JAR.
*** Total elapsed time was 3 seconds.
Errors/Warnings reported during compilation
---------------------------------------------------------------------------
/usr/bin/gcc -D_REENTRANT -D_LIBC_REENTRANT -I/usr/tdbms/etc -L/usr/td
bms/lib -L/usr/tdbms/lib -fpic -c Teradata_new_delete.cpp
/usr/bin/gcc -D_REENTRANT -D_LIBC_REENTRANT -I/usr/tdbms/etc -L/usr/td
bms/lib -L/usr/tdbms/lib -fpic -c pre_parse_url.c
/usr/bin/gcc -shared -fpic -Xlinker -rpath -Xlinker /usr/tdbms/lib -Xlinke
r -rpath -Xlinker /usr/tdbms/lib -Wl,--version-script=/var/opt/teradata/tdt
emp/UDFTemp/0408.30719.25720/UserUdf_versions.scr -D_REENTRANT -D_LIBC_REEN
TRANT -I/usr/tdbms/etc -L/usr/tdbms/lib -L/usr/tdbms/lib -o @FileList
-lboost -ludf -lm -ljil -lstdc++
/usr/lib64/gcc/x86_64-suse-linux/4.3/../../../../x86_64-suse-linux/bin/ld:
cannot find -lboost
collect2: ld returned 1 exit status
make: *** [libudf_0408_43.so] Error 1
cannot find -lboost
collect2: ld returned 1 exit status
/home/tdat/apps/boost_1_72_0/libs/regex/src/libboost.so 
/usr/tdbms/lib 
cd /home/tdat/apps/boost_1_72_0/libs/regex/src
cp libboost.so /usr/tdbms/lib
cd /usr/tdbms/lib
chmod 555 libboost.so
chown teradata libboost.so
chgrp tdtrusted libboost.so
BTEQ -- Enter your SQL request or BTEQ command: 
SELECT MYPROJ.parse_url(
'https://www.samp.com:8080/samp/catg/path?parm1=sampparm'
);
*** Query completed. One row found. One column returned.
*** Total elapsed time was 1 second.
parse_url('https:www.samp.com:8080/samp/catg/path?parm1=sampparm')--------------------------------------------------------------------
{"scheme": "https", "host": "www.samp.com", "port": "8080", "path": "/samp/catg/path", "query": "parm1=sampparm"}
TDExpress1620_Sles11:~ # cufconfig -o

UDFLibPath

Purpose

Specifies a library path in addition to the standard library path for use by UDFs, external stored procedures, or UDMs that specify a data access clause of NO SQL in the CREATE PROCEDURE or REPLACE PROCEDURE statement.

Default

This field has no default value.

UDFLibPath: /usr/local/lib
UDFLibPath: /usr/local/lib
cufconfig -f /root/cufedit.txt
cufconfig -o | grep UDFLibPathOutput:UDFLibPath: /usr/local/lib/
cd /usr/local/lib
chmod 555 libboost.so
chown teradata libboost.so
chgrp tdtrusted libboost.so
/usr/bin/gcc -D_REENTRANT -D_LIBC_REENTRANT -I/usr/tdbms/etc -L /usr/local/lib -L/usr/tdbms/lib......
BTEQ — Enter your SQL request or BTEQ command: 
SELECT
MYPROJ.parse_url
(
'https://www.samp.com:8080/samp/catg/path?parm1=sampparm'
);
*** Query completed. One row found. One column returned.
*** Total elapsed time was 1 second.
{"scheme": "https", "host": "www.samp.com", "port": "8080", "path": "/samp/catg/path", "query": "parm1=sampparm"}

Closing Remarks

Appendix/References

#define  SQL_TEXT Latin_Text
#include "sqltypes_td.h"
extern "C" 
void f1( char *a, char *result, char sqlstate[6])
{
...
}
void function_name ( type_1      *input_parameter_1,
...,
type_n *input_parameter_n,
result_type *result,
char sqlstate[6] )
{
...
}
void function_name ( type_1      *input_parameter_1,
...,
type_n *input_parameter_n,
result_type *result,
int *indicator_parameter_1,
...,
int *indicator_parameter_n,
int *indicator_result,
char sqlstate[6],
SQL_TEXT function_name[m],
SQL_TEXT specific_function_name[l],
SQL_TEXT error_message[p] )
{
...
}

Initial Code

F!function_entry_point_name

Path Specification

I!name_on_server!include_name
L!library_name
O!name_on_server!object_name
P!package_name
S!name_on_server!source_name
NS!source_file!include_file

delimiter

References

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store