Below are the steps to configure Polybase environment in SQL Server 2016, helps in reading files in BlobStorage with SQL queries.
STEP 1: CHECK for existing configuration
EXEC sp_configure;
STEP 2: CHECK for HADOOP Connectivity, if value = 0
EXEC sp_configure @configname='hadoop connectivity';
STEP 3: Configure external tables to reference data on Hortonworks 2.1, 2.2, and 2.3 on Linux, and Azure blob storage
sp_configure @configname = 'hadoop connectivity', @configvalue = 7;
GO
RECONFIGURE
GO
STEP 4: Install JAVA and add JAVA_HOME environment variable
CONNECT TO SQL SERVER
STEP 1: Create a master key on the database. This is required to encrypt the credential secret.
CREATE MASTER KEY ENCRYPTION BY PASSWORD = '**********';
STEP 2: Create a database scoped credential for Azure blob storage.
-- IDENTITY: any string (this is not used for authentication to Azure storage).
-- SECRET: your Azure storage account key.
CREATE DATABASE SCOPED CREDENTIAL GeAzureBlobStorageCredential
WITH IDENTITY = 'mypolybase', Secret = '******************';
STEP 3: Create an external data source with CREATE EXTERNAL DATA SOURCE..
-- LOCATION: Azure account storage account name and blob container name.
-- CREDENTIAL: The database scoped credential created above.
CREATE EXTERNAL DATA SOURCE MyAzureStorage with (
TYPE = HADOOP,
LOCATION ='wasbs://polybase@getestdatawarehouse.blob.core.windows.net',
CREDENTIAL = GeAzureBlobStorageCredential
);
STEP 4: Create an external file format with CREATE EXTERNAL FILE FORMAT.
-- FORMAT TYPE: Type of format in Hadoop (DELIMITEDTEXT, RCFILE, ORC, PARQUET).
CREATE EXTERNAL FILE FORMAT TextFileFormat
WITH
( FORMAT_TYPE = DELIMITEDTEXT
, FORMAT_OPTIONS ( FIELD_TERMINATOR = '|'
, STRING_DELIMITER = ''
, DATE_FORMAT = 'yyyy-MM-dd HH:mm:ss.fff'
, USE_TYPE_DEFAULT = FALSE
)
);
GO
STEP 5: Create an external table.
CREATE EXTERNAL TABLE asb.my_Table( [UserID] [varchar](50) NULL, [EndDate] [varchar](50) NULL, [StartDate] [varchar](50) NULL, [ModifiedDate][varchar](50) NULL ) WITH (LOCATION='/my_blob_folder/', DATA_SOURCE =
MyAzureStorage
, FILE_FORMAT = TextFileFormat , REJECT_TYPE = VALUE, REJECT_VALUE = 0 );