Documentation Index

Fetch the complete documentation index at: https://api-fin.ncloud-docs.com/llms.txt

Use this file to discover all available pages before exploring further.

장문 인식

Prev Next

CLOVA Speech 서비스의 장문 인식 예제를 소개합니다.

Java

Java 기반의 API 예제 코드는 다음과 같습니다.

<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpclient</artifactId>
    <version>4.5.12</version>
</dependency>
<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpmime</artifactId>
    <version>4.3.1</version>
</dependency>
<dependency>
    <groupId>com.google.code.gson</groupId>
    <artifactId>gson</artifactId>
    <version>2.8.5</version>
</dependency>
package org.example.clovaspeech.client;

import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;

import com.google.gson.Gson;

public class ClovaSpeechClient {

    // Clova Speech secret key
	private static final String SECRET = "";
    // Clova Speech invoke URL
	private static final String INVOKE_URL = "";

	private CloseableHttpClient httpClient = HttpClients.createDefault();
	private Gson gson = new Gson();

	private static final Header[] HEADERS = new Header[] {
		new BasicHeader("Accept", "application/json"),
		new BasicHeader("X-CLOVASPEECH-API-KEY", SECRET),
	};

    	public static class Boosting {
		private String words;

		public String getWords() {
			return words;
		}

		public void setWords(String words) {
			this.words = words;
		}
	}

	public static class Diarization {
		private Boolean enable = Boolean.FALSE;
		private Integer speakerCountMin;
		private Integer speakerCountMax;

		public Boolean getEnable() {
			return enable;
		}

		public void setEnable(Boolean enable) {
			this.enable = enable;
		}

		public Integer getSpeakerCountMin() {
			return speakerCountMin;
		}

		public void setSpeakerCountMin(Integer speakerCountMin) {
			this.speakerCountMin = speakerCountMin;
		}

		public Integer getSpeakerCountMax() {
			return speakerCountMax;
		}

		public void setSpeakerCountMax(Integer speakerCountMax) {
			this.speakerCountMax = speakerCountMax;
		}
	}

	public static class NestRequestEntity {
		private String language = "ko-KR";
		//completion optional, sync/async
		private String completion = "sync";
		//optional, used to receive the analyzed results
		private String callback;
		//optional, any data
		private Map<String, Object> userdata;
		private Boolean wordAlignment = Boolean.TRUE;
		private Boolean fullText = Boolean.TRUE;
		//boosting object array
		private List<Boosting> boostings;
		//comma separated words
		private String forbiddens;
		private Diarization diarization;

		public String getLanguage() {
			return language;
		}

		public void setLanguage(String language) {
			this.language = language;
		}

		public String getCompletion() {
			return completion;
		}

		public void setCompletion(String completion) {
			this.completion = completion;
		}

		public String getCallback() {
			return callback;
		}

		public Boolean getWordAlignment() {
			return wordAlignment;
		}

		public void setWordAlignment(Boolean wordAlignment) {
			this.wordAlignment = wordAlignment;
		}

		public Boolean getFullText() {
			return fullText;
		}

		public void setFullText(Boolean fullText) {
			this.fullText = fullText;
		}

		public void setCallback(String callback) {
			this.callback = callback;
		}

		public Map<String, Object> getUserdata() {
			return userdata;
		}

		public void setUserdata(Map<String, Object> userdata) {
			this.userdata = userdata;
		}

		public String getForbiddens() {
			return forbiddens;
		}

		public void setForbiddens(String forbiddens) {
			this.forbiddens = forbiddens;
		}

		public List<Boosting> getBoostings() {
			return boostings;
		}

		public void setBoostings(List<Boosting> boostings) {
			this.boostings = boostings;
		}

		public Diarization getDiarization() {
			return diarization;
		}

		public void setDiarization(Diarization diarization) {
			this.diarization = diarization;
		}
	}

	/**
	 * recognize media using URL
	 * @param url required, the media URL
	 * @param nestRequestEntity optional
	 * @return string
	 */
	public String url(String url, NestRequestEntity nestRequestEntity) {
		HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/url");
		httpPost.setHeaders(HEADERS);
		Map<String, Object> body = new HashMap<>();
		body.put("url", url);
		body.put("language", nestRequestEntity.getLanguage());
		body.put("completion", nestRequestEntity.getCompletion());
		body.put("callback", nestRequestEntity.getCallback());
		body.put("userdata", nestRequestEntity.getCallback());
		body.put("wordAlignment", nestRequestEntity.getWordAlignment());
		body.put("fullText", nestRequestEntity.getFullText());
		body.put("forbiddens", nestRequestEntity.getForbiddens());
		body.put("boostings", nestRequestEntity.getBoostings());
		body.put("diarization", nestRequestEntity.getDiarization());
		HttpEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
		httpPost.setEntity(httpEntity);
		return execute(httpPost);
	}

	/**
	 * recognize media using Object Storage
	 * @param dataKey required, the Object Storage key
	 * @param nestRequestEntity optional
	 * @return string
	 */
	public String objectStorage(String dataKey, NestRequestEntity nestRequestEntity) {
		HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/object-storage");
		httpPost.setHeaders(HEADERS);
		Map<String, Object> body = new HashMap<>();
		body.put("dataKey", dataKey);
		body.put("language", nestRequestEntity.getLanguage());
		body.put("completion", nestRequestEntity.getCompletion());
		body.put("callback", nestRequestEntity.getCallback());
		body.put("userdata", nestRequestEntity.getCallback());
		body.put("wordAlignment", nestRequestEntity.getWordAlignment());
		body.put("fullText", nestRequestEntity.getFullText());
		body.put("forbiddens", nestRequestEntity.getForbiddens());
		body.put("boostings", nestRequestEntity.getBoostings());
		body.put("diarization", nestRequestEntity.getDiarization());
		StringEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
		httpPost.setEntity(httpEntity);
		return execute(httpPost);
	}

	/**
	 *
	 * recognize media using a file
	 * @param file required, the media file
	 * @param nestRequestEntity optional
	 * @return string
	 */
	public String upload(File file, NestRequestEntity nestRequestEntity) {
		HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/upload");
		httpPost.setHeaders(HEADERS);
		HttpEntity httpEntity = MultipartEntityBuilder.create()
			.addTextBody("params", gson.toJson(nestRequestEntity), ContentType.APPLICATION_JSON)
			.addBinaryBody("media", file, ContentType.MULTIPART_FORM_DATA, file.getName())
			.build();
		httpPost.setEntity(httpEntity);
		return execute(httpPost);
	}

	private String execute(HttpPost httpPost) {
		try (final CloseableHttpResponse httpResponse = httpClient.execute(httpPost)) {
			final HttpEntity entity = httpResponse.getEntity();
			return EntityUtils.toString(entity, StandardCharsets.UTF_8);
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

	public static void main(String[] args) {
		final ClovaSpeechClient clovaSpeechClient = new ClovaSpeechClient();
		NestRequestEntity requestEntity = new NestRequestEntity();
		final String result =
			clovaSpeechClient.upload(new File("/data/sample.mp4"), requestEntity);
		//final String result = clovaSpeechClient.url("file URL", requestEntity);
		//final String result = clovaSpeechClient.objectStorage("Object Storage key", requestEntity);
		System.out.println(result);
	}
}

Python

Python 기반의 API 예제 코드는 다음과 같습니다.

import requests
import json


class ClovaSpeechClient:
    # Clova Speech invoke URL
    invoke_url = ''
    # Clova Speech secret key
    secret = ''

    def req_url(self, url, completion, callback=None, userdata=None, forbiddens=None, boostings=None, wordAlignment=True, fullText=True, diarization=None):
        request_body = {
            'url': url,
            'language': 'ko-KR',
            'completion': completion,
            'callback': callback,
            'userdata': userdata,
            'wordAlignment': wordAlignment,
            'fullText': fullText,
            'forbiddens': forbiddens,
            'boostings': boostings,
            'diarization': diarization,
        }
        headers = {
            'Accept': 'application/json;UTF-8',
            'Content-Type': 'application/json;UTF-8',
            'X-CLOVASPEECH-API-KEY': self.secret
        }
        return requests.post(headers=headers,
                             url=self.invoke_url + '/recognizer/url',
                             data=json.dumps(request_body).encode('UTF-8'))

    def req_object_storage(self, data_key, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
                           wordAlignment=True, fullText=True, diarization=None):
        request_body = {
            'dataKey': data_key,
            'language': 'ko-KR',
            'completion': completion,
            'callback': callback,
            'userdata': userdata,
            'wordAlignment': wordAlignment,
            'fullText': fullText,
            'forbiddens': forbiddens,
            'boostings': boostings,
            'diarization': diarization,
        }
        headers = {
            'Accept': 'application/json;UTF-8',
            'Content-Type': 'application/json;UTF-8',
            'X-CLOVASPEECH-API-KEY': self.secret
        }
        return requests.post(headers=headers,
                             url=self.invoke_url + '/recognizer/object-storage',
                             data=json.dumps(request_body).encode('UTF-8'))

    def req_upload(self, file, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
                   wordAlignment=True, fullText=True, diarization=None):
        request_body = {
            'language': 'ko-KR',
            'completion': completion,
            'callback': callback,
            'userdata': userdata,
            'wordAlignment': wordAlignment,
            'fullText': fullText,
            'forbiddens': forbiddens,
            'boostings': boostings,
            'diarization': diarization,
        }
        headers = {
            'Accept': 'application/json;UTF-8',
            'X-CLOVASPEECH-API-KEY': self.secret
        }
        print(json.dumps(request_body, ensure_ascii=False).encode('UTF-8'))
        files = {
            'media': open(file, 'rb'),
            'params': (None, json.dumps(request_body, ensure_ascii=False).encode('UTF-8'), 'application/json')
        }
        response = requests.post(headers=headers, url=self.invoke_url + '/recognizer/upload', files=files)
        return response

if __name__ == '__main__':
    # res = ClovaSpeechClient().req_url(url='http://example.com/media.mp3', completion='sync')
    # res = ClovaSpeechClient().req_object_storage(data_key='data/media.mp3', completion='sync')
    res = ClovaSpeechClient().req_upload(file='/data/media.mp3', completion='sync')
    print(res.text)

PHP

PHP 기반의 장문 인식 API 예제 코드는 다음과 같습니다.

<?php

$secret = '';
$invoke_url = '';

function req_url($url, $completion, $callback, $userdata, $forbiddens, $boostings,
                 $wordAlignment, $fullText, $diarization)
{
    $object = (object)[
        'language' => 'ko-KR',
        'completion' => $completion,
        'callback' => $callback,
        'url' => $url,
        'userdata' => $userdata,
        'forbiddens' => $forbiddens,
        'boostings' => $boostings,
        'wordAlignment' => $wordAlignment,
        'fullText' => $fullText,
        'diarization' => $diarization,
    ];
    return execute('/recognizer/url', json_encode($object), array('Content-Type: application/json'));
}

function req_object_storage($dataKey, $completion, $callback, $userdata, $forbiddens, $boostings,
                            $wordAlignment, $fullText, $diarization)
{
    $object = (object)[
        'language' => 'ko-KR',
        'completion' => $completion,
        'callback' => $callback,
        'dataKey' => $dataKey,
        'userdata' => $userdata,
        'forbiddens' => $forbiddens,
        'boostings' => $boostings,
        'wordAlignment' => $wordAlignment,
        'fullText' => $fullText,
        'diarization' => $diarization,
    ];
    return execute('/recognizer/object-storage', json_encode($object), array('Content-Type: application/json'));
}

function req_upload($filePath, $completion, $callback, $userdata, $forbiddens, $boostings,
                    $wordAlignment, $fullText, $diarization)
{
    $object = (object)[
        'language' => 'ko-KR',
        'completion' => $completion,
        'callback' => $callback,
        'userdata' => $userdata,
        'forbiddens' => $forbiddens,
        'boostings' => $boostings,
        'wordAlignment' => $wordAlignment,
        'fullText' => $fullText,
        'diarization' => $diarization,
    ];
    $fields = array(
        'media' => new CURLFile($filePath),
        'params' => json_encode($object),
    );
    return execute('/recognizer/upload', $fields, null);
}

function execute($uri, $postFields, $customHeaders)
{
    try {
        $ch = curl_init($GLOBALS['invoke_url'] . $uri);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
        curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields);
        curl_setopt($ch, CURLOPT_VERBOSE, true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 600);
        $headers = array();
        $headers[] = 'X-CLOVASPEECH-API-KEY: ' . $GLOBALS['secret'];
        if (!is_null($customHeaders)) {
            $headers = array_merge($headers, $customHeaders);
        }
        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
        $response = curl_exec($ch);
        $err = curl_error($ch);
        curl_close($ch);
        if ($err) {
            echo 'cURL Error #:' . $err;
            return $err;
        }
        return $response;
    } catch (Exception $E) {
        echo 'Response: ' . $E . '\n';
        return $E->lastResponse;
    }
}

//$response = req_url('https://example.com/sample.mp4', 'sync', null, null, null, null, null, null, null);
//$response = req_object_storage('data/sample.mp4', 'sync', null, null, null, null, null, null, null);
$response = req_upload('/data/sample.mp4', 'sync', null, null, null, null, null, null, null);
echo $response;
?>

C#

C# 기반의 API 예제 코드는 다음과 같습니다.

using System;
using System.Globalization;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text.RegularExpressions;
using System.Threading.Channels;
using System.Threading.Tasks;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text;
using System.Diagnostics;

namespace HttpClientStatus
{
    public class ClovaSpeechRequest
    {
        public string language { get; set; }
        public string completion { get; set; }
        // Other fields are omitted, please refer to: https://api.ncloud-docs.com/release-20230525/docs/ai-application-service-clovaspeech-clovaspeech for available fields
    }
    public class Program
    {
        private static readonly string secretKey = "";
        private static readonly string invokeUrl = "";
        public static async Task<string> Upload(ClovaSpeechRequest clovaSpeechRequest, string path)
        {

            using (var client = new HttpClient())
            {
                var multiForm = new MultipartFormDataContent();
                multiForm.Headers.Add("X-CLOVASPEECH-API-KEY", secretKey);
                multiForm.Add(new StringContent(JsonSerializer.Serialize(clovaSpeechRequest)), "params");
                FileStream fs = File.OpenRead(path);
                Console.WriteLine(Path.GetFileName(path));
                multiForm.Add(new StreamContent(fs), "media", Path.GetFileName(path));
                var message = await client.PostAsync(invokeUrl+ "/recognizer/upload", multiForm);
                return await message.Content.ReadAsStringAsync();
            }
        }

        static async Task Main(string[] args)
        {
            var clovaSpeechRequest = new ClovaSpeechRequest
            {
                language = "ko-KR",
                completion = "sync"
            };

            var result = await Upload(clovaSpeechRequest, @"D:\media\video\\sample.mp3");
            Console.WriteLine(result);
        }
    }
}