Solr Extract Request (php7)

This commit is contained in:
Omar Shaban
2016-05-17 22:11:31 +02:00
parent a00cea3f92
commit d5fda95391
18 changed files with 833 additions and 77 deletions

View File

@@ -143,6 +143,7 @@ if test "$PHP_SOLR" != "no"; then
$subdir/php_solr_utils.c \
$subdir/php_solr_dismax_query.c \
$subdir/php_solr_collapse_function.c \
$subdir/php_solr_extract.c \
$subdir/solr_string.c \
$subdir/solr_functions_document.c \
$subdir/solr_functions_client.c \

View File

@@ -856,6 +856,161 @@ final class SolrClient
public function system() {}
}
/**
* @author Omar Shaban <omars@php.net>
*
* @link https://cwiki.apache.org/confluence/display/solr/Uploading+Data+with+Solr+Cell+using+Apache+Tika
*/
class SolrExtractRequest
{
/**
* Capture the specified fields (and everything included below it that isn't capture by some other capture field) separately from the default. This is different
* from the case of passing in an XPath expression.
* <p>
* The Capture field is based on the localName returned to the SolrContentHandler
* by Tika, not to be confused by the mapped field. The field name can then
* be mapped into the index schema.
* <p>
* For instance, a Tika document may look like:
* <pre>
* &lt;html&gt;
* ...
* &lt;body&gt;
* &lt;p&gt;some text here. &lt;div&gt;more text&lt;/div&gt;&lt;/p&gt;
* Some more text
* &lt;/body&gt;
* </pre>
* By passing in the p tag, you could capture all P tags separately from the rest of the t
* Thus, in the example, the capture of the P tag would be: "some text here. more text"
*
*/
const CAPTURE_ELEMENTS = 'capture';
/**
* Capture attributes separately according to the name of the element, instead of just adding them to the string
* buffer.
*/
const CAPTURE_ATTRIBUTES = 'captureAttr';
/**
* Commit document within X number of milliseconds.
*/
const COMMIT_WITHIN = 'commitWithin';
/**
* Defines the date format patterns to identify in the documents.
*/
const DATE_FORMATS = 'date.formats';
/**
* If specified and the name of a potential field cannot be determined, the default Field specified will be used
* instead.
*/
const DEFAULT_FIELD = 'defaultField';
/**
* Only extract and return the content, do not index it.
*/
const EXTRACT_ONLY = 'extractOnly';
/**
* Content output format if extractOnly is true.
*/
const EXTRACT_FORMAT = 'extractFormat';
/**
* If true, exceptions found during processing will be skipped. Any metadata available, however, will be indexed.
*/
const IGNORE_TIKA_EXCEPTION = 'ignoreTikaException';
/**
* Literal field values will by default override other values such as metadata and content.
*/
const LITERALS_OVERRIDE = 'literalsOverride';
/**
* Map all generated attribute names to field names with lowercase and underscores.
*/
const LOWERNAMES = 'lowernames';
/**
* Useful if uploading very large documents, this defines the KB size of documents to allow.
*/
const MULTIPART_UPLOAD_LIMIT = 'multipartUploadLimitInKB';
/**
* If specified, loads the file as a source for password lookups for Tika encrypted documents.
*
* File format is Java properties format with one key=value per line. The key is evaluated as a regex against
* the file name, and the value is the password The rules are evaluated top-bottom,
* i.e. the first match will be used If you want a fallback password to be always used,
* supply a .*=<defaultmypassword> at the end
*/
const PASSWORD_MAP_FILE = 'passwordsFile';
/**
* The file name. If specified, Tika can take this into account while guessing the MIME type.
*/
const RESOURCE_NAME = 'resource.name';
/**
* The password for this resource. Will be used instead of the rule based password lookup mechanisms.
*/
const RESOURCE_PASSWORD = 'resource.password';
/**
* Tika config path
*/
const TIKE_CONFIG = 'tika.config';
/**
* If specified, the prefix will be prepended to all Metadata, such that it would be possible to setup a
* dynamic field to automatically capture it.
*/
const UNKNOWN_FIELD_PREFIX = 'uprefix';
/**
* Restrict the extracted parts of a document to be indexed by passing in an XPath expression.
*/
const XPATH_EXPRESSION = 'xpath';
/**
* Mapping Tika metadata to Solr fields. (parameter prefix)
*/
const FIELD_MAPPING_PREFIX = 'fmap.';
/**
* Boost value for the name of the field. (parameter prefix)
*/
const FIELD_BOOST_PREFIX = 'boost.';
/**
* Pass in literal values to be added to the document, as is. (parameter prefix)
*/
const LITERALS_PREFIX = 'literal.';
private function __construct();
/**
* @param string $filename
* @param SolrModifiableParams $params
*
* @return SolrExtractRequest
*/
public static function createFromFile($filename, SolrModifiableParams $params);
/**
* Create request from binary stream
*
* @param string $content
* @param string $contentType
* @param SolrModifiableParams $params
*
* @return SolrUpdateStreamRequest
*/
public static function createFromStream($content, $contentType, SolrModifiableParams $params);
}
/**
*
* @author Israel Ekpo <iekpo@php.net>

View File

@@ -54,6 +54,7 @@ zend_class_entry *solr_ce_SolrObject;
zend_class_entry *solr_ce_SolrInputDocument;
zend_class_entry *solr_ce_SolrDocument;
zend_class_entry *solr_ce_SolrDocumentField;
zend_class_entry *solr_ce_SolrExtractRequest;
zend_class_entry *solr_ce_SolrClient;
zend_class_entry *solr_ce_SolrParams;
zend_class_entry *solr_ce_SolrModifiableParams;
@@ -80,6 +81,7 @@ zend_object_handlers solr_input_document_object_handlers;
zend_object_handlers solr_client_object_handlers;
zend_object_handlers solr_response_object_handlers;
zend_object_handlers solr_collapse_function_object_handlers;
zend_object_handlers solr_extract_request_object_handlers;
/* }}} */
/* {{{ static void php_solr_globals_ctor(zend_solr_globals *solr_globals_arg TSRMLS_DC)
@@ -96,6 +98,7 @@ static void php_solr_globals_ctor(zend_solr_globals *solr_globals_arg TSRMLS_DC)
solr_globals_arg->clients = NULL;
solr_globals_arg->params = NULL;
solr_globals_arg->functions = NULL;
solr_globals_arg->ustreams = NULL;
}
/* }}} */
@@ -245,6 +248,10 @@ ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, overwrite)
ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, commitWithin)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(SolrClient_sendUpdateStream_args, SOLR_ARG_PASS_REMAINING_BY_REF_FALSE, SOLR_METHOD_RETURN_REFERENCE_TRUE, 1)
ZEND_ARG_OBJ_INFO(SOLR_ARG_PASS_BY_REF_TRUE, request, SolrExtractRequest, SOLR_ARG_ALLOW_NULL_FALSE)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(SolrClient_request_args, SOLR_ARG_PASS_REMAINING_BY_REF_FALSE, SOLR_METHOD_RETURN_REFERENCE_TRUE, 1)
ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, raw_request)
ZEND_END_ARG_INFO()
@@ -489,6 +496,18 @@ ZEND_BEGIN_ARG_INFO_EX(SolrCollapseFunction_set_null_policy_args, SOLR_ARG_PASS_
ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, policy)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(SolrExtractRequest_createFromFile_args, SOLR_ARG_PASS_REMAINING_BY_REF_FALSE, SOLR_METHOD_RETURN_REFERENCE_FALSE, 2)
ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, filename)
ZEND_ARG_OBJ_INFO(SOLR_ARG_PASS_BY_REF_TRUE, params, SolrModifiableParams, SOLR_ARG_ALLOW_NULL_FALSE)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(SolrExtractRequest_createFromStream_args, SOLR_ARG_PASS_REMAINING_BY_REF_FALSE, SOLR_METHOD_RETURN_REFERENCE_FALSE, 3)
ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, content)
ZEND_ARG_INFO(SOLR_ARG_PASS_BY_REF_FALSE, mime_type)
ZEND_ARG_OBJ_INFO(SOLR_ARG_PASS_BY_REF_TRUE, params, SolrModifiableParams, SOLR_ARG_ALLOW_NULL_FALSE)
ZEND_END_ARG_INFO()
/* }}} */
/* {{{ solr_functions[] */
@@ -652,6 +671,7 @@ static zend_function_entry solr_client_methods[] = {
PHP_ME(SolrClient, query, SolrClient_query_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrClient, addDocument, SolrClient_addDocument_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrClient, addDocuments, SolrClient_addDocuments_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrClient, sendUpdateStream, SolrClient_sendUpdateStream_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrClient, request, SolrClient_request_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrClient, setResponseWriter, SolrClient_setResponseWriter_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrClient, deleteById, SolrClient_deleteById_args, ZEND_ACC_PUBLIC)
@@ -742,6 +762,17 @@ static zend_function_entry solr_modifiable_params_methods[] = {
};
/* }}} */
static zend_function_entry solr_extract_request_methods[] = {
PHP_ME(SolrExtractRequest, __construct, Solr_no_args, ZEND_ACC_PRIVATE | ZEND_ACC_CTOR)
PHP_ME(SolrExtractRequest, __destruct, Solr_no_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrExtractRequest, createFromFile, SolrExtractRequest_createFromFile_args, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
PHP_ME(SolrExtractRequest, createFromStream, SolrExtractRequest_createFromStream_args, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
PHP_ME(SolrExtractRequest, __clone, Solr_no_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrExtractRequest, __sleep, Solr_no_args, ZEND_ACC_PUBLIC)
PHP_ME(SolrExtractRequest, __wakeup, Solr_no_args, ZEND_ACC_PUBLIC)
PHP_FE_END
};
/* {{{ solr_query_methods. */
static zend_function_entry solr_query_methods[] = {
SOLR_CTOR(SolrQuery, __construct, SolrQuery_constructor_args)
@@ -1085,6 +1116,9 @@ PHP_MINIT_FUNCTION(solr)
memcpy(&solr_input_document_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
memcpy(&solr_client_object_handlers, &solr_input_document_object_handlers, sizeof(zend_object_handlers));
memcpy(&solr_collapse_function_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
memcpy(&solr_extract_request_object_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers));
solr_extract_request_object_handlers.offset = XtOffsetOf(solr_ustream_t, std);
solr_collapse_function_object_handlers.clone_obj = solr_collapse_function_object_handler_clone;
solr_input_document_object_handlers.clone_obj = solr_document_object_handler_clone;
@@ -1181,6 +1215,16 @@ PHP_MINIT_FUNCTION(solr)
init_solr_dismax_query(TSRMLS_C);
solr_query_register_class_constants(solr_ce_SolrQuery TSRMLS_CC);
/* Register SolrExtractRequest */
INIT_CLASS_ENTRY(ce, PHP_SOLR_EXTRACTREQUEST_CLASSNAME, solr_extract_request_methods);
solr_ce_SolrExtractRequest = zend_register_internal_class(&ce TSRMLS_CC);
solr_ce_SolrExtractRequest->ce_flags |= ZEND_ACC_FINAL;
solr_ce_SolrExtractRequest->create_object = solr_extract_create_object_handler;
zend_declare_property_long(solr_ce_SolrExtractRequest, SOLR_INDEX_PROPERTY_NAME, sizeof(SOLR_INDEX_PROPERTY_NAME)-1, 0L, ZEND_ACC_PRIVATE TSRMLS_CC);
zend_declare_property_null(solr_ce_SolrExtractRequest, "params", sizeof("params")-1, ZEND_ACC_PRIVATE);
solr_extract_register_class_constants(solr_ce_SolrExtractRequest TSRMLS_CC);
/* Register the SolrCollapseFunction class */
INIT_CLASS_ENTRY(ce, PHP_SOLR_COLLAPSE_FUNCTION_CLASSNAME, solr_collapse_function_methods);
solr_ce_SolrCollapseFunction = zend_register_internal_class_ex(&ce, solr_ce_SolrCollapseFunction);
@@ -1261,6 +1305,7 @@ PHP_RINIT_FUNCTION(solr)
ALLOC_HASHTABLE(SOLR_GLOBAL(clients));
ALLOC_HASHTABLE(SOLR_GLOBAL(params));
ALLOC_HASHTABLE(SOLR_GLOBAL(functions));
ALLOC_HASHTABLE(SOLR_GLOBAL(ustreams));
/* Initialize the HashTable for directory for SolrInputDocuments */
zend_hash_init(SOLR_GLOBAL(documents), nSize, NULL, solr_destroy_document, persistent);
@@ -1269,6 +1314,7 @@ PHP_RINIT_FUNCTION(solr)
zend_hash_init(SOLR_GLOBAL(params), nSize, NULL, solr_destroy_params, persistent);
zend_hash_init(SOLR_GLOBAL(functions), nSize, NULL, solr_destroy_function, persistent);
zend_hash_init(SOLR_GLOBAL(ustreams), nSize, NULL, solr_destroy_ustream, persistent);
return SUCCESS;
}
@@ -1281,11 +1327,13 @@ PHP_RSHUTDOWN_FUNCTION(solr)
zend_hash_destroy(SOLR_GLOBAL(clients));
zend_hash_destroy(SOLR_GLOBAL(params));
zend_hash_destroy(SOLR_GLOBAL(functions));
zend_hash_destroy(SOLR_GLOBAL(ustreams));
FREE_HASHTABLE(SOLR_GLOBAL(documents));
FREE_HASHTABLE(SOLR_GLOBAL(clients));
FREE_HASHTABLE(SOLR_GLOBAL(params));
FREE_HASHTABLE(SOLR_GLOBAL(functions));
FREE_HASHTABLE(SOLR_GLOBAL(ustreams));
return SUCCESS;
}

View File

@@ -216,6 +216,16 @@ PHP_METHOD(SolrDocument, getInputDocument);
PHP_METHOD(SolrDocument, hasChildDocuments);
PHP_METHOD(SolrDocument, getChildDocuments);
PHP_METHOD(SolrDocument, getChildDocumentsCount);
/* }}} */
/* {{{ SolrExtractRequest methods */
PHP_METHOD(SolrExtractRequest, __construct);
PHP_METHOD(SolrExtractRequest, createFromFile);
PHP_METHOD(SolrExtractRequest, createFromStream);
PHP_METHOD(SolrExtractRequest, __destruct);
PHP_METHOD(SolrExtractRequest, __clone);
PHP_METHOD(SolrExtractRequest, __sleep);
PHP_METHOD(SolrExtractRequest, __wakeup);
/* }}} */
@@ -268,6 +278,7 @@ PHP_METHOD(SolrClient, addDocuments);
PHP_METHOD(SolrClient, setServlet);
PHP_METHOD(SolrClient, setResponseWriter);
PHP_METHOD(SolrClient, request);
PHP_METHOD(SolrClient, sendUpdateStream);
PHP_METHOD(SolrClient, ping);
PHP_METHOD(SolrClient, system);
PHP_METHOD(SolrClient, threads);
@@ -618,6 +629,7 @@ int solr_curl_debug_callback(CURL *curl_handle, curl_infotype infotype, solr_cha
PHP_SOLR_API int solr_init_options(solr_client_options_t *options TSRMLS_DC);
PHP_SOLR_API int solr_init_handle(solr_curl_t *sch, solr_client_options_t *options TSRMLS_DC);
PHP_SOLR_API int solr_make_request(solr_client_t *client, solr_request_type_t request_type TSRMLS_DC);
PHP_SOLR_API int solr_make_update_stream_request(solr_client_t *client, solr_ustream_t* stream_data, solr_string_t *request_params);
PHP_SOLR_API void solr_free_handle(solr_curl_t *sch);
PHP_SOLR_API void solr_free_option(solr_client_options_t *options);
@@ -625,6 +637,7 @@ PHP_SOLR_API void solr_free_option(solr_client_options_t *options);
PHP_SOLR_API void solr_extension_register_constants(int type, int module_number TSRMLS_DC);
PHP_SOLR_API void solr_document_register_class_constants(zend_class_entry *ce TSRMLS_DC);
PHP_SOLR_API void solr_extract_register_class_constants(zend_class_entry *ce TSRMLS_DC);
PHP_SOLR_API void solr_client_register_class_constants(zend_class_entry *ce TSRMLS_DC);
PHP_SOLR_API void solr_query_register_class_constants(zend_class_entry *ce TSRMLS_DC);
PHP_SOLR_API void solr_collapse_function_register_class_constants(zend_class_entry *ce TSRMLS_DC);
@@ -638,9 +651,26 @@ PHP_SOLR_API void solr_throw_exception(zend_class_entry *exception_ce, char *mes
PHP_SOLR_API void solr_throw_solr_server_exception(solr_client_t *client,const char *requestType TSRMLS_DC);
/* {{{ init functions called in constructors - allocate/initialize params_t */
PHP_SOLR_API int solr_params_obj_ctor(zval *obj TSRMLS_DC);
PHP_SOLR_API void solr_params_obj_dtor(zval *obj);
PHP_SOLR_API int solr_init_params(solr_params_t *solr_params, long int index);
zend_object *solr_extract_create_object_handler(zend_class_entry *ce TSRMLS_CC);
static void solr_extract_free_object_handler(zend_object *obj);
zend_object *solr_extract_create_object_handler(zend_class_entry *ce TSRMLS_CC);
/* }}} */
PHP_SOLR_API int solr_init_ustream(zval *obj);
PHP_SOLR_API void solr_destroy_ustream_ex(solr_ustream_t *stream);
PHP_SOLR_API void solr_destroy_ustream_zv(zval *obj);
PHP_SOLR_API int solr_fetch_ustream_entry(zval *objptr, solr_ustream_t **stream_entry TSRMLS_DC);
#ifdef PHP_7
#define solr_destroy_ustream solr_destroy_ustream_zv
#else
#define solr_destroy_ustream solr_destroy_ustream_ex
#endif
/* {{{ solr_document_t based objects methods*/
PHP_SOLR_API solr_document_t *solr_init_document(long int document_index);
PHP_SOLR_API solr_document_t *solr_input_doc_ctor(zval *objptr);

View File

@@ -45,6 +45,7 @@ static void solr_client_init_urls(solr_client_t *solr_client)
/* Release all previously allocated URL values, if any */
solr_string_free(&(options->update_url));
solr_string_free(&(options->extract_url));
solr_string_free(&(options->search_url));
solr_string_free(&(options->thread_url));
solr_string_free(&(options->ping_url));
@@ -72,6 +73,7 @@ static void solr_client_init_urls(solr_client_t *solr_client)
/* Copying over the prefixes */
solr_string_append_solr_string(&(options->update_url), &url_prefix);
solr_string_append_solr_string(&(options->extract_url), &url_prefix);
solr_string_append_solr_string(&(options->search_url), &url_prefix);
solr_string_append_solr_string(&(options->thread_url), &url_prefix);
solr_string_append_solr_string(&(options->ping_url), &url_prefix);
@@ -81,6 +83,7 @@ static void solr_client_init_urls(solr_client_t *solr_client)
/* Making http://hostname:host_port/path/servlet/ */
solr_string_append_solr_string(&(options->update_url), &(options->update_servlet));
solr_string_append_solr_string(&(options->extract_url), &(options->extract_servlet));
solr_string_append_solr_string(&(options->search_url), &(options->search_servlet));
solr_string_append_solr_string(&(options->thread_url), &(options->thread_servlet));
solr_string_append_solr_string(&(options->ping_url), &(options->ping_servlet));
@@ -89,6 +92,7 @@ static void solr_client_init_urls(solr_client_t *solr_client)
solr_string_append_solr_string(&(options->get_url), &(options->get_servlet));
solr_string_append_const(&(options->update_url), "/?version=2.2&indent=on&wt=");
solr_string_append_const(&(options->extract_url), "/?version=2.2&indent=on&wt=");
solr_string_append_const(&(options->search_url), "/?version=2.2&indent=on&wt=");
solr_string_append_const(&(options->thread_url), "/?version=2.2&indent=on&wt=");
solr_string_append_const(&(options->ping_url), "/?version=2.2&indent=on&wt=");
@@ -97,6 +101,7 @@ static void solr_client_init_urls(solr_client_t *solr_client)
solr_string_append_const(&(options->get_url), "/?version=2.2&indent=on&wt=");
solr_string_append_solr_string(&(options->update_url), &(options->response_writer));
solr_string_append_solr_string(&(options->extract_url), &(options->response_writer));
solr_string_append_solr_string(&(options->search_url), &(options->response_writer));
solr_string_append_solr_string(&(options->thread_url), &(options->response_writer));
solr_string_append_solr_string(&(options->ping_url), &(options->response_writer));
@@ -109,22 +114,13 @@ static void solr_client_init_urls(solr_client_t *solr_client)
/* }}} */
/* {{{ static int solr_http_build_query(solr_string_t *buffer, zval *params_objptr, const solr_char_t *delimiter, int delimiter_length TSRMLS_DC) */
static int solr_http_build_query(solr_string_t *buffer, zval *params_objptr, const solr_char_t *delimiter, int delimiter_length TSRMLS_DC)
static int solr_http_build_query(solr_string_t *buffer, solr_params_t *solr_params, const solr_char_t *delimiter, int delimiter_length TSRMLS_DC)
{
solr_params_t *solr_params = NULL;
HashTable *params = NULL;
solr_param_t *solr_param = NULL;
if (solr_fetch_params_entry(params_objptr, &solr_params TSRMLS_CC) == FAILURE) {
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Unable to retrieve solr_params_t");
return FAILURE;
}
params = solr_params->params;
ZEND_HASH_FOREACH_PTR(params, solr_param)
{
@@ -229,6 +225,7 @@ PHP_METHOD(SolrClient, __construct)
solr_string_append_const(&(client_options->response_writer), SOLR_XML_RESPONSE_WRITER);
solr_string_append_const(&(client_options->update_servlet), SOLR_DEFAULT_UPDATE_SERVLET);
solr_string_append_const(&(client_options->extract_servlet), SOLR_DEFAULT_EXTRACT_SERVLET);
solr_string_append_const(&(client_options->search_servlet), SOLR_DEFAULT_SEARCH_SERVLET);
solr_string_append_const(&(client_options->thread_servlet), SOLR_DEFAULT_THREADS_SERVLET);
solr_string_append_const(&(client_options->ping_servlet), SOLR_DEFAULT_PING_SERVLET);
@@ -613,7 +610,7 @@ PHP_METHOD(SolrClient, query)
/* Remove wt if any */
zend_hash_str_del(solr_params->params, "wt", sizeof("wt")-1);
if (solr_http_build_query(buffer, solr_params_obj, delimiter, delimiter_length TSRMLS_CC) == FAILURE)
if (solr_http_build_query(buffer, solr_params, delimiter, delimiter_length TSRMLS_CC) == FAILURE)
{
solr_throw_exception_ex(solr_ce_SolrException, SOLR_ERROR_1003 TSRMLS_CC, SOLR_FILE_LINE_FUNC, "Error building HTTP query from parameters");
@@ -972,6 +969,64 @@ PHP_METHOD(SolrClient, request)
}
/* }}} */
/* {{{ proto SolrUpdateResponse SolrClient::sendUpdateStream(SolrExtractRequest request)
sends an update stream request. */
PHP_METHOD(SolrClient, sendUpdateStream)
{
zval *request_zv = NULL, *params_zv = NULL;
solr_ustream_t *stream = NULL;
solr_client_t *client = NULL;
solr_string_t *qs_buffer; /* query string buffer */
solr_char_t *delimiter = NULL;
size_t delimiter_length = 0L;
solr_params_t *params = NULL;
zend_bool success = 1;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &request_zv) == FAILURE) {
return;
}
if (solr_fetch_client_entry(getThis(), &client) == FAILURE) {
return;
}
stream = Z_USTREAM_P(Z_REFVAL_P(request_zv));
params_zv = zend_read_property(solr_ce_SolrModifiableParams, Z_REFVAL_P(request_zv), "params", sizeof("params"), 0, NULL);
if (params_zv && IS_NULL != Z_TYPE_P(params_zv)) {
solr_fetch_params_entry(params_zv, &params);
}
/* Always reset the URLs before making any request */
solr_client_init_urls(client);
qs_buffer = &(client->handle.request_body.buffer);
/* Get rid of all the data from the previous request */
solr_string_free(qs_buffer);
delimiter = client->options.qs_delimiter.str;
delimiter_length = client->options.qs_delimiter.len;
if (solr_http_build_query(qs_buffer, params, delimiter, delimiter_length) == FAILURE){
solr_throw_exception_ex(solr_ce_SolrException, SOLR_ERROR_1003 TSRMLS_CC, SOLR_FILE_LINE_FUNC, SOLR_ERROR_1003_MSG);
return;
}
if (solr_make_update_stream_request(client, stream, qs_buffer) == FAILURE) {
success = 0;
/* if there was an error with the http request solr_make_request throws an exception by itself
* if it wasn't a curl connection error, throw exception (omars)
*/
HANDLE_SOLR_SERVER_ERROR(client,"extract");
}
object_init_ex(return_value, solr_ce_SolrUpdateResponse);
solr_set_response_object_properties(solr_ce_SolrUpdateResponse, return_value, client, &(client->options.extract_url), success TSRMLS_CC);
}
/* }}} */
/* {{{ proto SolrUpdateResponse SolrClient::deleteById(string id)
Allows the user to delete a document by id */
PHP_METHOD(SolrClient, deleteById)

163
src/php7/php_solr_extract.c Normal file
View File

@@ -0,0 +1,163 @@
/*
+----------------------------------------------------------------------+
| PHP Version 7 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2016 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Omar Shaban <omars@php.net> |
+----------------------------------------------------------------------+
*/
#include "php_solr.h"
extern zend_class_entry *solr_ce_SolrExtractRequest;
extern zend_object_handlers solr_extract_request_object_handlers;
extern zend_class_entry *solr_ce_SolrIllegalArgumentException;
extern zend_class_entry *solr_ce_SolrIllegalOperationException;
extern HashTable *ustreams;
#define SOLR_EXTRACT_OBJ_CTOR() { \
do { \
zend_string *params_property_name = zend_string_init("params", sizeof("params"), 1); \
object_init_ex(return_value, solr_ce_SolrExtractRequest); \
stream_entry = Z_USTREAM_P(return_value); \
zend_update_property_ex(solr_ce_SolrExtractRequest, return_value, params_property_name, params); \
} while(0); \
}
PHP_SOLR_API int solr_init_ustream(zval *obj)
{
ulong index = SOLR_UNIQUE_USTREAM_INDEX();
solr_ustream_t *ustream_entry = NULL;
ustream_entry = pemalloc(sizeof(solr_ustream_t), 0);
if (zend_hash_index_update_ptr(SOLR_GLOBAL(ustreams), index, ustream_entry) == NULL) {
return FAILURE;
}
return SUCCESS;
}
/* {{{ solr_extract_obj_handlers */
static void solr_extract_free_object_handler(zend_object *obj)
{
solr_ustream_t *intern = solr_get_ustream_object(obj);
solr_string_free(&(intern->content_info->filename));
solr_string_free(&(intern->content_info->stream_info.binary_content));
solr_string_free(&(intern->content_info->stream_info.mime_type));
efree(intern->content_info);
zend_object_std_dtor(obj);
efree(intern);
}
zend_object *solr_extract_create_object_handler(zend_class_entry *ce TSRMLS_CC)
{
solr_ustream_t *intern = ecalloc(1, sizeof(solr_ustream_t)+zend_object_properties_size(ce));
memset(intern, 0, sizeof(solr_ustream_t));
zend_object_std_init(&intern->std, ce);
object_properties_init(&intern->std, ce);
intern->content_info = emalloc(sizeof(solr_cuv_t));
solr_string_init(&(intern->content_info->stream_info.mime_type));
solr_string_init(&(intern->content_info->stream_info.binary_content));
solr_extract_request_object_handlers.free_obj = solr_extract_free_object_handler;
intern->std.handlers = &solr_extract_request_object_handlers;
return &intern->std;
}
/* }}} */
PHP_METHOD(SolrExtractRequest, __construct)
{
}
PHP_METHOD(SolrExtractRequest, __destruct)
{
}
/* {{{ proto SolrExtractRequest::createFromFile(string filename, SolrModifiableParams params)
Create request from file */
PHP_METHOD(SolrExtractRequest, createFromFile)
{
char *filename;
COMPAT_ARG_SIZE_T filename_length = 0;
solr_ustream_t *stream_entry = NULL;
zval *params = NULL;
zend_error_handling error_handling;
zend_replace_error_handling(EH_THROW, solr_ce_SolrIllegalArgumentException, &error_handling TSRMLS_CC);
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz", &filename, &filename_length, &params) == FAILURE) {
zend_restore_error_handling(&error_handling);
return;
}
SOLR_EXTRACT_OBJ_CTOR();
stream_entry->content_type = SOLR_EXTRACT_CONTENT_FILE;
solr_string_set_ex(&(stream_entry->content_info->filename), filename, filename_length);
}
/* {{{ proto SolrExtractRequest::createFromStream(string content, string contentType, SolrModifiableParams params)
Create request from binary stream */
PHP_METHOD(SolrExtractRequest, createFromStream)
{
char *content, *content_type;
COMPAT_ARG_SIZE_T content_length = 0, content_type_length = 0;
solr_ustream_t *stream_entry = NULL;
zval *params = NULL;
zend_error_handling error_handling;
solr_cuv_binary_t stream_info;
zend_replace_error_handling(EH_THROW, solr_ce_SolrIllegalArgumentException, &error_handling TSRMLS_CC);
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ssz", &content, &content_length, &content_type, &content_type_length, &params) == FAILURE) {
zend_restore_error_handling(&error_handling);
return;
}
SOLR_EXTRACT_OBJ_CTOR();
stream_entry->content_type = SOLR_EXTRACT_CONTENT_STREAM;
solr_string_set_ex(&(stream_entry->content_info->stream_info.mime_type), content_type, content_type_length);
solr_string_set_ex(&(stream_entry->content_info->stream_info.binary_content), content, content_length);
}
/* }}} */
/* {{{ proto SolrExtractRequest::__clone(void)
Should not be called directly. Cloning is not supported. */
PHP_METHOD(SolrExtractRequest, __clone)
{
solr_throw_exception_ex(solr_ce_SolrIllegalOperationException, SOLR_ERROR_4001 TSRMLS_CC, SOLR_FILE_LINE_FUNC, "Cloning of SolrExtractRequest objects is currently not supported");
}
/* }}} */
/* {{{ proto SolrExtractRequest::__sleep(void)
Should not be called directly. Serialization is not supported. */
PHP_METHOD(SolrExtractRequest, __sleep)
{
solr_throw_exception_ex(solr_ce_SolrIllegalOperationException, SOLR_ERROR_4001 TSRMLS_CC, SOLR_FILE_LINE_FUNC, "SolrExtractRequest objects cannot be serialized or unserialized");
}
/* }}} */
/* {{{ proto SolrExtractRequest::__wakeup(void)
Should not be called directly. Deserialization is not supported. */
PHP_METHOD(SolrExtractRequest, __wakeup)
{
solr_throw_exception_ex(solr_ce_SolrIllegalOperationException, SOLR_ERROR_4001 TSRMLS_CC, SOLR_FILE_LINE_FUNC, "SolrExtractRequest objects cannot be serialized or unserialized");
}
/* }}} */

View File

@@ -573,6 +573,34 @@ loop_complete:
}
/* }}} */
/* {{{ SolrParams object constructor inner */
PHP_SOLR_API int solr_params_obj_ctor(zval *obj TSRMLS_DC)
{
long int params_index = SOLR_UNIQUE_PARAMS_INDEX();
solr_params_t solr_params;
if (solr_init_params(&solr_params, params_index) == FAILURE)
{
return FAILURE;
}
zend_update_property_long(Z_OBJCE_P(obj), obj, SOLR_INDEX_PROPERTY_NAME, sizeof(SOLR_INDEX_PROPERTY_NAME) - 1, params_index TSRMLS_CC);
return SUCCESS;
}
/* }}} */
/* {{{ SolrParams object destructor inner */
PHP_SOLR_API void solr_params_obj_dtor(zval *obj)
{
solr_params_t *solr_params = NULL;
/* Retrieve the document entry for this SolrDocument */
if (solr_fetch_params_entry(obj, &solr_params TSRMLS_CC) == SUCCESS) {
zend_hash_index_del(SOLR_GLOBAL(params), solr_params->params_index);
return ;
}
}
/* }}} */
/* {{{ proto SolrParams::__clone(void)
Should never be called directly. Throws exceptions whenever there is an attempt to clone a SolrParams instance */
PHP_METHOD(SolrParams, __clone)
@@ -936,14 +964,7 @@ PHP_METHOD(SolrParams, unserialize)
Constructor. */
PHP_METHOD(SolrModifiableParams, __construct)
{
long int params_index = SOLR_UNIQUE_PARAMS_INDEX();
solr_params_t solr_params;
if (solr_init_params(&solr_params, params_index) == FAILURE)
{
return;
}
zend_update_property_long(solr_ce_SolrModifiableParams, getThis(), SOLR_INDEX_PROPERTY_NAME, sizeof(SOLR_INDEX_PROPERTY_NAME) - 1, params_index TSRMLS_CC);
solr_params_obj_ctor(getThis() TSRMLS_CC);
}
/* }}} */
@@ -951,15 +972,7 @@ PHP_METHOD(SolrModifiableParams, __construct)
Destructor. */
PHP_METHOD(SolrModifiableParams, __destruct)
{
solr_params_t *solr_params = NULL;
/* Retrieve the document entry for this SolrDocument */
if (solr_fetch_params_entry(getThis(), &solr_params TSRMLS_CC) == SUCCESS) {
zend_hash_index_del(SOLR_GLOBAL(params), solr_params->params_index);
return ;
}
solr_params_obj_dtor(getThis());
}
/* }}} */

View File

@@ -29,6 +29,7 @@
/* {{{ Constants for the SolrClient with*/
#define SOLR_DEFAULT_UPDATE_SERVLET "update"
#define SOLR_DEFAULT_EXTRACT_SERVLET "update/extract"
#define SOLR_DEFAULT_SEARCH_SERVLET "select"
#define SOLR_DEFAULT_THREADS_SERVLET "admin/threads"
#define SOLR_DEFAULT_PING_SERVLET "admin/ping"
@@ -55,6 +56,7 @@
#define PHP_SOLR_DOCUMENT_CLASSNAME "SolrDocument"
#define PHP_SOLR_DOCUMENT_FIELD_CLASSNAME "SolrDocumentField"
#define PHP_SOLR_INPUT_DOCUMENT_CLASSNAME "SolrInputDocument"
#define PHP_SOLR_EXTRACTREQUEST_CLASSNAME "SolrExtractRequest"
#define PHP_SOLR_CLIENT_CLASSNAME "SolrClient"
#define PHP_SOLR_PARAMS_CLASSNAME "SolrParams"
#define PHP_SOLR_QUERY_CLASSNAME "SolrQuery"

View File

@@ -317,6 +317,126 @@ int solr_curl_debug_callback(CURL *curl_handle, curl_infotype infotype, solr_cha
}
/* }}} */
PHP_SOLR_API void solr_curl_request_reset(solr_curl_t *sch, solr_client_options_t *options) {
/* Reset the buffers */
solr_string_free(&sch->request_header.buffer);
solr_string_free(&sch->request_body_debug.buffer);
solr_string_free(&sch->response_body.buffer);
solr_string_free(&sch->response_header.buffer);
solr_string_free(&sch->debug_data_buffer);
curl_easy_reset(sch->curl_handle);
solr_set_initial_curl_handle_options(&(sch), options TSRMLS_CC);
/* Reset the CURL options if the handle is reused */
curl_easy_setopt(sch->curl_handle, CURLOPT_HEADER, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_POST, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_HTTPGET, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_NOBODY, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_POSTFIELDSIZE, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_POSTFIELDS, NULL);
curl_easy_setopt(sch->curl_handle, CURLOPT_URL, NULL);
curl_easy_setopt(sch->curl_handle, CURLOPT_HTTPHEADER, NULL);
}
PHP_SOLR_API solr_http_header_list_t *solr_curl_init_header_list()
{
solr_http_header_list_t *header_list = NULL;
header_list = curl_slist_append(header_list, "Accept-Charset: utf-8");
header_list = curl_slist_append(header_list, "Keep-Alive: 300");
header_list = curl_slist_append(header_list, "Connection: keep-alive");
/* Disable the Expect: 100-continue header. Jetty gets confused with this header */
header_list = curl_slist_append(header_list, "Expect:");
return header_list;
}
PHP_SOLR_API int solr_is_request_successful(CURLcode info_status, solr_curl_t *sch)
{
int return_status = SUCCESS;
if (info_status != CURLE_OK) {
solr_throw_exception_ex(
solr_ce_SolrClientException,
SOLR_ERROR_1004 TSRMLS_CC,
SOLR_FILE_LINE_FUNC,
"HTTP Transfer status could not be retrieved successfully"
);
return_status = FAILURE;
}
if (sch->result_code != CURLE_OK)
{
solr_throw_exception_ex(
solr_ce_SolrClientException,
SOLR_ERROR_1004 TSRMLS_CC,
SOLR_FILE_LINE_FUNC,
"Solr HTTP Error %d: '%s' ",
sch->result_code,
curl_easy_strerror(sch->result_code)
);
return_status = FAILURE;
}
if (sch->response_header.response_code != 200L)
{
return_status = FAILURE;
}
return return_status;
}
PHP_SOLR_API int solr_make_update_stream_request(solr_client_t *client, solr_ustream_t* stream_data, solr_string_t *request_params)
{
solr_curl_t *sch = &(client->handle);
solr_client_options_t *options = &(client->options);
int return_status = SUCCESS;
CURLcode info_status = CURLE_OK;
struct curl_httppost *formpost = NULL, *lastptr = NULL;
int is_binary = stream_data->content_type == SOLR_EXTRACT_CONTENT_STREAM;
solr_string_t content_type_header;
solr_http_header_list_t *header_list = solr_curl_init_header_list();
solr_curl_request_reset(sch, options);
solr_string_appendc(&(options->extract_url), '&');
solr_string_append_solr_string(&(options->extract_url), request_params);
curl_easy_setopt(sch->curl_handle, CURLOPT_URL, options->extract_url.str);
if (is_binary) {
solr_string_init(&content_type_header);
solr_string_appends(&content_type_header, "Content-Type: ", sizeof("Content-Type: ")-1);
solr_string_append_solr_string(&content_type_header, &(stream_data->content_info->stream_info.mime_type));
header_list = curl_slist_append(header_list, content_type_header.str);
curl_easy_setopt(sch->curl_handle, CURLOPT_POSTFIELDS, stream_data->content_info->stream_info.binary_content.str);
curl_easy_setopt(sch->curl_handle, CURLOPT_POSTFIELDSIZE, stream_data->content_info->stream_info.binary_content.len);
solr_string_free_ex(&content_type_header);
} else{
curl_formadd(&formpost, &lastptr,
CURLFORM_COPYNAME, "PHPSOLRCLIENT",
CURLFORM_FILE, (const char *) stream_data->content_info->filename.str,
CURLFORM_END
);
curl_easy_setopt(sch->curl_handle, CURLOPT_HTTPPOST, formpost);
}
curl_easy_setopt(sch->curl_handle, CURLOPT_HTTPHEADER, header_list);
sch->result_code = curl_easy_perform(sch->curl_handle);
info_status = curl_easy_getinfo(sch->curl_handle, CURLINFO_RESPONSE_CODE, &(sch->response_header.response_code));
return_status = solr_is_request_successful(info_status, sch);
curl_slist_free_all(header_list);
if (!is_binary) {
curl_formfree(formpost);
}
return return_status;
}
/* {{{ PHP_SOLR_API int solr_make_request(solr_client_t *client, solr_request_type_t request_type TSRMLS_DC) */
PHP_SOLR_API int solr_make_request(solr_client_t *client, solr_request_type_t request_type TSRMLS_DC)
{
@@ -326,34 +446,8 @@ PHP_SOLR_API int solr_make_request(solr_client_t *client, solr_request_type_t re
int return_status = SUCCESS;
CURLcode info_status = CURLE_OK;
header_list = curl_slist_append(header_list, "Accept-Charset: utf-8");
header_list = curl_slist_append(header_list, "Keep-Alive: 300");
header_list = curl_slist_append(header_list, "Connection: keep-alive");
/* Disable the Expect: 100-continue header. Jetty gets confused with this header */
header_list = curl_slist_append(header_list, "Expect:");
/* Reset the buffers */
solr_string_free(&sch->request_header.buffer);
solr_string_free(&sch->request_body_debug.buffer);
solr_string_free(&sch->response_body.buffer);
solr_string_free(&sch->response_header.buffer);
solr_string_free(&sch->debug_data_buffer);
curl_easy_reset(sch->curl_handle);
solr_set_initial_curl_handle_options(&(sch), options TSRMLS_CC);
/* Reset the CURL options if the handle is reused */
curl_easy_setopt(sch->curl_handle, CURLOPT_HEADER, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_POST, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_HTTPGET, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_NOBODY, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_POSTFIELDSIZE, 0L);
curl_easy_setopt(sch->curl_handle, CURLOPT_POSTFIELDS, NULL);
curl_easy_setopt(sch->curl_handle, CURLOPT_URL, NULL);
curl_easy_setopt(sch->curl_handle, CURLOPT_HTTPHEADER, NULL);
solr_curl_request_reset(sch, options);
header_list = solr_curl_init_header_list();
switch(request_type)
{
@@ -385,7 +479,7 @@ PHP_SOLR_API int solr_make_request(solr_client_t *client, solr_request_type_t re
case SOLR_REQUEST_UPDATE : /* HTTP XML POST */
{
header_list = curl_slist_append(header_list, "Content-Type: text/xml;charset=UTF-8");
header_list = curl_slist_append(header_list, "Content-Type: text/xml;charset=UTF-8");
curl_easy_setopt(sch->curl_handle, CURLOPT_POST, 1L);
@@ -442,26 +536,7 @@ PHP_SOLR_API int solr_make_request(solr_client_t *client, solr_request_type_t re
info_status = curl_easy_getinfo(sch->curl_handle, CURLINFO_RESPONSE_CODE, &(sch->response_header.response_code));
if (info_status != CURLE_OK)
{
php_error_docref(NULL TSRMLS_CC, E_WARNING, "HTTP Transfer status could not be retrieved successfully");
return_status = FAILURE;
}
if (sch->result_code != CURLE_OK)
{
/* commented by: Omar Shaban <omars@php.net> */
/* php_error_docref(NULL TSRMLS_CC, E_WARNING, "Solr HTTP Error : '%s' ", curl_easy_strerror(sch->result_code)); */
solr_throw_exception_ex(solr_ce_SolrClientException, SOLR_ERROR_1004 TSRMLS_CC, SOLR_FILE_LINE_FUNC, "Solr HTTP Error %d: '%s' ",sch->result_code, curl_easy_strerror(sch->result_code));
return_status = FAILURE;
}
if (sch->response_header.response_code != 200L)
{
return_status = FAILURE;
}
return_status = solr_is_request_successful(info_status, sch);
curl_slist_free_all(header_list);
@@ -513,6 +588,8 @@ PHP_SOLR_API void solr_free_options(solr_client_options_t *options)
solr_string_free(&((options)->terms_url));
solr_string_free(&((options)->system_url));
solr_string_free(&((options)->get_url));
solr_string_free(&((options)->extract_url));
solr_string_free(&((options)->update_servlet));
solr_string_free(&((options)->search_servlet));
@@ -521,6 +598,7 @@ PHP_SOLR_API void solr_free_options(solr_client_options_t *options)
solr_string_free(&((options)->terms_servlet));
solr_string_free(&((options)->system_servlet));
solr_string_free(&((options)->get_servlet));
solr_string_free(&((options)->extract_servlet));
}
/* }}} */

View File

@@ -147,6 +147,30 @@ PHP_SOLR_API void solr_query_register_class_constants(zend_class_entry *ce TSRML
}
/* }}} */
PHP_SOLR_API void solr_extract_register_class_constants(zend_class_entry *ce TSRMLS_DC)
{
zend_declare_class_constant_string(ce, "CAPTURE_ELEMENTS", sizeof("CAPTURE_ELEMENTS")-1, "capture" TSRMLS_CC);
zend_declare_class_constant_string(ce, "CAPTURE_ATTRIBUTES", sizeof("CAPTURE_ATTRIBUTES")-1, "captureAttr" TSRMLS_CC);
zend_declare_class_constant_string(ce, "COMMIT_WITHIN", sizeof("COMMIT_WITHIN")-1, "commitWithin" TSRMLS_CC);
zend_declare_class_constant_string(ce, "DATE_FORMATS", sizeof("DATE_FORMATS")-1, "date.formats" TSRMLS_CC);
zend_declare_class_constant_string(ce, "DEFAULT_FIELD", sizeof("DEFAULT_FIELD")-1, "defaultField" TSRMLS_CC);
zend_declare_class_constant_string(ce, "EXTRACT_ONLY", sizeof("EXTRACT_ONLY")-1, "extractOnly" TSRMLS_CC);
zend_declare_class_constant_string(ce, "EXTRACT_FORMAT", sizeof("EXTRACT_FORMAT")-1,"extractFormat" TSRMLS_CC);
zend_declare_class_constant_string(ce, "IGNORE_TIKA_EXCEPTION", sizeof("IGNORE_TIKA_EXCEPTION")-1, "ignoreTikaException" TSRMLS_CC);
zend_declare_class_constant_string(ce, "LITERALS_OVERRIDE", sizeof("LITERALS_OVERRIDE")-1, "literalsOverride" TSRMLS_CC);
zend_declare_class_constant_string(ce, "LOWERNAMES", sizeof("LOWERNAMES")-1, "lowernames" TSRMLS_CC);
zend_declare_class_constant_string(ce, "MULTIPART_UPLOAD_LIMIT", sizeof("MULTIPART_UPLOAD_LIMIT")-1, "multipartUploadLimitInKB" TSRMLS_CC);
zend_declare_class_constant_string(ce, "PASSWORD_MAP_FILE", sizeof("PASSWORD_MAP_FILE")-1, "passwordsFile" TSRMLS_CC);
zend_declare_class_constant_string(ce, "RESOURCE_NAME", sizeof("RESOURCE_NAME")-1, "resource.name" TSRMLS_CC);
zend_declare_class_constant_string(ce, "RESOURCE_PASSWORD", sizeof("RESOURCE_PASSWORD")-1, "resource.password" TSRMLS_CC);
zend_declare_class_constant_string(ce, "TIKE_CONFIG", sizeof("TIKE_CONFIG")-1, "tika.config" TSRMLS_CC);
zend_declare_class_constant_string(ce, "UNKNOWN_FIELD_PREFIX", sizeof("UNKNOWN_FIELD_PREFIX")-1, "uprefix" TSRMLS_CC);
zend_declare_class_constant_string(ce, "XPATH_EXPRESSION", sizeof("XPATH_EXPRESSION")-1, "xpath" TSRMLS_CC);
zend_declare_class_constant_string(ce, "FIELD_MAPPING_PREFIX", sizeof("FIELD_MAPPING_PREFIX")-1, "fmap." TSRMLS_CC);
zend_declare_class_constant_string(ce, "FIELD_BOOST_PREFIX", sizeof("FIELD_BOOST_PREFIX")-1, "boost." TSRMLS_CC);
zend_declare_class_constant_string(ce, "LITERALS_PREFIX", sizeof("LITERALS_PREFIX")-1, "literal." TSRMLS_CC);
}
/** ************************************************************************ **/
/** UTILITY FUNCTIONS **/
/** ************************************************************************ **/
@@ -1533,6 +1557,36 @@ PHP_SOLR_API void solr_solrfunc_to_string(solr_function_t *function, solr_string
/* todo handle localParams argument */
}
PHP_SOLR_API void solr_destroy_ustream_ex(solr_ustream_t *stream)
{
if (stream->content_info->filename.len > 0) {
solr_string_free(&stream->content_info->filename);
}
pefree(stream->content_info, 0);
pefree(stream, 0);
}
PHP_SOLR_API void solr_destroy_ustream_zv(zval *obj)
{
solr_ustream_t *entry = Z_PTR_P(obj);
solr_destroy_ustream_ex(entry);
}
PHP_SOLR_API int solr_fetch_ustream_entry(zval *objptr, solr_ustream_t **stream_entry TSRMLS_DC)
{
zval rv, *index_zv;
ulong index = 0;
index_zv = zend_read_property(Z_OBJCE_P(objptr), objptr, SOLR_INDEX_PROPERTY_NAME, sizeof(SOLR_INDEX_PROPERTY_NAME)-1, 1, &rv);
index = Z_LVAL_P(index_zv);
if ((*stream_entry = zend_hash_index_find_ptr(SOLR_GLOBAL(ustreams), index)) == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Update Stream Index %ld. HashTable index does not exist.", index);
php_error_docref(NULL TSRMLS_CC, E_WARNING, SOLR_ERROR_1008_MSG, SOLR_FILE_LINE_FUNC);
return FAILURE;
}
return SUCCESS;
}
/*
* Local variables:
* tab-width: 4

View File

@@ -41,6 +41,9 @@
/* Retrieve a unique hash index for this SolrFunction instance. Unique for this thread */
#define SOLR_UNIQUE_FUNCTION_INDEX() solr_hashtable_get_new_index(SOLR_GLOBAL(functions) TSRMLS_CC)
/* Retrieve a unique hash index for this update stream instances. Unique for this thread */
#define SOLR_UNIQUE_USTREAM_INDEX() solr_hashtable_get_new_index(SOLR_GLOBAL(ustreams) TSRMLS_CC)
/* Iterates through the HashTable pointer */
#define SOLR_HASHTABLE_FOR_LOOP(HashTablePtr) \
if (HashTablePtr) for ( zend_hash_internal_pointer_reset((HashTablePtr)); \
@@ -159,6 +162,13 @@
}
/* }}} */
static inline solr_ustream_t *solr_get_ustream_object(zend_object *obj)
{
return (solr_ustream_t *) ((char *) obj - XtOffsetOf(solr_ustream_t, std));
}
#define Z_USTREAM_P(zv) solr_get_ustream_object(Z_OBJ_P(zv));
#endif /* SOLR_MACROS_H */
/*

View File

@@ -62,6 +62,7 @@ typedef enum {
SOLR_REQUEST_TERMS = 5, /** 5 **/
SOLR_REQUEST_SYSTEM = 6, /** 6 **/
SOLR_REQUEST_GET = 7,
SOLR_REQUEST_EXTRACT = 8,
SOLR_REQUEST_END
} solr_request_type_t;
@@ -76,6 +77,7 @@ typedef enum {
SOLR_SERVLET_TYPE_TERMS = 16,
SOLR_SERVLET_TYPE_SYSTEM = 32,
SOLR_SERVLET_TYPE_GET = 64,
SOLR_SERVLET_TYPE_EXTRACT = 128,
SOLR_SERVLET_TYPE_END
} solr_servlet_type_t;
@@ -224,6 +226,8 @@ typedef struct {
solr_string_t update_url; /* URL used for updates */
solr_string_t extract_url; /* URL used for file indexing using extract */
solr_string_t search_url; /* URL used for queries */
solr_string_t thread_url; /* URL used for thread monitoring */
@@ -238,6 +242,8 @@ typedef struct {
solr_string_t update_servlet; /* The update servlet */
solr_string_t extract_servlet; /* The update servlet appended with extract Req. handler */
solr_string_t search_servlet; /* The search servlet */
solr_string_t thread_servlet; /* The thread servlet */
@@ -428,7 +434,28 @@ typedef struct {
HashTable *params; /* The HashTable<solr_string_t> for storing function key-val parameters */
} solr_function_t;
/* }}} */
/* {{{ content stream/extract types */
#define SOLR_EXTRACT_CONTENT_FILE 0
#define SOLR_EXTRACT_CONTENT_STREAM 1
typedef struct {
solr_string_t binary_content; /* actual contents */
solr_string_t mime_type; /* mime type */
} solr_cuv_binary_t;
typedef union {
solr_string_t filename;
solr_cuv_binary_t stream_info;
} solr_cuv_t;
typedef struct {
solr_cuv_t *content_info; /* stores the content data whether filename or stream_info */
int content_type; /* stores the content type from the constants above */
zend_object std;
} solr_ustream_t;
/* }}} */
/* }}} */
@@ -452,6 +479,8 @@ ZEND_BEGIN_MODULE_GLOBALS(solr)
HashTable *functions; /* HashTable for storing solr_function_t */
HashTable *ustreams; /* HashTable for storing solr_ustream_t */
ZEND_END_MODULE_GLOBALS(solr)
/* }}} */

View File

@@ -0,0 +1,41 @@
--TEST--
SolrClient::sendUpdateStream - send SolrExtractRequest (bin)
--SKIPIF--
<?php require_once 'skip.if.server_not_configured.inc'; ?>
--FILE--
<?php
require_once "bootstrap.inc";
$options = array (
'hostname' => SOLR_SERVER_HOSTNAME,
'login' => SOLR_SERVER_USERNAME,
'password' => SOLR_SERVER_PASSWORD,
'port' => SOLR_SERVER_PORT,
'path' => SOLR_SERVER_FILES_PATH,
);
$client = new SolrClient($options);
$extractParams = new SolrModifiableParams();
$extractParams
->set(SolrExtractRequest::LITERALS_PREFIX . 'id', 'doc1')
->set(SolrExtractRequest::CAPTURE_ELEMENTS, 'p')
->set(SolrExtractRequest::FIELD_MAPPING_PREFIX . 'p', 'text')
;
$binContent = file_get_contents(EXTRACT_FILE_1);
$extractRequest = SolrExtractRequest::createFromStream($binContent, 'application/pdf', $extractParams);
$response = $client->sendUpdateStream($extractRequest);
$client->rollback();
echo $response->getHttpStatus() . PHP_EOL;
echo $response->getRequestUrl() . PHP_EOL;
?>
--EXPECTF--
200
http://%s/update/extract/?version=2.2&indent=on&wt=xml&literal.id=doc1&capture=p&fmap.p=text

View File

@@ -0,0 +1,39 @@
--TEST--
SolrClient::sendUpdateStream - send SolrExtractRequest (file)
--SKIPIF--
<?php require_once 'skip.if.server_not_configured.inc'; ?>
--FILE--
<?php
require_once "bootstrap.inc";
$options = array (
'hostname' => SOLR_SERVER_HOSTNAME,
'login' => SOLR_SERVER_USERNAME,
'password' => SOLR_SERVER_PASSWORD,
'port' => SOLR_SERVER_PORT,
'path' => SOLR_SERVER_FILES_PATH,
);
$client = new SolrClient($options);
$extractParams = new SolrModifiableParams();
$extractParams
->set(SolrExtractRequest::LITERALS_PREFIX . 'id', 'doc1')
->set(SolrExtractRequest::CAPTURE_ELEMENTS, 'p')
->set(SolrExtractRequest::FIELD_MAPPING_PREFIX . 'p', 'text')
;
$extractRequest = SolrExtractRequest::createFromFile(EXTRACT_FILE_1, $extractParams);
$response = $client->sendUpdateStream($extractRequest);
$client->rollback();
echo $response->getHttpStatus() . PHP_EOL;
echo $response->getRequestUrl() . PHP_EOL;
?>
--EXPECTF--
200
http://%s/update/extract/?version=2.2&indent=on&wt=xml&literal.id=doc1&capture=p&fmap.p=text

View File

@@ -0,0 +1,18 @@
--TEST--
SolrExtractRequest - clone [exception]
--FILE--
<?php
$params = new SolrModifiableParams();
$request = SolrExtractRequest::createFromFile('test', $params);
try {
$request2 = clone $request;
} catch (SolrIllegalOperationException $e) {
echo sprintf("Exception %d: %s", $e->getCode(), $e->getMessage()).PHP_EOL;
}
?>
--EXPECTF--
Exception 4001: Cloning of SolrExtractRequest objects is currently not supported

View File

@@ -0,0 +1,17 @@
--TEST--
SolrExtractRequest - serialize [exception]
--FILE--
<?php
$params = new SolrModifiableParams();
$request = SolrExtractRequest::createFromFile('test', $params);
try {
var_dump(serialize($request));
} catch (SolrIllegalOperationException $e) {
echo sprintf("Exception %d: %s", $e->getCode(), $e->getMessage()).PHP_EOL;
}
?>
--EXPECTF--
Exception 4001: SolrExtractRequest objects cannot be serialized or unserialized

Binary file not shown.

View File

@@ -8,6 +8,7 @@ define('EXAMPLE_RESPONSE_XML_1', ROOT_DIRECTORY . '/files/response_xml.1.xml');
define('EXAMPLE_RESPONSE_XML_2', ROOT_DIRECTORY . '/files/response_xml.2.xml');
define('EXAMPLE_RESPONSE_XML_3', ROOT_DIRECTORY . '/files/response_xml.3.xml');
define('EXAMPLE_RESPONSE_XML_4', ROOT_DIRECTORY . '/files/response_xml.4.xml');
define('EXTRACT_FILE_1', ROOT_DIRECTORY . '/files/extract_file.1.pdf');
/* Whether or not to run in secure mode */
define('SOLR_SECURE', false);
@@ -24,6 +25,8 @@ define('SOLR_SERVER_PATH', 'solr/collection1');
/* used to test child documents */
define('SOLR_SERVER_STORE_PATH', 'solr/metal_store');
define('SOLR_SERVER_FILES_PATH', 'solr/myfiles');
/* HTTP Basic Authentication Username */
define('SOLR_SERVER_USERNAME', 'admin');