Source for file mysql.php
Documentation is available at mysql.php
* @package Joomla.Administrator
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
* Indexer class supporting MySQL(i) for the Finder indexer package.
* The indexer class provides the core functionality of the Finder
* search engine. It is responsible for adding and updating the
* content links table; extracting and scoring tokens; and maintaining
* all referential information for the content.
* Note: All exceptions thrown from within this class should be caught
* @package Joomla.Administrator
* Method to index a content item.
* @param FinderIndexerResult $item The content item to index.
* @param string $format The format of the content. [optional]
* @return integer The ID of the record in the links table.
* @throws Exception on database error.
public function index($item, $format =
'html')
// Mark beforeIndexing in the profiler.
static::$profiler ?
static::$profiler->mark('beforeIndexing') :
null;
$nd =
$db->getNullDate();
// Check if the item is in the database.
$query =
$db->getQuery(true)
->select($db->quoteName('link_id') .
', ' .
$db->quoteName('md5sum'))
->from($db->quoteName('#__finder_links'))
->where($db->quoteName('url') .
' = ' .
$db->quote($item->url));
// Load the item from the database.
$link =
$db->loadObject();
// Get the indexer state.
$state =
static::getState();
// Get the signatures of the item.
$curSig =
static::getSignature($item);
$oldSig = isset
($link->md5sum) ?
$link->md5sum :
null;
// Get the other item information.
$linkId =
empty($link->link_id) ?
null :
$link->link_id;
$isNew =
empty($link->link_id) ?
true :
false;
// Check the signatures. If they match, the item is up to date.
if (!$isNew &&
$curSig ==
$oldSig)
* If the link already exists, flush all the term maps for the item.
* Maps are stored in 16 tables so we need to iterate through and flush
* each table one at a time.
for ($i =
0; $i <=
15; $i++
)
// Flush the maps for the link.
->delete($db->quoteName('#__finder_links_terms' .
dechex($i)))
->where($db->quoteName('link_id') .
' = ' . (int)
$linkId);
// Remove the taxonomy maps.
// Mark afterUnmapping in the profiler.
static::$profiler ?
static::$profiler->mark('afterUnmapping') :
null;
// Perform cleanup on the item data.
$item->publish_start_date = (int)
$item->publish_start_date !=
0 ?
$item->publish_start_date :
$nd;
$item->publish_end_date = (int)
$item->publish_end_date !=
0 ?
$item->publish_end_date :
$nd;
$item->start_date = (int)
$item->start_date !=
0 ?
$item->start_date :
$nd;
$item->end_date = (int)
$item->end_date !=
0 ?
$item->end_date :
$nd;
// Prepare the item description.
* Now, we need to enter the item into the links table. If the item
* already exists in the database, we need to use an UPDATE query.
* Otherwise, we need to use an INSERT to get the link id back.
$db->quoteName('url'), $db->quoteName('route'), $db->quoteName('title'), $db->quoteName('description'),
$db->quoteName('indexdate'), $db->quoteName('published'), $db->quoteName('state'), $db->quoteName('access'),
$db->quoteName('language'), $db->quoteName('type_id'), $db->quoteName('object'), $db->quoteName('publish_start_date'),
$db->quoteName('publish_end_date'), $db->quoteName('start_date'), $db->quoteName('end_date'), $db->quoteName('list_price'),
$db->quoteName('sale_price')
->insert($db->quoteName('#__finder_links'))
$db->quote($item->url) .
', '
.
$db->quote($item->route) .
', '
.
$db->quote($item->title) .
', '
.
$db->quote($item->description) .
', '
.
$query->currentTimestamp() .
', '
. (int)
$item->state .
', '
. (int)
$item->access .
', '
.
$db->quote($item->language) .
', '
. (int)
$item->type_id .
', '
.
$db->quote($item->publish_start_date) .
', '
.
$db->quote($item->publish_end_date) .
', '
.
$db->quote($item->start_date) .
', '
.
$db->quote($item->end_date) .
', '
. (double)
($item->list_price ?
$item->list_price :
0) .
', '
. (double)
($item->sale_price ?
$item->sale_price :
0)
$linkId = (int)
$db->insertid();
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('route') .
' = ' .
$db->quote($item->route))
->set($db->quoteName('title') .
' = ' .
$db->quote($item->title))
->set($db->quoteName('description') .
' = ' .
$db->quote($item->description))
->set($db->quoteName('indexdate') .
' = ' .
$query->currentTimestamp())
->set($db->quoteName('state') .
' = ' . (int)
$item->state)
->set($db->quoteName('access') .
' = ' . (int)
$item->access)
->set($db->quoteName('language') .
' = ' .
$db->quote($item->language))
->set($db->quoteName('type_id') .
' = ' . (int)
$item->type_id)
->set($db->quoteName('object') .
' = ' .
$db->quote(serialize($item)))
->set($db->quoteName('publish_start_date') .
' = ' .
$db->quote($item->publish_start_date))
->set($db->quoteName('publish_end_date') .
' = ' .
$db->quote($item->publish_end_date))
->set($db->quoteName('start_date') .
' = ' .
$db->quote($item->start_date))
->set($db->quoteName('end_date') .
' = ' .
$db->quote($item->end_date))
->set($db->quoteName('list_price') .
' = ' . (double)
($item->list_price ?
$item->list_price :
0))
->set($db->quoteName('sale_price') .
' = ' . (double)
($item->sale_price ?
$item->sale_price :
0))
->where('link_id = ' . (int)
$linkId);
// Set up the variables we will need during processing.
// Mark afterLinking in the profiler.
static::$profiler ?
static::$profiler->mark('afterLinking') :
null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
* Process the item's content. The items can customize their
* processing instructions to define extra properties to process
* or rearrange how properties are weighted.
foreach ($item->getInstructions() as $group =>
$properties)
// Iterate through the properties of the group.
foreach ($properties as $property)
// Check if the property exists in the item.
if (empty($item->$property))
// Tokenize the property.
// Tokenize an array of content and add it to the database.
foreach ($item->$property as $ip)
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
if ($group ===
static::PATH_CONTEXT)
$ip =
JFile::stripExt($ip);
$ip =
str_replace('/', ' ', $ip);
// Tokenize a string of content and add it to the database.
$count +=
$this->tokenizeToDB($ip, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count >
static::$state->options->get('memory_table_limit', 30000))
$this->toggleTables(false);
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
if ($group ===
static::PATH_CONTEXT)
$item->$property =
JFile::stripExt($item->$property);
$item->$property =
str_replace('/', ' ', $item->$property);
$item->$property =
str_replace('-', ' ', $item->$property);
// Tokenize a string of content and add it to the database.
$count +=
$this->tokenizeToDB($item->$property, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count >
static::$state->options->get('memory_table_limit', 30000))
$this->toggleTables(false);
* Process the item's taxonomy. The items can customize their
* taxonomy mappings to define extra properties to map.
foreach ($item->getTaxonomy() as $branch =>
$nodes)
// Iterate through the nodes and map them to the branch.
foreach ($nodes as $node)
// Add the node to the tree.
// Add the link => node map.
// Tokenize the node title and add them to the database.
$count +=
$this->tokenizeToDB($node->title, static::META_CONTEXT, $item->language, $format);
// Mark afterProcessing in the profiler.
static::$profiler ?
static::$profiler->mark('afterProcessing') :
null;
* At this point, all of the item's content has been parsed, tokenized
* and inserted into the #__finder_tokens table. Now, we need to
* aggregate all the data into that table into a more usable form. The
* aggregated data will be inserted into #__finder_tokens_aggregate
$query =
'INSERT INTO ' .
$db->quoteName('#__finder_tokens_aggregate') .
' (' .
$db->quoteName('term_id') .
', ' .
$db->quoteName('term') .
', ' .
$db->quoteName('stem') .
', ' .
$db->quoteName('common') .
', ' .
$db->quoteName('phrase') .
', ' .
$db->quoteName('term_weight') .
', ' .
$db->quoteName('context') .
', ' .
$db->quoteName('context_weight') .
', ' .
$db->quoteName('language') .
')' .
' t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, ' .
' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, t1.language' .
' SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
' FROM ' .
$db->quoteName('#__finder_tokens') .
' AS t1' .
' WHERE t1.context = %d' .
' JOIN ' .
$db->quoteName('#__finder_tokens') .
' AS t2 ON t2.term = t1.term' .
' LEFT JOIN ' .
$db->quoteName('#__finder_terms') .
' AS t ON t.term = t1.term' .
' WHERE t2.context = %d' .
' ORDER BY t1.term DESC';
// Iterate through the contexts and aggregate the tokens per context.
foreach ($state->weights as $context =>
$multiplier)
// Run the query to aggregate the tokens for this context..
$db->setQuery(sprintf($query, $multiplier, $context, $context));
// Mark afterAggregating in the profiler.
static::$profiler ?
static::$profiler->mark('afterAggregating') :
null;
* When we pulled down all of the aggregate data, we did a LEFT JOIN
* over the terms table to try to find all the term ids that
* already exist for our tokens. If any of the rows in the aggregate
* table have a term of 0, then no term record exists for that
* term so we need to add it to the terms table.
'INSERT IGNORE INTO ' .
$db->quoteName('#__finder_terms') .
' (' .
$db->quoteName('term') .
', ' .
$db->quoteName('stem') .
', ' .
$db->quoteName('common') .
', ' .
$db->quoteName('phrase') .
', ' .
$db->quoteName('weight') .
', ' .
$db->quoteName('soundex') .
', ' .
$db->quoteName('language') .
')' .
' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' .
' FROM ' .
$db->quoteName('#__finder_tokens_aggregate') .
' AS ta' .
' WHERE ta.term_id = 0' .
* Now, we just inserted a bunch of new records into the terms table
* so we need to go back and update the aggregate table with all the
$query =
$db->getQuery(true)
->update($db->quoteName('#__finder_tokens_aggregate') .
' AS ta')
->join('INNER', $db->quoteName('#__finder_terms') .
' AS t ON t.term = ta.term')
->set('ta.term_id = t.term_id')
->where('ta.term_id = 0');
// Mark afterTerms in the profiler.
static::$profiler ?
static::$profiler->mark('afterTerms') :
null;
* After we've made sure that all of the terms are in the terms table
* and the aggregate table has the correct term ids, we need to update
* the links counter for each term by one.
->update($db->quoteName('#__finder_terms') .
' AS t')
->join('INNER', $db->quoteName('#__finder_tokens_aggregate') .
' AS ta ON ta.term_id = t.term_id')
->set('t.' .
$db->quoteName('links') .
' = t.links + 1');
// Mark afterTerms in the profiler.
static::$profiler ?
static::$profiler->mark('afterTerms') :
null;
* Before we can insert all of the mapping rows, we have to figure out
* which mapping table the rows need to be inserted into. The mapping
* table for each term is based on the first character of the md5 of
* the first character of the term. In php, it would be expressed as
* substr(md5(substr($token, 0, 1)), 0, 1)
->update($db->quoteName('#__finder_tokens_aggregate'))
->set($db->quoteName('map_suffix') .
' = SUBSTR(MD5(SUBSTR(' .
$db->quoteName('term') .
', 1, 1)), 1, 1)');
* At this point, the aggregate table contains a record for each
* term in each context. So, we're going to pull down all of that
* data while grouping the records by term and add all of the
* sub-totals together to arrive at the final total for each token for
* this link. Then, we insert all of that data into the appropriate
for ($i =
0; $i <=
15; $i++
)
// Get the mapping table suffix.
* We have to run this query 16 times, one for each link => term
'INSERT INTO ' .
$db->quoteName('#__finder_links_terms' .
$suffix) .
' (' .
$db->quoteName('link_id') .
', ' .
$db->quoteName('term_id') .
', ' .
$db->quoteName('weight') .
')' .
' SELECT ' . (int)
$linkId .
', ' .
$db->quoteName('term_id') .
',' .
' ROUND(SUM(' .
$db->quoteName('context_weight') .
'), 8)' .
' FROM ' .
$db->quoteName('#__finder_tokens_aggregate') .
' WHERE ' .
$db->quoteName('map_suffix') .
' = ' .
$db->quote($suffix) .
' GROUP BY ' .
$db->quoteName('term') .
' ORDER BY ' .
$db->quoteName('term') .
' DESC'
// Mark afterMapping in the profiler.
static::$profiler ?
static::$profiler->mark('afterMapping') :
null;
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('md5sum') .
' = ' .
$db->quote($curSig))
->where($db->quoteName('link_id') .
' = ' .
$db->quote($linkId));
// Mark afterSigning in the profiler.
static::$profiler ?
static::$profiler->mark('afterSigning') :
null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
// Toggle the token tables back to memory tables.
// Mark afterTruncating in the profiler.
static::$profiler ?
static::$profiler->mark('afterTruncating') :
null;
* Method to remove a link from the index.
* @param integer $linkId The id of the link.
* @return boolean True on success.
* @throws Exception on database error.
public function remove($linkId)
$query =
$db->getQuery(true);
// Update the link counts and remove the mapping records.
for ($i =
0; $i <=
15; $i++
)
// Update the link counts for the terms.
$query->update($db->quoteName('#__finder_terms') .
' AS t')
->join('INNER', $db->quoteName('#__finder_links_terms' .
dechex($i)) .
' AS m ON m.term_id = t.term_id')
->set('t.links = t.links - 1')
->where('m.link_id = ' .
$db->quote((int)
$linkId));
// Remove all records from the mapping tables.
->delete($db->quoteName('#__finder_links_terms' .
dechex($i)))
->where($db->quoteName('link_id') .
' = ' . (int)
$linkId);
// Delete all orphaned terms.
->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') .
' <= 0');
// Delete the link from the index.
->delete($db->quoteName('#__finder_links'))
->where($db->quoteName('link_id') .
' = ' .
$db->quote((int)
$linkId));
// Remove the taxonomy maps.
// Remove the orphaned taxonomy nodes.
* Method to optimize the index. We use this method to remove unused terms
* and any other optimizations that might be necessary.
* @return boolean True on success.
* @throws Exception on database error.
// Get the database object.
$query =
$db->getQuery(true);
// Delete all orphaned terms.
$query->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') .
' <= 0');
// Optimize the links table.
$db->setQuery('OPTIMIZE TABLE ' .
$db->quoteName('#__finder_links'));
for ($i =
0; $i <=
15; $i++
)
// Optimize the terms mapping table.
$db->setQuery('OPTIMIZE TABLE ' .
$db->quoteName('#__finder_links_terms' .
dechex($i)));
// Optimize the terms mapping table.
$db->setQuery('OPTIMIZE TABLE ' .
$db->quoteName('#__finder_links_terms'));
// Remove the orphaned taxonomy nodes.
// Optimize the taxonomy mapping table.
$db->setQuery('OPTIMIZE TABLE ' .
$db->quoteName('#__finder_taxonomy_map'));
* Method to add a set of tokens to the database.
* @param mixed $tokens An array or single FinderIndexerToken object.
* @param mixed $context The context of the tokens. See context constants. [optional]
* @return integer The number of tokens inserted into the database.
* @throws Exception on database error.
// Get the database object.
$query =
$db->getQuery(true);
// Force tokens to an array.
$tokens =
is_array($tokens) ?
$tokens :
array($tokens);
// Count the number of token values.
// Insert the tokens into the database.
$query->insert($db->quoteName('#__finder_tokens'))
$db->quoteName('common'),
$db->quoteName('phrase'),
$db->quoteName('weight'),
$db->quoteName('context'),
$db->quoteName('language')
// Iterate through the tokens to create SQL value sets.
foreach ($tokens as $token)
$db->quote($token->term) .
', '
.
$db->quote($token->stem) .
', '
. (int)
$token->common .
', '
. (int)
$token->phrase .
', '
. (float)
$token->weight .
', '
.
$db->quote($token->language)
* Method to switch the token tables from Memory tables to MyISAM tables
* when they are close to running out of memory.
* @param boolean $memory Flag to control how they should be toggled.
* @return boolean True on success.
* @throws Exception on database error.
// Get the database adapter.
// Check if we are setting the tables to the Memory engine.
if ($memory ===
true &&
$state !==
true)
// Set the tokens table to Memory.
$db->setQuery('ALTER TABLE ' .
$db->quoteName('#__finder_tokens') .
' ENGINE = MEMORY');
// Set the tokens aggregate table to Memory.
$db->setQuery('ALTER TABLE ' .
$db->quoteName('#__finder_tokens_aggregate') .
' ENGINE = MEMORY');
// Set the internal state.
// We must be setting the tables to the MyISAM engine.
elseif ($memory ===
false &&
$state !==
false)
// Set the tokens table to MyISAM.
$db->setQuery('ALTER TABLE ' .
$db->quoteName('#__finder_tokens') .
' ENGINE = MYISAM');
// Set the tokens aggregate table to MyISAM.
$db->setQuery('ALTER TABLE ' .
$db->quoteName('#__finder_tokens_aggregate') .
' ENGINE = MYISAM');
// Set the internal state.
Documentation generated on Tue, 19 Nov 2013 15:09:06 +0100 by phpDocumentor 1.4.3