Skip to content

Instantly share code, notes, and snippets.

@cute
Created March 30, 2012 08:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save cute/2249640 to your computer and use it in GitHub Desktop.
Save cute/2249640 to your computer and use it in GitHub Desktop.
Sphinx 2.0.4 GetDocinfo Patch
diff -rup sphinx-2.0.4-release/api/sphinxapi.php 2.0.4/api/sphinxapi.php
--- sphinx-2.0.4-release/api/sphinxapi.php 2012-01-31 07:07:35.000000000 +0800
+++ 2.0.4/api/sphinxapi.php 2012-03-30 16:39:25.000000000 +0800
@@ -27,6 +27,7 @@ define ( "SEARCHD_COMMAND_KEYWORDS", 3 )
define ( "SEARCHD_COMMAND_PERSIST", 4 );
define ( "SEARCHD_COMMAND_STATUS", 5 );
define ( "SEARCHD_COMMAND_FLUSHATTRS", 7 );
+define ( "SEARCHD_COMMAND_DOCINFO", 9 );
/// current client-side command implementation versions
define ( "VER_COMMAND_SEARCH", 0x119 );
@@ -36,6 +37,7 @@ define ( "VER_COMMAND_KEYWORDS", 0x100 )
define ( "VER_COMMAND_STATUS", 0x100 );
define ( "VER_COMMAND_QUERY", 0x100 );
define ( "VER_COMMAND_FLUSHATTRS", 0x100 );
+define ( "VER_COMMAND_DOCINFO", 0x100 );
/// known searchd status codes
define ( "SEARCHD_OK", 0 );
@@ -1705,6 +1707,115 @@ class SphinxClient
$this->_MBPop ();
return $tag;
}
+
+ /**
+ * connect to searchd server, and docinfo for a given doc_id
+ * @param int $doc_id
+ * @param string $index
+ * @return object
+ * @throws SphinxException
+ */
+ function GetDocinfo($doc_id, $index)
+ {
+ $this->_MBPush();
+
+ if (!($fp = $this->_Connect()))
+ {
+ $this->_MBPop();
+ return false;
+ }
+
+ $req = sphPackU64($doc_id);
+ $req .= pack('N', strlen($index)) . $index;
+
+ $len = strlen($req);
+ $req = pack('nnN', SEARCHD_COMMAND_DOCINFO, VER_COMMAND_DOCINFO, $len) . $req; // add header
+ if (!($this->_Send($fp, $req, $len + 8))
+ || !($response = $this->_GetResponse($fp, VER_COMMAND_DOCINFO)))
+ {
+ $this->_MBPop();
+ return false;
+ }
+
+ $pos = 0;
+ $attrs = array();
+ $rlen = strlen($response);
+ list(, $nwords) = unpack('N*', substr($response, $pos, 4));
+
+ if($nwords == 0){
+ return null;
+ }
+
+ $pos += 4;
+ for($i = 0; $i < $nwords; $i ++)
+ {
+ list(, $len) = unpack('N*', substr($response, $pos, 4));
+ $pos += 4;
+ $field = substr($response, $pos, $len);
+ $pos += $len;
+ list(, $val) = unpack('N*', substr($response, $pos, 4));
+ $pos += 4;
+ $attrs[$field] = $val;
+ }
+
+ $doc = array();
+ foreach($attrs as $attr=>$type)
+ {
+ if ($type == SPH_ATTR_BIGINT)
+ {
+ $doc[$attr] = sphUnpackI64(substr($response, $pos, 8));
+ $pos += 8;
+ continue;
+ }
+
+ // handle floats
+ if ($type == SPH_ATTR_FLOAT)
+ {
+ list(, $uval) = unpack('N*', substr($response, $pos, 4));
+ $pos += 4;
+ list(, $fval) = unpack('f*', pack('L', $uval));
+ $doc[$attr] = $fval;
+ continue;
+ }
+
+ // handle everything else as unsigned ints
+ list(, $val) = unpack('N*', substr($response, $pos, 4));
+ $pos += 4;
+ if ($type == SPH_ATTR_MULTI)
+ {
+ $doc[$attr] = array();
+ $nvalues = $val;
+ while ($nvalues -- > 0 && $pos < $rlen)
+ {
+ list(, $val) = unpack('N*', substr($response, $pos, 4));
+ $pos += 4;
+ $doc[$attr][] = sphFixUint($val);
+ }
+ }
+ elseif ($type == SPH_ATTR_MULTI64)
+ {
+ $doc[$attr] = array();
+ $nvalues = $val;
+ while ($nvalues -- > 0 && $pos < $rlen)
+ {
+ $doc[$attr][] = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
+ $nvalues -= 2;
+ }
+ }
+ elseif ($type == SPH_ATTR_STRING)
+ {
+ $doc[$attr] = substr($response, $pos, $val);
+ $pos += $val;
+ }
+ else
+ {
+ $doc[$attr] = sphFixUint($val);
+ }
+ }
+
+ $this->_MBPop();
+ return $doc;
+ }
}
//
diff -rup sphinx-2.0.4-release/src/searchd.cpp 2.0.4/src/searchd.cpp
--- sphinx-2.0.4-release/src/searchd.cpp 2012-03-01 08:26:39.000000000 +0800
+++ 2.0.4/src/searchd.cpp 2012-03-30 15:33:42.000000000 +0800
@@ -469,7 +469,7 @@ enum SearchdCommand_e
SEARCHD_COMMAND_STATUS = 5,
SEARCHD_COMMAND_FLUSHATTRS = 7,
SEARCHD_COMMAND_SPHINXQL = 8,
-
+ SEARCHD_COMMAND_DOCINFO = 9,
SEARCHD_COMMAND_TOTAL
};
@@ -483,7 +483,8 @@ enum
VER_COMMAND_KEYWORDS = 0x100,
VER_COMMAND_STATUS = 0x100,
VER_COMMAND_FLUSHATTRS = 0x100,
- VER_COMMAND_SPHINXQL = 0x100
+ VER_COMMAND_SPHINXQL = 0x100,
+ VER_COMMAND_DOCINFO = 0x100
};
@@ -7424,6 +7425,108 @@ void SendSearchResponse ( SearchHandler_
}
+void HandleCommandDocinfo ( int iSock, int iVer, InputBuffer_c & tReq )
+{
+ if ( !CheckCommandVersion ( iVer, VER_COMMAND_DOCINFO, tReq ) )
+ return;
+
+ SphDocID_t uDocID = tReq.GetUint64 ();
+ CSphString sIndex = tReq.GetString ();
+
+ const ServedIndex_t * pIndex = g_pIndexes->GetRlockedEntry ( sIndex );
+ if ( !pIndex )
+ {
+ tReq.SendErrorReply ( "unknown local index '%s' in search request", sIndex.cstr() );
+ return;
+ }
+
+ CSphQueryResult *pResult = new CSphQueryResult();
+
+ int iRespLen = 4;
+ NetOutputBuffer_c tOut ( iSock );
+ tOut.SendWord ( SEARCHD_OK );
+ tOut.SendWord ( VER_COMMAND_DOCINFO );
+
+ if (!pIndex->m_pIndex->GetDocinfo(pResult, uDocID))
+ {
+ tOut.SendInt ( iRespLen);
+ tOut.SendInt ( 0 );
+ tOut.Flush ();
+ pIndex->Unlock();
+ return;
+ }
+
+ const CSphMatch & tMatch = pResult->m_dMatches [0];
+ const BYTE * pStrings = pResult->m_pStrings;
+ const int tCount = pResult->m_tSchema.GetAttrsCount();
+
+ iRespLen += tCount * 12;
+
+ for ( int j = 0; j < tCount ; j++ )
+ {
+ const CSphColumnInfo & tCol = pResult->m_tSchema.GetAttr(j);
+ DWORD uOffset = (DWORD) tMatch.GetAttr(tCol.m_tLocator );
+ iRespLen += strlen ( tCol.m_sName.cstr());
+
+ if ( tCol.m_eAttrType == SPH_ATTR_UINT32SET ) {
+ const DWORD * pValues = tMatch.GetAttrMVA ( tCol.m_tLocator, pResult->m_pMva );
+ if (pValues ){
+ iRespLen += pValues[0] * 4;
+ }
+ } else if ( tCol.m_eAttrType==SPH_ATTR_STRING ) {
+ if ( uOffset ) {
+ iRespLen += sphUnpackStr ( pStrings + uOffset, NULL );
+ }
+ }
+ }
+
+ tOut.SendInt ( iRespLen );
+ tOut.SendInt ( tCount );
+
+ for ( int i= 0; i< tCount; i++ ){
+ const CSphColumnInfo & tCol = pResult->m_tSchema.GetAttr(i);
+ tOut.SendString ( tCol.m_sName.cstr() );
+ tOut.SendDword ( tCol.m_eAttrType );
+ }
+
+ for ( int j = 0; j < tCount; j++ )
+ {
+ const CSphColumnInfo & tCol = pResult->m_tSchema.GetAttr(j);
+ DWORD uOffset = (DWORD) tMatch.GetAttr(tCol.m_tLocator );
+ if ( tCol.m_eAttrType == SPH_ATTR_UINT32SET ) {
+ const DWORD * pValues = tMatch.GetAttrMVA ( tCol.m_tLocator, pResult->m_pMva );
+ if ( !pValues ){
+ tOut.SendString ( 0 );
+ } else {
+ int iValues = *pValues++;
+ tOut.SendDword ( iValues );
+ while ( iValues-- ){
+ tOut.SendDword ( *pValues++ );
+ }
+ }
+ } else if ( tCol.m_eAttrType==SPH_ATTR_STRING ) {
+ if ( !uOffset ) {
+ tOut.SendDword ( 0 ); // null string
+ } else{
+ const BYTE * pStr;
+ int iLen = sphUnpackStr (pResult->m_pStrings + uOffset, &pStr );
+ tOut.SendDword ( iLen );
+ tOut.SendBytes ( pStr, iLen );
+ }
+ } else {
+ if ( tCol.m_eAttrType==SPH_ATTR_FLOAT )
+ tOut.SendFloat (uOffset );
+ else if (tCol.m_eAttrType==SPH_ATTR_BIGINT )
+ tOut.SendUint64 ( uOffset );
+ else
+ tOut.SendDword ( uOffset );
+ }
+ }
+ pIndex->Unlock();
+ tOut.Flush ();
+}
+
+
void HandleCommandSearch ( int iSock, int iVer, InputBuffer_c & tReq )
{
MEMORY ( SPH_MEM_SEARCH_NONSQL );
@@ -9907,6 +10010,7 @@ void HandleClientSphinx ( int iSock, con
switch ( iCommand )
{
case SEARCHD_COMMAND_SEARCH: HandleCommandSearch ( iSock, iCommandVer, tBuf ); break;
+ case SEARCHD_COMMAND_DOCINFO: HandleCommandDocinfo ( iSock, iCommandVer, tBuf ); break;
case SEARCHD_COMMAND_EXCERPT: HandleCommandExcerpt ( iSock, iCommandVer, tBuf ); break;
case SEARCHD_COMMAND_KEYWORDS: HandleCommandKeywords ( iSock, iCommandVer, tBuf ); break;
case SEARCHD_COMMAND_UPDATE: HandleCommandUpdate ( iSock, iCommandVer, tBuf ); break;
diff -rup sphinx-2.0.4-release/src/sphinx.cpp 2.0.4/src/sphinx.cpp
--- sphinx-2.0.4-release/src/sphinx.cpp 2012-03-02 03:34:23.000000000 +0800
+++ 2.0.4/src/sphinx.cpp 2012-03-30 15:35:36.000000000 +0800
@@ -1365,6 +1365,7 @@ public:
virtual SphAttr_t * GetKillList () const;
virtual int GetKillListSize () const { return m_iKillListSize; }
virtual bool HasDocid ( SphDocID_t uDocid ) const;
+ virtual bool GetDocinfo (CSphQueryResult *pResult ,SphDocID_t uDocid) const;
virtual const CSphSourceStats & GetStats () const { return m_tStats; }
@@ -12179,6 +12180,29 @@ bool CSphIndex_VLN::HasDocid ( SphDocID_
}
+bool CSphIndex_VLN::GetDocinfo (CSphQueryResult *pResult, SphDocID_t uDocid ) const
+{
+ const DWORD * pFound = FindDocinfo ( uDocid );
+
+ if ( !pFound )
+ return false;
+
+ CSphSwapVector<CSphMatch> m_dMatches;
+
+ m_dMatches.Add();
+ m_dMatches[0].m_iDocID = uDocid;
+ m_dMatches[0].m_pStatic = DOCINFO2ATTRS(pFound);
+
+ pResult->m_pStrings = m_pStrings.GetWritePtr();
+ pResult->m_pMva = m_pMva.GetWritePtr();
+ pResult->m_tSchema = m_tSchema;
+ pResult->m_dMatches = m_dMatches;
+ pResult->m_iCount = 1;
+
+ return true;
+}
+
+
const DWORD * CSphIndex_VLN::FindDocinfo ( SphDocID_t uDocID ) const
{
if ( m_uDocinfo<=0 )
diff -rup sphinx-2.0.4-release/src/sphinx.h 2.0.4/src/sphinx.h
--- sphinx-2.0.4-release/src/sphinx.h 2012-03-01 17:04:19.000000000 +0800
+++ 2.0.4/src/sphinx.h 2012-03-30 15:36:34.000000000 +0800
@@ -2622,6 +2622,7 @@ public:
virtual int GetKillListSize () const = 0;
virtual bool HasDocid ( SphDocID_t uDocid ) const = 0;
virtual bool IsRT() const { return false; }
+ virtual bool GetDocinfo ( CSphQueryResult *pResult, SphDocID_t uDocid ) const = 0;
virtual void SetEnableStar ( bool bEnableStar ) { m_bEnableStar = bEnableStar; }
bool IsStarEnabled () const { return m_bEnableStar; }
diff -rup sphinx-2.0.4-release/src/sphinxrt.cpp 2.0.4/src/sphinxrt.cpp
--- sphinx-2.0.4-release/src/sphinxrt.cpp 2012-03-01 09:44:34.000000000 +0800
+++ 2.0.4/src/sphinxrt.cpp 2012-03-30 15:37:38.000000000 +0800
@@ -1052,6 +1052,7 @@ public:
virtual SphAttr_t * GetKillList () const { return NULL; }
virtual int GetKillListSize () const { return 0; }
virtual bool HasDocid ( SphDocID_t ) const { assert ( 0 ); return false; }
+ virtual bool GetDocinfo (CSphQueryResult *pResult ,SphDocID_t uDocid) const { assert ( 0 ); return false; }
virtual int Build ( const CSphVector<CSphSource*> & dSources, int iMemoryLimit, int iWriteBuffer ) { return 0; }
virtual bool Merge ( CSphIndex * pSource, CSphVector<CSphFilterSettings> & dFilters, bool bMergeKillLists ) { return false; }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment