Context Navigation

← Previous Changeset
Next Changeset →

Changeset 685224

Timestamp:

03/21/2013 02:30:23 PM (13 years ago)

Author:

chr15

Message:

Updated Core Library to 2.0.5

Location:

summy/trunk/lib/Summy

Files:

: 6 edited

Core.php (modified) (9 diffs)
Filter/En/Stemmer.php (modified) (2 diffs)
Filter/En/Text.php (modified) (2 diffs)
Filter/Gr/Stemmer.php (modified) (25 diffs)
Score/Position.php (modified) (4 diffs)
Score/Term.php (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

summy/trunk/lib/Summy/Core.php

-                      r664156
+                      r685224
 /**
  * @package     Summy
  * @version     $Id: Core.php 105 2013-02-05 00:44:51Z Tefra $
+ * @version     $Id: Core.php 128 2013-03-17 23:44:07Z Tefra $
  * @author      Christodoulos Tsoulloftas
  * @copyright   Copyright 2011-2013, http://www.komposta.net
 …
             'language' => 'gr',
             'termScore' => 'tfisf',
             'positionScore' => 'news',
+            'positionScore' => 'article',
             'minWordsLimit' => 6,
             'maxWordsLimit' => 20,
 …
         $text = $textFilter->clear($text);
         $text = $textFilter->process($text);
         $terms = array_unique(explode(" ", $text));
+        $terms = array_filter(explode(" ", $text));
         foreach($terms AS $term)
+        {
 …
+            {
                 $term = $filter->filter($term);
+                if($term === false)
+                {
+                    break;
+                }
+            }
 …
             $i = 0;
             $total = ceil(($this->config['rate'] / 100)  * $this->totalSentences);
+            $total = ceil(($this->config['rate'] / 100) * $this->totalSentences);
             $indexes = array_keys($this->sentenceScores);
             //Grab the top x sentences
 …
     /**
      * Process senteces, words and produce the core statistics
+     * Process sentences, words and produce the core statistics
      * - Loop through sentences
      * - Extra text filtering
 …
     /**
      * Fetch the summary body
+     * Returns the summary body
      * @return string
      */
 …
     /**
      * Fetch the original text with the summary sentences highlighted
+     * Returns the original text with the summary sentences highlighted
      * @return string
      */
 …
     /**
+     *
      * @var type
+     * @var type
      */
     static $instances = array();
     /**
+     *
+     *
      * @return type
      */

summy/trunk/lib/Summy/Filter/En/Stemmer.php

-                      r664156
+                      r685224
+ *
  * @package     Summy
  * @version     $Id: Stemmer.php 88 2013-02-04 13:25:53Z Tefra $
+ * @version     $Id: Stemmer.php 128 2013-03-17 23:44:07Z Tefra $
  * @author      Christodoulos Tsoulloftas
  * @copyright   Copyright 2011-2013, http://www.komposta.net
 …
                 break;
             case 'S':
+            case 'T':
                 self::replace($word, 'BILITI', 'BLE', 0) OR self::replace($word, 'ALITI', 'AL', 0) OR self::replace($word, 'IVITI', 'IVE', 0);
                 break;

summy/trunk/lib/Summy/Filter/En/Text.php

-                      r664156
+                      r685224
 /**
  * @package     Summy
  * @version     $Id: Text.php 103 2013-02-04 21:56:04Z Tefra $
+ * @version     $Id: Text.php 128 2013-03-17 23:44:07Z Tefra $
  * @author      Christodoulos Tsoulloftas
  * @copyright   Copyright 2011-2013, http://www.komposta.net
 …
             return $string;
+        }
         // Make Paragraphs from html
         $string = str_replace('</p>', "\n", $string);

summy/trunk/lib/Summy/Filter/Gr/Stemmer.php

-                      r664156
+                      r685224
+ *
  * @package     Summy
  * @version     $Id: Stemmer.php 88 2013-02-04 13:25:53Z Tefra $
+ * @version     $Id: Stemmer.php 112 2013-03-06 21:56:52Z Tefra $
  * @author      Christodoulos Tsoulloftas
  * @copyright   Copyright 2011-2013, http://www.komposta.net
 …
         //Step1
+        if(preg_match($this->step1regexp, $w))
+        {
+            preg_match($this->step1regexp, $w, $fp);
+        if(preg_match($this->step1regexp, $w, $fp))
+        {
             $stem = $fp[1];
             $suffix = $fp[2];
 …
         // Step 2a
         $re = '/^(.+?)(ΑΔΕΣ|ΑΔΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 2b
         $re = '/^(.+?)(ΕΔΕΣ|ΕΔΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 2c
         $re = '/^(.+?)(ΟΥΔΕΣ|ΟΥΔΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 2d
         $re = '/^(.+?)(ΕΩΣ|ΕΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 3
         $re = '/^(.+?)(ΙΑ|ΙΟΥ|ΙΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 4
         $re = '/^(.+?)(ΙΚΑ|ΙΚΟ|ΙΚΟΥ|ΙΚΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
+        }
+        if(preg_match($re2, $w))
+        {
+            preg_match($re2, $w, $fp);
+            $stem = $fp[1];
+            $w = $stem;
+            $test1 = false;
+        }
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re2, $w, $fp))
+        {
+            $stem = $fp[1];
+            $w = $stem;
+            $test1 = false;
+        }
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         $re3 = '/^(.+?)(ΑΓΑΝΕ|ΗΣΑΝΕ|ΟΥΣΑΝΕ|ΙΟΝΤΑΝΕ|ΙΟΤΑΝΕ|ΙΟΥΝΤΑΝΕ|ΟΝΤΑΝΕ|ΟΤΑΝΕ|ΟΥΝΤΑΝΕ|ΗΚΑΝΕ|ΗΘΗΚΑΝΕ)$/u';
+        if(preg_match($re3, $w))
+        {
+            preg_match($re3, $w, $fp);
+        if(preg_match($re3, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
+        }
+        if(preg_match($re2, $w))
+        {
+            preg_match($re2, $w, $fp);
+        if(preg_match($re2, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         $re4 = '/^(.+?)(ΗΣΕΤΕ)$/u';
+        if(preg_match($re4, $w))
+        {
+            preg_match($re4, $w, $fp);
+            $stem = $fp[1];
+            $w = $stem;
+            $test1 = false;
+        }
+        if(preg_match($re3, $w))
+        {
+            preg_match($re3, $w, $fp);
+        if(preg_match($re4, $w, $fp))
+        {
+            $stem = $fp[1];
+            $w = $stem;
+            $test1 = false;
+        }
+        if(preg_match($re3, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5d
         $re = '/^(.+?)(ΟΝΤΑΣ|ΩΝΤΑΣ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5e
         $re = '/^(.+?)(ΟΜΑΣΤΕ|ΙΟΜΑΣΤΕ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         $re2 = '/^(.+?)(ΙΕΣΤΕ)$/u';
+        if(preg_match($re2, $w))
+        {
+            preg_match($re2, $w, $fp);
+        if(preg_match($re2, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
+        }
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         $re2 = '/^(.+?)(ΗΘΗΚΑ|ΗΘΗΚΕΣ|ΗΘΗΚΕ)$/u';
+        if(preg_match($re2, $w))
+        {
+            preg_match($re2, $w, $fp);
+            $stem = $fp[1];
+            $w = $stem;
+            $test1 = false;
+        }
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re2, $w, $fp))
+        {
+            $stem = $fp[1];
+            $w = $stem;
+            $test1 = false;
+        }
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5h
         $re = '/^(.+?)(ΟΥΣΑ|ΟΥΣΕΣ|ΟΥΣΕ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         $re = '/^(.+?)(ΑΓΑ|ΑΓΕΣ|ΑΓΕ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5j
         $re = '/^(.+?)(ΗΣΕ|ΗΣΟΥ|ΗΣΑ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5k
         $re = '/^(.+?)(ΗΣΤΕ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5l
         $re = '/^(.+?)(ΟΥΝΕ|ΗΣΟΥΝΕ|ΗΘΟΥΝΕ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         //Step 5l
         $re = '/^(.+?)(ΟΥΜΕ|ΗΣΟΥΜΕ|ΗΘΟΥΜΕ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         $re = '/^(.+?)(ΜΑΤΑ|ΜΑΤΩΝ|ΜΑΤΟΣ)$/u';
         $re2 = '/^(.+?)(Α|ΑΓΑΤΕ|ΑΓΑΝ|ΑΕΙ|ΑΜΑΙ|ΑΝ|ΑΣ|ΑΣΑΙ|ΑΤΑΙ|ΑΩ|Ε|ΕΙ|ΕΙΣ|ΕΙΤΕ|ΕΣΑΙ|ΕΣ|ΕΤΑΙ|Ι|ΙΕΜΑΙ|ΙΕΜΑΣΤΕ|ΙΕΤΑΙ|ΙΕΣΑΙ|ΙΕΣΑΣΤΕ|ΙΟΜΑΣΤΑΝ|ΙΟΜΟΥΝ|ΙΟΜΟΥΝΑ|ΙΟΝΤΑΝ|ΙΟΝΤΟΥΣΑΝ|ΙΟΣΑΣΤΑΝ|ΙΟΣΑΣΤΕ|ΙΟΣΟΥΝ|ΙΟΣΟΥΝΑ|ΙΟΤΑΝ|ΙΟΥΜΑ|ΙΟΥΜΑΣΤΕ|ΙΟΥΝΤΑΙ|ΙΟΥΝΤΑΝ|Η|ΗΔΕΣ|ΗΔΩΝ|ΗΘΕΙ|ΗΘΕΙΣ|ΗΘΕΙΤΕ|ΗΘΗΚΑΤΕ|ΗΘΗΚΑΝ|ΗΘΟΥΝ|ΗΘΩ|ΗΚΑΤΕ|ΗΚΑΝ|ΗΣ|ΗΣΑΝ|ΗΣΑΤΕ|ΗΣΕΙ|ΗΣΕΣ|ΗΣΟΥΝ|ΗΣΩ|Ο|ΟΙ|ΟΜΑΙ|ΟΜΑΣΤΑΝ|ΟΜΟΥΝ|ΟΜΟΥΝΑ|ΟΝΤΑΙ|ΟΝΤΑΝ|ΟΝΤΟΥΣΑΝ|ΟΣ|ΟΣΑΣΤΑΝ|ΟΣΑΣΤΕ|ΟΣΟΥΝ|ΟΣΟΥΝΑ|ΟΤΑΝ|ΟΥ|ΟΥΜΑΙ|ΟΥΜΑΣΤΕ|ΟΥΝ|ΟΥΝΤΑΙ|ΟΥΝΤΑΝ|ΟΥΣ|ΟΥΣΑΝ|ΟΥΣΑΤΕ|Υ|ΥΣ|Ω|ΩΝ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem . "ΜΑ";
+        }
+        if(preg_match($re2, $w) && $test1)
+        {
+            preg_match($re2, $w, $fp);
+        if(preg_match($re2, $w, $fp) && $test1)
+        {
             $stem = $fp[1];
             $w = $stem;
 …
         // Step 7 (ΠΑΡΑΘΕΤΙΚΑ)
         $re = '/^(.+?)(ΕΣΤΕΡ|ΕΣΤΑΤ|ΟΤΕΡ|ΟΤΑΤ|ΥΤΕΡ|ΥΤΑΤ|ΩΤΕΡ|ΩΤΑΤ)$/u';
+        if(preg_match($re, $w))
+        {
+            preg_match($re, $w, $fp);
+        if(preg_match($re, $w, $fp))
+        {
             $stem = $fp[1];
             $w = $stem;

summy/trunk/lib/Summy/Score/Position.php

-                      r664156
+                      r685224
 /**
  * @package     Summy
  * @version     $Id: Position.php 93 2013-02-04 18:26:25Z Tefra $
+ * @version     $Id: Position.php 117 2013-03-16 16:37:14Z Tefra $
  * @author      Christodoulos Tsoulloftas
  * @copyright   Copyright 2011-2013, http://www.komposta.net
 …
     /**
      * Return the Position Weight for a sentence based on the Baxendale investigation
+     * Returns the Position Weight for a sentence based on the Baxendale investigation
+     *
      * Baxendale (1958) investigated a sample of 200 paragraphs to determine
 …
     /**
      * Return the Position Weight for a sentence based on the hypothesis that
      * first paragraphs/sentences are the most meaningfull to a document, which
+     * Returns the Position Weight for a sentence based on the hypothesis that
+     * first paragraphs/sentences are the most meaningful to a document, which
      * applies to small articles, like newspaper news.
+     *
 …
      * @return float
      */
     public function news($totalParagraphs, $sentencesInParagraph, $paragraph, $sentence)
+    public function article($totalParagraphs, $sentencesInParagraph, $paragraph, $sentence)
+    {
         return (($totalParagraphs - $paragraph + 1) / $totalParagraphs) * (($sentencesInParagraph - $sentence + 1) / $sentencesInParagraph);

summy/trunk/lib/Summy/Score/Term.php

-                      r664156
+                      r685224
 /**
  * @package     Summy
  * @version     $Id: Term.php 93 2013-02-04 18:26:25Z Tefra $
+ * @version     $Id: Term.php 125 2013-03-16 17:49:26Z Tefra $
  * @author      Christodoulos Tsoulloftas
  * @copyright   Copyright 2011-2013, http://www.komposta.net
 …
     /**
      * Language identifier (gr,en)
      * @var string
+     * @var string
      */
     private $_language = null;
 …
     /**
      * Database adapter
      * @var object
+     * @var object
      */
     private $_dbAdapter = null;
 …
     /**
      * Rank sentences by TF-ISF http://en.wikipedia.org/wiki/Tf-idf
+     * Rank sentences by TF-IDF http://en.wikipedia.org/wiki/Tf-idf
+     *
      * @param integer $totalSentences
 …
         $sql = $db->query("SELECT COUNT(*) as total FROM document WHERE language = ?", array($language))->current();
         $totalDocuments = $sql['total'] + 1;
+        $terms = $db->platform->quoteValueList(array_keys($tf));
+        $sql = $db->query("SELECT id, documents FROM term WHERE id IN ({$terms}) AND language = ?", array($language));
+        $terms = implode(',', array_map(array($db->driver->getConnection()->getResource(), 'quote'), array_keys($tf)));
+        $sql = $db->query("SELECT term, documents FROM term WHERE term IN ({$terms}) AND language = ?", array($language));
         foreach($sql AS $row)
+        {
             $docFreq[$row['id']] = $row['documents'];
+            $docFreq[$row['term']] = $row['documents'];
+        }
 …
     public function tfridf($totalSentences, $tf = array(), $sf = array())
+    {
         $wordScore = $docFreq =  $totFreq = array();
+        $wordScore = $docFreq = $totFreq = array();
         $totalWords = array_sum($tf);
         $db = $this->getAdapter();
 …
         $sql = $db->query("SELECT COUNT(*) as total FROM document WHERE language = ?", array($language))->current();
         $totalDocuments = $sql['total'] + 1;
         $terms = $db->platform->quoteValueList(array_keys($tf));
         $sql = $db->query("SELECT id, frequency, documents FROM term WHERE id IN ({$terms}) AND language = ?", array($language));
+        $terms = implode(',', array_map(array($db->driver->getConnection()->getResource(), 'quote'), array_keys($tf)));
+        $sql = $db->query("SELECT term, frequency, documents FROM term WHERE term IN ({$terms}) AND language = ?", array($language));
         foreach($sql AS $row)
+        {
             $docFreq[$row['id']] = $row['documents'];
             $totFreq[$row['id']] = $row['frequency'];
+            $docFreq[$row['term']] = $row['documents'];
+            $totFreq[$row['term']] = $row['frequency'];
+        }
         foreach($tf AS $word => $frequency)
+        {
             $docFreq[$word] += 1;
             $totFreq[$word] += $frequency;
+            $docFreq[$word] = isset($docFreq[$word]) ? $docFreq[$word] + 1 : 1;
+            $totFreq[$word] = isset($totFreq[$word]) ? $totFreq[$word] + $frequency : $frequency;
             // Normalized frequency
             $_tf = $frequency / $totalWords;

Note: See TracChangeset for help on using the changeset viewer.

Trac UI Preferences

Download in other formats: