Readability.php 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. <?php
  2. require_once '../Readability.php';
  3. header('Content-Type: text/plain; charset=utf-8');
  4. // get latest Medialens alert
  5. // (change this URL to whatever you'd like to test)
  6. $url = 'http://www.medialens.org/index.php/alerts/alert-archive/alerts-2013/729-thatcher.html';
  7. $html = file_get_contents($url);
  8. // Note: PHP Readability expects UTF-8 encoded content.
  9. // If your content is not UTF-8 encoded, convert it
  10. // first before passing it to PHP Readability.
  11. // Both iconv() and mb_convert_encoding() can do this.
  12. // If we've got Tidy, let's clean up input.
  13. // This step is highly recommended - PHP's default HTML parser
  14. // often does a terrible job and results in strange output.
  15. if (function_exists('tidy_parse_string')) {
  16. $tidy = tidy_parse_string($html, array(), 'UTF8');
  17. $tidy->cleanRepair();
  18. $html = $tidy->value;
  19. }
  20. // give it to Readability
  21. $readability = new Readability($html, $url);
  22. // print debug output?
  23. // useful to compare against Arc90's original JS version -
  24. // simply click the bookmarklet with FireBug's console window open
  25. $readability->debug = false;
  26. // convert links to footnotes?
  27. $readability->convertLinksToFootnotes = true;
  28. // process it
  29. $result = $readability->init();
  30. // does it look like we found what we wanted?
  31. if ($result) {
  32. echo "== Title =====================================\n";
  33. echo $readability->getTitle()->textContent, "\n\n";
  34. echo "== Body ======================================\n";
  35. $content = $readability->getContent()->innerHTML;
  36. // if we've got Tidy, let's clean it up for output
  37. if (function_exists('tidy_parse_string')) {
  38. $tidy = tidy_parse_string($content, array('indent'=>true, 'show-body-only' => true), 'UTF8');
  39. $tidy->cleanRepair();
  40. $content = $tidy->value;
  41. }
  42. echo $content;
  43. } else {
  44. echo 'Looks like we couldn\'t find the content. :(';
  45. }
  46. ?>