6
6
class ExportService
7
7
{
8
8
9
-
10
9
/**
11
10
* Convert a page to a self-contained HTML file.
12
11
* Includes required CSS & image content. Images are base64 encoded into the HTML.
13
12
* @param Page $page
14
13
* @return mixed|string
15
14
*/
16
15
public function pageToContainedHtml (Page $ page )
16
+ {
17
+ $ cssContent = file_get_contents (public_path ('/css/export-styles.css ' ));
18
+ $ pageHtml = view ('pages/export ' , ['page ' => $ page , 'css ' => $ cssContent ])->render ();
19
+ return $ this ->containHtml ($ pageHtml );
20
+ }
21
+
22
+ /**
23
+ * Convert a page to a pdf file.
24
+ * @param Page $page
25
+ * @return mixed|string
26
+ */
27
+ public function pageToPdf (Page $ page )
17
28
{
18
29
$ cssContent = file_get_contents (public_path ('/css/export-styles.css ' ));
19
30
$ pageHtml = view ('pages/pdf ' , ['page ' => $ page , 'css ' => $ cssContent ])->render ();
31
+ $ containedHtml = $ this ->containHtml ($ pageHtml );
32
+ $ pdf = \PDF ::loadHTML ($ containedHtml );
33
+ return $ pdf ->output ();
34
+ }
20
35
36
+ /**
37
+ * Bundle of the contents of a html file to be self-contained.
38
+ * @param $htmlContent
39
+ * @return mixed|string
40
+ */
41
+ protected function containHtml ($ htmlContent )
42
+ {
21
43
$ imageTagsOutput = [];
22
- preg_match_all ("/\<img.*src\=(\'| \")(.*?)(\'| \").*?\>/i " , $ pageHtml , $ imageTagsOutput );
44
+ preg_match_all ("/\<img.*src\=(\'| \")(.*?)(\'| \").*?\>/i " , $ htmlContent , $ imageTagsOutput );
23
45
24
46
// Replace image src with base64 encoded image strings
25
47
if (isset ($ imageTagsOutput [0 ]) && count ($ imageTagsOutput [0 ]) > 0 ) {
@@ -34,12 +56,12 @@ public function pageToContainedHtml(Page $page)
34
56
$ imageContent = file_get_contents ($ pathString );
35
57
$ imageEncoded = 'data:image/ ' . pathinfo ($ pathString , PATHINFO_EXTENSION ) . ';base64, ' . base64_encode ($ imageContent );
36
58
$ newImageString = str_replace ($ srcString , $ imageEncoded , $ oldImgString );
37
- $ pageHtml = str_replace ($ oldImgString , $ newImageString , $ pageHtml );
59
+ $ htmlContent = str_replace ($ oldImgString , $ newImageString , $ htmlContent );
38
60
}
39
61
}
40
62
41
63
$ linksOutput = [];
42
- preg_match_all ("/\<a.*href\=(\'| \")(.*?)(\'| \").*?\>/i " , $ pageHtml , $ linksOutput );
64
+ preg_match_all ("/\<a.*href\=(\'| \")(.*?)(\'| \").*?\>/i " , $ htmlContent , $ linksOutput );
43
65
44
66
// Replace image src with base64 encoded image strings
45
67
if (isset ($ linksOutput [0 ]) && count ($ linksOutput [0 ]) > 0 ) {
@@ -49,13 +71,45 @@ public function pageToContainedHtml(Page $page)
49
71
if (strpos (trim ($ srcString ), 'http ' ) !== 0 ) {
50
72
$ newSrcString = url ($ srcString );
51
73
$ newLinkString = str_replace ($ srcString , $ newSrcString , $ oldLinkString );
52
- $ pageHtml = str_replace ($ oldLinkString , $ newLinkString , $ pageHtml );
74
+ $ htmlContent = str_replace ($ oldLinkString , $ newLinkString , $ htmlContent );
53
75
}
54
76
}
55
77
}
56
78
57
79
// Replace any relative links with system domain
58
- return $ pageHtml ;
80
+ return $ htmlContent ;
81
+ }
82
+
83
+ /**
84
+ * Converts the page contents into simple plain text.
85
+ * This method filters any bad looking content to
86
+ * provide a nice final output.
87
+ * @param Page $page
88
+ * @return mixed
89
+ */
90
+ public function pageToPlainText (Page $ page )
91
+ {
92
+ $ text = $ page ->text ;
93
+ // Replace multiple spaces with single spaces
94
+ $ text = preg_replace ('/\ {2,}/ ' , ' ' , $ text );
95
+ // Reduce multiple horrid whitespace characters.
96
+ $ text = preg_replace ('/(\x0A|\xA0|\x0A|\r|\n){2,}/su ' , "\n\n" , $ text );
97
+ $ text = html_entity_decode ($ text );
98
+ // Add title
99
+ $ text = $ page ->name . "\n\n" . $ text ;
100
+ return $ text ;
59
101
}
60
102
61
- }
103
+ }
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
0 commit comments