@@ -3,14 +3,18 @@ const Tesseract = require('tesseract.js');
33const { find_markdown_files } = require ( './shared' ) ;
44
55const generate_descriptions = async ( chapter_match ) => {
6+ // Escape special regex characters in a user-supplied string so it is treated literally.
7+ const escapeRegExp = ( str ) => str . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) ;
8+
69 let re ;
710 if ( chapter_match ) {
811 chapter_match = chapter_match . replace ( / \. m d $ / , '' ) ;
912 chapter_match = chapter_match . replace ( / ^ c o n t e n t [ / \\ ] * / , '' ) ;
10- chapter_match = ( process . platform != 'win32' )
11- ? 'content/' + '(' + chapter_match . replace ( / \/ / g, ')/(' ) + ').md'
12- : 'content\\\\' + '(' + chapter_match . replace ( / \/ / g, ')\\\\(' ) + ').md' ;
13- re = new RegExp ( chapter_match ) ;
13+ const escapedChapter = escapeRegExp ( chapter_match ) ;
14+ const pattern = ( process . platform != 'win32' )
15+ ? 'content/' + '(' + escapedChapter . replace ( / \/ / g, ')/(' ) + ').md'
16+ : 'content\\\\' + '(' + escapedChapter . replace ( / \/ / g, ')\\\\(' ) + ').md' ;
17+ re = new RegExp ( pattern ) ;
1418 } else {
1519 console . log ( 'Please provide an argument of the form: en/2020/performance' ) ;
1620 process . exit ( 1 ) ;
@@ -65,11 +69,12 @@ const generate_descriptions = async (chapter_match) => {
6569 const result = await Tesseract . recognize ( image_path , 'eng' ) ;
6670 const text = result . data . text ;
6771
68- // Basic cleanup: remove newlines, collapse spaces, escape quotes
72+ // Basic cleanup: remove newlines, collapse spaces, escape backslashes and quotes
6973 const description = text
7074 . replace ( / \n / g, ' ' )
7175 . replace ( / \s + / g, ' ' )
7276 . trim ( )
77+ . replace ( / \\ / g, '\\\\' )
7378 . replace ( / " / g, '\\"' ) ;
7479
7580 if ( description ) {
0 commit comments