#!/bin/python3
import os
import sys
import csv
import re
import glob
import argparse
import json
# This allows for importing from the localization and util directories NOTE: Auto importing tools will also prepend the import paths with "tools." this will not work and needs to be removed from import paths
sys . path . append ( os . path . abspath ( os . path . join ( os . path . dirname ( __file__ ) , " .. " ) ) )
from util . time import ExecutionTimer
timer = ExecutionTimer ( )
from localization . parseDictionary import parse_dictionary
from localization . regex import localization_regex
from util . listUtils import missingFromSet , removeFromSet
from util . fileUtils import makeDirIfNotExists , removeFileIfExists
from util . logger import console
parser = argparse . ArgumentParser ( )
parser . add_argument (
" --debug " , action = " store_true " , help = " Enable debug mode, print debug messages "
)
parser . add_argument (
" --output-dir " ,
type = str ,
default = " ./tools/localization/analysis " ,
help = " Output directory for the results " ,
)
parser . add_argument (
" --master-strings " ,
type = str ,
default = " ./tools/localization/input/master_string_list.txt " ,
help = " Path to the master string list " ,
)
parser . add_argument (
" --to-be-removed " ,
type = str ,
default = " ./tools/localization/input/to_be_removed_list.txt " ,
help = " Path to the list of strings to be removed " ,
)
args = parser . parse_args ( )
# Configuration
intentionallyUnusedStrings = [ ]
DEBUG = args . debug
console . enableDebug ( ) if DEBUG else None
OUTPUT_DIR = args . output_dir
FOUND_STRINGS_PATH = os . path . join ( OUTPUT_DIR , " found_strings.csv " )
NOT_FOUND_STRINGS_PATH = os . path . join ( OUTPUT_DIR , " not_found_strings.txt " )
POTENTIAL_MATCHES_PATH = os . path . join ( OUTPUT_DIR , " potential_matches.csv " )
NOT_IN_MASTER_LIST_PATH = os . path . join ( OUTPUT_DIR , " not_in_master_list.csv " )
EN_PATH = " _locales/en/messages.json "
MASTER_STRINGS_PATH = args . master_strings
TO_BE_REMOVED_PATH = args . to_be_removed
# Remove files that are to be generated if they exist
removeFileIfExists ( FOUND_STRINGS_PATH )
removeFileIfExists ( NOT_FOUND_STRINGS_PATH )
removeFileIfExists ( POTENTIAL_MATCHES_PATH )
removeFileIfExists ( NOT_IN_MASTER_LIST_PATH )
def flush ( ) :
sys . stdout . flush ( ) if not DEBUG else None
# File search setup
console . info ( " Scanning for localized strings... " )
files = [ ]
files_to_ignore = [ " LocalizerKeys.ts " ]
ignore_patterns = [ re . compile ( pattern ) for pattern in files_to_ignore ]
console . debug ( f " Ignoring files: { " , " . join ( files_to_ignore ) } " )
def should_ignore_file ( file_path ) :
return any ( pattern . search ( file_path ) for pattern in ignore_patterns )
for extension in ( " *.ts " , " *.tsx " ) :
files . extend (
[
y
for x in os . walk ( " ./ts/ " )
for y in glob . glob ( os . path . join ( x [ 0 ] , extension ) )
if not should_ignore_file ( y )
]
)
foundStringsAndLocations = { } # Dictionary to store found strings and their locations
notFoundStrings = set ( ) # Set to store not found strings
total_files = len ( files ) * 1.1
bar_length = 25
def progress_bar ( current , total , overallCurrent , overalTotal ) :
if DEBUG :
return
percent = 100.0 * current / total
percentOverall = 100.0 * overallCurrent / overalTotal
sys . stdout . write ( " \r " )
sys . stdout . write (
" Overall: [ { : {} }] {:>3} % " . format (
" = " * int ( percentOverall / ( 100.0 / bar_length ) ) ,
bar_length ,
int ( percentOverall ) ,
)
)
sys . stdout . write (
" Stage: [ { : {} }] {:>3} % " . format (
" = " * int ( percent / ( 100.0 / bar_length ) ) , bar_length , int ( percent )
)
)
sys . stdout . flush ( )
current_line_number = 0
current_file_number = 0
line_count = 0
keys = [ ]
with open ( EN_PATH , " r " , encoding = " utf-8 " ) as messages_file :
messages_dict = json . load ( messages_file )
# Read json file and get all keys
with open ( EN_PATH , " r " , encoding = " utf-8 " ) as messages_file :
for line in messages_file :
for match in re . finditer ( r ' " ([^ " ]+) " : ' , line ) :
keys . append ( match . group ( 1 ) )
total_line_numbers = len ( keys )
console . debug ( f " Total keys: { total_line_numbers } " )
def format_vscode_path ( file_path ) :
return file_path . replace ( " ./ " , " " )
# search
for key in keys :
if key in intentionallyUnusedStrings :
continue
searchedLine = localization_regex ( key )
locations = [ ]
current_file_number = 0 # To keep track of the current file number for progress bar
for file_path in files :
with open ( file_path , " r " , encoding = " utf-8 " ) as file_content :
content = file_content . read ( )
for line_number , line in enumerate ( content . split ( " \n " ) , start = 1 ) :
if searchedLine . search ( line ) :
locations . append ( f " { format_vscode_path ( file_path ) } : { line_number } " )
current_file_number + = 1
progress_bar (
current_file_number , total_files , current_line_number , total_line_numbers
)
current_line_number + = 1
if locations :
console . debug ( f " { key } - Found in { len ( locations ) } " )
foundStringsAndLocations [ key ] = locations
else :
console . debug ( f " { key } - Not Found " )
notFoundStrings . add ( key )
progress_bar ( 1 , 1 , 1 , 1 )
flush ( )
# Writing found strings and their locations to a CSV file
makeDirIfNotExists ( FOUND_STRINGS_PATH )
with open ( FOUND_STRINGS_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
csvwriter = csv . writer ( csvfile )
csvwriter . writerow ( [ " String " , " Phrase " , " Locations " ] ) # Header row
for foundString , locations in foundStringsAndLocations . items ( ) :
# Write each found string and its locations. Locations are joined into a single string for CSV simplicity
csvwriter . writerow (
[ foundString , messages_dict [ foundString ] , " ; " . join ( locations ) ]
)
# Writing not found strings to a text file as before
makeDirIfNotExists ( NOT_FOUND_STRINGS_PATH )
with open ( NOT_FOUND_STRINGS_PATH , " w " , encoding = " utf-8 " ) as not_found_file :
for notFound in notFoundStrings :
not_found_file . write ( f " { notFound } \n " )
sys . stdout . write ( " \n " )
# Print the result statistics and file paths (linkable)
console . info ( f " Found { len ( foundStringsAndLocations ) } strings in { len ( files ) } files " )
console . info ( f " Found strings and their locations written to: { FOUND_STRINGS_PATH } " )
console . info (
f " Identified { len ( notFoundStrings ) } not found strings and written to: { NOT_FOUND_STRINGS_PATH } "
)
# Search for not found strings in any single quotes across all files
console . info ( " Searching for potential matches for not found strings... " )
current_not_found_number = 0
current_file_number = 0
total_not_found_strings = len ( notFoundStrings )
potentialMatches = (
{ }
) # Dictionary to store potential matches: {string: [file1, file2, ...]}
for string in notFoundStrings :
console . debug ( f " Searching for: { string } " )
current_file_number = 0
quotedStringPattern = re . compile (
r " ' {} ' " . format ( string )
) # Pattern to search for 'STRING'
for file_path in files :
with open ( file_path , " r " , encoding = " utf-8 " ) as file_content :
if quotedStringPattern . search ( file_content . read ( ) ) :
console . debug ( f " Potential match found: { string } in { file_path } " )
if string not in potentialMatches :
potentialMatches [ string ] = [ ]
potentialMatches [ string ] . append ( file_path )
current_file_number + = 1
progress_bar (
current_file_number ,
total_files ,
current_not_found_number ,
total_not_found_strings ,
)
current_not_found_number + = 1
# Function to find the line numbers of matches within a specific file
def find_line_numbers ( file_path , pattern ) :
line_numbers = [ ]
with open ( file_path , " r " , encoding = " utf-8 " ) as file :
for i , line in enumerate ( file , start = 1 ) :
if pattern . search ( line ) :
line_numbers . append ( i )
return line_numbers
# Process the found files to add line numbers
for string , files in potentialMatches . items ( ) :
for file_path in files :
quotedStringPattern = re . compile ( r " ' {} ' " . format ( string ) )
line_numbers = find_line_numbers ( file_path , quotedStringPattern )
match_details = [ f " { file_path } : { line } " for line in line_numbers ]
potentialMatches [ string ] = match_details # Update with detailed matches
# Writing potential matches to CSV, now with line numbers
makeDirIfNotExists ( POTENTIAL_MATCHES_PATH )
with open ( POTENTIAL_MATCHES_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
csvwriter = csv . writer ( csvfile )
csvwriter . writerow ( [ " String " , " Potential File Matches " ] )
for string , matches in potentialMatches . items ( ) :
csvwriter . writerow ( [ string , " ; " . join ( matches ) ] )
sys . stdout . write ( " \n " )
# Print the result statistics and file paths (linkable)
console . info (
f " Potential matches found for { len ( potentialMatches ) } / { len ( notFoundStrings ) } not found strings "
)
console . info ( f " Potential matches written to: { POTENTIAL_MATCHES_PATH } " )
# Identify found strings that are not in the master string list
try :
masterStringList = set ( )
with open ( MASTER_STRINGS_PATH , " r " , encoding = " utf-8 " ) as masterListFile :
for line in masterListFile :
masterStringList . add ( line . strip ( ) )
notInMasterList = missingFromSet (
set ( foundStringsAndLocations . keys ( ) ) , masterStringList
)
try :
slatedForRemovalList = set ( )
with open ( TO_BE_REMOVED_PATH , " r " , encoding = " utf-8 " ) as slatedForRemovalFile :
for line in slatedForRemovalFile :
slatedForRemovalList . add ( line . strip ( ) )
notInMasterList = removeFromSet ( notInMasterList , slatedForRemovalList )
except FileNotFoundError :
console . warn (
f " Strings to be removed list not found at: { TO_BE_REMOVED_PATH } . Skipping comparison. "
)
# Output the found strings not in the master list to a CSV file
makeDirIfNotExists ( NOT_IN_MASTER_LIST_PATH )
with open ( NOT_IN_MASTER_LIST_PATH , " w " , encoding = " utf-8 " , newline = " " ) as csvfile :
csvwriter = csv . writer ( csvfile )
csvwriter . writerow ( [ " String " , " Phrase " , " Locations " ] ) # Header row
for notInMaster in notInMasterList :
# Write each found string and its locations. Locations are joined into a single string for CSV simplicity
csvwriter . writerow (
[
notInMaster ,
messages_dict [ notInMaster ] ,
" ; " . join ( foundStringsAndLocations [ notInMaster ] ) ,
]
)
console . info ( f " Found { len ( notInMasterList ) } strings not in the master list " )
console . info (
f " Found strings not in the master list written to: { NOT_IN_MASTER_LIST_PATH } "
)
except FileNotFoundError :
console . warn (
f " Master string list not found at: { MASTER_STRINGS_PATH } . Skipping comparison. "
)
if DEBUG :
console . warn (
" This script ran with debug enabled. Please disable debug mode for a cleaner output and faster execution. "
)
timer . stop ( )