@@ -482,7 +482,6 @@ def _find_dr_record_by_name(vd, path, encoding):
482482 return root_dir_record
483483
484484 splitpath = utils .split_path (path )
485-
486485 currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
487486
488487 entry = root_dir_record
@@ -505,7 +504,6 @@ def _find_dr_record_by_name(vd, path, encoding):
505504 index = lo
506505 if index != len (thelist ) and thelist [index ].file_ident == currpath :
507506 child = thelist [index ]
508-
509507 if child is None :
510508 # We failed to find this component of the path, so break out of the
511509 # loop and fail.
@@ -520,7 +518,6 @@ def _find_dr_record_by_name(vd, path, encoding):
520518 # We found the last child we are looking for; return it.
521519 if not splitpath :
522520 return child
523-
524521 if not child .is_dir ():
525522 break
526523 entry = child
@@ -704,9 +701,9 @@ def _seek_to_extent(self, extent):
704701 """
705702 self ._cdfp .seek (extent * self .logical_block_size )
706703
707- @functools . lru_cache (maxsize = 256 )
708- def _find_iso_record (self , iso_path ):
709- # type: (bytes) -> dr.DirectoryRecord
704+ @lru_cache (maxsize = 256 )
705+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
706+ # type: (bytes, str ) -> dr.DirectoryRecord
710707 """
711708 An internal method to find a directory record on the ISO given an ISO
712709 path. If the entry is found, it returns the directory record object
@@ -718,11 +715,11 @@ def _find_iso_record(self, iso_path):
718715 Returns:
719716 The directory record entry representing the entry on the ISO.
720717 """
721- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
718+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
722719
723- @functools . lru_cache (maxsize = 256 )
724- def _find_rr_record (self , rr_path ):
725- # type: (bytes) -> dr.DirectoryRecord
720+ @lru_cache (maxsize = 256 )
721+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
722+ # type: (bytes, str ) -> dr.DirectoryRecord
726723 """
727724 An internal method to find a directory record on the ISO given a Rock
728725 Ridge path. If the entry is found, it returns the directory record
@@ -742,7 +739,7 @@ def _find_rr_record(self, rr_path):
742739
743740 splitpath = utils .split_path (rr_path )
744741
745- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
742+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
746743
747744 entry = root_dir_record
748745
@@ -793,13 +790,13 @@ def _find_rr_record(self, rr_path):
793790 if not child .is_dir ():
794791 break
795792 entry = child
796- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
793+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
797794
798795 raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
799796
800- @functools . lru_cache (maxsize = 256 )
801- def _find_joliet_record (self , joliet_path ):
802- # type: (bytes) -> dr.DirectoryRecord
797+ @lru_cache (maxsize = 256 )
798+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
799+ # type: (bytes, str ) -> dr.DirectoryRecord
803800 """
804801 An internal method to find a directory record on the ISO given a Joliet
805802 path. If the entry is found, it returns the directory record object
@@ -813,7 +810,7 @@ def _find_joliet_record(self, joliet_path):
813810 """
814811 if self .joliet_vd is None :
815812 raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
816- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
813+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
817814
818815 @functools .lru_cache (maxsize = 256 )
819816 def _find_udf_record (self , udf_path ):
@@ -2412,8 +2409,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24122409 utils .copy_data (data_len , blocksize , data_fp , outfp )
24132410
24142411 def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2415- joliet_path ):
2416- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2412+ joliet_path , encoding = None ):
2413+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
24172414 """
24182415 An internal method to fetch a single file from the ISO and write it out
24192416 to the file object.
@@ -2433,13 +2430,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24332430 if joliet_path is not None :
24342431 if self .joliet_vd is None :
24352432 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2436- found_record = self ._find_joliet_record (joliet_path )
2433+ encoding = encoding or 'utf-16_be'
2434+ found_record = self ._find_joliet_record (joliet_path , encoding )
24372435 elif rr_path is not None :
24382436 if not self .rock_ridge :
24392437 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2440- found_record = self ._find_rr_record (rr_path )
2438+ encoding = encoding or 'utf-8'
2439+ found_record = self ._find_rr_record (rr_path , encoding )
24412440 elif iso_path is not None :
2442- found_record = self ._find_iso_record (iso_path )
2441+ encoding = encoding or 'utf-8'
2442+ found_record = self ._find_iso_record (iso_path , encoding )
24432443 else :
24442444 raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
24452445
@@ -3471,8 +3471,8 @@ def _rm_joliet_dir(self, joliet_path):
34713471
34723472 return num_bytes_to_remove
34733473
3474- def _get_iso_entry (self , iso_path ):
3475- # type: (bytes) -> dr.DirectoryRecord
3474+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3475+ # type: (bytes, str ) -> dr.DirectoryRecord
34763476 """
34773477 Internal method to get the directory record for an ISO path.
34783478
@@ -3484,10 +3484,10 @@ def _get_iso_entry(self, iso_path):
34843484 if self ._needs_reshuffle :
34853485 self ._reshuffle_extents ()
34863486
3487- return self ._find_iso_record (iso_path )
3487+ return self ._find_iso_record (iso_path , encoding )
34883488
3489- def _get_rr_entry (self , rr_path ):
3490- # type: (bytes) -> dr.DirectoryRecord
3489+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3490+ # type: (bytes, str ) -> dr.DirectoryRecord
34913491 """
34923492 Internal method to get the directory record for a Rock Ridge path.
34933493
@@ -3500,10 +3500,10 @@ def _get_rr_entry(self, rr_path):
35003500 if self ._needs_reshuffle :
35013501 self ._reshuffle_extents ()
35023502
3503- return self ._find_rr_record (rr_path )
3503+ return self ._find_rr_record (rr_path , encoding )
35043504
3505- def _get_joliet_entry (self , joliet_path ):
3506- # type: (bytes) -> dr.DirectoryRecord
3505+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3506+ # type: (bytes, str ) -> dr.DirectoryRecord
35073507 """
35083508 Internal method to get the directory record for a Joliet path.
35093509
@@ -3516,7 +3516,7 @@ def _get_joliet_entry(self, joliet_path):
35163516 if self ._needs_reshuffle :
35173517 self ._reshuffle_extents ()
35183518
3519- return self ._find_joliet_record (joliet_path )
3519+ return self ._find_joliet_record (joliet_path , encoding )
35203520
35213521 def _get_udf_entry (self , udf_path ):
35223522 # type: (str) -> udfmod.UDFFileEntry
@@ -4183,6 +4183,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41834183 iso_path = None
41844184 rr_path = None
41854185 udf_path = None
4186+ encoding = None
41864187 num_paths = 0
41874188 for key , value in kwargs .items ():
41884189 if key == 'blocksize' :
@@ -4213,6 +4214,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42134214 num_paths += 1
42144215 elif value is not None :
42154216 raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4217+ elif key == 'encoding' :
4218+ encoding = value
42164219 else :
42174220 raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
42184221
@@ -4223,7 +4226,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42234226 self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
42244227 else :
42254228 self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4226- joliet_path )
4229+ joliet_path , encoding )
42274230
42284231 def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
42294232 # type: (str, str, int) -> None
@@ -5459,6 +5462,8 @@ def list_children(self, **kwargs):
54595462 if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
54605463 if value is not None :
54615464 num_paths += 1
5465+ elif key in ('encoding' ):
5466+ continue
54625467 else :
54635468 raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
54645469
@@ -5476,12 +5481,15 @@ def list_children(self, **kwargs):
54765481 else :
54775482 use_rr = False
54785483 if 'joliet_path' in kwargs :
5479- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5484+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-16_be'
5485+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
54805486 elif 'rr_path' in kwargs :
5481- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5487+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5488+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
54825489 use_rr = True
54835490 else :
5484- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5491+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5492+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
54855493
54865494 for c in _yield_children (rec , use_rr ):
54875495 yield c
@@ -5626,8 +5634,8 @@ def rm_isohybrid(self):
56265634
56275635 self .isohybrid_mbr = None
56285636
5629- def full_path_from_dirrecord (self , rec , rockridge = False ):
5630- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5637+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = None ):
5638+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
56315639 """
56325640 Get the absolute path of a directory record.
56335641
@@ -5646,6 +5654,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56465654 if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
56475655 encoding = 'utf-16_be'
56485656
5657+ if user_encoding :
5658+ encoding = user_encoding
56495659 # A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
56505660 # Always return / here.
56515661 if rec .is_root :
@@ -5685,6 +5695,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56855695 encoding = rec .file_ident .encoding
56865696 else :
56875697 encoding = 'utf-8'
5698+ if user_encoding :
5699+ encoding = user_encoding
56885700 udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
56895701 while udf_rec is not None :
56905702 ident = udf_rec .file_identifier ()
@@ -5893,13 +5905,13 @@ def walk(self, **kwargs):
58935905 while dirs :
58945906 dir_record = dirs .popleft ()
58955907
5896- relpath = self .full_path_from_dirrecord (dir_record ,
5897- rockridge = path_type == 'rr_path' )
5908+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5909+ user_encoding = user_encoding )
58985910 dirlist = []
58995911 filelist = []
59005912 dirdict = {}
59015913
5902- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5914+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : kwargs . get ( 'encoding' , None ) }))):
59035915 if child is None or child .is_dot () or child .is_dotdot ():
59045916 continue
59055917
0 commit comments