@@ -495,7 +495,6 @@ def _find_dr_record_by_name(vd, path, encoding):
495495 return root_dir_record
496496
497497 splitpath = utils .split_path (path )
498-
499498 currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
500499
501500 entry = root_dir_record
@@ -518,7 +517,6 @@ def _find_dr_record_by_name(vd, path, encoding):
518517 index = lo
519518 if index != len (thelist ) and thelist [index ].file_ident == currpath :
520519 child = thelist [index ]
521-
522520 if child is None :
523521 # We failed to find this component of the path, so break out of the
524522 # loop and fail.
@@ -533,7 +531,6 @@ def _find_dr_record_by_name(vd, path, encoding):
533531 # We found the last child we are looking for; return it.
534532 if not splitpath :
535533 return child
536-
537534 if not child .is_dir ():
538535 break
539536 entry = child
@@ -718,8 +715,8 @@ def _seek_to_extent(self, extent):
718715 self ._cdfp .seek (extent * self .logical_block_size )
719716
720717 @lru_cache (maxsize = 256 )
721- def _find_iso_record (self , iso_path ):
722- # type: (bytes) -> dr.DirectoryRecord
718+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
719+ # type: (bytes, str ) -> dr.DirectoryRecord
723720 """
724721 An internal method to find a directory record on the ISO given an ISO
725722 path. If the entry is found, it returns the directory record object
@@ -731,11 +728,11 @@ def _find_iso_record(self, iso_path):
731728 Returns:
732729 The directory record entry representing the entry on the ISO.
733730 """
734- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
731+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
735732
736733 @lru_cache (maxsize = 256 )
737- def _find_rr_record (self , rr_path ):
738- # type: (bytes) -> dr.DirectoryRecord
734+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
735+ # type: (bytes, str ) -> dr.DirectoryRecord
739736 """
740737 An internal method to find a directory record on the ISO given a Rock
741738 Ridge path. If the entry is found, it returns the directory record
@@ -755,7 +752,7 @@ def _find_rr_record(self, rr_path):
755752
756753 splitpath = utils .split_path (rr_path )
757754
758- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
755+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
759756
760757 entry = root_dir_record
761758
@@ -806,13 +803,13 @@ def _find_rr_record(self, rr_path):
806803 if not child .is_dir ():
807804 break
808805 entry = child
809- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
806+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
810807
811808 raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
812809
813810 @lru_cache (maxsize = 256 )
814- def _find_joliet_record (self , joliet_path ):
815- # type: (bytes) -> dr.DirectoryRecord
811+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
812+ # type: (bytes, str ) -> dr.DirectoryRecord
816813 """
817814 An internal method to find a directory record on the ISO given a Joliet
818815 path. If the entry is found, it returns the directory record object
@@ -826,7 +823,7 @@ def _find_joliet_record(self, joliet_path):
826823 """
827824 if self .joliet_vd is None :
828825 raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
829- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
826+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
830827
831828 @lru_cache (maxsize = 256 )
832829 def _find_udf_record (self , udf_path ):
@@ -2425,8 +2422,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24252422 utils .copy_data (data_len , blocksize , data_fp , outfp )
24262423
24272424 def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2428- joliet_path ):
2429- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2425+ joliet_path , encoding = None ):
2426+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
24302427 """
24312428 An internal method to fetch a single file from the ISO and write it out
24322429 to the file object.
@@ -2446,13 +2443,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24462443 if joliet_path is not None :
24472444 if self .joliet_vd is None :
24482445 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2449- found_record = self ._find_joliet_record (joliet_path )
2446+ encoding = encoding or 'utf-16_be'
2447+ found_record = self ._find_joliet_record (joliet_path , encoding )
24502448 elif rr_path is not None :
24512449 if not self .rock_ridge :
24522450 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2453- found_record = self ._find_rr_record (rr_path )
2451+ encoding = encoding or 'utf-8'
2452+ found_record = self ._find_rr_record (rr_path , encoding )
24542453 elif iso_path is not None :
2455- found_record = self ._find_iso_record (iso_path )
2454+ encoding = encoding or 'utf-8'
2455+ found_record = self ._find_iso_record (iso_path , encoding )
24562456 else :
24572457 raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
24582458
@@ -3487,8 +3487,8 @@ def _rm_joliet_dir(self, joliet_path):
34873487
34883488 return num_bytes_to_remove
34893489
3490- def _get_iso_entry (self , iso_path ):
3491- # type: (bytes) -> dr.DirectoryRecord
3490+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3491+ # type: (bytes, str ) -> dr.DirectoryRecord
34923492 """
34933493 Internal method to get the directory record for an ISO path.
34943494
@@ -3500,10 +3500,10 @@ def _get_iso_entry(self, iso_path):
35003500 if self ._needs_reshuffle :
35013501 self ._reshuffle_extents ()
35023502
3503- return self ._find_iso_record (iso_path )
3503+ return self ._find_iso_record (iso_path , encoding )
35043504
3505- def _get_rr_entry (self , rr_path ):
3506- # type: (bytes) -> dr.DirectoryRecord
3505+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3506+ # type: (bytes, str ) -> dr.DirectoryRecord
35073507 """
35083508 Internal method to get the directory record for a Rock Ridge path.
35093509
@@ -3516,10 +3516,10 @@ def _get_rr_entry(self, rr_path):
35163516 if self ._needs_reshuffle :
35173517 self ._reshuffle_extents ()
35183518
3519- return self ._find_rr_record (rr_path )
3519+ return self ._find_rr_record (rr_path , encoding )
35203520
3521- def _get_joliet_entry (self , joliet_path ):
3522- # type: (bytes) -> dr.DirectoryRecord
3521+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3522+ # type: (bytes, str ) -> dr.DirectoryRecord
35233523 """
35243524 Internal method to get the directory record for a Joliet path.
35253525
@@ -3532,7 +3532,7 @@ def _get_joliet_entry(self, joliet_path):
35323532 if self ._needs_reshuffle :
35333533 self ._reshuffle_extents ()
35343534
3535- return self ._find_joliet_record (joliet_path )
3535+ return self ._find_joliet_record (joliet_path , encoding )
35363536
35373537 def _get_udf_entry (self , udf_path ):
35383538 # type: (str) -> udfmod.UDFFileEntry
@@ -4199,6 +4199,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41994199 iso_path = None
42004200 rr_path = None
42014201 udf_path = None
4202+ encoding = None
42024203 num_paths = 0
42034204 for key , value in kwargs .items ():
42044205 if key == 'blocksize' :
@@ -4229,6 +4230,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42294230 num_paths += 1
42304231 elif value is not None :
42314232 raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4233+ elif key == 'encoding' :
4234+ encoding = value
42324235 else :
42334236 raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
42344237
@@ -4239,7 +4242,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42394242 self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
42404243 else :
42414244 self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4242- joliet_path )
4245+ joliet_path , encoding )
42434246
42444247 def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
42454248 # type: (str, str, int) -> None
@@ -5475,6 +5478,8 @@ def list_children(self, **kwargs):
54755478 if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
54765479 if value is not None :
54775480 num_paths += 1
5481+ elif key in ('encoding' ):
5482+ continue
54785483 else :
54795484 raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
54805485
@@ -5492,12 +5497,15 @@ def list_children(self, **kwargs):
54925497 else :
54935498 use_rr = False
54945499 if 'joliet_path' in kwargs :
5495- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5500+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-16_be'
5501+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
54965502 elif 'rr_path' in kwargs :
5497- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5503+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5504+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
54985505 use_rr = True
54995506 else :
5500- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5507+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5508+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
55015509
55025510 for c in _yield_children (rec , use_rr ):
55035511 yield c
@@ -5642,8 +5650,8 @@ def rm_isohybrid(self):
56425650
56435651 self .isohybrid_mbr = None
56445652
5645- def full_path_from_dirrecord (self , rec , rockridge = False ):
5646- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5653+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = None ):
5654+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
56475655 """
56485656 Get the absolute path of a directory record.
56495657
@@ -5662,6 +5670,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56625670 if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
56635671 encoding = 'utf-16_be'
56645672
5673+ if user_encoding :
5674+ encoding = user_encoding
56655675 # A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
56665676 # Always return / here.
56675677 if rec .is_root :
@@ -5701,6 +5711,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
57015711 encoding = rec .file_ident .encoding
57025712 else :
57035713 encoding = 'utf-8'
5714+ if user_encoding :
5715+ encoding = user_encoding
57045716 udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
57055717 while udf_rec is not None :
57065718 ident = udf_rec .file_identifier ()
@@ -5913,13 +5925,13 @@ def walk(self, **kwargs):
59135925 while dirs :
59145926 dir_record = dirs .popleft ()
59155927
5916- relpath = self .full_path_from_dirrecord (dir_record ,
5917- rockridge = path_type == 'rr_path' )
5928+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5929+ user_encoding = user_encoding )
59185930 dirlist = []
59195931 filelist = []
59205932 dirdict = {}
59215933
5922- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5934+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : kwargs . get ( 'encoding' , None ) }))):
59235935 if child is None or child .is_dot () or child .is_dotdot ():
59245936 continue
59255937
0 commit comments