Hi for this purpose I made 2 scripts, the first one to decipher all video data
Code:
import os
import shutil
headerSize = 48
def decrypt_file(encryptedFile, decryptedFile):
shutil.copy(encryptedFile, decryptedFile)
with open(decryptedFile, "r+b") as inputFile:
# Remove the user ID
videoIdEmplacement = headerSize * 2 - 3
inputFile.seek(-videoIdEmplacement, os.SEEK_END)
inputFile.write(bytes([0x00, 0x00, 0x00, 0x00]))
# Read the encrypted header
inputFile.seek(-headerSize, os.SEEK_END)
encryptedHeader = bytearray(inputFile.read(headerSize))
# Xor the header
decryptedHeader = bytes([b ^ 0x2E for b in encryptedHeader])
# Write the original file header
inputFile.seek(0, os.SEEK_SET)
inputFile.write(decryptedHeader)
inputVideoDir = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}"
destVideoDir = os.path.join(os.path.dirname(inputVideoDir), f"{os.path.basename(inputVideoDir)}_clear")
os.makedirs(destVideoDir)
for file in os.listdir(inputVideoDir):
if "." in file:
continue
inputVideoFile = os.path.join(inputVideoDir, file)
mp4VideoFile = os.path.join(destVideoDir, f"{file}.mp4")
decrypt_file(inputVideoFile, mp4VideoFile)
# Single file
# inputVideoFile = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}\{VIDEO_ID}"
# mp4VideoFile = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}\{VIDEO_ID}.mp4"
# decrypt_file(inputVideoFile, mp4VideoFile)
Then I made another script, that will read the file "page.html" (see line 103) and properly create folder structure and rename decrypted files.
For the moments I copy/paste the content of page.html. And I get its content from the source code of the video online. Usually URL is https://fr.tuto.com/compte/achats/video/{TUTORIAL_ID}/player/
Code:
import os
import shutil
from html.parser import HTMLParser
DIR_TO_RENAME = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}_clear"
class MyHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.attrs = []
self.read_next_title = False
self._video_uuid = -1 # Name of the file
self._video_id = -1 # Order of the file in the TOC
self.read_next_chapter = False
self.last_folder = ""
def clean_filename(self, filename):
filename = filename.lstrip().rstrip()
forbidden_chars = ["\\",
"/",
":",
"*",
"?",
"\"",
"<",
">",
"|",
" ",
]
for char in forbidden_chars:
filename = filename.replace(char, "")
return filename
def reset_parsing_data(self):
self.attrs = []
self.read_next_title = False
self._video_uuid = -1 # Name of the file
self._video_id = -1 # Order of the file in the TOC
@property
def video_uuid(self):
if self._video_uuid == -1:
raise ValueError("Video Uuid is not defined")
return self._video_uuid
@video_uuid.setter
def video_uuid(self, value):
self._video_uuid = int(value.replace("video-", ""))
@property
def video_id(self):
if self._video_id == -1:
raise ValueError("Video Id is not defined")
return self._video_id
@video_id.setter
def video_id(self, value):
self._video_id = int(value)
def get_attr(self, attr_name):
for attr in self.attrs:
if attr_name == attr[0]:
return attr[1]
def handle_starttag(self, tag, attrs):
self.attrs = attrs
if tag == "div":
if self.get_attr("class") == "player-toc-chapter-video":
self.video_uuid = self.get_attr("id")
self.video_id = self.get_attr("data-playlist-index")
elif tag == "strong":
if self.get_attr("class") == "title-16":
self.read_next_chapter = True
elif tag == "span":
if self.get_attr("class") == "player-toc-chapter-video-title":
self.read_next_title = True
def handle_data(self, data):
if self.read_next_title:
original_file = os.path.join(DIR_TO_RENAME, f"{self._video_uuid}.mp4")
video_name = self.clean_filename(f"{self._video_id}_{data}.mp4")
final_dir = os.path.join(DIR_TO_RENAME, self.last_folder)
if not os.path.exists(final_dir):
os.mkdir(final_dir)
renamed_file = os.path.join(final_dir, video_name)
shutil.copy(original_file, renamed_file)
os.remove(original_file)
self.reset_parsing_data()
if self.read_next_chapter:
self.last_folder = self.clean_filename(data)
self.read_next_chapter = False
parser = MyHTMLParser()
html_page = os.path.join(os.path.dirname(__file__), "page.html")
with open(html_page, "r", encoding="utf-8") as f:
parser.feed(f.read())
So my usual way to dump a tutorial, is to open the Tuto.com Application, download the full course in my local drive. Run the first script.
Open tuto.com webiste, log in access the video course (dump the webpage), copy->paste in the page.html -> run the next script.
Final note, I'm in the process of checking if source_file also contains the User ID(for the moment I think not, but Im not 100% sure), I will get back once I checked it