View Single Post
  #16  
Old 10-07-2021, 18:40
yologuy yologuy is offline
Friend
 
Join Date: Nov 2016
Posts: 18
Rept. Given: 0
Rept. Rcvd 0 Times in 0 Posts
Thanks Given: 5
Thanks Rcvd at 23 Times in 9 Posts
yologuy Reputation: 0
Hi for this purpose I made 2 scripts, the first one to decipher all video data

Code:
import os
import shutil

headerSize = 48


def decrypt_file(encryptedFile, decryptedFile):
    shutil.copy(encryptedFile, decryptedFile)

    with open(decryptedFile, "r+b") as inputFile:
        # Remove the user ID
        videoIdEmplacement = headerSize * 2 - 3
        inputFile.seek(-videoIdEmplacement, os.SEEK_END)
        inputFile.write(bytes([0x00, 0x00, 0x00, 0x00]))

        # Read the encrypted header
        inputFile.seek(-headerSize, os.SEEK_END)
        encryptedHeader = bytearray(inputFile.read(headerSize))

        # Xor the header
        decryptedHeader = bytes([b ^ 0x2E for b in encryptedHeader])

        # Write the original file header
        inputFile.seek(0, os.SEEK_SET)
        inputFile.write(decryptedHeader)


inputVideoDir = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}"
destVideoDir = os.path.join(os.path.dirname(inputVideoDir), f"{os.path.basename(inputVideoDir)}_clear")
os.makedirs(destVideoDir)

for file in os.listdir(inputVideoDir):
    if "." in file:
        continue

    inputVideoFile = os.path.join(inputVideoDir, file)
    mp4VideoFile = os.path.join(destVideoDir, f"{file}.mp4")
    decrypt_file(inputVideoFile, mp4VideoFile)

# Single file
# inputVideoFile = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}\{VIDEO_ID}"
# mp4VideoFile = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}\{VIDEO_ID}.mp4"
# decrypt_file(inputVideoFile, mp4VideoFile)

Then I made another script, that will read the file "page.html" (see line 103) and properly create folder structure and rename decrypted files.
For the moments I copy/paste the content of page.html. And I get its content from the source code of the video online. Usually URL is https://fr.tuto.com/compte/achats/video/{TUTORIAL_ID}/player/

Code:
import os
import shutil
from html.parser import HTMLParser

DIR_TO_RENAME = r"D:\tuto.com\{USER_ID}\{TUTORIAL_ID}_clear"


class MyHTMLParser(HTMLParser):

    def __init__(self):
        super().__init__()
        self.attrs = []
        self.read_next_title = False
        self._video_uuid = -1   # Name of the file
        self._video_id = -1     # Order of the file in the TOC
        self.read_next_chapter = False
        self.last_folder = ""

    def clean_filename(self, filename):
        filename = filename.lstrip().rstrip()
        forbidden_chars = ["\\",
                           "/",
                           ":",
                           "*",
                           "?",
                           "\"",
                           "<",
                           ">",
                           "|",
                           "  ",
                           ]
        for char in forbidden_chars:
            filename = filename.replace(char, "")

        return filename

    def reset_parsing_data(self):
        self.attrs = []
        self.read_next_title = False
        self._video_uuid = -1   # Name of the file
        self._video_id = -1     # Order of the file in the TOC

    @property
    def video_uuid(self):
        if self._video_uuid == -1:
            raise ValueError("Video Uuid is not defined")
        return self._video_uuid

    @video_uuid.setter
    def video_uuid(self, value):
        self._video_uuid = int(value.replace("video-", ""))

    @property
    def video_id(self):
        if self._video_id == -1:
            raise ValueError("Video Id is not defined")
        return self._video_id

    @video_id.setter
    def video_id(self, value):
        self._video_id = int(value)

    def get_attr(self, attr_name):
        for attr in self.attrs:
            if attr_name == attr[0]:
                return attr[1]

    def handle_starttag(self, tag, attrs):
        self.attrs = attrs
        if tag == "div":
            if self.get_attr("class") == "player-toc-chapter-video":
                self.video_uuid = self.get_attr("id")
                self.video_id = self.get_attr("data-playlist-index")
        elif tag == "strong":
            if self.get_attr("class") == "title-16":
                self.read_next_chapter = True
        elif tag == "span":
            if self.get_attr("class") == "player-toc-chapter-video-title":
                self.read_next_title = True

    def handle_data(self, data):
        if self.read_next_title:
            original_file = os.path.join(DIR_TO_RENAME, f"{self._video_uuid}.mp4")

            video_name = self.clean_filename(f"{self._video_id}_{data}.mp4")
            final_dir = os.path.join(DIR_TO_RENAME, self.last_folder)
            if not os.path.exists(final_dir):
                os.mkdir(final_dir)

            renamed_file = os.path.join(final_dir, video_name)
            shutil.copy(original_file, renamed_file)
            os.remove(original_file)
            self.reset_parsing_data()

        if self.read_next_chapter:
            self.last_folder = self.clean_filename(data)
            self.read_next_chapter = False


parser = MyHTMLParser()


html_page = os.path.join(os.path.dirname(__file__), "page.html")
with open(html_page, "r", encoding="utf-8") as f:
    parser.feed(f.read())
So my usual way to dump a tutorial, is to open the Tuto.com Application, download the full course in my local drive. Run the first script.
Open tuto.com webiste, log in access the video course (dump the webpage), copy->paste in the page.html -> run the next script.

Final note, I'm in the process of checking if source_file also contains the User ID(for the moment I think not, but Im not 100% sure), I will get back once I checked it
Reply With Quote
The Following 3 Users Say Thank You to yologuy For This Useful Post:
bolo2002 (10-09-2021), niculaita (10-08-2021), Stingered (10-08-2021)