fixed video upload and reencoding, refactored court processing

CornellDataScience · Sep 29, 2023 · 80eb10a · 80eb10a
1 parent 3a6e5c2
commit 80eb10a
Show file tree

Hide file tree

Showing 6 changed files with 153 additions and 99 deletions.
diff --git a/src/main.py b/src/main.py
@@ -38,8 +38,10 @@ def main(video_path):
     modelrunner.run()
     people_output, ball_output = modelrunner.fetch_output()
     output_video_path = 'tmp/court_video.mp4'
+    output_video_path_reenc = 'tmp/court_video_reenc.mp4'
 
-    processrunner = ProcessRunner(video_path, people_output, ball_output, output_video_path)
+    processrunner = ProcessRunner(video_path, people_output, ball_output, output_video_path, 
+                                  output_video_path_reenc)
     processrunner.run()
     results = processrunner.get_results()
     return results

diff --git a/src/modelrunner.py b/src/modelrunner.py
@@ -17,14 +17,14 @@ def __init__(self, video_path, model_vars) -> None:
         self.frame_reduction_factor = model_vars['frame_reduction_factor']
 
 
-    def drop_frames(self, input_path) -> None:
+    def drop_frames(self, input_path) -> str:
         """
-        Alters the input video fps to 1 / reduction_factor. Irreversible operation.
+        Alters the input video fps to 1 / reduction_factor. Stores + returns new video in output_path.
         """
-        dummy_path = 'tmp/temp.mp4'
+        output_path = 'tmp/temp.mp4'
         video = cv2.VideoCapture(input_path)
         nframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        output_video = cv2.VideoWriter(dummy_path, cv2.VideoWriter_fourcc(*'mp4v'),
+        output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                     int(video.get(cv2.CAP_PROP_FPS)/2), (int(video.get(
             cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
         for i in range(nframes):
@@ -36,17 +36,18 @@ def drop_frames(self, input_path) -> None:
 
         video.release()
         output_video.release()
-        os.remove(input_path)
-        os.rename(dummy_path, input_path)
+        #os.remove(input_path)
+        #os.rename(output_path, input_path)
+        return output_path
 
 
     def run(self):
         """
         Executes StrongSORT models and its related video pre- and post- processing.
         """
         # comment first two lines out to exclude running the model
-        self.drop_frames(self.video_path)
-        subprocess.run(['bash', 'src/StrongSORT-YOLO/run_tracker.sh'])
+        # self.drop_frames(self.video_path)
+        # subprocess.run(['bash', 'src/StrongSORT-YOLO/run_tracker.sh'])
         with open('tmp/output.pickle', 'rb') as f:
             self.output_dict = pickle.load(f)
 

diff --git a/src/processing/courtline_detect.py b/src/processing/courtline_detect.py
@@ -82,77 +82,9 @@ def __init__(self, video_path:str, display_images:bool=False):
         else:
             self._HOMOGRAPHY = self._detect_courtlines()
 
-    def render_video(self,states:list,players:dict,filename:str,fps:int=30):
-        '''
-        Takes into player position data, applied homography,
-        and renders video stored in filename
-        @param states, list of dictionaries,
-        each represent a frame with state info in chronological order
-        @param players, dictionary of players where keys are players
-        @param filename, file path from project root where video is saved
-        @param fps, frames per second expected of produced video
-        '''
-        players=players.keys()
-        # Create a blank image to use as the background for each frame
-        background = cv.cvtColor(self._TRUTH_COURT_MAP,cv.COLOR_GRAY2BGR)
-        height, width, _ = background.shape
-
-        # Initialize the video writer
-        fourcc = cv.VideoWriter_fourcc(*'mp4v')
-        video_writer = cv.VideoWriter(filename, fourcc, fps, (width,height))
-
-        # Define initial positions for each player
-        player_state = {}
-        for player in players:
-            player_state.update({player:{'pos':(0,0),
-                                        'color':(random.randint(0,256),random.randint(0,256),random.randint(0,256))}})
-
-        # find duration of video
-        dur = states[-1]["frameno"]
-        states += [{"frameno":dur+fps,"players":{}}]
-        frame_index = 0
-        # Loop through each time step
-        for t in range(1,dur+10):
-            # Create a copy of the background image to draw the points on
-            frame = background.copy()
-
-            # Get dictionary of positions at each frame
-            while (states[frame_index]["frameno"]<=t):
-                state = states[frame_index]
-                player_info = state['players']
-                for player in players:
-                    if player in player_info:
-                        pd = player_info[player]
-                        ps = player_state[player]
-                        x, y = (pd['xmin']+pd['xmax'])/2.0, pd['ymax']-5
-                        x1, y1 = self._transform_point(x,y)
-                        x0, y0 = ps['pos']
-                        x1, y1 = (2*x1+x0)/3.0, (2*y1+y0)/3.0
-                        ps.update({'pos':(x1, y1)})
-                if frame_index>=len(states)-2>= 0 or states[frame_index+1]["frameno"] > t: # release if at end of contig
-                    break
-                frame_index += 1
-
-
-            # Loop through each point and draw it on the frame
-            for player in players:
-                pos = player_state[player]['pos']
-                pos = (int(pos[0]),int(pos[1]))
-                color = player_state[player]['color']
-                font = cv.FONT_HERSHEY_SIMPLEX
-                thickness = 2
-                font_scale = 1
-                radius = 10
-                text_width = cv.getTextSize(player, font, font_scale, thickness)[0][0]
-                cv.circle(img=frame, center=pos, radius=radius, color=color, thickness=-1)
-                cv.putText(img=frame,text=player,org=(pos[0]-(text_width//2),pos[1]-radius-10),
-                           fontFace=font,fontScale=font_scale,color=color,thickness=thickness,lineType=cv.LINE_AA)
-
-            # Write the frame to the video writer
-            video_writer.write(frame)
-
-        # Release the video writer
-        video_writer.release()
+    def get_homography(self):
+        return self._HOMOGRAPHY
+
 
     def _detect_courtlines(self):
         'Finds best homography'
@@ -552,17 +484,6 @@ def _apply_hough(self,img:np.ndarray, lines:list):
             cv.line(out,(x1,y1),(x2,y2),[0,0,255])
         return out
 
-    def _transform_point(self,x:float,y:float):
-        '''
-        Applies court homography to single point
-        @param x,y pixel positions of point on court video
-        @returns transformed pixels x,y positions on true court
-        '''
-        point = np.array([x, y], dtype=np.float32)
-        point = point.reshape((1, 1, 2))
-        transformed_point = cv.perspectiveTransform(point, self._HOMOGRAPHY)
-        tx, ty = transformed_point[0, 0]
-        return tx, ty
 
     def _apply_gray_homography(self,im_src:np.ndarray, pts_src:list, pts_dst=None, or_mask=False):
         '''

diff --git a/src/processing/video_render.py b/src/processing/video_render.py
@@ -0,0 +1,106 @@
+import cv2 as cv
+import random
+import os
+import numpy as np
+# pass in homo matrix +  + 
+# implement video reencoding 
+class VideoRender:
+    def __init__(self, homography):
+        self._TRUE_PATH = os.path.join('data','true_map.png')
+        self._TRUTH_COURT_MAP = cv.imread(self._TRUE_PATH,cv.IMREAD_GRAYSCALE)
+        self._HOMOGRAPHY = homography
+
+
+    def reencode(self, input_path, output_path):
+        """
+        Re-encodes a MPEG4 video file to H.264 format. Overrides existing output videos if present.
+        Deletes the unprocessed video when complete.
+        """
+        reencode_command = f'ffmpeg -y -i {input_path} -vcodec libx264 -c:a copy {output_path}'
+        os.system(reencode_command)
+        # os.remove(input_path)
+
+
+    def render_video(self,states:list,players:dict,filename:str,fps:int=30):
+        '''
+        Takes into player position data, applied homography,
+        and renders video stored in filename
+        @param states, list of dictionaries,
+        each represent a frame with state info in chronological order
+        @param players, dictionary of players where keys are players
+        @param filename, file path from project root where video is saved
+        @param fps, frames per second expected of produced video
+        '''
+        players=players.keys()
+        # Create a blank image to use as the background for each frame
+        background = cv.cvtColor(self._TRUTH_COURT_MAP,cv.COLOR_GRAY2BGR)
+        height, width, _ = background.shape
+
+        # Initialize the video writer
+        fourcc = cv.VideoWriter_fourcc(*'H264')
+        video_writer = cv.VideoWriter(filename, fourcc, fps, (width,height))
+
+        # Define initial positions for each player
+        player_state = {}
+        for player in players:
+            player_state.update({player:{'pos':(0,0),
+                                        'color':(random.randint(0,256),random.randint(0,256),random.randint(0,256))}})
+
+        # find duration of video
+        dur = states[-1]["frameno"]
+        states += [{"frameno":dur+fps,"players":{}}]
+        frame_index = 0
+        # Loop through each time step
+        for t in range(1,dur+10):
+            # Create a copy of the background image to draw the points on
+            frame = background.copy()
+
+            # Get dictionary of positions at each frame
+            while (states[frame_index]["frameno"]<=t):
+                state = states[frame_index]
+                player_info = state['players']
+                for player in players:
+                    if player in player_info:
+                        pd = player_info[player]
+                        ps = player_state[player]
+                        x, y = (pd['xmin']+pd['xmax'])/2.0, pd['ymax']-5
+                        x1, y1 = self._transform_point(x,y)
+                        x0, y0 = ps['pos']
+                        x1, y1 = (2*x1+x0)/3.0, (2*y1+y0)/3.0
+                        ps.update({'pos':(x1, y1)})
+                if frame_index>=len(states)-2>= 0 or states[frame_index+1]["frameno"] > t: # release if at end of contig
+                    break
+                frame_index += 1
+
+
+            # Loop through each point and draw it on the frame
+            for player in players:
+                pos = player_state[player]['pos']
+                pos = (int(pos[0]),int(pos[1]))
+                color = player_state[player]['color']
+                font = cv.FONT_HERSHEY_SIMPLEX
+                thickness = 2
+                font_scale = 1
+                radius = 10
+                text_width = cv.getTextSize(player, font, font_scale, thickness)[0][0]
+                cv.circle(img=frame, center=pos, radius=radius, color=color, thickness=-1)
+                cv.putText(img=frame,text=player,org=(pos[0]-(text_width//2),pos[1]-radius-10),
+                           fontFace=font,fontScale=font_scale,color=color,thickness=thickness,lineType=cv.LINE_AA)
+
+            # Write the frame to the video writer
+            video_writer.write(frame)
+
+        # Release the video writer
+        video_writer.release()
+
+    def _transform_point(self,x:float,y:float):
+        '''
+        Applies court homography to single point
+        @param x,y pixel positions of point on court video
+        @returns transformed pixels x,y positions on true court
+        '''
+        point = np.array([x, y], dtype=np.float32)
+        point = point.reshape((1, 1, 2))
+        transformed_point = cv.perspectiveTransform(point, self._HOMOGRAPHY)
+        tx, ty = transformed_point[0, 0]
+        return tx, ty
diff --git a/src/processrunner.py b/src/processrunner.py
@@ -2,18 +2,20 @@
 Runner module for processing and statistics
 """
 from state import GameState
-from processing import general_detect, team_detect, shot_detect, courtline_detect
+from processing import general_detect, team_detect, shot_detect, courtline_detect, video_render
 class ProcessRunner:
     """
     Runner class taking in: original video file path, 2 model output files, render destination path
     Performs player, team, shot, and courtline detection in sequence.
     Effect: updates GameState with statistics and produces courtline video.
     """
-    def __init__(self, video_path, players_tracking, ball_tracking, output_video_path) -> None:
+    def __init__(self, video_path, players_tracking, ball_tracking, output_video_path, 
+                 output_video_path_reenc) -> None:
         self.video_path = video_path
         self.players_tracking = players_tracking
         self.ball_tracking = ball_tracking
         self.output_video_path = output_video_path
+        self.output_video_path_reenc = output_video_path_reenc
         self.state = GameState()
 
 
@@ -55,9 +57,18 @@ def run_shot_detect(self):
 
 
     def run_courtline_detect(self):
-        """Runs courtline detection and renders video."""
+        """Runs courtline detection."""
         court = courtline_detect.Render(self.video_path)
-        court.render_video(self.state.states, self.state.players, self.output_video_path)
+        self.homography = court.get_homography()
+        # court.render_video(self.state.states, self.state.players, self.output_video_path)
+
+
+    def run_video_render(self):
+        """Runs video rendering and reencodes, stores to output_video_path_reenc."""
+        videoRender = video_render.VideoRender(self.homography)
+        videoRender.render_video(self.state.states, self.state.players, self.output_video_path)
+        videoRender.reencode(self.output_video_path, 
+                              self.output_video_path_reenc)
 
 
     def run(self):
@@ -68,6 +79,7 @@ def run(self):
         self.run_team_detect()
         self.run_shot_detect()
         self.run_courtline_detect()
+        self.run_video_render()
 
 
     def get_results(self):

diff --git a/src/view/app.py b/src/view/app.py
@@ -36,6 +36,10 @@ def process_video(video_file):
     if response.status_code == 200:
         data = response.json()
         st.session_state.upload_name = data.get('message')
+        # temp fix
+        with open('tmp/user_upload.mp4', 'wb') as f:
+            #f.write(video_file.value)
+            f.write(video_file.getvalue())
     else:
         print('error uploading file') # maybe make an error handler in frontend
     st.session_state.is_downloaded = False
@@ -179,10 +183,9 @@ def fetch_result_video():
     '''
     Updates and returns the resulting video to be displayed.
     TODO change to calling backend instead of accessing from repo
-    TODO currently display does not work properly. change encoding?
     '''
     if st.session_state.processed_video is None:
-        st.session_state.processed_video = 'tmp/court_video.mp4'
+        st.session_state.processed_video = 'tmp/court_video_reenc.mp4'
     return st.session_state.processed_video
 
 
@@ -191,8 +194,17 @@ def fetch_result_string():
     Updates and returns the resulting statistics in string format.
     TODO change to calling backend instead of accessing from repo
     '''
-    if st.session_state.result_string is None:
-        st.session_state.result_string = main('data/training_data.mp4')
+    # if st.session_state.result_string is None:
+    #     response = requests.get(SERVER_URL+f"download/{st.session_state.upload_name}", files=
+    #                             {'file_name': st.session_state.upload_name, 'download_path': 
+    #                              'tmp/user_upload.mp4'}, timeout=30)
+    #     if response.status_code == 200:
+    #         st.session_state.result_string = main('tmp/user_upload.mp4')
+    #     else:
+    #         print('error downloading file') # maybe make an error handler in frontend
+    #         st.session_state.result_string = main('data/training_data.mp4')
+
+    st.session_state.result_string = main('tmp/user_upload.mp4')
     return st.session_state.result_string