2024-02-18 06:06:08 +01:00
|
|
|
from pathlib import Path
|
2023-11-19 06:41:43 +01:00
|
|
|
import hashlib
|
2024-01-03 01:22:48 +01:00
|
|
|
import re
|
2024-02-20 05:44:00 +01:00
|
|
|
from datetime import timedelta
|
|
|
|
from typing import Optional
|
2023-11-19 06:41:43 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_gravatar_url(email):
|
|
|
|
# Trim leading and trailing whitespace from
|
|
|
|
# an email address and force all characters
|
|
|
|
# to lower case
|
|
|
|
address = str(email).strip().lower()
|
|
|
|
|
|
|
|
# Create a SHA256 hash of the final string
|
|
|
|
hash_object = hashlib.sha256(address.encode())
|
|
|
|
hash_hex = hash_object.hexdigest()
|
|
|
|
|
|
|
|
# Grab the actual image URL
|
2023-11-19 09:46:27 +01:00
|
|
|
return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"
|
2023-12-24 00:38:52 +01:00
|
|
|
|
|
|
|
|
|
|
|
def calculate_sha256(file):
|
|
|
|
sha256 = hashlib.sha256()
|
|
|
|
# Read the file in chunks to efficiently handle large files
|
|
|
|
for chunk in iter(lambda: file.read(8192), b""):
|
|
|
|
sha256.update(chunk)
|
|
|
|
return sha256.hexdigest()
|
2024-01-03 01:22:48 +01:00
|
|
|
|
|
|
|
|
2024-01-27 07:17:28 +01:00
|
|
|
def calculate_sha256_string(string):
|
|
|
|
# Create a new SHA-256 hash object
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
|
|
# Update the hash object with the bytes of the input string
|
|
|
|
sha256_hash.update(string.encode("utf-8"))
|
|
|
|
# Get the hexadecimal representation of the hash
|
|
|
|
hashed_string = sha256_hash.hexdigest()
|
|
|
|
return hashed_string
|
|
|
|
|
|
|
|
|
2024-01-03 01:22:48 +01:00
|
|
|
def validate_email_format(email: str) -> bool:
|
|
|
|
if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
|
|
|
|
return False
|
|
|
|
return True
|
2024-02-18 06:06:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
def sanitize_filename(file_name):
|
|
|
|
# Convert to lowercase
|
|
|
|
lower_case_file_name = file_name.lower()
|
|
|
|
|
|
|
|
# Remove special characters using regular expression
|
|
|
|
sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)
|
|
|
|
|
|
|
|
# Replace spaces with dashes
|
|
|
|
final_file_name = re.sub(r"\s+", "-", sanitized_file_name)
|
|
|
|
|
|
|
|
return final_file_name
|
|
|
|
|
|
|
|
|
|
|
|
def extract_folders_after_data_docs(path):
|
|
|
|
# Convert the path to a Path object if it's not already
|
|
|
|
path = Path(path)
|
|
|
|
|
|
|
|
# Extract parts of the path
|
|
|
|
parts = path.parts
|
|
|
|
|
|
|
|
# Find the index of '/data/docs' in the path
|
|
|
|
try:
|
|
|
|
index_data_docs = parts.index("data") + 1
|
|
|
|
index_docs = parts.index("docs", index_data_docs) + 1
|
|
|
|
except ValueError:
|
|
|
|
return []
|
|
|
|
|
|
|
|
# Exclude the filename and accumulate folder names
|
|
|
|
tags = []
|
|
|
|
|
|
|
|
folders = parts[index_docs:-1]
|
|
|
|
for idx, part in enumerate(folders):
|
|
|
|
tags.append("/".join(folders[: idx + 1]))
|
|
|
|
|
|
|
|
return tags
|
2024-02-20 05:44:00 +01:00
|
|
|
|
|
|
|
|
|
|
|
def parse_duration(duration: str) -> Optional[timedelta]:
|
|
|
|
if duration == "-1" or duration == "0":
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Regular expression to find number and unit pairs
|
|
|
|
pattern = r"(-?\d+(\.\d+)?)(ms|s|m|h|d|w)"
|
|
|
|
matches = re.findall(pattern, duration)
|
|
|
|
|
|
|
|
if not matches:
|
|
|
|
raise ValueError("Invalid duration string")
|
|
|
|
|
|
|
|
total_duration = timedelta()
|
|
|
|
|
|
|
|
for number, _, unit in matches:
|
|
|
|
number = float(number)
|
|
|
|
if unit == "ms":
|
|
|
|
total_duration += timedelta(milliseconds=number)
|
|
|
|
elif unit == "s":
|
|
|
|
total_duration += timedelta(seconds=number)
|
|
|
|
elif unit == "m":
|
|
|
|
total_duration += timedelta(minutes=number)
|
|
|
|
elif unit == "h":
|
|
|
|
total_duration += timedelta(hours=number)
|
|
|
|
elif unit == "d":
|
|
|
|
total_duration += timedelta(days=number)
|
|
|
|
elif unit == "w":
|
|
|
|
total_duration += timedelta(weeks=number)
|
|
|
|
|
|
|
|
return total_duration
|