Merge pull request #535 from bhulston/fix/chat-imports

Add validation for chatGPT imports, stopping any breaking issues with imports of incompatible chats
This commit is contained in:
Timothy Jaeryang Baek 2024-01-21 13:44:11 -08:00 committed by GitHub
commit daa6bedcf9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -206,25 +206,32 @@ const convertOpenAIMessages = (convo) => {
const mapping = convo['mapping']; const mapping = convo['mapping'];
const messages = []; const messages = [];
let currentId = ''; let currentId = '';
let lastId = null;
for (let message_id in mapping) { for (let message_id in mapping) {
const message = mapping[message_id]; const message = mapping[message_id];
currentId = message_id; currentId = message_id;
if (message['message'] == null || message['message']['content']['parts'][0] == '') { try {
// Skip chat messages with no content if (messages.length == 0 && (message['message'] == null ||
continue; (message['message']['content']['parts']?.[0] == '' && message['message']['content']['text'] == null))) {
} else { // Skip chat messages with no content
const new_chat = { continue;
id: message_id, } else {
parentId: messages.length > 0 && message['parent'] in mapping ? message['parent'] : null, const new_chat = {
childrenIds: message['children'] || [], id: message_id,
role: message['message']?.['author']?.['role'] !== 'user' ? 'assistant' : 'user', parentId: lastId,
content: message['message']?.['content']?.['parts']?.[0] || '', childrenIds: message['children'] || [],
model: 'gpt-3.5-turbo', role: message['message']?.['author']?.['role'] !== 'user' ? 'assistant' : 'user',
done: true, content: message['message']?.['content']?.['parts']?.[0] || message['message']?.['content']?.['text'] || '',
context: null model: 'gpt-3.5-turbo',
}; done: true,
messages.push(new_chat); context: null
};
messages.push(new_chat);
lastId = currentId;
}
} catch (error) {
console.log("Error with", message, "\nError:", error);
} }
} }
@ -245,13 +252,45 @@ const convertOpenAIMessages = (convo) => {
return chat; return chat;
}; };
const validateChat = (chat) => {
// Because ChatGPT sometimes has features we can't use like DALL-E or migh have corrupted messages, need to validate
const messages = chat.messages;
// Check if messages array is empty
if (messages.length === 0) {
return false;
}
// Last message's children should be an empty array
const lastMessage = messages[messages.length - 1];
if (lastMessage.childrenIds.length !== 0) {
return false;
}
// First message's parent should be null
const firstMessage = messages[0];
if (firstMessage.parentId !== null) {
return false;
}
// Every message's content should be a string
for (let message of messages) {
if (typeof message.content !== 'string') {
return false;
}
}
return true;
};
export const convertOpenAIChats = (_chats) => { export const convertOpenAIChats = (_chats) => {
// Create a list of dictionaries with each conversation from import // Create a list of dictionaries with each conversation from import
const chats = []; const chats = [];
let failed = 0;
for (let convo of _chats) { for (let convo of _chats) {
const chat = convertOpenAIMessages(convo); const chat = convertOpenAIMessages(convo);
if (Object.keys(chat.history.messages).length > 0) { if (validateChat(chat)) {
chats.push({ chats.push({
id: convo['id'], id: convo['id'],
user_id: '', user_id: '',
@ -259,7 +298,8 @@ export const convertOpenAIChats = (_chats) => {
chat: chat, chat: chat,
timestamp: convo['timestamp'] timestamp: convo['timestamp']
}); });
} } else { failed ++}
} }
console.log(failed, "Conversations could not be imported");
return chats; return chats;
}; };