<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
    <channel>
        <title><![CDATA[Misha Laskin]]></title>
        <description><![CDATA[Misha Laskin]]></description>
        <link>https://mishalaskin.com</link>
        <generator>RSS for Node</generator>
        <lastBuildDate>Sat, 05 Aug 2023 17:58:15 GMT</lastBuildDate>
        <atom:link href="https://mishalaskin.com/feed.xml" rel="self" type="application/rss+xml"/>
        <item>
            <title><![CDATA[Training Deep Networks with Data Parallelism in Jax]]></title>
            <description><![CDATA[Train deep nets efficiently by parallelizing batch data in jax.]]></description>
            <link>/posts/data_parallel</link>
            <guid isPermaLink="true">/posts/data_parallel</guid>
            <category><![CDATA[large model engineering]]></category>
            <dc:creator><![CDATA[You]]></dc:creator>
            <pubDate>Sun, 19 Feb 2023 00:00:00 GMT</pubDate>
        </item>
        <item>
            <title><![CDATA[Sharding Large Models with Tensor Parallelism]]></title>
            <description><![CDATA[State-of-the-art language models are too large to fit on a single GPU, even if you use data parallelism. This post explains tensor parallelism, a technique that splits large models across multiple GPUs.]]></description>
            <link>/posts/tensor_parallel</link>
            <guid isPermaLink="true">/posts/tensor_parallel</guid>
            <category><![CDATA[large model engineering]]></category>
            <dc:creator><![CDATA[You]]></dc:creator>
            <pubDate>Sun, 05 Mar 2023 00:00:00 GMT</pubDate>
        </item>
    </channel>
</rss>